From 3f55768754ed558606441f87369fae7ec6540608 Mon Sep 17 00:00:00 2001 From: Tal <83217276+talwat@users.noreply.github.com> Date: Thu, 7 Aug 2025 15:18:04 +0200 Subject: [PATCH] feat: start work on chillhop scraper only the basic get request with caching has actually been implemented, but the rest shouldn't be too complicated. --- .gitignore | 1 + src/main.rs | 9 +++++-- src/scrapers.rs | 1 + src/scrapers/chillhop.rs | 53 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 src/scrapers/chillhop.rs diff --git a/.gitignore b/.gitignore index ea8c4bf..ad08475 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /target +/cache \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 0a0d9e7..5af5687 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,6 +5,8 @@ use clap::{Parser, Subcommand}; use std::path::PathBuf; +use crate::scrapers::Sources; + mod messages; mod play; mod player; @@ -97,10 +99,13 @@ async fn main() -> eyre::Result<()> { match command { // TODO: Actually distinguish between sources. Commands::Scrape { - source: _, + source, extension, include_full, - } => scrapers::lofigirl::scrape(extension, include_full).await?, + } => match source { + Sources::Lofigirl => scrapers::lofigirl::scrape(extension, include_full).await?, + Sources::Chillhop => scrapers::chillhop::scrape().await, + }, } } else { play::play(cli).await?; diff --git a/src/scrapers.rs b/src/scrapers.rs index 1729657..95707e3 100644 --- a/src/scrapers.rs +++ b/src/scrapers.rs @@ -1,5 +1,6 @@ use clap::ValueEnum; +pub mod chillhop; pub mod lofigirl; #[derive(Clone, Copy, PartialEq, Eq, Debug, ValueEnum)] diff --git a/src/scrapers/chillhop.rs b/src/scrapers/chillhop.rs new file mode 100644 index 0000000..945674d --- /dev/null +++ b/src/scrapers/chillhop.rs @@ -0,0 +1,53 @@ +use std::path::{Path, PathBuf}; + +use reqwest::Client; +use tokio::{ + fs::{self, File}, + io::AsyncWriteExt, +}; + +struct Release { + pub tracks: Vec, + pub author: String, +} + +struct Data { + pub releases: Vec, +} + +/// Sends a get request, with caching. +async fn get(client: &Client, path: &str) -> String { + let cache = PathBuf::from(format!("./cache/chillhop/{path}.html")); + if let Ok(x) = fs::read_to_string(&cache).await { + x + } else { + let resp = client + .get(format!("https://chillhop.com/{path}")) + .send() + .await + .unwrap(); + let text = resp.text().await.unwrap(); + + let parent = cache.parent(); + if let Some(x) = parent { + if x != Path::new("") { + fs::create_dir_all(x).await.unwrap(); + } + } + + let mut file = File::create(&cache).await.unwrap(); + file.write_all(text.as_bytes()).await.unwrap(); + + text + } +} + +pub async fn scrape() { + const PAGE_COUNT: usize = 40; + const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"; + + fs::create_dir_all("./cache/chillhop").await.unwrap(); + let client = Client::builder().user_agent(USER_AGENT).build().unwrap(); + + get(&client, "releases/?page=30").await; +}