mirror of
https://github.com/talwat/lowfi
synced 2025-08-17 15:12:37 +00:00
fix: seperate get function from chillhop scraper
This commit is contained in:
parent
4efd8e9e34
commit
fa0d1f52c3
@ -1,4 +1,3 @@
|
|||||||
https://stream.chillhop.com/mp3/
|
|
||||||
9476!Apple Juice
|
9476!Apple Juice
|
||||||
8448!Tôzen
|
8448!Tôzen
|
||||||
8878!Swiss
|
8878!Swiss
|
||||||
@ -1372,4 +1371,4 @@ https://stream.chillhop.com/mp3/
|
|||||||
82333!Driving
|
82333!Driving
|
||||||
85214!My Steeze
|
85214!My Steeze
|
||||||
82331!Filaments
|
82331!Filaments
|
||||||
85273!Shoegaze
|
85273!Shoegaze
|
||||||
|
@ -15,7 +15,7 @@ mod tracks;
|
|||||||
mod scrapers;
|
mod scrapers;
|
||||||
|
|
||||||
#[cfg(feature = "scrape")]
|
#[cfg(feature = "scrape")]
|
||||||
use crate::scrapers::Sources;
|
use crate::scrapers::Source;
|
||||||
/// An extremely simple lofi player.
|
/// An extremely simple lofi player.
|
||||||
#[derive(Parser, Clone)]
|
#[derive(Parser, Clone)]
|
||||||
#[command(about, version)]
|
#[command(about, version)]
|
||||||
@ -70,7 +70,7 @@ enum Commands {
|
|||||||
#[cfg(feature = "scrape")]
|
#[cfg(feature = "scrape")]
|
||||||
Scrape {
|
Scrape {
|
||||||
// The source to scrape from.
|
// The source to scrape from.
|
||||||
source: scrapers::Sources,
|
source: scrapers::Source,
|
||||||
|
|
||||||
/// The file extension to search for, defaults to mp3.
|
/// The file extension to search for, defaults to mp3.
|
||||||
#[clap(long, short, default_value = "mp3")]
|
#[clap(long, short, default_value = "mp3")]
|
||||||
@ -106,8 +106,8 @@ async fn main() -> eyre::Result<()> {
|
|||||||
extension,
|
extension,
|
||||||
include_full,
|
include_full,
|
||||||
} => match source {
|
} => match source {
|
||||||
Sources::Lofigirl => scrapers::lofigirl::scrape(extension, include_full).await?,
|
Source::Lofigirl => scrapers::lofigirl::scrape(extension, include_full).await?,
|
||||||
Sources::Chillhop => scrapers::chillhop::scrape().await?,
|
Source::Chillhop => scrapers::chillhop::scrape().await?,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -1,10 +1,81 @@
|
|||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
use clap::ValueEnum;
|
use clap::ValueEnum;
|
||||||
|
use eyre::bail;
|
||||||
|
use reqwest::Client;
|
||||||
|
use tokio::{
|
||||||
|
fs::{self, File},
|
||||||
|
io::AsyncWriteExt,
|
||||||
|
};
|
||||||
|
|
||||||
pub mod chillhop;
|
pub mod chillhop;
|
||||||
pub mod lofigirl;
|
pub mod lofigirl;
|
||||||
|
|
||||||
#[derive(Clone, Copy, PartialEq, Eq, Debug, ValueEnum)]
|
#[derive(Clone, Copy, PartialEq, Eq, Debug, ValueEnum)]
|
||||||
pub enum Sources {
|
pub enum Source {
|
||||||
Lofigirl,
|
Lofigirl,
|
||||||
Chillhop,
|
Chillhop,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Source {
|
||||||
|
pub fn cache_dir(&self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
Source::Lofigirl => "lofigirl",
|
||||||
|
Source::Chillhop => "chillhop",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn url(&self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
Source::Chillhop => "https://chillhop.com",
|
||||||
|
Source::Lofigirl => "https://lofigirl.com/wp-content/uploads",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Sends a get request, with caching.
|
||||||
|
async fn get(client: &Client, path: &str, source: Source) -> eyre::Result<String> {
|
||||||
|
let trimmed = path.trim_matches('/');
|
||||||
|
let cache = PathBuf::from(format!("./cache/{}/{trimmed}.html", source.cache_dir()));
|
||||||
|
|
||||||
|
if let Ok(x) = fs::read_to_string(&cache).await {
|
||||||
|
Ok(x)
|
||||||
|
} else {
|
||||||
|
let resp = client
|
||||||
|
.get(format!("{}/{trimmed}", source.url()))
|
||||||
|
.send()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let status = resp.status();
|
||||||
|
|
||||||
|
if status == 429 {
|
||||||
|
bail!("rate limit reached: {path}");
|
||||||
|
}
|
||||||
|
|
||||||
|
if status != 404 && !status.is_success() && !status.is_redirection() {
|
||||||
|
bail!("non success code {}: {path}", resp.status().as_u16());
|
||||||
|
}
|
||||||
|
|
||||||
|
let text = resp.text().await?;
|
||||||
|
|
||||||
|
let parent = cache.parent();
|
||||||
|
if let Some(x) = parent {
|
||||||
|
if x != Path::new("") {
|
||||||
|
fs::create_dir_all(x).await?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut file = File::create(&cache).await?;
|
||||||
|
file.write_all(text.as_bytes()).await?;
|
||||||
|
|
||||||
|
if status.is_redirection() {
|
||||||
|
bail!("redirect: {path}")
|
||||||
|
}
|
||||||
|
|
||||||
|
if status == 404 {
|
||||||
|
bail!("not found: {path}")
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(text)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1,13 +1,10 @@
|
|||||||
use eyre::{bail, eyre};
|
use eyre::eyre;
|
||||||
use futures::stream::FuturesUnordered;
|
use futures::stream::FuturesUnordered;
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use indicatif::ProgressBar;
|
use indicatif::ProgressBar;
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
|
use std::fmt;
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::{
|
|
||||||
fmt,
|
|
||||||
path::{Path, PathBuf},
|
|
||||||
};
|
|
||||||
|
|
||||||
use reqwest::Client;
|
use reqwest::Client;
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
@ -15,10 +12,9 @@ use serde::{
|
|||||||
de::{self, Visitor},
|
de::{self, Visitor},
|
||||||
Deserialize, Deserializer,
|
Deserialize, Deserializer,
|
||||||
};
|
};
|
||||||
use tokio::{
|
use tokio::fs;
|
||||||
fs::{self, File},
|
|
||||||
io::AsyncWriteExt,
|
use crate::scrapers::{get, Source};
|
||||||
};
|
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref RELEASES: Selector = Selector::parse(".table-body > a").unwrap();
|
static ref RELEASES: Selector = Selector::parse(".table-body > a").unwrap();
|
||||||
@ -70,7 +66,7 @@ impl Release {
|
|||||||
client: Client,
|
client: Client,
|
||||||
bar: ProgressBar,
|
bar: ProgressBar,
|
||||||
) -> Result<Self, ReleaseError> {
|
) -> Result<Self, ReleaseError> {
|
||||||
let content = get(&client, &path).await?;
|
let content = get(&client, &path, Source::Chillhop).await?;
|
||||||
let html = Html::parse_document(&content);
|
let html = Html::parse_document(&content);
|
||||||
|
|
||||||
let textarea = html
|
let textarea = html
|
||||||
@ -89,60 +85,13 @@ impl Release {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Sends a get request, with caching.
|
|
||||||
async fn get(client: &Client, path: &str) -> eyre::Result<String> {
|
|
||||||
let trimmed = path.trim_matches('/');
|
|
||||||
let cache = PathBuf::from(format!("./cache/chillhop/{trimmed}.html"));
|
|
||||||
|
|
||||||
if let Ok(x) = fs::read_to_string(&cache).await {
|
|
||||||
Ok(x)
|
|
||||||
} else {
|
|
||||||
let resp = client
|
|
||||||
.get(format!("https://chillhop.com/{trimmed}"))
|
|
||||||
.send()
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
let status = resp.status();
|
|
||||||
|
|
||||||
if status == 429 {
|
|
||||||
bail!("rate limit reached: {path}");
|
|
||||||
}
|
|
||||||
|
|
||||||
if status != 404 && !status.is_success() && !status.is_redirection() {
|
|
||||||
bail!("non success code {}: {path}", resp.status().as_u16());
|
|
||||||
}
|
|
||||||
|
|
||||||
let text = resp.text().await?;
|
|
||||||
|
|
||||||
let parent = cache.parent();
|
|
||||||
if let Some(x) = parent {
|
|
||||||
if x != Path::new("") {
|
|
||||||
fs::create_dir_all(x).await?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut file = File::create(&cache).await?;
|
|
||||||
file.write_all(text.as_bytes()).await?;
|
|
||||||
|
|
||||||
if status.is_redirection() {
|
|
||||||
bail!("redirect: {path}")
|
|
||||||
}
|
|
||||||
|
|
||||||
if status == 404 {
|
|
||||||
bail!("not found: {path}")
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(text)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn scan_page(
|
async fn scan_page(
|
||||||
number: usize,
|
number: usize,
|
||||||
client: &Client,
|
client: &Client,
|
||||||
bar: ProgressBar,
|
bar: ProgressBar,
|
||||||
) -> eyre::Result<Vec<impl futures::Future<Output = Result<Release, ReleaseError>>>> {
|
) -> eyre::Result<Vec<impl futures::Future<Output = Result<Release, ReleaseError>>>> {
|
||||||
let path = format!("releases/?page={number}");
|
let path = format!("releases/?page={number}");
|
||||||
let content = get(client, &path).await?;
|
let content = get(client, &path, Source::Chillhop).await?;
|
||||||
let html = Html::parse_document(&content);
|
let html = Html::parse_document(&content);
|
||||||
|
|
||||||
let elements = html.select(&RELEASES);
|
let elements = html.select(&RELEASES);
|
||||||
|
@ -5,19 +5,19 @@
|
|||||||
|
|
||||||
use futures::{stream::FuturesOrdered, StreamExt};
|
use futures::{stream::FuturesOrdered, StreamExt};
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
|
use reqwest::Client;
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
|
|
||||||
const BASE_URL: &str = "https://lofigirl.com/wp-content/uploads/";
|
use crate::scrapers::{get, Source};
|
||||||
|
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref SELECTOR: Selector = Selector::parse("html > body > pre > a").unwrap();
|
static ref SELECTOR: Selector = Selector::parse("html > body > pre > a").unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn parse(path: &str) -> eyre::Result<Vec<String>> {
|
async fn parse(client: &Client, path: &str) -> eyre::Result<Vec<String>> {
|
||||||
let response = reqwest::get(format!("{}{}", BASE_URL, path)).await?;
|
let document = get(client, path, super::Source::Lofigirl).await?;
|
||||||
let document = response.text().await?;
|
|
||||||
|
|
||||||
let html = Html::parse_document(&document);
|
let html = Html::parse_document(&document);
|
||||||
|
|
||||||
Ok(html
|
Ok(html
|
||||||
.select(&SELECTOR)
|
.select(&SELECTOR)
|
||||||
.skip(5)
|
.skip(5)
|
||||||
@ -30,9 +30,10 @@ async fn parse(path: &str) -> eyre::Result<Vec<String>> {
|
|||||||
/// It's a bit hacky, and basically works by checking all of the years, then months, and then all of the files.
|
/// It's a bit hacky, and basically works by checking all of the years, then months, and then all of the files.
|
||||||
/// This is done as a way to avoid recursion, since async rust really hates recursive functions.
|
/// This is done as a way to avoid recursion, since async rust really hates recursive functions.
|
||||||
async fn scan(extension: &str, include_full: bool) -> eyre::Result<Vec<String>> {
|
async fn scan(extension: &str, include_full: bool) -> eyre::Result<Vec<String>> {
|
||||||
|
let client = Client::new();
|
||||||
let extension = &format!(".{}", extension);
|
let extension = &format!(".{}", extension);
|
||||||
|
|
||||||
let items = parse("").await?;
|
let items = parse(&client, "/").await?;
|
||||||
|
|
||||||
let mut years: Vec<u32> = items
|
let mut years: Vec<u32> = items
|
||||||
.iter()
|
.iter()
|
||||||
@ -48,19 +49,20 @@ async fn scan(extension: &str, include_full: bool) -> eyre::Result<Vec<String>>
|
|||||||
let mut futures = FuturesOrdered::new();
|
let mut futures = FuturesOrdered::new();
|
||||||
|
|
||||||
for year in years {
|
for year in years {
|
||||||
let months = parse(&year.to_string()).await?;
|
let months = parse(&client, &year.to_string()).await?;
|
||||||
|
|
||||||
for month in months {
|
for month in months {
|
||||||
|
let client = client.clone();
|
||||||
futures.push_back(async move {
|
futures.push_back(async move {
|
||||||
let path = format!("{}/{}", year, month);
|
let path = format!("{}/{}", year, month);
|
||||||
|
|
||||||
let items = parse(&path).await.unwrap();
|
let items = parse(&client, &path).await.unwrap();
|
||||||
items
|
items
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(|x| {
|
.filter_map(|x| {
|
||||||
if x.ends_with(extension) {
|
if x.ends_with(extension) {
|
||||||
if include_full {
|
if include_full {
|
||||||
Some(format!("{BASE_URL}{path}{x}"))
|
Some(format!("{}/{path}{x}", Source::Lofigirl.url()))
|
||||||
} else {
|
} else {
|
||||||
Some(format!("{path}{x}"))
|
Some(format!("{path}{x}"))
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user