mirror of
https://github.com/talwat/lowfi
synced 2026-04-27 20:33:28 +00:00
fix: remove archive scraper
This commit is contained in:
parent
c605a9fa8a
commit
980cbbce3f
@ -121,7 +121,6 @@ async fn main() -> eyre::Result<()> {
|
|||||||
if let Some(command) = &args.command {
|
if let Some(command) = &args.command {
|
||||||
return match command {
|
return match command {
|
||||||
Commands::Scrape { source } => match source {
|
Commands::Scrape { source } => match source {
|
||||||
Source::Archive => scrapers::archive::scrape().await,
|
|
||||||
Source::Lofigirl => scrapers::lofigirl::scrape().await,
|
Source::Lofigirl => scrapers::lofigirl::scrape().await,
|
||||||
Source::Chillhop => scrapers::chillhop::scrape().await,
|
Source::Chillhop => scrapers::chillhop::scrape().await,
|
||||||
},
|
},
|
||||||
|
|||||||
@ -11,7 +11,6 @@ use tokio::{
|
|||||||
io::AsyncWriteExt,
|
io::AsyncWriteExt,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub mod archive;
|
|
||||||
pub mod chillhop;
|
pub mod chillhop;
|
||||||
pub mod lofigirl;
|
pub mod lofigirl;
|
||||||
|
|
||||||
@ -19,7 +18,6 @@ pub mod lofigirl;
|
|||||||
#[derive(Clone, Copy, PartialEq, Eq, Debug, ValueEnum)]
|
#[derive(Clone, Copy, PartialEq, Eq, Debug, ValueEnum)]
|
||||||
pub enum Source {
|
pub enum Source {
|
||||||
Lofigirl,
|
Lofigirl,
|
||||||
Archive,
|
|
||||||
Chillhop,
|
Chillhop,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -28,7 +26,6 @@ impl Source {
|
|||||||
pub fn cache_dir(&self) -> &'static str {
|
pub fn cache_dir(&self) -> &'static str {
|
||||||
match self {
|
match self {
|
||||||
Source::Lofigirl => "lofigirl",
|
Source::Lofigirl => "lofigirl",
|
||||||
Source::Archive => "archive",
|
|
||||||
Source::Chillhop => "chillhop",
|
Source::Chillhop => "chillhop",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -37,7 +34,6 @@ impl Source {
|
|||||||
pub fn url(&self) -> &'static str {
|
pub fn url(&self) -> &'static str {
|
||||||
match self {
|
match self {
|
||||||
Source::Chillhop => "https://chillhop.com",
|
Source::Chillhop => "https://chillhop.com",
|
||||||
Source::Archive => "https://ia601004.us.archive.org/31/items/lofigirl",
|
|
||||||
Source::Lofigirl => "https://lofigirl.com/wp-content/uploads",
|
Source::Lofigirl => "https://lofigirl.com/wp-content/uploads",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,74 +0,0 @@
|
|||||||
//! Has all of the functions for the `scrape` command.
|
|
||||||
//!
|
|
||||||
//! This command is completely optional, and as such isn't subject to the same
|
|
||||||
//! quality standards as the rest of the codebase.
|
|
||||||
|
|
||||||
use std::sync::LazyLock;
|
|
||||||
|
|
||||||
use futures_util::{stream::FuturesOrdered, StreamExt};
|
|
||||||
use reqwest::Client;
|
|
||||||
use scraper::{Html, Selector};
|
|
||||||
|
|
||||||
use crate::scrapers::{get, Source};
|
|
||||||
|
|
||||||
static SELECTOR: LazyLock<Selector> =
|
|
||||||
LazyLock::new(|| Selector::parse("html > body > pre > a").unwrap());
|
|
||||||
|
|
||||||
async fn parse(client: &Client, path: &str) -> eyre::Result<Vec<String>> {
|
|
||||||
let document = get(client, path, super::Source::Lofigirl).await?;
|
|
||||||
let html = Html::parse_document(&document);
|
|
||||||
|
|
||||||
Ok(html
|
|
||||||
.select(&SELECTOR)
|
|
||||||
.skip(1)
|
|
||||||
.map(|x| String::from(x.attr("href").unwrap()))
|
|
||||||
.collect())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// This function basically just scans the entire file server, and returns a list of paths to mp3 files.
|
|
||||||
///
|
|
||||||
/// It's a bit hacky, and basically works by checking all of the years, then months, and then all of the files.
|
|
||||||
/// This is done as a way to avoid recursion, since async rust really hates recursive functions.
|
|
||||||
async fn scan() -> eyre::Result<Vec<String>> {
|
|
||||||
let client = Client::new();
|
|
||||||
|
|
||||||
let mut releases = parse(&client, "/").await?;
|
|
||||||
releases.truncate(releases.len() - 4);
|
|
||||||
|
|
||||||
// A little bit of async to run all of the months concurrently.
|
|
||||||
let mut futures = FuturesOrdered::new();
|
|
||||||
|
|
||||||
for release in releases {
|
|
||||||
let client = client.clone();
|
|
||||||
futures.push_back(async move {
|
|
||||||
let items = parse(&client, &release).await.unwrap();
|
|
||||||
items
|
|
||||||
.into_iter()
|
|
||||||
.filter_map(|x| {
|
|
||||||
if x.ends_with(".mp3") {
|
|
||||||
Some(format!("{release}{x}"))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.collect::<Vec<String>>()
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut files = Vec::new();
|
|
||||||
while let Some(mut result) = futures.next().await {
|
|
||||||
files.append(&mut result);
|
|
||||||
}
|
|
||||||
|
|
||||||
eyre::Result::Ok(files)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn scrape() -> eyre::Result<()> {
|
|
||||||
println!("{}/", Source::Lofigirl.url());
|
|
||||||
let files = scan().await?;
|
|
||||||
for file in files {
|
|
||||||
println!("{file}");
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
@ -9,7 +9,7 @@ impl Clock {
|
|||||||
/// Small shorthand for getting the local time now, and formatting it.
|
/// Small shorthand for getting the local time now, and formatting it.
|
||||||
#[inline]
|
#[inline]
|
||||||
fn now() -> chrono::format::DelayedFormat<chrono::format::StrftimeItems<'static>> {
|
fn now() -> chrono::format::DelayedFormat<chrono::format::StrftimeItems<'static>> {
|
||||||
chrono::Local::now().format("%H:%M:%S")
|
chrono::Local::now().format("%H:%M")
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Checks if the last update was long enough ago, and if so,
|
/// Checks if the last update was long enough ago, and if so,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user