mirror of
https://github.com/talwat/lowfi
synced 2025-08-13 13:04:16 +00:00
feat: add archive scraper
feat: add more information to errors
This commit is contained in:
parent
bdd508bfbb
commit
6f15f9226f
2459
data/lofigirl-new.txt
Normal file
2459
data/lofigirl-new.txt
Normal file
File diff suppressed because it is too large
Load Diff
17
src/main.rs
17
src/main.rs
@ -71,14 +71,6 @@ enum Commands {
|
||||
Scrape {
|
||||
// The source to scrape from.
|
||||
source: scrapers::Source,
|
||||
|
||||
/// The file extension to search for, defaults to mp3.
|
||||
#[clap(long, short, default_value = "mp3")]
|
||||
extension: String,
|
||||
|
||||
/// Whether to include the full HTTP URL or just the distinguishing part.
|
||||
#[clap(long, short)]
|
||||
include_full: bool,
|
||||
},
|
||||
}
|
||||
|
||||
@ -101,12 +93,9 @@ async fn main() -> eyre::Result<()> {
|
||||
match command {
|
||||
// TODO: Actually distinguish between sources.
|
||||
#[cfg(feature = "scrape")]
|
||||
Commands::Scrape {
|
||||
source,
|
||||
extension,
|
||||
include_full,
|
||||
} => match source {
|
||||
Source::Lofigirl => scrapers::lofigirl::scrape(extension, include_full).await?,
|
||||
Commands::Scrape { source } => match source {
|
||||
Source::Archive => scrapers::archive::scrape().await?,
|
||||
Source::Lofigirl => scrapers::lofigirl::scrape().await?,
|
||||
Source::Chillhop => scrapers::chillhop::scrape().await?,
|
||||
},
|
||||
}
|
||||
|
@ -41,7 +41,8 @@ pub use error::Error;
|
||||
pub mod mpris;
|
||||
|
||||
/// The time to wait in between errors.
|
||||
const TIMEOUT: Duration = Duration::from_secs(3);
|
||||
/// TODO: Make this configurable.
|
||||
const TIMEOUT: Duration = Duration::from_secs(5);
|
||||
|
||||
/// Main struct responsible for queuing up & playing tracks.
|
||||
// TODO: Consider refactoring [Player] from being stored in an [Arc], into containing many smaller [Arc]s.
|
||||
|
@ -49,7 +49,7 @@ impl Downloader {
|
||||
Ok(track) => self.player.tracks.write().await.push_back(track),
|
||||
Err(error) if !error.is_timeout() => {
|
||||
if debug {
|
||||
panic!("{}", error)
|
||||
panic!("{error}")
|
||||
}
|
||||
|
||||
sleep(TIMEOUT).await;
|
||||
|
@ -10,7 +10,7 @@ use crate::{
|
||||
impl Player {
|
||||
/// Fetches the next track from the queue, or a random track if the queue is empty.
|
||||
/// This will also set the current track to the fetched track's info.
|
||||
async fn fetch(&self) -> Result<tracks::DecodedTrack, tracks::TrackError> {
|
||||
async fn fetch(&self) -> Result<tracks::DecodedTrack, tracks::Error> {
|
||||
// TODO: Consider replacing this with `unwrap_or_else` when async closures are stablized.
|
||||
let track = self.tracks.write().await.pop_front();
|
||||
let track = if let Some(track) = track {
|
||||
@ -66,7 +66,7 @@ impl Player {
|
||||
Err(error) => {
|
||||
if !error.is_timeout() {
|
||||
if debug {
|
||||
panic!("{error:?}")
|
||||
panic!("{error}")
|
||||
}
|
||||
|
||||
sleep(TIMEOUT).await;
|
||||
|
@ -8,12 +8,14 @@ use tokio::{
|
||||
io::AsyncWriteExt,
|
||||
};
|
||||
|
||||
pub mod archive;
|
||||
pub mod chillhop;
|
||||
pub mod lofigirl;
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Debug, ValueEnum)]
|
||||
pub enum Source {
|
||||
Lofigirl,
|
||||
Archive,
|
||||
Chillhop,
|
||||
}
|
||||
|
||||
@ -21,6 +23,7 @@ impl Source {
|
||||
pub fn cache_dir(&self) -> &'static str {
|
||||
match self {
|
||||
Source::Lofigirl => "lofigirl",
|
||||
Source::Archive => "archive",
|
||||
Source::Chillhop => "chillhop",
|
||||
}
|
||||
}
|
||||
@ -28,6 +31,7 @@ impl Source {
|
||||
pub fn url(&self) -> &'static str {
|
||||
match self {
|
||||
Source::Chillhop => "https://chillhop.com",
|
||||
Source::Archive => "https://ia601004.us.archive.org/31/items/lofigirl",
|
||||
Source::Lofigirl => "https://lofigirl.com/wp-content/uploads",
|
||||
}
|
||||
}
|
||||
|
74
src/scrapers/archive.rs
Normal file
74
src/scrapers/archive.rs
Normal file
@ -0,0 +1,74 @@
|
||||
//! Has all of the functions for the `scrape` command.
|
||||
//!
|
||||
//! This command is completely optional, and as such isn't subject to the same
|
||||
//! quality standards as the rest of the codebase.
|
||||
|
||||
use futures::{stream::FuturesOrdered, StreamExt};
|
||||
use lazy_static::lazy_static;
|
||||
use reqwest::Client;
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use crate::scrapers::{get, Source};
|
||||
|
||||
lazy_static! {
|
||||
static ref SELECTOR: Selector = Selector::parse("html > body > pre > a").unwrap();
|
||||
}
|
||||
|
||||
async fn parse(client: &Client, path: &str) -> eyre::Result<Vec<String>> {
|
||||
let document = get(client, path, super::Source::Lofigirl).await?;
|
||||
let html = Html::parse_document(&document);
|
||||
|
||||
Ok(html
|
||||
.select(&SELECTOR)
|
||||
.skip(1)
|
||||
.map(|x| String::from(x.attr("href").unwrap()))
|
||||
.collect())
|
||||
}
|
||||
|
||||
/// This function basically just scans the entire file server, and returns a list of paths to mp3 files.
|
||||
///
|
||||
/// It's a bit hacky, and basically works by checking all of the years, then months, and then all of the files.
|
||||
/// This is done as a way to avoid recursion, since async rust really hates recursive functions.
|
||||
async fn scan() -> eyre::Result<Vec<String>> {
|
||||
let client = Client::new();
|
||||
|
||||
let mut releases = parse(&client, "/").await?;
|
||||
releases.truncate(releases.len() - 4);
|
||||
|
||||
// A little bit of async to run all of the months concurrently.
|
||||
let mut futures = FuturesOrdered::new();
|
||||
|
||||
for release in releases {
|
||||
let client = client.clone();
|
||||
futures.push_back(async move {
|
||||
let items = parse(&client, &release).await.unwrap();
|
||||
items
|
||||
.into_iter()
|
||||
.filter_map(|x| {
|
||||
if x.ends_with(".mp3") {
|
||||
Some(format!("{release}{x}"))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect::<Vec<String>>()
|
||||
});
|
||||
}
|
||||
|
||||
let mut files = Vec::new();
|
||||
while let Some(mut result) = futures.next().await {
|
||||
files.append(&mut result);
|
||||
}
|
||||
|
||||
eyre::Result::Ok(files)
|
||||
}
|
||||
|
||||
pub async fn scrape() -> eyre::Result<()> {
|
||||
println!("{}/", Source::Lofigirl.url());
|
||||
let files = scan().await?;
|
||||
for file in files {
|
||||
println!("{file}");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
@ -8,7 +8,7 @@ use lazy_static::lazy_static;
|
||||
use reqwest::Client;
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
use crate::scrapers::{get, Source};
|
||||
use crate::scrapers::get;
|
||||
|
||||
lazy_static! {
|
||||
static ref SELECTOR: Selector = Selector::parse("html > body > pre > a").unwrap();
|
||||
@ -29,10 +29,8 @@ async fn parse(client: &Client, path: &str) -> eyre::Result<Vec<String>> {
|
||||
///
|
||||
/// It's a bit hacky, and basically works by checking all of the years, then months, and then all of the files.
|
||||
/// This is done as a way to avoid recursion, since async rust really hates recursive functions.
|
||||
async fn scan(extension: &str, include_full: bool) -> eyre::Result<Vec<String>> {
|
||||
async fn scan() -> eyre::Result<Vec<String>> {
|
||||
let client = Client::new();
|
||||
let extension = &format!(".{}", extension);
|
||||
|
||||
let items = parse(&client, "/").await?;
|
||||
|
||||
let mut years: Vec<u32> = items
|
||||
@ -60,12 +58,8 @@ async fn scan(extension: &str, include_full: bool) -> eyre::Result<Vec<String>>
|
||||
items
|
||||
.into_iter()
|
||||
.filter_map(|x| {
|
||||
if x.ends_with(extension) {
|
||||
if include_full {
|
||||
Some(format!("{}/{path}{x}", Source::Lofigirl.url()))
|
||||
} else {
|
||||
Some(format!("{path}{x}"))
|
||||
}
|
||||
if x.ends_with(".mp3") {
|
||||
Some(format!("{path}{x}"))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
@ -83,8 +77,8 @@ async fn scan(extension: &str, include_full: bool) -> eyre::Result<Vec<String>>
|
||||
eyre::Result::Ok(files)
|
||||
}
|
||||
|
||||
pub async fn scrape(extension: String, include_full: bool) -> eyre::Result<()> {
|
||||
let files = scan(&extension, include_full).await?;
|
||||
pub async fn scrape() -> eyre::Result<()> {
|
||||
let files = scan().await?;
|
||||
for file in files {
|
||||
println!("{file}");
|
||||
}
|
||||
|
@ -20,41 +20,15 @@ use std::{io::Cursor, path::Path, time::Duration};
|
||||
use bytes::Bytes;
|
||||
use inflector::Inflector as _;
|
||||
use rodio::{Decoder, Source as _};
|
||||
use thiserror::Error;
|
||||
use tokio::io;
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
use url::form_urlencoded;
|
||||
|
||||
pub mod error;
|
||||
pub mod list;
|
||||
|
||||
/// The error type for the track system, which is used to handle errors that occur
|
||||
/// while downloading, decoding, or playing tracks.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum TrackError {
|
||||
#[error("timeout")]
|
||||
Timeout,
|
||||
pub use error::Error;
|
||||
|
||||
#[error("unable to decode")]
|
||||
Decode(#[from] rodio::decoder::DecoderError),
|
||||
|
||||
#[error("invalid name")]
|
||||
InvalidName,
|
||||
|
||||
#[error("invalid file path")]
|
||||
InvalidPath,
|
||||
|
||||
#[error("unable to read file")]
|
||||
File(#[from] io::Error),
|
||||
|
||||
#[error("unable to fetch data")]
|
||||
Request(#[from] reqwest::Error),
|
||||
}
|
||||
|
||||
impl TrackError {
|
||||
pub const fn is_timeout(&self) -> bool {
|
||||
matches!(self, Self::Timeout)
|
||||
}
|
||||
}
|
||||
use crate::tracks::error::Context;
|
||||
|
||||
/// Just a shorthand for a decoded [Bytes].
|
||||
pub type DecodedData = Decoder<Cursor<Bytes>>;
|
||||
@ -92,7 +66,7 @@ impl QueuedTrack {
|
||||
/// This will actually decode and format the track,
|
||||
/// returning a [`DecodedTrack`] which can be played
|
||||
/// and also has a duration & formatted name.
|
||||
pub fn decode(self) -> eyre::Result<DecodedTrack, TrackError> {
|
||||
pub fn decode(self) -> eyre::Result<DecodedTrack, Error> {
|
||||
DecodedTrack::new(self)
|
||||
}
|
||||
}
|
||||
@ -134,13 +108,13 @@ impl Info {
|
||||
/// Formats a name with [Inflector].
|
||||
/// This will also strip the first few numbers that are
|
||||
/// usually present on most lofi tracks.
|
||||
fn format_name(name: &str) -> eyre::Result<String, TrackError> {
|
||||
fn format_name(name: &str) -> eyre::Result<String, Error> {
|
||||
let path = Path::new(name);
|
||||
|
||||
let stem = path
|
||||
.file_stem()
|
||||
.and_then(|x| x.to_str())
|
||||
.ok_or(TrackError::InvalidName)?;
|
||||
.ok_or((name, error::Kind::InvalidName))?;
|
||||
let formatted = Self::decode_url(stem)
|
||||
.to_lowercase()
|
||||
.to_title_case()
|
||||
@ -181,7 +155,7 @@ impl Info {
|
||||
name: TrackName,
|
||||
full_path: String,
|
||||
decoded: &DecodedData,
|
||||
) -> eyre::Result<Self, TrackError> {
|
||||
) -> eyre::Result<Self, Error> {
|
||||
let (display_name, custom_name) = match name {
|
||||
TrackName::Raw(raw) => (Self::format_name(&raw)?, false),
|
||||
TrackName::Formatted(custom) => (custom, true),
|
||||
@ -210,11 +184,12 @@ pub struct DecodedTrack {
|
||||
impl DecodedTrack {
|
||||
/// Creates a new track.
|
||||
/// This is equivalent to [`QueuedTrack::decode`].
|
||||
pub fn new(track: QueuedTrack) -> eyre::Result<Self, TrackError> {
|
||||
pub fn new(track: QueuedTrack) -> eyre::Result<Self, Error> {
|
||||
let data = Decoder::builder()
|
||||
.with_byte_len(track.data.len().try_into().unwrap())
|
||||
.with_data(Cursor::new(track.data))
|
||||
.build()?;
|
||||
.build()
|
||||
.track(track.full_path.clone())?;
|
||||
|
||||
let info = Info::new(track.name, track.full_path, &data)?;
|
||||
|
||||
|
61
src/tracks/error.rs
Normal file
61
src/tracks/error.rs
Normal file
@ -0,0 +1,61 @@
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum Kind {
|
||||
#[error("timeout")]
|
||||
Timeout,
|
||||
|
||||
#[error("unable to decode: {0}")]
|
||||
Decode(#[from] rodio::decoder::DecoderError),
|
||||
|
||||
#[error("invalid name")]
|
||||
InvalidName,
|
||||
|
||||
#[error("invalid file path")]
|
||||
InvalidPath,
|
||||
|
||||
#[error("unable to read file: {0}")]
|
||||
File(#[from] std::io::Error),
|
||||
|
||||
#[error("unable to fetch data: {0}")]
|
||||
Request(#[from] reqwest::Error),
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
#[error("{kind}\ntrack: {track}")]
|
||||
pub struct Error {
|
||||
pub track: String,
|
||||
|
||||
#[source]
|
||||
pub kind: Kind,
|
||||
}
|
||||
|
||||
impl Error {
|
||||
pub const fn is_timeout(&self) -> bool {
|
||||
matches!(self.kind, Kind::Timeout)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, E> From<(T, E)> for Error
|
||||
where
|
||||
T: Into<String>,
|
||||
Kind: From<E>,
|
||||
{
|
||||
fn from((track, err): (T, E)) -> Self {
|
||||
Error {
|
||||
track: track.into(),
|
||||
kind: Kind::from(err),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Context<T> {
|
||||
fn track(self, name: impl Into<String>) -> Result<T, Error>;
|
||||
}
|
||||
|
||||
impl<T, E> Context<T> for Result<T, E>
|
||||
where
|
||||
(String, E): Into<Error>,
|
||||
{
|
||||
fn track(self, name: impl Into<String>) -> Result<T, Error> {
|
||||
self.map_err(|e| (name.into(), e).into())
|
||||
}
|
||||
}
|
@ -7,7 +7,10 @@ use rand::Rng as _;
|
||||
use reqwest::Client;
|
||||
use tokio::fs;
|
||||
|
||||
use crate::{data_dir, tracks::TrackError};
|
||||
use crate::{
|
||||
data_dir,
|
||||
tracks::{self, error::Context},
|
||||
};
|
||||
|
||||
use super::QueuedTrack;
|
||||
|
||||
@ -52,7 +55,11 @@ impl List {
|
||||
}
|
||||
|
||||
/// Downloads a raw track, but doesn't decode it.
|
||||
async fn download(&self, track: &str, client: &Client) -> Result<(Bytes, String), TrackError> {
|
||||
async fn download(
|
||||
&self,
|
||||
track: &str,
|
||||
client: &Client,
|
||||
) -> Result<(Bytes, String), tracks::Error> {
|
||||
// If the track has a protocol, then we should ignore the base for it.
|
||||
let full_path = if track.contains("://") {
|
||||
track.to_owned()
|
||||
@ -62,28 +69,31 @@ impl List {
|
||||
|
||||
let data: Bytes = if let Some(x) = full_path.strip_prefix("file://") {
|
||||
let path = if x.starts_with('~') {
|
||||
let home_path = dirs::home_dir().ok_or(TrackError::InvalidPath)?;
|
||||
let home = home_path.to_str().ok_or(TrackError::InvalidPath)?;
|
||||
let home_path =
|
||||
dirs::home_dir().ok_or((track, tracks::error::Kind::InvalidPath))?;
|
||||
let home = home_path
|
||||
.to_str()
|
||||
.ok_or((track, tracks::error::Kind::InvalidPath))?;
|
||||
|
||||
x.replace('~', home)
|
||||
} else {
|
||||
x.to_owned()
|
||||
};
|
||||
|
||||
let result = tokio::fs::read(path).await?;
|
||||
let result = tokio::fs::read(path.clone()).await.track(track)?;
|
||||
result.into()
|
||||
} else {
|
||||
let response = match client.get(full_path.clone()).send().await {
|
||||
Ok(x) => Ok(x),
|
||||
Err(x) => {
|
||||
if x.is_timeout() {
|
||||
Err(TrackError::Timeout)
|
||||
Err((track, tracks::error::Kind::Timeout))
|
||||
} else {
|
||||
Err(TrackError::Request(x))
|
||||
Err((track, tracks::error::Kind::Request(x)))
|
||||
}
|
||||
}
|
||||
}?;
|
||||
response.bytes().await?
|
||||
response.bytes().await.track(track)?
|
||||
};
|
||||
|
||||
Ok((data, full_path))
|
||||
@ -93,7 +103,7 @@ impl List {
|
||||
///
|
||||
/// The Result's error is a bool, which is true if a timeout error occured,
|
||||
/// and false otherwise. This tells lowfi if it shouldn't wait to try again.
|
||||
pub async fn random(&self, client: &Client) -> Result<QueuedTrack, TrackError> {
|
||||
pub async fn random(&self, client: &Client) -> Result<QueuedTrack, tracks::Error> {
|
||||
let (path, custom_name) = self.random_path();
|
||||
let (data, full_path) = self.download(&path, client).await?;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user