From 5bb5ed9fdb6fcd2f7b6fffe6680c4b72c92a416f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20BERTHO?= Date: Tue, 21 Jul 2020 20:37:26 +0200 Subject: [PATCH] Add search in a website --- TODO.md | 2 +- src/main.rs | 28 +++++++++++++++++++++++----- src/parser.rs | 16 ++++++++++++++-- src/price_checker.rs | 15 ++++++++++++++- 4 files changed, 52 insertions(+), 9 deletions(-) diff --git a/TODO.md b/TODO.md index 0629b47..1f867e4 100644 --- a/TODO.md +++ b/TODO.md @@ -3,7 +3,7 @@ * [x] Récupération prix darty avec [scraper](https://crates.io/crates/scraper) et [reqwest](https://crates.io/crates/reqwest) * [x] Ajout du support de la Fnac, … * [x] Récupération URL ligne de commande avec [clap](https://crates.io/crates/clap) -* [ ] Ajout de SearchParser pour rechercher un article sur tous les parseurs - recherche sur un seul parser +* [ ] Ajout de SearchParser pour rechercher un article sur tous les parseurs * [x] Commande de liste des différents parseurs * [ ] Ajout des pays avec [celes](https://crates.io/crates/celes) : recherche uniquement sur les parser du pays et parseur multi pays (amazon par exemple) * [ ] Lecture des URLs depuis un fichier avec [toml](https://crates.io/crates/toml) diff --git a/src/main.rs b/src/main.rs index 786e54e..5888e9f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -19,40 +19,58 @@ fn main() -> Result<()> { .author("Rémi BERTHO ") .about("Check price") .subcommand( - SubCommand::with_name("check") - .about("Check from an URL") + SubCommand::with_name("get") + .about("Get a price from an URL") .arg(Arg::with_name("URL").required(true).multiple(true).help("The URL to get price")), ) .subcommand( SubCommand::with_name("search") .about("Search an object") + .arg(Arg::with_name("parser") + .short("w") + .long("website") + .value_name("WEBSITE") + .help("Website name") + .takes_value(true)) .arg(Arg::with_name("name").required(true).multiple(true).help("The name of the object")), ) .subcommand(SubCommand::with_name("list").about("List the parsers")) .get_matches(); let price_checker = PriceChecker::new().unwrap(); match matches.subcommand() { - ("check", Some(check_matches)) => { + ("get", Some(check_matches)) => { for url_str in check_matches.values_of("URL").unwrap() { let url = Url::parse(url_str)?; println!("{}", price_checker.get_price(&url)?); } } ("search", Some(check_matches)) => { + if let Some(parser) = check_matches.value_of("parser") + { + for name in check_matches.values_of("name").unwrap() { + if let Some(url) = price_checker.search(parser, name)? { + let price = price_checker.get_price(&url)?; + println!(" - {}", name); + println!(" * {}", url); + println!(" * {}", price); + } + } + } else { for name in check_matches.values_of("name").unwrap() { - let res = price_checker.search(name)?; + let res = price_checker.search_all(name)?; if res.is_empty() { println!("«{}» not found", name); } else { for (parser_name, url) in res { let price = price_checker.get_price(&url)?; - println!(" - {}", parser_name); + println!(" - {} : {}", name, parser_name); println!(" * {}", url); println!(" * {}", price); } } } } + } ("list", _) => { println!("Price parsers:"); for parser in price_checker.parser_list.get_price().iter() { diff --git a/src/parser.rs b/src/parser.rs index 4bb8178..b0259fd 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -5,7 +5,7 @@ pub mod fnac; pub mod ldlc; use crate::price_result::PriceResult; -use anyhow::Result; +use anyhow::{anyhow, Result}; use arraygen::Arraygen; use scraper::Html; use url::Url; @@ -27,7 +27,7 @@ pub trait PriceParser: Parser { fn parse_price(&self, html: &Html) -> Result; } -pub trait SearchParser: Parser { +pub trait SearchParser: PriceParser { /// Return the search URL fn search_url(&self, name: &str) -> Url; /// Return the first occurence of result of the page if any @@ -39,11 +39,13 @@ macro_rules! gen_list { #[derive(Arraygen, Debug)] #[gen_array(pub fn get_price: & dyn PriceParser)] #[gen_array(pub fn get_search: & dyn SearchParser)] + #[gen_array(pub fn get_all: & dyn Parser)] pub struct List { $( $( #[in_array($array)] )* + #[in_array(get_all)] $module: $module::$name ),* } @@ -71,6 +73,16 @@ gen_list!( [amazon::Amazon: get_price, get_search] ); +impl List { + pub fn get_parser(&self, name: &str) -> Result<&dyn SearchParser> { + Ok(*self + .get_search() + .iter() + .find(|&&parser| parser.name().to_lowercase() == name.to_lowercase()) + .ok_or(anyhow!("Cannot find the parser {}", name))?) + } +} + #[test] fn test_parser_list() { let parser_list = List::new().unwrap(); diff --git a/src/price_checker.rs b/src/price_checker.rs index 9371a3b..17467da 100644 --- a/src/price_checker.rs +++ b/src/price_checker.rs @@ -45,7 +45,7 @@ impl PriceChecker { } /// Search an object in all parsers - pub fn search(&self, name: &str) -> Result> { + pub fn search_all(&self, name: &str) -> Result> { let mut urls = Vec::new(); for parser in self.parser_list.get_search().iter() { let search_url = parser.search_url(name); @@ -56,6 +56,19 @@ impl PriceChecker { } Ok(urls) } + + /// Search an object in a parser + pub fn search(&self, parser_name: &str, name: &str) -> Result> { + let parser = self.parser_list.get_parser(parser_name)?; + let search_url = parser.search_url(name); + let document = self.get_html(&search_url)?; + if let Some(url) = parser.search(&document)? { + Ok(Some(url)) + } + else { + Ok(None) + } + } } #[test]