Add search in a website

This commit is contained in:
Rémi BERTHO 2020-07-21 20:37:26 +02:00
parent cf6106155a
commit 5bb5ed9fdb
Signed by: dalan
GPG key ID: EE3B917931C07B64
4 changed files with 52 additions and 9 deletions

View file

@ -3,7 +3,7 @@
* [x] Récupération prix darty avec [scraper](https://crates.io/crates/scraper) et [reqwest](https://crates.io/crates/reqwest) * [x] Récupération prix darty avec [scraper](https://crates.io/crates/scraper) et [reqwest](https://crates.io/crates/reqwest)
* [x] Ajout du support de la Fnac, … * [x] Ajout du support de la Fnac, …
* [x] Récupération URL ligne de commande avec [clap](https://crates.io/crates/clap) * [x] Récupération URL ligne de commande avec [clap](https://crates.io/crates/clap)
* [ ] Ajout de SearchParser pour rechercher un article sur tous les parseurs - recherche sur un seul parser * [ ] Ajout de SearchParser pour rechercher un article sur tous les parseurs
* [x] Commande de liste des différents parseurs * [x] Commande de liste des différents parseurs
* [ ] Ajout des pays avec [celes](https://crates.io/crates/celes) : recherche uniquement sur les parser du pays et parseur multi pays (amazon par exemple) * [ ] Ajout des pays avec [celes](https://crates.io/crates/celes) : recherche uniquement sur les parser du pays et parseur multi pays (amazon par exemple)
* [ ] Lecture des URLs depuis un fichier avec [toml](https://crates.io/crates/toml) * [ ] Lecture des URLs depuis un fichier avec [toml](https://crates.io/crates/toml)

View file

@ -19,40 +19,58 @@ fn main() -> Result<()> {
.author("Rémi BERTHO <remi.bertho@dalan.fr>") .author("Rémi BERTHO <remi.bertho@dalan.fr>")
.about("Check price") .about("Check price")
.subcommand( .subcommand(
SubCommand::with_name("check") SubCommand::with_name("get")
.about("Check from an URL") .about("Get a price from an URL")
.arg(Arg::with_name("URL").required(true).multiple(true).help("The URL to get price")), .arg(Arg::with_name("URL").required(true).multiple(true).help("The URL to get price")),
) )
.subcommand( .subcommand(
SubCommand::with_name("search") SubCommand::with_name("search")
.about("Search an object") .about("Search an object")
.arg(Arg::with_name("parser")
.short("w")
.long("website")
.value_name("WEBSITE")
.help("Website name")
.takes_value(true))
.arg(Arg::with_name("name").required(true).multiple(true).help("The name of the object")), .arg(Arg::with_name("name").required(true).multiple(true).help("The name of the object")),
) )
.subcommand(SubCommand::with_name("list").about("List the parsers")) .subcommand(SubCommand::with_name("list").about("List the parsers"))
.get_matches(); .get_matches();
let price_checker = PriceChecker::new().unwrap(); let price_checker = PriceChecker::new().unwrap();
match matches.subcommand() { match matches.subcommand() {
("check", Some(check_matches)) => { ("get", Some(check_matches)) => {
for url_str in check_matches.values_of("URL").unwrap() { for url_str in check_matches.values_of("URL").unwrap() {
let url = Url::parse(url_str)?; let url = Url::parse(url_str)?;
println!("{}", price_checker.get_price(&url)?); println!("{}", price_checker.get_price(&url)?);
} }
} }
("search", Some(check_matches)) => { ("search", Some(check_matches)) => {
if let Some(parser) = check_matches.value_of("parser")
{
for name in check_matches.values_of("name").unwrap() {
if let Some(url) = price_checker.search(parser, name)? {
let price = price_checker.get_price(&url)?;
println!(" - {}", name);
println!(" * {}", url);
println!(" * {}", price);
}
}
} else {
for name in check_matches.values_of("name").unwrap() { for name in check_matches.values_of("name").unwrap() {
let res = price_checker.search(name)?; let res = price_checker.search_all(name)?;
if res.is_empty() { if res.is_empty() {
println!("«{}» not found", name); println!("«{}» not found", name);
} else { } else {
for (parser_name, url) in res { for (parser_name, url) in res {
let price = price_checker.get_price(&url)?; let price = price_checker.get_price(&url)?;
println!(" - {}", parser_name); println!(" - {} : {}", name, parser_name);
println!(" * {}", url); println!(" * {}", url);
println!(" * {}", price); println!(" * {}", price);
} }
} }
} }
} }
}
("list", _) => { ("list", _) => {
println!("Price parsers:"); println!("Price parsers:");
for parser in price_checker.parser_list.get_price().iter() { for parser in price_checker.parser_list.get_price().iter() {

View file

@ -5,7 +5,7 @@ pub mod fnac;
pub mod ldlc; pub mod ldlc;
use crate::price_result::PriceResult; use crate::price_result::PriceResult;
use anyhow::Result; use anyhow::{anyhow, Result};
use arraygen::Arraygen; use arraygen::Arraygen;
use scraper::Html; use scraper::Html;
use url::Url; use url::Url;
@ -27,7 +27,7 @@ pub trait PriceParser: Parser {
fn parse_price(&self, html: &Html) -> Result<PriceResult>; fn parse_price(&self, html: &Html) -> Result<PriceResult>;
} }
pub trait SearchParser: Parser { pub trait SearchParser: PriceParser {
/// Return the search URL /// Return the search URL
fn search_url(&self, name: &str) -> Url; fn search_url(&self, name: &str) -> Url;
/// Return the first occurence of result of the page if any /// Return the first occurence of result of the page if any
@ -39,11 +39,13 @@ macro_rules! gen_list {
#[derive(Arraygen, Debug)] #[derive(Arraygen, Debug)]
#[gen_array(pub fn get_price: & dyn PriceParser)] #[gen_array(pub fn get_price: & dyn PriceParser)]
#[gen_array(pub fn get_search: & dyn SearchParser)] #[gen_array(pub fn get_search: & dyn SearchParser)]
#[gen_array(pub fn get_all: & dyn Parser)]
pub struct List { pub struct List {
$( $(
$( $(
#[in_array($array)] #[in_array($array)]
)* )*
#[in_array(get_all)]
$module: $module::$name $module: $module::$name
),* ),*
} }
@ -71,6 +73,16 @@ gen_list!(
[amazon::Amazon: get_price, get_search] [amazon::Amazon: get_price, get_search]
); );
impl List {
pub fn get_parser(&self, name: &str) -> Result<&dyn SearchParser> {
Ok(*self
.get_search()
.iter()
.find(|&&parser| parser.name().to_lowercase() == name.to_lowercase())
.ok_or(anyhow!("Cannot find the parser {}", name))?)
}
}
#[test] #[test]
fn test_parser_list() { fn test_parser_list() {
let parser_list = List::new().unwrap(); let parser_list = List::new().unwrap();

View file

@ -45,7 +45,7 @@ impl PriceChecker {
} }
/// Search an object in all parsers /// Search an object in all parsers
pub fn search(&self, name: &str) -> Result<Vec<(&'static str, Url)>> { pub fn search_all(&self, name: &str) -> Result<Vec<(&'static str, Url)>> {
let mut urls = Vec::new(); let mut urls = Vec::new();
for parser in self.parser_list.get_search().iter() { for parser in self.parser_list.get_search().iter() {
let search_url = parser.search_url(name); let search_url = parser.search_url(name);
@ -56,6 +56,19 @@ impl PriceChecker {
} }
Ok(urls) Ok(urls)
} }
/// Search an object in a parser
pub fn search(&self, parser_name: &str, name: &str) -> Result<Option<Url>> {
let parser = self.parser_list.get_parser(parser_name)?;
let search_url = parser.search_url(name);
let document = self.get_html(&search_url)?;
if let Some(url) = parser.search(&document)? {
Ok(Some(url))
}
else {
Ok(None)
}
}
} }
#[test] #[test]