Ajout du search parser amazon

This commit is contained in:
Rémi BERTHO 2020-06-23 18:29:55 +02:00
parent e37425de5d
commit ba2cd87ec4
Signed by: dalan
GPG key ID: EE3B917931C07B64
9 changed files with 99 additions and 34 deletions

View file

@ -3,8 +3,9 @@
* [x] Récupération prix darty avec [scraper](https://crates.io/crates/scraper) et [reqwest](https://crates.io/crates/reqwest) * [x] Récupération prix darty avec [scraper](https://crates.io/crates/scraper) et [reqwest](https://crates.io/crates/reqwest)
* [x] Ajout du support de la Fnac, … * [x] Ajout du support de la Fnac, …
* [x] Récupération URL ligne de commande avec [clap](https://crates.io/crates/clap) * [x] Récupération URL ligne de commande avec [clap](https://crates.io/crates/clap)
* [ ] Ajout de SearchParser pour recherché un article sur tous les parseurs * [ ] Ajout de SearchParser pour rechercher un article sur tous les parseurs
* [ ] Ajout des pays avec [celes](https://crates.io/crates/celes) : recherche uniquement sur les parser du pays et parseur multi pays (amazon) * [ ] Commande de liste des différents parseurs
* [ ] Ajout des pays avec [celes](https://crates.io/crates/celes) : recherche uniquement sur les parser du pays et parseur multi pays (amazon par exemple)
* [ ] Lecture des URLs depuis un fichier avec [toml](https://crates.io/crates/toml) * [ ] Lecture des URLs depuis un fichier avec [toml](https://crates.io/crates/toml)
* [ ] Parallélisation des requêtes avce [rayon](rayon) ou reqwest asynchrone * [ ] Parallélisation des requêtes avce [rayon](rayon) ou reqwest asynchrone
* [ ] Écriture dans un fichier ODS avec [calamine](https://crates.io/crates/calamine) * [ ] Écriture dans un fichier ODS avec [calamine](https://crates.io/crates/calamine)

View file

@ -24,13 +24,34 @@ fn main() -> Result<()> {
.required(true) .required(true)
.multiple(true) .multiple(true)
.help("The URL to get price"))) .help("The URL to get price")))
.subcommand(SubCommand::with_name("search")
.about("Search an object")
.arg(Arg::with_name("name")
.required(true)
.multiple(true)
.help("The name of the object")))
.get_matches(); .get_matches();
let price_checker = PriceChecker::new().unwrap();
match matches.subcommand() { match matches.subcommand() {
("check", Some(check_matches)) => { ("check", Some(check_matches)) => {
let price_checker = PriceChecker::new().unwrap();
for url_str in check_matches.values_of("URL").unwrap() { for url_str in check_matches.values_of("URL").unwrap() {
let url = Url::parse(url_str)?; let url = Url::parse(url_str)?;
println!("{}", price_checker.get_price(url)?) ; println!("{}", price_checker.get_price(&url)?) ;
}
},
("search", Some(check_matches)) => {
for name in check_matches.values_of("name").unwrap() {
let res = price_checker.search(name)? ;
if res.is_empty() {
println!("«{}» not found", name);
} else {
for (parser_name, url) in res {
let price = price_checker.get_price(&url)?;
println!(" - {}", parser_name) ;
println!(" * {}", url);
println!(" * {}", price);
}
}
} }
}, },
_ => { _ => {

View file

@ -14,7 +14,7 @@ pub trait Parser{
/// Create the parser /// Create the parser
fn new() -> Result<Self> where Self : Sized; fn new() -> Result<Self> where Self : Sized;
/// Get the name /// Get the name
fn name() -> &'static str where Self : Sized; fn name(&self) -> &'static str;
} }
/// Trait needed to get price from a specific website /// Trait needed to get price from a specific website
@ -22,15 +22,21 @@ pub trait PriceParser : Parser{
/// Indicate if it can parse this URL /// Indicate if it can parse this URL
fn can_parse(&self, url : &Url) -> bool; fn can_parse(&self, url : &Url) -> bool;
/// Parse the html into a price /// Parse the html into a price
fn parse(&self, html : &Html) -> Result<PriceResult>; fn parse_price(&self, html : &Html) -> Result<PriceResult>;
} }
// @todo Macro générateur liste et tests pub trait SearchParser : Parser {
/// Return the search URL
fn search_url(&self, name: &str) -> Url;
/// Return the first occurence of result of the page if any
fn search(&self, html : &Html) -> Result<Option<Url>>;
}
macro_rules! gen_list { macro_rules! gen_list {
( $([$module:ident::$name:ident : $($array:ident),*]),* ) => { ( $([$module:ident::$name:ident : $($array:ident),*]),* ) => {
#[derive(Arraygen, Debug)] #[derive(Arraygen, Debug)]
#[gen_array(pub fn get_price: & dyn PriceParser)] #[gen_array(pub fn get_price: & dyn PriceParser)]
#[gen_array(pub fn get_search: & dyn SearchParser)]
pub struct List { pub struct List {
$( $(
$( $(
@ -60,11 +66,12 @@ gen_list!(
[fnac::Fnac : get_price], [fnac::Fnac : get_price],
[du_bruit_dans_la_cuisine::DuBruitDansLaCuisine : get_price], [du_bruit_dans_la_cuisine::DuBruitDansLaCuisine : get_price],
[ldlc::LDLC : get_price], [ldlc::LDLC : get_price],
[amazon::Amazon : get_price] [amazon::Amazon : get_price, get_search]
); );
#[test] #[test]
fn test_parser_list() { fn test_parser_list() {
let parser_list = List::new().unwrap(); let parser_list = List::new().unwrap();
assert_eq!(parser_list.get_price().len(), 5); assert_eq!(parser_list.get_price().len(), 5);
assert_eq!(parser_list.get_search().len(), 1);
} }

View file

@ -1,4 +1,4 @@
use super::{Parser, PriceParser}; use super::{Parser, PriceParser, SearchParser};
use crate::PriceResult; use crate::PriceResult;
use scraper::{Selector, Html}; use scraper::{Selector, Html};
use url::Url; use url::Url;
@ -9,19 +9,23 @@ use anyhow::{Result, anyhow};
pub struct Amazon { pub struct Amazon {
price_selector: Selector, price_selector: Selector,
name_selector: Selector, name_selector: Selector,
product_selector: Selector product_selector: Selector,
search_selector_1: Selector,
search_selector_2: Selector
} }
impl Parser for Amazon { impl Parser for Amazon {
fn new() -> Result<Self> { fn new() -> Result<Self> {
Ok(Amazon { Ok(Amazon {
price_selector: Selector::parse(r"#priceblock_ourprice").unwrap(), price_selector: Selector::parse(r".a-color-price").unwrap(),
name_selector: Selector::parse(r"#productTitle").unwrap(), name_selector: Selector::parse(r"#productTitle").unwrap(),
product_selector: Selector::parse(r".nav-search-label").unwrap() product_selector: Selector::parse(r".nav-search-label").unwrap(),
search_selector_1: Selector::parse(r".rush-component[data-component-type=s-product-image]").unwrap(),
search_selector_2: Selector::parse(r".a-link-normal").unwrap()
}) })
} }
fn name() -> &'static str { fn name(&self) -> &'static str {
"Amazon" "Amazon"
} }
} }
@ -32,7 +36,7 @@ impl PriceParser for Amazon {
url.host_str().unwrap_or("") == "www.amazon.fr" url.host_str().unwrap_or("") == "www.amazon.fr"
} }
fn parse(&self, html : &Html) -> Result<PriceResult> { fn parse_price(&self, html : &Html) -> Result<PriceResult> {
// Get price // Get price
let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?; let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?;
let mut price_text_it = price_element.text(); let mut price_text_it = price_element.text();
@ -54,10 +58,25 @@ impl PriceParser for Amazon {
} }
} }
impl SearchParser for Amazon {
fn search_url(&self, name: &str) -> Url {
Url::parse(& format!("https://www.amazon.fr/s?k={}", name)).unwrap()
}
fn search(&self, html : &Html) -> Result<Option<Url>> {
let search_element_1 = html.select(&self.search_selector_1).next().ok_or(anyhow!("No search element 1"))?;
let search_element_2 = search_element_1.select(&self.search_selector_2).next().ok_or(anyhow!("No search element 2"))?;
let path_url = search_element_2.value().attr("href").ok_or(anyhow!("No link element"))?;
let mut url = Url::parse("https://www.amazon.fr").unwrap();
url.set_path(path_url.split("/ref").next().unwrap_or(path_url));
Ok(Option::Some(url))
}
}
#[test] #[test]
fn test_parser_fnac() { fn test_parser_fnac() {
let fnac_parser = Amazon::new().unwrap(); let fnac_parser = Amazon::new().unwrap();
assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B?ref_=ast_sto_dp").unwrap())); assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B").unwrap()));
assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B?ref_=ast_sto_dp").unwrap())); assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B").unwrap()));
assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.com").unwrap()) == false); assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.com").unwrap()) == false);
} }

View file

@ -21,7 +21,7 @@ impl Parser for Darty {
}) })
} }
fn name() -> &'static str { fn name(&self) -> &'static str {
"Darty" "Darty"
} }
} }
@ -31,7 +31,7 @@ impl PriceParser for Darty {
url.host_str().unwrap_or("") == "www.darty.com" url.host_str().unwrap_or("") == "www.darty.com"
} }
fn parse(&self, html : &Html) -> Result<PriceResult> { fn parse_price(&self, html : &Html) -> Result<PriceResult> {
// Get price // Get price
let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?; let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?;
let mut price_text_it = price_element.text(); let mut price_text_it = price_element.text();

View file

@ -19,7 +19,7 @@ impl Parser for DuBruitDansLaCuisine {
}) })
} }
fn name() -> &'static str { fn name(&self) -> &'static str {
"Du bruit dans la Cuisine" "Du bruit dans la Cuisine"
} }
} }
@ -29,7 +29,7 @@ impl PriceParser for DuBruitDansLaCuisine {
url.host_str().unwrap_or("") == "www.dubruitdanslacuisine.fr" url.host_str().unwrap_or("") == "www.dubruitdanslacuisine.fr"
} }
fn parse(&self, html : &Html) -> Result<PriceResult> { fn parse_price(&self, html : &Html) -> Result<PriceResult> {
// Get price // Get price
let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?; let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?;
let mut price_text_it = price_element.text(); let mut price_text_it = price_element.text();

View file

@ -21,7 +21,7 @@ impl Parser for Fnac{
}) })
} }
fn name() -> &'static str { fn name(&self) -> &'static str {
"Fnac" "Fnac"
} }
} }
@ -31,7 +31,7 @@ impl PriceParser for Fnac {
url.host_str().unwrap_or("") == "www.fnac.com" url.host_str().unwrap_or("") == "www.fnac.com"
} }
fn parse(&self, html : &Html) -> Result<PriceResult> { fn parse_price(&self, html : &Html) -> Result<PriceResult> {
// Get price // Get price
let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?; let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?;
let mut price_text_it = price_element.text(); let mut price_text_it = price_element.text();

View file

@ -19,7 +19,7 @@ impl Parser for LDLC {
}) })
} }
fn name() -> &'static str { fn name(&self) -> &'static str {
"LDLC" "LDLC"
} }
} }
@ -29,7 +29,7 @@ impl PriceParser for LDLC {
url.host_str().unwrap_or("") == "www.ldlc.com" url.host_str().unwrap_or("") == "www.ldlc.com"
} }
fn parse(&self, html : &Html) -> Result<PriceResult> { fn parse_price(&self, html : &Html) -> Result<PriceResult> {
// Get price // Get price
let price_element = html.select(&self.price_selector).nth(4).ok_or(anyhow!("No price element"))?; let price_element = html.select(&self.price_selector).nth(4).ok_or(anyhow!("No price element"))?;
let mut price_text_it = price_element.text(); let mut price_text_it = price_element.text();

View file

@ -26,13 +26,30 @@ impl PriceChecker {
}) })
} }
/// Get a price from an URL fn get_html(&self, url : &Url) -> Result<Html> {
pub fn get_price(&self, url : Url) -> Result<PriceResult> {
let parser = *self.parser_list.get_price().iter().find(|p| p.can_parse(&url)).ok_or(anyhow!("No parser can parse {}", url))?;
let response = self.client.get(url.clone()).send()?; let response = self.client.get(url.clone()).send()?;
let text = response.text()?; let text = response.text()?;
let document = Html::parse_document(&text); Ok(Html::parse_document(&text))
Ok(parser.parse(&document)?) }
/// Get a price from an URL
pub fn get_price(&self, url : &Url) -> Result<PriceResult> {
let parser = *self.parser_list.get_price().iter().find(|p| p.can_parse(&url)).ok_or(anyhow!("No parser can parse {}", url))?;
let document = self.get_html(url)?;
Ok(parser.parse_price(&document)?)
}
/// Search an object in all parsers
pub fn search(&self, name: &str) -> Result<Vec<(&'static str, Url)>> {
let mut urls = Vec::new();
for parser in self.parser_list.get_search().iter() {
let search_url = parser.search_url(name);
let document = self.get_html(&search_url)?;
if let Some(url) = parser.search(&document)? {
urls.push((parser.name(), url));
}
}
Ok(urls)
} }
} }
@ -41,29 +58,29 @@ fn test_price_checker() {
let price_checker = PriceChecker::new().unwrap(); let price_checker = PriceChecker::new().unwrap();
// Test darty // Test darty
let price_result = price_checker.get_price(Url::parse("https://www.darty.com/nav/achat/gros_electromenager/refrigerateur-congelateur-refrigerateur-cong/refrigerateur-congelateur_bas/samsung_rb33n300nsa_ef.html").unwrap()).unwrap(); let price_result = price_checker.get_price(&Url::parse("https://www.darty.com/nav/achat/gros_electromenager/refrigerateur-congelateur-refrigerateur-cong/refrigerateur-congelateur_bas/samsung_rb33n300nsa_ef.html").unwrap()).unwrap();
assert!(price_result.name != ""); assert!(price_result.name != "");
assert!(price_result.price != 0.); assert!(price_result.price != 0.);
assert!(price_result.product != ""); assert!(price_result.product != "");
// Test fnac // Test fnac
let price_result = price_checker.get_price(Url::parse("https://www.fnac.com/a12584732/Kaamelott-Les-Six-Livres-L-integrale-de-la-serie-Coffret-Blu-ray-Alexandre-Astier-Blu-ray").unwrap()).unwrap(); let price_result = price_checker.get_price(&Url::parse("https://www.fnac.com/a12584732/Kaamelott-Les-Six-Livres-L-integrale-de-la-serie-Coffret-Blu-ray-Alexandre-Astier-Blu-ray").unwrap()).unwrap();
assert!(price_result.name != ""); assert!(price_result.name != "");
assert!(price_result.price != 0.); assert!(price_result.price != 0.);
assert!(price_result.product != ""); assert!(price_result.product != "");
// Test du bruis dans la cuisine // Test du bruis dans la cuisine
let price_result = price_checker.get_price(Url::parse("https://www.dubruitdanslacuisine.fr/tapis-a-patisserie-40-62-14377-p").unwrap()).unwrap(); let price_result = price_checker.get_price(&Url::parse("https://www.dubruitdanslacuisine.fr/tapis-a-patisserie-40-62-14377-p").unwrap()).unwrap();
assert!(price_result.name != ""); assert!(price_result.name != "");
assert!(price_result.price != 0.); assert!(price_result.price != 0.);
// LDLC // LDLC
let price_result = price_checker.get_price(Url::parse("https://www.ldlc.com/fiche/PB00335410.html").unwrap()).unwrap(); let price_result = price_checker.get_price(&Url::parse("https://www.ldlc.com/fiche/PB00335410.html").unwrap()).unwrap();
assert!(price_result.name != ""); assert!(price_result.name != "");
assert!(price_result.price != 0.); assert!(price_result.price != 0.);
// Amazon // Amazon
let price_result = price_checker.get_price(Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B?ref_=ast_sto_dp&th=1").unwrap()).unwrap(); let price_result = price_checker.get_price(&Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B?ref_=ast_sto_dp&th=1").unwrap()).unwrap();
assert!(price_result.name != ""); assert!(price_result.name != "");
assert!(price_result.price != 0.); assert!(price_result.price != 0.);
assert!(price_result.product != ""); assert!(price_result.product != "");