From f47b5f3b33c87525144b33fd9fae49568fda29d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20BERTHO?= Date: Sun, 21 Jun 2020 10:36:13 +0200 Subject: [PATCH] Add amazon --- src/parser.rs | 10 ++++++-- src/parser/amazon.rs | 56 ++++++++++++++++++++++++++++++++++++++++++++ src/price_checker.rs | 6 +++++ 3 files changed, 70 insertions(+), 2 deletions(-) create mode 100644 src/parser/amazon.rs diff --git a/src/parser.rs b/src/parser.rs index 660975b..083b85e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,6 +2,7 @@ pub mod darty; pub mod fnac; pub mod du_bruit_dans_la_cuisine; pub mod ldlc; +pub mod amazon; extern crate arraygen; extern crate url; @@ -22,6 +23,8 @@ pub trait PriceParser{ fn parse(&self, html : &Html) -> Result; } +// @todo Macro générateur liste et tests + #[derive(Arraygen, Debug)] #[gen_array(pub fn get_price: & dyn PriceParser)] /// Represent the list of all the parser @@ -34,6 +37,8 @@ pub struct List { du_bruit_dans_la_cuisine: du_bruit_dans_la_cuisine::DuBruitDansLaCuisine, #[in_array(get_price)] ldlc: ldlc::LDLC, + #[in_array(get_price)] + amazon: amazon::Amazon, } impl List { @@ -43,7 +48,8 @@ impl List { darty: darty::Darty::new()?, fnac: fnac::Fnac::new()?, du_bruit_dans_la_cuisine: du_bruit_dans_la_cuisine::DuBruitDansLaCuisine::new()?, - ldlc: ldlc::LDLC::new()? + ldlc: ldlc::LDLC::new()?, + amazon: amazon::Amazon::new()? }) } } @@ -51,5 +57,5 @@ impl List { #[test] fn test_parser_list() { let parser_list = List::new().unwrap(); - assert_eq!(parser_list.get_price().len(), 4); + assert_eq!(parser_list.get_price().len(), 5); } \ No newline at end of file diff --git a/src/parser/amazon.rs b/src/parser/amazon.rs new file mode 100644 index 0000000..3ff2360 --- /dev/null +++ b/src/parser/amazon.rs @@ -0,0 +1,56 @@ +use super::PriceParser; +use crate::PriceResult; +use scraper::{Selector, Html}; +use url::Url; +use anyhow::{Result, anyhow}; + +#[derive(Debug)] +/// Parser for the darty website +pub struct Amazon { + price_selector: Selector, + name_selector: Selector, + product_selector: Selector +} + +impl PriceParser for Amazon { + fn new() -> Result { + Ok(Amazon { + price_selector: Selector::parse(r"#priceblock_ourprice").unwrap(), + name_selector: Selector::parse(r"#productTitle").unwrap(), + product_selector: Selector::parse(r".nav-search-label").unwrap() + }) + } + + fn can_parse(&self, url : &Url) -> bool { + url.host_str().unwrap_or("") == "www.amazon.fr" + } + + fn parse(&self, html : &Html) -> Result { + // Get price + let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?; + let mut price_text_it = price_element.text(); + let price : f64 = price_text_it.next().unwrap_or("0.").trim_end_matches("€").trim().replace(',', ".").parse()?; + + // Get name + let name_element = html.select(&self.name_selector).next().ok_or(anyhow!("No name element"))?; + let name = name_element.text().next().unwrap_or("").trim().replace('\n', "-"); + + // Get product + let family_element = html.select(&self.product_selector).next().ok_or(anyhow!("No product element"))?; + let family = family_element.text().next().unwrap_or("").trim().replace('\n', "-"); + + Ok(PriceResult { + name: name.to_owned(), + product: family.to_owned(), + price + }) + } +} + +#[test] +fn test_parser_fnac() { + let fnac_parser = Amazon::new().unwrap(); + assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B?ref_=ast_sto_dp").unwrap())); + assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B?ref_=ast_sto_dp").unwrap())); + assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.com").unwrap()) == false); +} \ No newline at end of file diff --git a/src/price_checker.rs b/src/price_checker.rs index fd831a6..841e2e9 100644 --- a/src/price_checker.rs +++ b/src/price_checker.rs @@ -61,4 +61,10 @@ fn test_price_checker() { let price_result = price_checker.get_price(Url::parse("https://www.ldlc.com/fiche/PB00335410.html").unwrap()).unwrap(); assert!(price_result.name != ""); assert!(price_result.price != 0.); + + // Amazon + let price_result = price_checker.get_price(Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B?ref_=ast_sto_dp&th=1").unwrap()).unwrap(); + assert!(price_result.name != ""); + assert!(price_result.price != 0.); + assert!(price_result.product != ""); } \ No newline at end of file