Add amazon
This commit is contained in:
parent
ed00ce7dec
commit
f47b5f3b33
3 changed files with 70 additions and 2 deletions
|
@ -2,6 +2,7 @@ pub mod darty;
|
|||
pub mod fnac;
|
||||
pub mod du_bruit_dans_la_cuisine;
|
||||
pub mod ldlc;
|
||||
pub mod amazon;
|
||||
|
||||
extern crate arraygen;
|
||||
extern crate url;
|
||||
|
@ -22,6 +23,8 @@ pub trait PriceParser{
|
|||
fn parse(&self, html : &Html) -> Result<PriceResult>;
|
||||
}
|
||||
|
||||
// @todo Macro générateur liste et tests
|
||||
|
||||
#[derive(Arraygen, Debug)]
|
||||
#[gen_array(pub fn get_price: & dyn PriceParser)]
|
||||
/// Represent the list of all the parser
|
||||
|
@ -34,6 +37,8 @@ pub struct List {
|
|||
du_bruit_dans_la_cuisine: du_bruit_dans_la_cuisine::DuBruitDansLaCuisine,
|
||||
#[in_array(get_price)]
|
||||
ldlc: ldlc::LDLC,
|
||||
#[in_array(get_price)]
|
||||
amazon: amazon::Amazon,
|
||||
}
|
||||
|
||||
impl List {
|
||||
|
@ -43,7 +48,8 @@ impl List {
|
|||
darty: darty::Darty::new()?,
|
||||
fnac: fnac::Fnac::new()?,
|
||||
du_bruit_dans_la_cuisine: du_bruit_dans_la_cuisine::DuBruitDansLaCuisine::new()?,
|
||||
ldlc: ldlc::LDLC::new()?
|
||||
ldlc: ldlc::LDLC::new()?,
|
||||
amazon: amazon::Amazon::new()?
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -51,5 +57,5 @@ impl List {
|
|||
#[test]
|
||||
fn test_parser_list() {
|
||||
let parser_list = List::new().unwrap();
|
||||
assert_eq!(parser_list.get_price().len(), 4);
|
||||
assert_eq!(parser_list.get_price().len(), 5);
|
||||
}
|
56
src/parser/amazon.rs
Normal file
56
src/parser/amazon.rs
Normal file
|
@ -0,0 +1,56 @@
|
|||
use super::PriceParser;
|
||||
use crate::PriceResult;
|
||||
use scraper::{Selector, Html};
|
||||
use url::Url;
|
||||
use anyhow::{Result, anyhow};
|
||||
|
||||
#[derive(Debug)]
|
||||
/// Parser for the darty website
|
||||
pub struct Amazon {
|
||||
price_selector: Selector,
|
||||
name_selector: Selector,
|
||||
product_selector: Selector
|
||||
}
|
||||
|
||||
impl PriceParser for Amazon {
|
||||
fn new() -> Result<Self> {
|
||||
Ok(Amazon {
|
||||
price_selector: Selector::parse(r"#priceblock_ourprice").unwrap(),
|
||||
name_selector: Selector::parse(r"#productTitle").unwrap(),
|
||||
product_selector: Selector::parse(r".nav-search-label").unwrap()
|
||||
})
|
||||
}
|
||||
|
||||
fn can_parse(&self, url : &Url) -> bool {
|
||||
url.host_str().unwrap_or("") == "www.amazon.fr"
|
||||
}
|
||||
|
||||
fn parse(&self, html : &Html) -> Result<PriceResult> {
|
||||
// Get price
|
||||
let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?;
|
||||
let mut price_text_it = price_element.text();
|
||||
let price : f64 = price_text_it.next().unwrap_or("0.").trim_end_matches("€").trim().replace(',', ".").parse()?;
|
||||
|
||||
// Get name
|
||||
let name_element = html.select(&self.name_selector).next().ok_or(anyhow!("No name element"))?;
|
||||
let name = name_element.text().next().unwrap_or("").trim().replace('\n', "-");
|
||||
|
||||
// Get product
|
||||
let family_element = html.select(&self.product_selector).next().ok_or(anyhow!("No product element"))?;
|
||||
let family = family_element.text().next().unwrap_or("").trim().replace('\n', "-");
|
||||
|
||||
Ok(PriceResult {
|
||||
name: name.to_owned(),
|
||||
product: family.to_owned(),
|
||||
price
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parser_fnac() {
|
||||
let fnac_parser = Amazon::new().unwrap();
|
||||
assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B?ref_=ast_sto_dp").unwrap()));
|
||||
assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B?ref_=ast_sto_dp").unwrap()));
|
||||
assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.com").unwrap()) == false);
|
||||
}
|
|
@ -61,4 +61,10 @@ fn test_price_checker() {
|
|||
let price_result = price_checker.get_price(Url::parse("https://www.ldlc.com/fiche/PB00335410.html").unwrap()).unwrap();
|
||||
assert!(price_result.name != "");
|
||||
assert!(price_result.price != 0.);
|
||||
|
||||
// Amazon
|
||||
let price_result = price_checker.get_price(Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B?ref_=ast_sto_dp&th=1").unwrap()).unwrap();
|
||||
assert!(price_result.name != "");
|
||||
assert!(price_result.price != 0.);
|
||||
assert!(price_result.product != "");
|
||||
}
|
Loading…
Reference in a new issue