use super::PriceParser; use crate::PriceResult; use scraper::{Selector, Html}; use url::Url; use anyhow::{Result, anyhow}; #[derive(Debug)] /// Parser for the darty website pub struct Amazon { price_selector: Selector, name_selector: Selector, product_selector: Selector } impl PriceParser for Amazon { fn new() -> Result { Ok(Amazon { price_selector: Selector::parse(r"#priceblock_ourprice").unwrap(), name_selector: Selector::parse(r"#productTitle").unwrap(), product_selector: Selector::parse(r".nav-search-label").unwrap() }) } fn can_parse(&self, url : &Url) -> bool { url.host_str().unwrap_or("") == "www.amazon.fr" } fn parse(&self, html : &Html) -> Result { // Get price let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?; let mut price_text_it = price_element.text(); let price : f64 = price_text_it.next().unwrap_or("0.").trim_end_matches("€").trim().replace(',', ".").parse()?; // Get name let name_element = html.select(&self.name_selector).next().ok_or(anyhow!("No name element"))?; let name = name_element.text().next().unwrap_or("").trim().replace('\n', "-"); // Get product let family_element = html.select(&self.product_selector).next().ok_or(anyhow!("No product element"))?; let family = family_element.text().next().unwrap_or("").trim().replace('\n', "-"); Ok(PriceResult { name: name.to_owned(), product: family.to_owned(), price }) } } #[test] fn test_parser_fnac() { let fnac_parser = Amazon::new().unwrap(); assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B?ref_=ast_sto_dp").unwrap())); assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B?ref_=ast_sto_dp").unwrap())); assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.com").unwrap()) == false); }