use super::{Parser, PriceParser, SearchParser}; use crate::PriceResult; use anyhow::{anyhow, Result}; use scraper::{Html, Selector}; use url::Url; #[derive(Debug)] /// Parser for the darty website pub struct Amazon { price_selector: Selector, name_selector: Selector, product_selector: Selector, search_selector_1: Selector, search_selector_2: Selector, } impl Parser for Amazon { fn new() -> Result { Ok(Amazon { price_selector: Selector::parse(r".a-color-price").unwrap(), name_selector: Selector::parse(r"#productTitle").unwrap(), product_selector: Selector::parse(r".nav-search-label").unwrap(), search_selector_1: Selector::parse(r".rush-component[data-component-type=s-product-image]").unwrap(), search_selector_2: Selector::parse(r".a-link-normal").unwrap(), }) } fn name(&self) -> &'static str { "Amazon" } } impl PriceParser for Amazon { fn can_parse(&self, url: &Url) -> bool { url.host_str().unwrap_or("") == "www.amazon.fr" } fn parse_price(&self, html: &Html) -> Result { // Get price let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?; let mut price_text_it = price_element.text(); let price: f64 = price_text_it .next() .unwrap_or("0.") .trim() .trim_end_matches("€") .trim() .replace(',', ".") .parse()?; // Get name let name_element = html.select(&self.name_selector).next().ok_or(anyhow!("No name element"))?; let name = name_element.text().next().unwrap_or("").trim().replace('\n', "-"); // Get product let family_element = html.select(&self.product_selector).next().ok_or(anyhow!("No product element"))?; let family = family_element.text().next().unwrap_or("").trim().replace('\n', "-"); Ok(PriceResult { name: name.to_owned(), product: family.to_owned(), price, }) } } impl SearchParser for Amazon { fn search_url(&self, name: &str) -> Url { Url::parse(&format!("https://www.amazon.fr/s?k={}", name)).unwrap() } fn search(&self, html: &Html) -> Result> { if let Some(search_element_1) = html.select(&self.search_selector_1).next() { let search_element_2 = search_element_1 .select(&self.search_selector_2) .next() .ok_or(anyhow!("No search element 2"))?; let path_url = search_element_2.value().attr("href").ok_or(anyhow!("No link element"))?; let mut url = Url::parse("https://www.amazon.fr").unwrap(); url.set_path(path_url.split("/ref").next().unwrap_or(path_url)); Ok(Option::Some(url)) } else { Ok(None) } } } #[test] fn test_parser_fnac() { let fnac_parser = Amazon::new().unwrap(); assert!(fnac_parser.can_parse( &Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B").unwrap() )); assert!(fnac_parser.can_parse( &Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B").unwrap() )); assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.com").unwrap()) == false); }