use super::{Parser, PriceParser, SearchParser}; use crate::PriceResult; use scraper::{Selector, Html}; use url::Url; use anyhow::{Result, anyhow}; #[derive(Debug)] /// Parser for the darty website pub struct Amazon { price_selector: Selector, name_selector: Selector, product_selector: Selector, search_selector_1: Selector, search_selector_2: Selector } impl Parser for Amazon { fn new() -> Result { Ok(Amazon { price_selector: Selector::parse(r".a-color-price").unwrap(), name_selector: Selector::parse(r"#productTitle").unwrap(), product_selector: Selector::parse(r".nav-search-label").unwrap(), search_selector_1: Selector::parse(r".rush-component[data-component-type=s-product-image]").unwrap(), search_selector_2: Selector::parse(r".a-link-normal").unwrap() }) } fn name(&self) -> &'static str { "Amazon" } } impl PriceParser for Amazon { fn can_parse(&self, url : &Url) -> bool { url.host_str().unwrap_or("") == "www.amazon.fr" } fn parse_price(&self, html : &Html) -> Result { // Get price let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?; let mut price_text_it = price_element.text(); let price : f64 = price_text_it.next().unwrap_or("0.").trim_end_matches("€").trim().replace(',', ".").parse()?; // Get name let name_element = html.select(&self.name_selector).next().ok_or(anyhow!("No name element"))?; let name = name_element.text().next().unwrap_or("").trim().replace('\n', "-"); // Get product let family_element = html.select(&self.product_selector).next().ok_or(anyhow!("No product element"))?; let family = family_element.text().next().unwrap_or("").trim().replace('\n', "-"); Ok(PriceResult { name: name.to_owned(), product: family.to_owned(), price }) } } impl SearchParser for Amazon { fn search_url(&self, name: &str) -> Url { Url::parse(& format!("https://www.amazon.fr/s?k={}", name)).unwrap() } fn search(&self, html : &Html) -> Result> { let search_element_1 = html.select(&self.search_selector_1).next().ok_or(anyhow!("No search element 1"))?; let search_element_2 = search_element_1.select(&self.search_selector_2).next().ok_or(anyhow!("No search element 2"))?; let path_url = search_element_2.value().attr("href").ok_or(anyhow!("No link element"))?; let mut url = Url::parse("https://www.amazon.fr").unwrap(); url.set_path(path_url.split("/ref").next().unwrap_or(path_url)); Ok(Option::Some(url)) } } #[test] fn test_parser_fnac() { let fnac_parser = Amazon::new().unwrap(); assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B").unwrap())); assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B").unwrap())); assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.com").unwrap()) == false); }