price_checker/src/parser/amazon.rs

82 lines
2.9 KiB
Rust

use super::{Parser, PriceParser, SearchParser};
use crate::PriceResult;
use scraper::{Selector, Html};
use url::Url;
use anyhow::{Result, anyhow};
#[derive(Debug)]
/// Parser for the darty website
pub struct Amazon {
price_selector: Selector,
name_selector: Selector,
product_selector: Selector,
search_selector_1: Selector,
search_selector_2: Selector
}
impl Parser for Amazon {
fn new() -> Result<Self> {
Ok(Amazon {
price_selector: Selector::parse(r".a-color-price").unwrap(),
name_selector: Selector::parse(r"#productTitle").unwrap(),
product_selector: Selector::parse(r".nav-search-label").unwrap(),
search_selector_1: Selector::parse(r".rush-component[data-component-type=s-product-image]").unwrap(),
search_selector_2: Selector::parse(r".a-link-normal").unwrap()
})
}
fn name(&self) -> &'static str {
"Amazon"
}
}
impl PriceParser for Amazon {
fn can_parse(&self, url : &Url) -> bool {
url.host_str().unwrap_or("") == "www.amazon.fr"
}
fn parse_price(&self, html : &Html) -> Result<PriceResult> {
// Get price
let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?;
let mut price_text_it = price_element.text();
let price : f64 = price_text_it.next().unwrap_or("0.").trim_end_matches("").trim().replace(',', ".").parse()?;
// Get name
let name_element = html.select(&self.name_selector).next().ok_or(anyhow!("No name element"))?;
let name = name_element.text().next().unwrap_or("").trim().replace('\n', "-");
// Get product
let family_element = html.select(&self.product_selector).next().ok_or(anyhow!("No product element"))?;
let family = family_element.text().next().unwrap_or("").trim().replace('\n', "-");
Ok(PriceResult {
name: name.to_owned(),
product: family.to_owned(),
price
})
}
}
impl SearchParser for Amazon {
fn search_url(&self, name: &str) -> Url {
Url::parse(& format!("https://www.amazon.fr/s?k={}", name)).unwrap()
}
fn search(&self, html : &Html) -> Result<Option<Url>> {
let search_element_1 = html.select(&self.search_selector_1).next().ok_or(anyhow!("No search element 1"))?;
let search_element_2 = search_element_1.select(&self.search_selector_2).next().ok_or(anyhow!("No search element 2"))?;
let path_url = search_element_2.value().attr("href").ok_or(anyhow!("No link element"))?;
let mut url = Url::parse("https://www.amazon.fr").unwrap();
url.set_path(path_url.split("/ref").next().unwrap_or(path_url));
Ok(Option::Some(url))
}
}
#[test]
fn test_parser_fnac() {
let fnac_parser = Amazon::new().unwrap();
assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B").unwrap()));
assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B").unwrap()));
assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.com").unwrap()) == false);
}