2020-06-23 16:29:55 +00:00
|
|
|
use super::{Parser, PriceParser, SearchParser};
|
2020-06-21 08:36:13 +00:00
|
|
|
use crate::PriceResult;
|
2020-07-20 20:14:05 +00:00
|
|
|
use anyhow::{anyhow, Result};
|
|
|
|
use scraper::{Html, Selector};
|
2020-06-21 08:36:13 +00:00
|
|
|
use url::Url;
|
|
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
|
/// Parser for the darty website
|
|
|
|
pub struct Amazon {
|
|
|
|
price_selector: Selector,
|
|
|
|
name_selector: Selector,
|
2020-06-23 16:29:55 +00:00
|
|
|
product_selector: Selector,
|
|
|
|
search_selector_1: Selector,
|
2020-07-20 20:14:05 +00:00
|
|
|
search_selector_2: Selector,
|
2020-06-21 08:36:13 +00:00
|
|
|
}
|
|
|
|
|
2020-06-21 09:32:40 +00:00
|
|
|
impl Parser for Amazon {
|
2020-06-21 08:36:13 +00:00
|
|
|
fn new() -> Result<Self> {
|
|
|
|
Ok(Amazon {
|
2020-06-23 16:29:55 +00:00
|
|
|
price_selector: Selector::parse(r".a-color-price").unwrap(),
|
2020-06-21 08:36:13 +00:00
|
|
|
name_selector: Selector::parse(r"#productTitle").unwrap(),
|
2020-06-23 16:29:55 +00:00
|
|
|
product_selector: Selector::parse(r".nav-search-label").unwrap(),
|
|
|
|
search_selector_1: Selector::parse(r".rush-component[data-component-type=s-product-image]").unwrap(),
|
2020-07-20 20:14:05 +00:00
|
|
|
search_selector_2: Selector::parse(r".a-link-normal").unwrap(),
|
2020-06-21 08:36:13 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2020-06-23 16:29:55 +00:00
|
|
|
fn name(&self) -> &'static str {
|
2020-06-21 09:32:40 +00:00
|
|
|
"Amazon"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl PriceParser for Amazon {
|
2020-07-20 20:14:05 +00:00
|
|
|
fn can_parse(&self, url: &Url) -> bool {
|
2020-06-21 08:36:13 +00:00
|
|
|
url.host_str().unwrap_or("") == "www.amazon.fr"
|
|
|
|
}
|
|
|
|
|
2020-07-20 20:14:05 +00:00
|
|
|
fn parse_price(&self, html: &Html) -> Result<PriceResult> {
|
2020-06-21 08:36:13 +00:00
|
|
|
// Get price
|
|
|
|
let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?;
|
|
|
|
let mut price_text_it = price_element.text();
|
2020-07-21 16:52:52 +00:00
|
|
|
let price: f64 = price_text_it
|
2020-07-20 20:14:05 +00:00
|
|
|
.next()
|
|
|
|
.unwrap_or("0.")
|
2020-07-21 16:43:29 +00:00
|
|
|
.trim()
|
2020-07-20 20:14:05 +00:00
|
|
|
.trim_end_matches("€")
|
|
|
|
.trim()
|
2020-07-21 16:52:52 +00:00
|
|
|
.replace(',', ".")
|
|
|
|
.parse()?;
|
2020-06-21 08:36:13 +00:00
|
|
|
|
|
|
|
// Get name
|
|
|
|
let name_element = html.select(&self.name_selector).next().ok_or(anyhow!("No name element"))?;
|
|
|
|
let name = name_element.text().next().unwrap_or("").trim().replace('\n', "-");
|
|
|
|
|
|
|
|
// Get product
|
|
|
|
let family_element = html.select(&self.product_selector).next().ok_or(anyhow!("No product element"))?;
|
|
|
|
let family = family_element.text().next().unwrap_or("").trim().replace('\n', "-");
|
|
|
|
|
|
|
|
Ok(PriceResult {
|
|
|
|
name: name.to_owned(),
|
|
|
|
product: family.to_owned(),
|
2020-07-20 20:14:05 +00:00
|
|
|
price,
|
2020-06-21 08:36:13 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-23 16:29:55 +00:00
|
|
|
impl SearchParser for Amazon {
|
|
|
|
fn search_url(&self, name: &str) -> Url {
|
2020-07-20 20:14:05 +00:00
|
|
|
Url::parse(&format!("https://www.amazon.fr/s?k={}", name)).unwrap()
|
2020-06-23 16:29:55 +00:00
|
|
|
}
|
|
|
|
|
2020-07-20 20:14:05 +00:00
|
|
|
fn search(&self, html: &Html) -> Result<Option<Url>> {
|
2020-07-20 19:56:49 +00:00
|
|
|
if let Some(search_element_1) = html.select(&self.search_selector_1).next() {
|
2020-07-20 20:14:05 +00:00
|
|
|
let search_element_2 = search_element_1
|
|
|
|
.select(&self.search_selector_2)
|
|
|
|
.next()
|
|
|
|
.ok_or(anyhow!("No search element 2"))?;
|
2020-07-20 19:56:49 +00:00
|
|
|
let path_url = search_element_2.value().attr("href").ok_or(anyhow!("No link element"))?;
|
|
|
|
let mut url = Url::parse("https://www.amazon.fr").unwrap();
|
|
|
|
url.set_path(path_url.split("/ref").next().unwrap_or(path_url));
|
|
|
|
Ok(Option::Some(url))
|
|
|
|
} else {
|
|
|
|
Ok(None)
|
|
|
|
}
|
2020-06-23 16:29:55 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-21 08:36:13 +00:00
|
|
|
#[test]
|
|
|
|
fn test_parser_fnac() {
|
|
|
|
let fnac_parser = Amazon::new().unwrap();
|
2020-07-20 20:14:05 +00:00
|
|
|
assert!(fnac_parser.can_parse(
|
|
|
|
&Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B").unwrap()
|
|
|
|
));
|
|
|
|
assert!(fnac_parser.can_parse(
|
|
|
|
&Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B").unwrap()
|
|
|
|
));
|
2020-06-21 08:36:13 +00:00
|
|
|
assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.com").unwrap()) == false);
|
2020-07-20 20:14:05 +00:00
|
|
|
}
|