price_checker/src/parser/darty.rs

64 lines
2.2 KiB
Rust
Raw Normal View History

2020-06-21 09:32:40 +00:00
use super::{Parser, PriceParser};
2020-05-11 19:21:57 +00:00
use crate::PriceResult;
use scraper::{Selector, Html};
2020-05-23 14:19:04 +00:00
use url::Url;
2020-05-24 08:30:41 +00:00
use anyhow::{Result, anyhow};
2020-05-11 19:21:57 +00:00
#[derive(Debug)]
2020-05-24 16:16:32 +00:00
/// Parser for the darty website
2020-05-11 19:21:57 +00:00
pub struct Darty {
price_selector: Selector,
name_selector: Selector,
product_selector: Selector
}
2020-06-21 09:32:40 +00:00
impl Parser for Darty {
2020-05-24 08:30:41 +00:00
fn new() -> Result<Self> {
Ok(Darty {
2020-05-11 19:21:57 +00:00
price_selector: Selector::parse(r#".darty_prix"#).unwrap(),
name_selector: Selector::parse(r#".product_name"#).unwrap(),
product_selector: Selector::parse(r#".product_family"#).unwrap()
2020-05-24 08:30:41 +00:00
})
2020-05-11 19:21:57 +00:00
}
2020-06-21 09:32:40 +00:00
fn name() -> &'static str {
"Darty"
}
}
impl PriceParser for Darty {
2020-05-23 14:19:04 +00:00
fn can_parse(&self, url : &Url) -> bool {
2020-05-24 08:30:41 +00:00
url.host_str().unwrap_or("") == "www.darty.com"
2020-05-11 19:21:57 +00:00
}
2020-05-24 08:30:41 +00:00
fn parse(&self, html : &Html) -> Result<PriceResult> {
2020-05-11 19:21:57 +00:00
// Get price
2020-05-24 08:30:41 +00:00
let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?;
2020-05-11 19:21:57 +00:00
let mut price_text_it = price_element.text();
2020-05-24 08:30:41 +00:00
let price_ent : u32 = price_text_it.next().unwrap_or("0").trim_end_matches(',').parse()?;
let price_dec : u32 = price_text_it.next().unwrap_or("0").trim_end_matches('€').parse()?;
2020-05-11 19:21:57 +00:00
let price = price_ent as f64 + (price_dec as f64) / 100.;
// Get name
2020-05-24 08:30:41 +00:00
let name_element = html.select(&self.name_selector).next().ok_or(anyhow!("No name element"))?;
let name = name_element.text().next().unwrap_or("").trim().replace('\n', "-");
2020-05-11 19:21:57 +00:00
// Get product
2020-05-24 08:30:41 +00:00
let family_element = html.select(&self.product_selector).next().ok_or(anyhow!("No product element"))?;
let family = family_element.text().next().unwrap_or("").trim().replace('\n', "-");
2020-05-11 19:21:57 +00:00
2020-05-24 08:30:41 +00:00
Ok(PriceResult {
name: name.to_owned(),
product: family.to_owned(),
price
})
}
2020-05-24 16:16:32 +00:00
}
#[test]
fn test_parser_darty() {
let darty_parser = Darty::new().unwrap();
assert!(darty_parser.can_parse(&Url::parse("https://www.darty.com/nav/achat/gros_electromenager/refrigerateur-congelateur-refrigerateur-cong/refrigerateur-congelateur_bas/samsung_rb33n300nsa_ef.html").unwrap()));
assert!(darty_parser.can_parse(&Url::parse("http://www.darty.com/nav/achat/gros_electromenager/refrigerateur-congelateur-refrigerateur-cong/refrigerateur-congelateur_bas/samsung_rb33n300nsa_ef.html").unwrap()));
assert!(darty_parser.can_parse(&Url::parse("https://www.fnace.com").unwrap()) == false);
2020-05-11 19:21:57 +00:00
}