extern crate reqwest; extern crate scraper; use reqwest::blocking::Client; use scraper::Html; use crate::parser; use crate::price_result::PriceResult; use url::Url; use anyhow::{Result, anyhow}; const USER_AGENT: &str = "Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0"; /// Struct used to get price from a website pub struct PriceChecker { client: Client, parser_list: parser::List } impl PriceChecker { /// Create a new PriceChecker pub fn new() -> Result { let client = reqwest::blocking::Client::builder().user_agent(USER_AGENT).build()?; Ok(PriceChecker { client, parser_list: parser::List::new()? }) } /// Get a price from an URL pub fn get_price(&self, url : Url) -> Result { let parser = *self.parser_list.get_price().iter().find(|p| p.can_parse(&url)).ok_or(anyhow!("No parser can parse {}", url))?; let response = self.client.get(url.clone()).send()?; let text = response.text()?; let document = Html::parse_document(&text); Ok(parser.parse(&document)?) } } #[test] fn test_price_checker() { let price_checker = PriceChecker::new().unwrap(); // Test darty let price_result = price_checker.get_price(Url::parse("https://www.darty.com/nav/achat/gros_electromenager/refrigerateur-congelateur-refrigerateur-cong/refrigerateur-congelateur_bas/samsung_rb33n300nsa_ef.html").unwrap()).unwrap(); assert!(price_result.name != ""); assert!(price_result.price != 0.); assert!(price_result.product != ""); // Test fnac let price_result = price_checker.get_price(Url::parse("https://www.fnac.com/a12584732/Kaamelott-Les-Six-Livres-L-integrale-de-la-serie-Coffret-Blu-ray-Alexandre-Astier-Blu-ray").unwrap()).unwrap(); assert!(price_result.name != ""); assert!(price_result.price != 0.); assert!(price_result.product != ""); // Test du bruis dans la cuisine let price_result = price_checker.get_price(Url::parse("https://www.dubruitdanslacuisine.fr/tapis-a-patisserie-40-62-14377-p").unwrap()).unwrap(); assert!(price_result.name != ""); assert!(price_result.price != 0.); // LDLC let price_result = price_checker.get_price(Url::parse("https://www.ldlc.com/fiche/PB00335410.html").unwrap()).unwrap(); assert!(price_result.name != ""); assert!(price_result.price != 0.); // Amazon let price_result = price_checker.get_price(Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B?ref_=ast_sto_dp&th=1").unwrap()).unwrap(); assert!(price_result.name != ""); assert!(price_result.price != 0.); assert!(price_result.product != ""); }