extern crate reqwest; extern crate scraper; use crate::parser; use crate::price_result::PriceResult; use anyhow::{anyhow, Result}; use reqwest::blocking::Client; use scraper::Html; use url::Url; const USER_AGENT: &str = "Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0"; /// Struct used to get price from a website pub struct PriceChecker { client: Client, pub parser_list: parser::List, } impl PriceChecker { /// Create a new PriceChecker pub fn new() -> Result { let client = reqwest::blocking::Client::builder().user_agent(USER_AGENT).build()?; Ok(PriceChecker { client, parser_list: parser::List::new()?, }) } fn get_html(&self, url: &Url) -> Result { let response = self.client.get(url.clone()).send()?; let text = response.text()?; Ok(Html::parse_document(&text)) } /// Get a price from an URL pub fn get_price(&self, url: &Url) -> Result { let parser = *self .parser_list .get_price() .iter() .find(|p| p.can_parse(&url)) .ok_or(anyhow!("No parser can parse {}", url))?; let document = self.get_html(url)?; Ok(parser.parse_price(&document)?) } /// Search an object in all parsers pub fn search(&self, name: &str) -> Result> { let mut urls = Vec::new(); for parser in self.parser_list.get_search().iter() { let search_url = parser.search_url(name); let document = self.get_html(&search_url)?; if let Some(url) = parser.search(&document)? { urls.push((parser.name(), url)); } } Ok(urls) } } #[test] fn test_price_checker() { let price_checker = PriceChecker::new().unwrap(); // Test darty let price_result = price_checker.get_price(&Url::parse("https://www.darty.com/nav/achat/gros_electromenager/refrigerateur-congelateur-refrigerateur-cong/refrigerateur-congelateur_bas/samsung_rb33n300nsa_ef.html").unwrap()).unwrap(); assert!(price_result.name != ""); assert!(price_result.price != 0.); assert!(price_result.product != ""); // Test fnac let price_result = price_checker .get_price( &Url::parse( "https://www.fnac.com/a12584732/Kaamelott-Les-Six-Livres-L-integrale-de-la-serie-Coffret-Blu-ray-Alexandre-Astier-Blu-ray", ) .unwrap(), ) .unwrap(); assert!(price_result.name != ""); assert!(price_result.price != 0.); assert!(price_result.product != ""); // Test du bruis dans la cuisine let price_result = price_checker .get_price(&Url::parse("https://www.dubruitdanslacuisine.fr/tapis-a-patisserie-40-62-14377-p").unwrap()) .unwrap(); assert!(price_result.name != ""); assert!(price_result.price != 0.); // LDLC let price_result = price_checker .get_price(&Url::parse("https://www.ldlc.com/fiche/PB00335410.html").unwrap()) .unwrap(); assert!(price_result.name != ""); assert!(price_result.price != 0.); // Amazon let price_result = price_checker .get_price( &Url::parse( "https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B?ref_=ast_sto_dp&th=1", ) .unwrap(), ) .unwrap(); assert!(price_result.name != ""); assert!(price_result.price != 0.); assert!(price_result.product != ""); }