124 lines
3.4 KiB
Rust
124 lines
3.4 KiB
Rust
extern crate reqwest;
|
|
extern crate scraper;
|
|
|
|
use crate::parser;
|
|
use crate::price_result::PriceResult;
|
|
use anyhow::{anyhow, Result};
|
|
use reqwest::blocking::Client;
|
|
use scraper::Html;
|
|
use url::Url;
|
|
|
|
const USER_AGENT: &str = "Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0";
|
|
|
|
/// Struct used to get price from a website
|
|
pub struct PriceChecker {
|
|
client: Client,
|
|
pub parser_list: parser::List,
|
|
}
|
|
|
|
impl PriceChecker {
|
|
/// Create a new PriceChecker
|
|
pub fn new() -> Result<Self> {
|
|
let client = reqwest::blocking::Client::builder().user_agent(USER_AGENT).build()?;
|
|
Ok(PriceChecker {
|
|
client,
|
|
parser_list: parser::List::new()?,
|
|
})
|
|
}
|
|
|
|
fn get_html(&self, url: &Url) -> Result<Html> {
|
|
let response = self.client.get(url.clone()).send()?;
|
|
let text = response.text()?;
|
|
Ok(Html::parse_document(&text))
|
|
}
|
|
|
|
/// Get a price from an URL
|
|
pub fn get_price(&self, url: &Url) -> Result<PriceResult> {
|
|
let parser = *self
|
|
.parser_list
|
|
.get_price()
|
|
.iter()
|
|
.find(|p| p.can_parse(&url))
|
|
.ok_or(anyhow!("No parser can parse {}", url))?;
|
|
let document = self.get_html(url)?;
|
|
Ok(parser.parse_price(&document)?)
|
|
}
|
|
|
|
/// Search an object in all parsers
|
|
pub fn search_all(&self, name: &str) -> Result<Vec<(&'static str, Url)>> {
|
|
let mut urls = Vec::new();
|
|
for parser in self.parser_list.get_search().iter() {
|
|
let search_url = parser.search_url(name);
|
|
let document = self.get_html(&search_url)?;
|
|
if let Some(url) = parser.search(&document)? {
|
|
urls.push((parser.name(), url));
|
|
}
|
|
}
|
|
Ok(urls)
|
|
}
|
|
|
|
/// Search an object in a parser
|
|
pub fn search(&self, parser_name: &str, name: &str) -> Result<Option<Url>> {
|
|
let parser = self.parser_list.get_parser(parser_name)?;
|
|
let search_url = parser.search_url(name);
|
|
let document = self.get_html(&search_url)?;
|
|
if let Some(url) = parser.search(&document)? {
|
|
Ok(Some(url))
|
|
}
|
|
else {
|
|
Ok(None)
|
|
}
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_price_checker() {
|
|
let price_checker = PriceChecker::new().unwrap();
|
|
|
|
// Test darty
|
|
let price_result = price_checker.get_price(&Url::parse("https://www.darty.com/nav/achat/gros_electromenager/refrigerateur-congelateur-refrigerateur-cong/refrigerateur-congelateur_bas/samsung_rb33n300nsa_ef.html").unwrap()).unwrap();
|
|
assert!(price_result.name != "");
|
|
assert!(price_result.price != 0.);
|
|
assert!(price_result.product != "");
|
|
|
|
// Test fnac
|
|
let price_result = price_checker
|
|
.get_price(
|
|
&Url::parse(
|
|
"https://www.fnac.com/a12584732/Kaamelott-Les-Six-Livres-L-integrale-de-la-serie-Coffret-Blu-ray-Alexandre-Astier-Blu-ray",
|
|
)
|
|
.unwrap(),
|
|
)
|
|
.unwrap();
|
|
assert!(price_result.name != "");
|
|
assert!(price_result.price != 0.);
|
|
assert!(price_result.product != "");
|
|
|
|
// Test du bruis dans la cuisine
|
|
let price_result = price_checker
|
|
.get_price(&Url::parse("https://www.dubruitdanslacuisine.fr/tapis-a-patisserie-40-62-14377-p").unwrap())
|
|
.unwrap();
|
|
assert!(price_result.name != "");
|
|
assert!(price_result.price != 0.);
|
|
|
|
// LDLC
|
|
let price_result = price_checker
|
|
.get_price(&Url::parse("https://www.ldlc.com/fiche/PB00335410.html").unwrap())
|
|
.unwrap();
|
|
assert!(price_result.name != "");
|
|
assert!(price_result.price != 0.);
|
|
|
|
// Amazon
|
|
let price_result = price_checker
|
|
.get_price(
|
|
&Url::parse(
|
|
"https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B?ref_=ast_sto_dp&th=1",
|
|
)
|
|
.unwrap(),
|
|
)
|
|
.unwrap();
|
|
assert!(price_result.name != "");
|
|
assert!(price_result.price != 0.);
|
|
assert!(price_result.product != "");
|
|
}
|