price_checker/src/price_checker.rs

124 lines
3.4 KiB
Rust
Raw Normal View History

2020-05-11 19:21:57 +00:00
extern crate reqwest;
2020-05-23 14:19:04 +00:00
extern crate scraper;
2020-05-11 19:21:57 +00:00
use crate::parser;
use crate::price_result::PriceResult;
2020-07-20 20:14:05 +00:00
use anyhow::{anyhow, Result};
use reqwest::blocking::Client;
use scraper::Html;
2020-05-23 14:19:04 +00:00
use url::Url;
2020-05-11 19:21:57 +00:00
const USER_AGENT: &str = "Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0";
2020-05-24 16:16:32 +00:00
/// Struct used to get price from a website
2020-05-11 19:21:57 +00:00
pub struct PriceChecker {
client: Client,
2020-07-20 20:14:05 +00:00
pub parser_list: parser::List,
2020-05-11 19:21:57 +00:00
}
impl PriceChecker {
2020-05-24 16:16:32 +00:00
/// Create a new PriceChecker
2020-05-24 08:30:41 +00:00
pub fn new() -> Result<Self> {
let client = reqwest::blocking::Client::builder().user_agent(USER_AGENT).build()?;
Ok(PriceChecker {
2020-05-11 19:21:57 +00:00
client,
2020-07-20 20:14:05 +00:00
parser_list: parser::List::new()?,
2020-05-24 08:30:41 +00:00
})
2020-05-11 19:21:57 +00:00
}
2020-07-20 20:14:05 +00:00
fn get_html(&self, url: &Url) -> Result<Html> {
2020-05-24 08:30:41 +00:00
let response = self.client.get(url.clone()).send()?;
let text = response.text()?;
2020-06-23 16:29:55 +00:00
Ok(Html::parse_document(&text))
}
/// Get a price from an URL
2020-07-20 20:14:05 +00:00
pub fn get_price(&self, url: &Url) -> Result<PriceResult> {
let parser = *self
.parser_list
.get_price()
.iter()
.find(|p| p.can_parse(&url))
.ok_or(anyhow!("No parser can parse {}", url))?;
2020-06-23 16:29:55 +00:00
let document = self.get_html(url)?;
Ok(parser.parse_price(&document)?)
}
/// Search an object in all parsers
2020-07-21 18:37:26 +00:00
pub fn search_all(&self, name: &str) -> Result<Vec<(&'static str, Url)>> {
2020-06-23 16:29:55 +00:00
let mut urls = Vec::new();
for parser in self.parser_list.get_search().iter() {
let search_url = parser.search_url(name);
let document = self.get_html(&search_url)?;
if let Some(url) = parser.search(&document)? {
urls.push((parser.name(), url));
}
}
Ok(urls)
2020-05-11 19:21:57 +00:00
}
2020-07-21 18:37:26 +00:00
/// Search an object in a parser
pub fn search(&self, parser_name: &str, name: &str) -> Result<Option<Url>> {
let parser = self.parser_list.get_parser(parser_name)?;
let search_url = parser.search_url(name);
let document = self.get_html(&search_url)?;
if let Some(url) = parser.search(&document)? {
Ok(Some(url))
}
else {
Ok(None)
}
}
2020-05-24 16:16:32 +00:00
}
#[test]
fn test_price_checker() {
let price_checker = PriceChecker::new().unwrap();
// Test darty
2020-06-23 16:29:55 +00:00
let price_result = price_checker.get_price(&Url::parse("https://www.darty.com/nav/achat/gros_electromenager/refrigerateur-congelateur-refrigerateur-cong/refrigerateur-congelateur_bas/samsung_rb33n300nsa_ef.html").unwrap()).unwrap();
2020-05-24 16:16:32 +00:00
assert!(price_result.name != "");
assert!(price_result.price != 0.);
assert!(price_result.product != "");
2020-05-24 16:19:18 +00:00
// Test fnac
2020-07-20 20:14:05 +00:00
let price_result = price_checker
.get_price(
&Url::parse(
"https://www.fnac.com/a12584732/Kaamelott-Les-Six-Livres-L-integrale-de-la-serie-Coffret-Blu-ray-Alexandre-Astier-Blu-ray",
)
.unwrap(),
)
.unwrap();
2020-05-24 16:19:18 +00:00
assert!(price_result.name != "");
assert!(price_result.price != 0.);
assert!(price_result.product != "");
2020-06-15 15:54:45 +00:00
// Test du bruis dans la cuisine
2020-07-20 20:14:05 +00:00
let price_result = price_checker
.get_price(&Url::parse("https://www.dubruitdanslacuisine.fr/tapis-a-patisserie-40-62-14377-p").unwrap())
.unwrap();
2020-06-15 15:54:45 +00:00
assert!(price_result.name != "");
assert!(price_result.price != 0.);
// LDLC
2020-07-20 20:14:05 +00:00
let price_result = price_checker
.get_price(&Url::parse("https://www.ldlc.com/fiche/PB00335410.html").unwrap())
.unwrap();
2020-06-15 15:54:45 +00:00
assert!(price_result.name != "");
assert!(price_result.price != 0.);
2020-06-21 08:36:13 +00:00
// Amazon
2020-07-20 20:14:05 +00:00
let price_result = price_checker
.get_price(
&Url::parse(
"https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B?ref_=ast_sto_dp&th=1",
)
.unwrap(),
)
.unwrap();
2020-06-21 08:36:13 +00:00
assert!(price_result.name != "");
assert!(price_result.price != 0.);
assert!(price_result.product != "");
2020-07-20 20:14:05 +00:00
}