Add erro handling

This commit is contained in:
Rémi BERTHO 2020-05-24 10:30:41 +02:00
parent bddd41dde8
commit 4bd5e1eb5e
Signed by: dalan
GPG key ID: EE3B917931C07B64
7 changed files with 56 additions and 40 deletions

7
Cargo.lock generated
View file

@ -1,5 +1,11 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "anyhow"
version = "1.0.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85bb70cc08ec97ca5450e6eba421deeea5f172c0fc61f78b5357b2a8e8be195f"
[[package]]
name = "arraygen"
version = "0.1.13"
@ -738,6 +744,7 @@ checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
name = "price_checker"
version = "0.1.0"
dependencies = [
"anyhow",
"arraygen",
"reqwest",
"scraper",

View file

@ -10,4 +10,6 @@ edition = "2018"
reqwest = { version = "0.10", features = ["blocking"]}
scraper = "0.12.0"
arraygen = "0.1.11"
url = "2.1.1"
url = "2.1.1"
anyhow = "1.0"
#thiserror = "1.0"

View file

@ -1,6 +1,6 @@
# TODO
* [ ] Récupération prix darty avec [scraper](https://crates.io/crates/scraper) et [reqwest](https://crates.io/crates/reqwest)
* [x] Récupération prix darty avec [scraper](https://crates.io/crates/scraper) et [reqwest](https://crates.io/crates/reqwest)
* [ ] Ajout du support de la Fnac, …
* [ ] Récupération URL ligne de commande avec [clap](https://crates.io/crates/clap)
* [ ] Ajout de SearchParser pour recherché un article sur tous les parseurs

View file

@ -1,3 +1,5 @@
extern crate anyhow;
pub mod price_result;
pub mod parser;
pub mod price_checker;
@ -5,11 +7,13 @@ pub mod price_checker;
use price_result::PriceResult;
use price_checker::PriceChecker;
use url::Url;
use anyhow::{Context, Result};
fn main() {
let price_checker = PriceChecker::new();
fn main() -> Result<()> {
let price_checker = PriceChecker::new().context("Cannot create price checker")?;
let price_result = price_checker.get_price(Url::parse("https://www.darty.com/nav/achat/gros_electromenager/refrigerateur-congelateur-refrigerateur-cong/refrigerateur-congelateur_bas/samsung_rb33n300nsa_ef.html").unwrap());
println!("{}", price_result);
let price_result = price_checker.get_price(Url::parse("https://www.darty.com/nav/achat/gros_electromenager/refrigerateur-congelateur-refrigerateur-cong/refrigerateur-congelateur_bas/samsung_rb33n300nsa_ef.html")?)?;
println!("{}", price_result);
Ok(())
}

View file

@ -7,11 +7,12 @@ use scraper::Html;
use crate::price_result::PriceResult;
use arraygen::Arraygen;
use url::Url;
use anyhow::Result;
pub trait PriceParser{
fn new() -> Self where Self :Sized;
fn new() -> Result<Self> where Self :Sized;
fn can_parse(&self, url : &Url) -> bool;
fn parse(&self, html : &Html) -> PriceResult;
fn parse(&self, html : &Html) -> Result<PriceResult>;
}
#[derive(Arraygen, Debug)]
@ -22,9 +23,9 @@ pub struct List {
}
impl List {
pub fn new() -> Self {
List {
darty: darty::Darty::new()
}
pub fn new() -> Result<Self> {
Ok(List {
darty: darty::Darty::new()?
})
}
}

View file

@ -2,6 +2,7 @@ use super::PriceParser;
use crate::PriceResult;
use scraper::{Selector, Html};
use url::Url;
use anyhow::{Result, anyhow};
#[derive(Debug)]
pub struct Darty {
@ -11,38 +12,38 @@ pub struct Darty {
}
impl PriceParser for Darty {
fn new() -> Self {
Darty {
fn new() -> Result<Self> {
Ok(Darty {
price_selector: Selector::parse(r#".darty_prix"#).unwrap(),
name_selector: Selector::parse(r#".product_name"#).unwrap(),
product_selector: Selector::parse(r#".product_family"#).unwrap()
}
})
}
fn can_parse(&self, url : &Url) -> bool {
url.host_str().unwrap() == "www.darty.com"
url.host_str().unwrap_or("") == "www.darty.com"
}
fn parse(&self, html : &Html) -> PriceResult {
fn parse(&self, html : &Html) -> Result<PriceResult> {
// Get price
let price_element = html.select(&self.price_selector).next().unwrap();
let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?;
let mut price_text_it = price_element.text();
let price_ent : u32 = price_text_it.next().unwrap_or("0").trim_end_matches(',').parse().unwrap();
let price_dec : u32 = price_text_it.next().unwrap_or("0").trim_end_matches('€').parse().unwrap();
let price_ent : u32 = price_text_it.next().unwrap_or("0").trim_end_matches(',').parse()?;
let price_dec : u32 = price_text_it.next().unwrap_or("0").trim_end_matches('€').parse()?;
let price = price_ent as f64 + (price_dec as f64) / 100.;
// Get name
let name_element = html.select(&self.name_selector).next().unwrap();
let name = name_element.text().next().unwrap().trim().replace('\n', "-");
let name_element = html.select(&self.name_selector).next().ok_or(anyhow!("No name element"))?;
let name = name_element.text().next().unwrap_or("").trim().replace('\n', "-");
// Get product
let family_element = html.select(&self.product_selector).next().unwrap();
let family = family_element.text().next().unwrap().trim().replace('\n', "-");
let family_element = html.select(&self.product_selector).next().ok_or(anyhow!("No product element"))?;
let family = family_element.text().next().unwrap_or("").trim().replace('\n', "-");
PriceResult {
name: name.to_owned(),
product: family.to_owned(),
price
}
}
Ok(PriceResult {
name: name.to_owned(),
product: family.to_owned(),
price
})
}
}

View file

@ -6,6 +6,7 @@ use scraper::Html;
use crate::parser;
use crate::price_result::PriceResult;
use url::Url;
use anyhow::{Result, anyhow};
const USER_AGENT: &str = "Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0";
@ -15,19 +16,19 @@ pub struct PriceChecker {
}
impl PriceChecker {
pub fn new() -> Self {
let client = reqwest::blocking::Client::builder().user_agent(USER_AGENT).build().unwrap();
PriceChecker {
pub fn new() -> Result<Self> {
let client = reqwest::blocking::Client::builder().user_agent(USER_AGENT).build()?;
Ok(PriceChecker {
client,
parser_list: parser::List::new()
}
parser_list: parser::List::new()?
})
}
pub fn get_price(&self, url : Url) -> PriceResult {
let response = self.client.get(url.clone()).send().unwrap();
let text = response.text().unwrap();
pub fn get_price(&self, url : Url) -> Result<PriceResult> {
let response = self.client.get(url.clone()).send()?;
let text = response.text()?;
let document = Html::parse_document(&text);
let parser = *self.parser_list.get_price().iter().find(|p| p.can_parse(&url)).unwrap();
parser.parse(&document)
let parser = *self.parser_list.get_price().iter().find(|p| p.can_parse(&url)).ok_or(anyhow!("No parser can parse {}", url))?;
Ok(parser.parse(&document)?)
}
}