Add erro handling

This commit is contained in:
Rémi BERTHO 2020-05-24 10:30:41 +02:00
parent bddd41dde8
commit 4bd5e1eb5e
Signed by: dalan
GPG key ID: EE3B917931C07B64
7 changed files with 56 additions and 40 deletions

7
Cargo.lock generated
View file

@ -1,5 +1,11 @@
# This file is automatically @generated by Cargo. # This file is automatically @generated by Cargo.
# It is not intended for manual editing. # It is not intended for manual editing.
[[package]]
name = "anyhow"
version = "1.0.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85bb70cc08ec97ca5450e6eba421deeea5f172c0fc61f78b5357b2a8e8be195f"
[[package]] [[package]]
name = "arraygen" name = "arraygen"
version = "0.1.13" version = "0.1.13"
@ -738,6 +744,7 @@ checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
name = "price_checker" name = "price_checker"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow",
"arraygen", "arraygen",
"reqwest", "reqwest",
"scraper", "scraper",

View file

@ -10,4 +10,6 @@ edition = "2018"
reqwest = { version = "0.10", features = ["blocking"]} reqwest = { version = "0.10", features = ["blocking"]}
scraper = "0.12.0" scraper = "0.12.0"
arraygen = "0.1.11" arraygen = "0.1.11"
url = "2.1.1" url = "2.1.1"
anyhow = "1.0"
#thiserror = "1.0"

View file

@ -1,6 +1,6 @@
# TODO # TODO
* [ ] Récupération prix darty avec [scraper](https://crates.io/crates/scraper) et [reqwest](https://crates.io/crates/reqwest) * [x] Récupération prix darty avec [scraper](https://crates.io/crates/scraper) et [reqwest](https://crates.io/crates/reqwest)
* [ ] Ajout du support de la Fnac, … * [ ] Ajout du support de la Fnac, …
* [ ] Récupération URL ligne de commande avec [clap](https://crates.io/crates/clap) * [ ] Récupération URL ligne de commande avec [clap](https://crates.io/crates/clap)
* [ ] Ajout de SearchParser pour recherché un article sur tous les parseurs * [ ] Ajout de SearchParser pour recherché un article sur tous les parseurs

View file

@ -1,3 +1,5 @@
extern crate anyhow;
pub mod price_result; pub mod price_result;
pub mod parser; pub mod parser;
pub mod price_checker; pub mod price_checker;
@ -5,11 +7,13 @@ pub mod price_checker;
use price_result::PriceResult; use price_result::PriceResult;
use price_checker::PriceChecker; use price_checker::PriceChecker;
use url::Url; use url::Url;
use anyhow::{Context, Result};
fn main() { fn main() -> Result<()> {
let price_checker = PriceChecker::new(); let price_checker = PriceChecker::new().context("Cannot create price checker")?;
let price_result = price_checker.get_price(Url::parse("https://www.darty.com/nav/achat/gros_electromenager/refrigerateur-congelateur-refrigerateur-cong/refrigerateur-congelateur_bas/samsung_rb33n300nsa_ef.html").unwrap()); let price_result = price_checker.get_price(Url::parse("https://www.darty.com/nav/achat/gros_electromenager/refrigerateur-congelateur-refrigerateur-cong/refrigerateur-congelateur_bas/samsung_rb33n300nsa_ef.html")?)?;
println!("{}", price_result); println!("{}", price_result);
Ok(())
} }

View file

@ -7,11 +7,12 @@ use scraper::Html;
use crate::price_result::PriceResult; use crate::price_result::PriceResult;
use arraygen::Arraygen; use arraygen::Arraygen;
use url::Url; use url::Url;
use anyhow::Result;
pub trait PriceParser{ pub trait PriceParser{
fn new() -> Self where Self :Sized; fn new() -> Result<Self> where Self :Sized;
fn can_parse(&self, url : &Url) -> bool; fn can_parse(&self, url : &Url) -> bool;
fn parse(&self, html : &Html) -> PriceResult; fn parse(&self, html : &Html) -> Result<PriceResult>;
} }
#[derive(Arraygen, Debug)] #[derive(Arraygen, Debug)]
@ -22,9 +23,9 @@ pub struct List {
} }
impl List { impl List {
pub fn new() -> Self { pub fn new() -> Result<Self> {
List { Ok(List {
darty: darty::Darty::new() darty: darty::Darty::new()?
} })
} }
} }

View file

@ -2,6 +2,7 @@ use super::PriceParser;
use crate::PriceResult; use crate::PriceResult;
use scraper::{Selector, Html}; use scraper::{Selector, Html};
use url::Url; use url::Url;
use anyhow::{Result, anyhow};
#[derive(Debug)] #[derive(Debug)]
pub struct Darty { pub struct Darty {
@ -11,38 +12,38 @@ pub struct Darty {
} }
impl PriceParser for Darty { impl PriceParser for Darty {
fn new() -> Self { fn new() -> Result<Self> {
Darty { Ok(Darty {
price_selector: Selector::parse(r#".darty_prix"#).unwrap(), price_selector: Selector::parse(r#".darty_prix"#).unwrap(),
name_selector: Selector::parse(r#".product_name"#).unwrap(), name_selector: Selector::parse(r#".product_name"#).unwrap(),
product_selector: Selector::parse(r#".product_family"#).unwrap() product_selector: Selector::parse(r#".product_family"#).unwrap()
} })
} }
fn can_parse(&self, url : &Url) -> bool { fn can_parse(&self, url : &Url) -> bool {
url.host_str().unwrap() == "www.darty.com" url.host_str().unwrap_or("") == "www.darty.com"
} }
fn parse(&self, html : &Html) -> PriceResult { fn parse(&self, html : &Html) -> Result<PriceResult> {
// Get price // Get price
let price_element = html.select(&self.price_selector).next().unwrap(); let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?;
let mut price_text_it = price_element.text(); let mut price_text_it = price_element.text();
let price_ent : u32 = price_text_it.next().unwrap_or("0").trim_end_matches(',').parse().unwrap(); let price_ent : u32 = price_text_it.next().unwrap_or("0").trim_end_matches(',').parse()?;
let price_dec : u32 = price_text_it.next().unwrap_or("0").trim_end_matches('€').parse().unwrap(); let price_dec : u32 = price_text_it.next().unwrap_or("0").trim_end_matches('€').parse()?;
let price = price_ent as f64 + (price_dec as f64) / 100.; let price = price_ent as f64 + (price_dec as f64) / 100.;
// Get name // Get name
let name_element = html.select(&self.name_selector).next().unwrap(); let name_element = html.select(&self.name_selector).next().ok_or(anyhow!("No name element"))?;
let name = name_element.text().next().unwrap().trim().replace('\n', "-"); let name = name_element.text().next().unwrap_or("").trim().replace('\n', "-");
// Get product // Get product
let family_element = html.select(&self.product_selector).next().unwrap(); let family_element = html.select(&self.product_selector).next().ok_or(anyhow!("No product element"))?;
let family = family_element.text().next().unwrap().trim().replace('\n', "-"); let family = family_element.text().next().unwrap_or("").trim().replace('\n', "-");
PriceResult { Ok(PriceResult {
name: name.to_owned(), name: name.to_owned(),
product: family.to_owned(), product: family.to_owned(),
price price
} })
} }
} }

View file

@ -6,6 +6,7 @@ use scraper::Html;
use crate::parser; use crate::parser;
use crate::price_result::PriceResult; use crate::price_result::PriceResult;
use url::Url; use url::Url;
use anyhow::{Result, anyhow};
const USER_AGENT: &str = "Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0"; const USER_AGENT: &str = "Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0";
@ -15,19 +16,19 @@ pub struct PriceChecker {
} }
impl PriceChecker { impl PriceChecker {
pub fn new() -> Self { pub fn new() -> Result<Self> {
let client = reqwest::blocking::Client::builder().user_agent(USER_AGENT).build().unwrap(); let client = reqwest::blocking::Client::builder().user_agent(USER_AGENT).build()?;
PriceChecker { Ok(PriceChecker {
client, client,
parser_list: parser::List::new() parser_list: parser::List::new()?
} })
} }
pub fn get_price(&self, url : Url) -> PriceResult { pub fn get_price(&self, url : Url) -> Result<PriceResult> {
let response = self.client.get(url.clone()).send().unwrap(); let response = self.client.get(url.clone()).send()?;
let text = response.text().unwrap(); let text = response.text()?;
let document = Html::parse_document(&text); let document = Html::parse_document(&text);
let parser = *self.parser_list.get_price().iter().find(|p| p.can_parse(&url)).unwrap(); let parser = *self.parser_list.get_price().iter().find(|p| p.can_parse(&url)).ok_or(anyhow!("No parser can parse {}", url))?;
parser.parse(&document) Ok(parser.parse(&document)?)
} }
} }