From 4bd5e1eb5edf6071095176807390b4978f162bce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20BERTHO?= Date: Sun, 24 May 2020 10:30:41 +0200 Subject: [PATCH] Add erro handling --- Cargo.lock | 7 +++++++ Cargo.toml | 4 +++- TODO.md | 2 +- src/main.rs | 12 ++++++++---- src/parser.rs | 13 +++++++------ src/parser/darty.rs | 37 +++++++++++++++++++------------------ src/price_checker.rs | 21 +++++++++++---------- 7 files changed, 56 insertions(+), 40 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a7e38ef..7e71ea8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,11 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +[[package]] +name = "anyhow" +version = "1.0.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85bb70cc08ec97ca5450e6eba421deeea5f172c0fc61f78b5357b2a8e8be195f" + [[package]] name = "arraygen" version = "0.1.13" @@ -738,6 +744,7 @@ checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" name = "price_checker" version = "0.1.0" dependencies = [ + "anyhow", "arraygen", "reqwest", "scraper", diff --git a/Cargo.toml b/Cargo.toml index 89ca7f7..9652a2a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,4 +10,6 @@ edition = "2018" reqwest = { version = "0.10", features = ["blocking"]} scraper = "0.12.0" arraygen = "0.1.11" -url = "2.1.1" \ No newline at end of file +url = "2.1.1" +anyhow = "1.0" +#thiserror = "1.0" \ No newline at end of file diff --git a/TODO.md b/TODO.md index 84ef52f..012cdf0 100644 --- a/TODO.md +++ b/TODO.md @@ -1,6 +1,6 @@ # TODO -* [ ] Récupération prix darty avec [scraper](https://crates.io/crates/scraper) et [reqwest](https://crates.io/crates/reqwest) +* [x] Récupération prix darty avec [scraper](https://crates.io/crates/scraper) et [reqwest](https://crates.io/crates/reqwest) * [ ] Ajout du support de la Fnac, … * [ ] Récupération URL ligne de commande avec [clap](https://crates.io/crates/clap) * [ ] Ajout de SearchParser pour recherché un article sur tous les parseurs diff --git a/src/main.rs b/src/main.rs index da37f42..345c321 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,5 @@ +extern crate anyhow; + pub mod price_result; pub mod parser; pub mod price_checker; @@ -5,11 +7,13 @@ pub mod price_checker; use price_result::PriceResult; use price_checker::PriceChecker; use url::Url; +use anyhow::{Context, Result}; -fn main() { - let price_checker = PriceChecker::new(); +fn main() -> Result<()> { + let price_checker = PriceChecker::new().context("Cannot create price checker")?; - let price_result = price_checker.get_price(Url::parse("https://www.darty.com/nav/achat/gros_electromenager/refrigerateur-congelateur-refrigerateur-cong/refrigerateur-congelateur_bas/samsung_rb33n300nsa_ef.html").unwrap()); - println!("{}", price_result); + let price_result = price_checker.get_price(Url::parse("https://www.darty.com/nav/achat/gros_electromenager/refrigerateur-congelateur-refrigerateur-cong/refrigerateur-congelateur_bas/samsung_rb33n300nsa_ef.html")?)?; + println!("{}", price_result); + Ok(()) } diff --git a/src/parser.rs b/src/parser.rs index f1f4ff6..4222a69 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -7,11 +7,12 @@ use scraper::Html; use crate::price_result::PriceResult; use arraygen::Arraygen; use url::Url; +use anyhow::Result; pub trait PriceParser{ - fn new() -> Self where Self :Sized; + fn new() -> Result where Self :Sized; fn can_parse(&self, url : &Url) -> bool; - fn parse(&self, html : &Html) -> PriceResult; + fn parse(&self, html : &Html) -> Result; } #[derive(Arraygen, Debug)] @@ -22,9 +23,9 @@ pub struct List { } impl List { - pub fn new() -> Self { - List { - darty: darty::Darty::new() - } + pub fn new() -> Result { + Ok(List { + darty: darty::Darty::new()? + }) } } \ No newline at end of file diff --git a/src/parser/darty.rs b/src/parser/darty.rs index 59a550e..bcb2332 100644 --- a/src/parser/darty.rs +++ b/src/parser/darty.rs @@ -2,6 +2,7 @@ use super::PriceParser; use crate::PriceResult; use scraper::{Selector, Html}; use url::Url; +use anyhow::{Result, anyhow}; #[derive(Debug)] pub struct Darty { @@ -11,38 +12,38 @@ pub struct Darty { } impl PriceParser for Darty { - fn new() -> Self { - Darty { + fn new() -> Result { + Ok(Darty { price_selector: Selector::parse(r#".darty_prix"#).unwrap(), name_selector: Selector::parse(r#".product_name"#).unwrap(), product_selector: Selector::parse(r#".product_family"#).unwrap() - } + }) } fn can_parse(&self, url : &Url) -> bool { - url.host_str().unwrap() == "www.darty.com" + url.host_str().unwrap_or("") == "www.darty.com" } - fn parse(&self, html : &Html) -> PriceResult { + fn parse(&self, html : &Html) -> Result { // Get price - let price_element = html.select(&self.price_selector).next().unwrap(); + let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?; let mut price_text_it = price_element.text(); - let price_ent : u32 = price_text_it.next().unwrap_or("0").trim_end_matches(',').parse().unwrap(); - let price_dec : u32 = price_text_it.next().unwrap_or("0").trim_end_matches('€').parse().unwrap(); + let price_ent : u32 = price_text_it.next().unwrap_or("0").trim_end_matches(',').parse()?; + let price_dec : u32 = price_text_it.next().unwrap_or("0").trim_end_matches('€').parse()?; let price = price_ent as f64 + (price_dec as f64) / 100.; // Get name - let name_element = html.select(&self.name_selector).next().unwrap(); - let name = name_element.text().next().unwrap().trim().replace('\n', "-"); + let name_element = html.select(&self.name_selector).next().ok_or(anyhow!("No name element"))?; + let name = name_element.text().next().unwrap_or("").trim().replace('\n', "-"); // Get product - let family_element = html.select(&self.product_selector).next().unwrap(); - let family = family_element.text().next().unwrap().trim().replace('\n', "-"); + let family_element = html.select(&self.product_selector).next().ok_or(anyhow!("No product element"))?; + let family = family_element.text().next().unwrap_or("").trim().replace('\n', "-"); - PriceResult { - name: name.to_owned(), - product: family.to_owned(), - price - } - } + Ok(PriceResult { + name: name.to_owned(), + product: family.to_owned(), + price + }) + } } \ No newline at end of file diff --git a/src/price_checker.rs b/src/price_checker.rs index 1a8b0d1..c3ee861 100644 --- a/src/price_checker.rs +++ b/src/price_checker.rs @@ -6,6 +6,7 @@ use scraper::Html; use crate::parser; use crate::price_result::PriceResult; use url::Url; +use anyhow::{Result, anyhow}; const USER_AGENT: &str = "Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0"; @@ -15,19 +16,19 @@ pub struct PriceChecker { } impl PriceChecker { - pub fn new() -> Self { - let client = reqwest::blocking::Client::builder().user_agent(USER_AGENT).build().unwrap(); - PriceChecker { + pub fn new() -> Result { + let client = reqwest::blocking::Client::builder().user_agent(USER_AGENT).build()?; + Ok(PriceChecker { client, - parser_list: parser::List::new() - } + parser_list: parser::List::new()? + }) } - pub fn get_price(&self, url : Url) -> PriceResult { - let response = self.client.get(url.clone()).send().unwrap(); - let text = response.text().unwrap(); + pub fn get_price(&self, url : Url) -> Result { + let response = self.client.get(url.clone()).send()?; + let text = response.text()?; let document = Html::parse_document(&text); - let parser = *self.parser_list.get_price().iter().find(|p| p.can_parse(&url)).unwrap(); - parser.parse(&document) + let parser = *self.parser_list.get_price().iter().find(|p| p.can_parse(&url)).ok_or(anyhow!("No parser can parse {}", url))?; + Ok(parser.parse(&document)?) } } \ No newline at end of file