Add foramt
This commit is contained in:
parent
05c114cca4
commit
6233a754df
11 changed files with 173 additions and 140 deletions
14
TODO.md~
14
TODO.md~
|
@ -1,14 +0,0 @@
|
||||||
# TODO
|
|
||||||
|
|
||||||
* [x] Récupération prix darty avec [scraper](https://crates.io/crates/scraper) et [reqwest](https://crates.io/crates/reqwest)
|
|
||||||
* [x] Ajout du support de la Fnac, …
|
|
||||||
* [x] Récupération URL ligne de commande avec [clap](https://crates.io/crates/clap)
|
|
||||||
* [ ] Ajout de SearchParser pour rechercher un article sur tous les parseurs
|
|
||||||
* [ ] Commande de liste des différents parseurs
|
|
||||||
* [ ] Ajout des pays avec [celes](https://crates.io/crates/celes) : recherche uniquement sur les parser du pays et parseur multi pays (amazon par exemple)
|
|
||||||
* [ ] Lecture des URLs depuis un fichier avec [toml](https://crates.io/crates/toml)
|
|
||||||
* [ ] Parallélisation des requêtes avce [rayon](rayon) ou reqwest asynchrone
|
|
||||||
* [ ] Écriture dans un fichier ODS avec [calamine](https://crates.io/crates/calamine)
|
|
||||||
* [ ] Notification si baisse de prix avec [notify-rust](https://crates.io/crates/notify-rust)
|
|
||||||
* [ ] Mail si baisse de prix avec [lettre](https://crates.io/crates/lettre)
|
|
||||||
* [ ] Lancement de la recherche régulière avec [Clokwerk](https://crates.io/crates/clokwerk)
|
|
6
src/.rustfmt.toml
Normal file
6
src/.rustfmt.toml
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
max_width = 140
|
||||||
|
hard_tabs = true
|
||||||
|
tab_spaces = 4
|
||||||
|
newline_style = "Unix"
|
||||||
|
use_field_init_shorthand = true
|
||||||
|
edition = "2018"
|
66
src/main.rs
66
src/main.rs
|
@ -1,61 +1,58 @@
|
||||||
extern crate anyhow;
|
extern crate anyhow;
|
||||||
|
extern crate arraygen;
|
||||||
extern crate clap;
|
extern crate clap;
|
||||||
extern crate url;
|
extern crate url;
|
||||||
extern crate arraygen;
|
|
||||||
|
|
||||||
pub mod price_result;
|
|
||||||
pub mod parser;
|
pub mod parser;
|
||||||
pub mod price_checker;
|
pub mod price_checker;
|
||||||
|
pub mod price_result;
|
||||||
|
|
||||||
use price_result::PriceResult;
|
|
||||||
use price_checker::PriceChecker;
|
|
||||||
use url::Url;
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use clap::{Arg, App, SubCommand};
|
use clap::{App, Arg, SubCommand};
|
||||||
|
use price_checker::PriceChecker;
|
||||||
|
use price_result::PriceResult;
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
fn main() -> Result<()> {
|
fn main() -> Result<()> {
|
||||||
let matches = App::new("Price checker")
|
let matches = App::new("Price checker")
|
||||||
.version("0.1")
|
.version("0.1")
|
||||||
.author("Rémi BERTHO <remi.bertho@dalan.fr>")
|
.author("Rémi BERTHO <remi.bertho@dalan.fr>")
|
||||||
.about("Check price")
|
.about("Check price")
|
||||||
.subcommand(SubCommand::with_name("check")
|
.subcommand(
|
||||||
.about("Check from an URL")
|
SubCommand::with_name("check")
|
||||||
.arg(Arg::with_name("URL")
|
.about("Check from an URL")
|
||||||
.required(true)
|
.arg(Arg::with_name("URL").required(true).multiple(true).help("The URL to get price")),
|
||||||
.multiple(true)
|
)
|
||||||
.help("The URL to get price")))
|
.subcommand(
|
||||||
.subcommand(SubCommand::with_name("search")
|
SubCommand::with_name("search")
|
||||||
.about("Search an object")
|
.about("Search an object")
|
||||||
.arg(Arg::with_name("name")
|
.arg(Arg::with_name("name").required(true).multiple(true).help("The name of the object")),
|
||||||
.required(true)
|
)
|
||||||
.multiple(true)
|
.subcommand(SubCommand::with_name("list").about("List the parsers"))
|
||||||
.help("The name of the object")))
|
.get_matches();
|
||||||
.subcommand(SubCommand::with_name("list")
|
|
||||||
.about("List the parsers"))
|
|
||||||
.get_matches();
|
|
||||||
let price_checker = PriceChecker::new().unwrap();
|
let price_checker = PriceChecker::new().unwrap();
|
||||||
match matches.subcommand() {
|
match matches.subcommand() {
|
||||||
("check", Some(check_matches)) => {
|
("check", Some(check_matches)) => {
|
||||||
for url_str in check_matches.values_of("URL").unwrap() {
|
for url_str in check_matches.values_of("URL").unwrap() {
|
||||||
let url = Url::parse(url_str)?;
|
let url = Url::parse(url_str)?;
|
||||||
println!("{}", price_checker.get_price(&url)?) ;
|
println!("{}", price_checker.get_price(&url)?);
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
("search", Some(check_matches)) => {
|
("search", Some(check_matches)) => {
|
||||||
for name in check_matches.values_of("name").unwrap() {
|
for name in check_matches.values_of("name").unwrap() {
|
||||||
let res = price_checker.search(name)? ;
|
let res = price_checker.search(name)?;
|
||||||
if res.is_empty() {
|
if res.is_empty() {
|
||||||
println!("«{}» not found", name);
|
println!("«{}» not found", name);
|
||||||
} else {
|
} else {
|
||||||
for (parser_name, url) in res {
|
for (parser_name, url) in res {
|
||||||
let price = price_checker.get_price(&url)?;
|
let price = price_checker.get_price(&url)?;
|
||||||
println!(" - {}", parser_name) ;
|
println!(" - {}", parser_name);
|
||||||
println!(" * {}", url);
|
println!(" * {}", url);
|
||||||
println!(" * {}", price);
|
println!(" * {}", price);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
("list", _) => {
|
("list", _) => {
|
||||||
println!("Price parsers:");
|
println!("Price parsers:");
|
||||||
for parser in price_checker.parser_list.get_price().iter() {
|
for parser in price_checker.parser_list.get_price().iter() {
|
||||||
|
@ -65,11 +62,10 @@ fn main() -> Result<()> {
|
||||||
for parser in price_checker.parser_list.get_search().iter() {
|
for parser in price_checker.parser_list.get_search().iter() {
|
||||||
println!(" - {}", parser.name());
|
println!(" - {}", parser.name());
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
_ => {
|
_ => {
|
||||||
println!("{}", matches.usage());
|
println!("{}", matches.usage());
|
||||||
},
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,35 +1,37 @@
|
||||||
pub mod darty;
|
|
||||||
pub mod fnac;
|
|
||||||
pub mod du_bruit_dans_la_cuisine;
|
|
||||||
pub mod ldlc;
|
|
||||||
pub mod amazon;
|
pub mod amazon;
|
||||||
|
pub mod darty;
|
||||||
|
pub mod du_bruit_dans_la_cuisine;
|
||||||
|
pub mod fnac;
|
||||||
|
pub mod ldlc;
|
||||||
|
|
||||||
use scraper::Html;
|
|
||||||
use crate::price_result::PriceResult;
|
use crate::price_result::PriceResult;
|
||||||
use arraygen::Arraygen;
|
|
||||||
use url::Url;
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
use arraygen::Arraygen;
|
||||||
|
use scraper::Html;
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
pub trait Parser{
|
pub trait Parser {
|
||||||
/// Create the parser
|
/// Create the parser
|
||||||
fn new() -> Result<Self> where Self : Sized;
|
fn new() -> Result<Self>
|
||||||
|
where
|
||||||
|
Self: Sized;
|
||||||
/// Get the name
|
/// Get the name
|
||||||
fn name(&self) -> &'static str;
|
fn name(&self) -> &'static str;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Trait needed to get price from a specific website
|
/// Trait needed to get price from a specific website
|
||||||
pub trait PriceParser : Parser{
|
pub trait PriceParser: Parser {
|
||||||
/// Indicate if it can parse this URL
|
/// Indicate if it can parse this URL
|
||||||
fn can_parse(&self, url : &Url) -> bool;
|
fn can_parse(&self, url: &Url) -> bool;
|
||||||
/// Parse the html into a price
|
/// Parse the html into a price
|
||||||
fn parse_price(&self, html : &Html) -> Result<PriceResult>;
|
fn parse_price(&self, html: &Html) -> Result<PriceResult>;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait SearchParser : Parser {
|
pub trait SearchParser: Parser {
|
||||||
/// Return the search URL
|
/// Return the search URL
|
||||||
fn search_url(&self, name: &str) -> Url;
|
fn search_url(&self, name: &str) -> Url;
|
||||||
/// Return the first occurence of result of the page if any
|
/// Return the first occurence of result of the page if any
|
||||||
fn search(&self, html : &Html) -> Result<Option<Url>>;
|
fn search(&self, html: &Html) -> Result<Option<Url>>;
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! gen_list {
|
macro_rules! gen_list {
|
||||||
|
@ -62,11 +64,11 @@ macro_rules! gen_list {
|
||||||
}
|
}
|
||||||
|
|
||||||
gen_list!(
|
gen_list!(
|
||||||
[darty::Darty : get_price],
|
[darty::Darty: get_price],
|
||||||
[fnac::Fnac : get_price],
|
[fnac::Fnac: get_price],
|
||||||
[du_bruit_dans_la_cuisine::DuBruitDansLaCuisine : get_price],
|
[du_bruit_dans_la_cuisine::DuBruitDansLaCuisine: get_price],
|
||||||
[ldlc::LDLC : get_price],
|
[ldlc::LDLC: get_price],
|
||||||
[amazon::Amazon : get_price, get_search]
|
[amazon::Amazon: get_price, get_search]
|
||||||
);
|
);
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -74,4 +76,4 @@ fn test_parser_list() {
|
||||||
let parser_list = List::new().unwrap();
|
let parser_list = List::new().unwrap();
|
||||||
assert_eq!(parser_list.get_price().len(), 5);
|
assert_eq!(parser_list.get_price().len(), 5);
|
||||||
assert_eq!(parser_list.get_search().len(), 1);
|
assert_eq!(parser_list.get_search().len(), 1);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
use super::{Parser, PriceParser, SearchParser};
|
use super::{Parser, PriceParser, SearchParser};
|
||||||
use crate::PriceResult;
|
use crate::PriceResult;
|
||||||
use scraper::{Selector, Html};
|
use anyhow::{anyhow, Result};
|
||||||
|
use scraper::{Html, Selector};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
use anyhow::{Result, anyhow};
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
/// Parser for the darty website
|
/// Parser for the darty website
|
||||||
|
@ -11,7 +11,7 @@ pub struct Amazon {
|
||||||
name_selector: Selector,
|
name_selector: Selector,
|
||||||
product_selector: Selector,
|
product_selector: Selector,
|
||||||
search_selector_1: Selector,
|
search_selector_1: Selector,
|
||||||
search_selector_2: Selector
|
search_selector_2: Selector,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Parser for Amazon {
|
impl Parser for Amazon {
|
||||||
|
@ -21,7 +21,7 @@ impl Parser for Amazon {
|
||||||
name_selector: Selector::parse(r"#productTitle").unwrap(),
|
name_selector: Selector::parse(r"#productTitle").unwrap(),
|
||||||
product_selector: Selector::parse(r".nav-search-label").unwrap(),
|
product_selector: Selector::parse(r".nav-search-label").unwrap(),
|
||||||
search_selector_1: Selector::parse(r".rush-component[data-component-type=s-product-image]").unwrap(),
|
search_selector_1: Selector::parse(r".rush-component[data-component-type=s-product-image]").unwrap(),
|
||||||
search_selector_2: Selector::parse(r".a-link-normal").unwrap()
|
search_selector_2: Selector::parse(r".a-link-normal").unwrap(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -31,16 +31,21 @@ impl Parser for Amazon {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PriceParser for Amazon {
|
impl PriceParser for Amazon {
|
||||||
|
fn can_parse(&self, url: &Url) -> bool {
|
||||||
fn can_parse(&self, url : &Url) -> bool {
|
|
||||||
url.host_str().unwrap_or("") == "www.amazon.fr"
|
url.host_str().unwrap_or("") == "www.amazon.fr"
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_price(&self, html : &Html) -> Result<PriceResult> {
|
fn parse_price(&self, html: &Html) -> Result<PriceResult> {
|
||||||
// Get price
|
// Get price
|
||||||
let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?;
|
let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?;
|
||||||
let mut price_text_it = price_element.text();
|
let mut price_text_it = price_element.text();
|
||||||
let price : f64 = price_text_it.next().unwrap_or("0.").trim_end_matches("€").trim().replace(',', ".").parse()?;
|
let price: f64 = price_text_it
|
||||||
|
.next()
|
||||||
|
.unwrap_or("0.")
|
||||||
|
.trim_end_matches("€")
|
||||||
|
.trim()
|
||||||
|
.replace(',', ".")
|
||||||
|
.parse()?;
|
||||||
|
|
||||||
// Get name
|
// Get name
|
||||||
let name_element = html.select(&self.name_selector).next().ok_or(anyhow!("No name element"))?;
|
let name_element = html.select(&self.name_selector).next().ok_or(anyhow!("No name element"))?;
|
||||||
|
@ -53,19 +58,22 @@ impl PriceParser for Amazon {
|
||||||
Ok(PriceResult {
|
Ok(PriceResult {
|
||||||
name: name.to_owned(),
|
name: name.to_owned(),
|
||||||
product: family.to_owned(),
|
product: family.to_owned(),
|
||||||
price
|
price,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SearchParser for Amazon {
|
impl SearchParser for Amazon {
|
||||||
fn search_url(&self, name: &str) -> Url {
|
fn search_url(&self, name: &str) -> Url {
|
||||||
Url::parse(& format!("https://www.amazon.fr/s?k={}", name)).unwrap()
|
Url::parse(&format!("https://www.amazon.fr/s?k={}", name)).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn search(&self, html : &Html) -> Result<Option<Url>> {
|
fn search(&self, html: &Html) -> Result<Option<Url>> {
|
||||||
if let Some(search_element_1) = html.select(&self.search_selector_1).next() {
|
if let Some(search_element_1) = html.select(&self.search_selector_1).next() {
|
||||||
let search_element_2 = search_element_1.select(&self.search_selector_2).next().ok_or(anyhow!("No search element 2"))?;
|
let search_element_2 = search_element_1
|
||||||
|
.select(&self.search_selector_2)
|
||||||
|
.next()
|
||||||
|
.ok_or(anyhow!("No search element 2"))?;
|
||||||
let path_url = search_element_2.value().attr("href").ok_or(anyhow!("No link element"))?;
|
let path_url = search_element_2.value().attr("href").ok_or(anyhow!("No link element"))?;
|
||||||
let mut url = Url::parse("https://www.amazon.fr").unwrap();
|
let mut url = Url::parse("https://www.amazon.fr").unwrap();
|
||||||
url.set_path(path_url.split("/ref").next().unwrap_or(path_url));
|
url.set_path(path_url.split("/ref").next().unwrap_or(path_url));
|
||||||
|
@ -79,7 +87,11 @@ impl SearchParser for Amazon {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_parser_fnac() {
|
fn test_parser_fnac() {
|
||||||
let fnac_parser = Amazon::new().unwrap();
|
let fnac_parser = Amazon::new().unwrap();
|
||||||
assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B").unwrap()));
|
assert!(fnac_parser.can_parse(
|
||||||
assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B").unwrap()));
|
&Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B").unwrap()
|
||||||
|
));
|
||||||
|
assert!(fnac_parser.can_parse(
|
||||||
|
&Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B").unwrap()
|
||||||
|
));
|
||||||
assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.com").unwrap()) == false);
|
assert!(fnac_parser.can_parse(&Url::parse("https://www.amazon.com").unwrap()) == false);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,15 +1,15 @@
|
||||||
use super::{Parser, PriceParser};
|
use super::{Parser, PriceParser};
|
||||||
use crate::PriceResult;
|
use crate::PriceResult;
|
||||||
use scraper::{Selector, Html};
|
use anyhow::{anyhow, Result};
|
||||||
|
use scraper::{Html, Selector};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
use anyhow::{Result, anyhow};
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
/// Parser for the darty website
|
/// Parser for the darty website
|
||||||
pub struct Darty {
|
pub struct Darty {
|
||||||
price_selector: Selector,
|
price_selector: Selector,
|
||||||
name_selector: Selector,
|
name_selector: Selector,
|
||||||
product_selector: Selector
|
product_selector: Selector,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Parser for Darty {
|
impl Parser for Darty {
|
||||||
|
@ -17,7 +17,7 @@ impl Parser for Darty {
|
||||||
Ok(Darty {
|
Ok(Darty {
|
||||||
price_selector: Selector::parse(r#".darty_prix"#).unwrap(),
|
price_selector: Selector::parse(r#".darty_prix"#).unwrap(),
|
||||||
name_selector: Selector::parse(r#".product_name"#).unwrap(),
|
name_selector: Selector::parse(r#".product_name"#).unwrap(),
|
||||||
product_selector: Selector::parse(r#".product_family"#).unwrap()
|
product_selector: Selector::parse(r#".product_family"#).unwrap(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -27,16 +27,16 @@ impl Parser for Darty {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PriceParser for Darty {
|
impl PriceParser for Darty {
|
||||||
fn can_parse(&self, url : &Url) -> bool {
|
fn can_parse(&self, url: &Url) -> bool {
|
||||||
url.host_str().unwrap_or("") == "www.darty.com"
|
url.host_str().unwrap_or("") == "www.darty.com"
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_price(&self, html : &Html) -> Result<PriceResult> {
|
fn parse_price(&self, html: &Html) -> Result<PriceResult> {
|
||||||
// Get price
|
// Get price
|
||||||
let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?;
|
let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?;
|
||||||
let mut price_text_it = price_element.text();
|
let mut price_text_it = price_element.text();
|
||||||
let price_ent : u32 = price_text_it.next().unwrap_or("0").trim_end_matches(',').parse()?;
|
let price_ent: u32 = price_text_it.next().unwrap_or("0").trim_end_matches(',').parse()?;
|
||||||
let price_dec : u32 = price_text_it.next().unwrap_or("0").trim_end_matches('€').parse()?;
|
let price_dec: u32 = price_text_it.next().unwrap_or("0").trim_end_matches('€').parse()?;
|
||||||
let price = price_ent as f64 + (price_dec as f64) / 100.;
|
let price = price_ent as f64 + (price_dec as f64) / 100.;
|
||||||
|
|
||||||
// Get name
|
// Get name
|
||||||
|
@ -50,7 +50,7 @@ impl PriceParser for Darty {
|
||||||
Ok(PriceResult {
|
Ok(PriceResult {
|
||||||
name: name.to_owned(),
|
name: name.to_owned(),
|
||||||
product: family.to_owned(),
|
product: family.to_owned(),
|
||||||
price
|
price,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -61,4 +61,4 @@ fn test_parser_darty() {
|
||||||
assert!(darty_parser.can_parse(&Url::parse("https://www.darty.com/nav/achat/gros_electromenager/refrigerateur-congelateur-refrigerateur-cong/refrigerateur-congelateur_bas/samsung_rb33n300nsa_ef.html").unwrap()));
|
assert!(darty_parser.can_parse(&Url::parse("https://www.darty.com/nav/achat/gros_electromenager/refrigerateur-congelateur-refrigerateur-cong/refrigerateur-congelateur_bas/samsung_rb33n300nsa_ef.html").unwrap()));
|
||||||
assert!(darty_parser.can_parse(&Url::parse("http://www.darty.com/nav/achat/gros_electromenager/refrigerateur-congelateur-refrigerateur-cong/refrigerateur-congelateur_bas/samsung_rb33n300nsa_ef.html").unwrap()));
|
assert!(darty_parser.can_parse(&Url::parse("http://www.darty.com/nav/achat/gros_electromenager/refrigerateur-congelateur-refrigerateur-cong/refrigerateur-congelateur_bas/samsung_rb33n300nsa_ef.html").unwrap()));
|
||||||
assert!(darty_parser.can_parse(&Url::parse("https://www.fnace.com").unwrap()) == false);
|
assert!(darty_parser.can_parse(&Url::parse("https://www.fnace.com").unwrap()) == false);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
use super::{PriceParser, Parser};
|
use super::{Parser, PriceParser};
|
||||||
use crate::PriceResult;
|
use crate::PriceResult;
|
||||||
use scraper::{Selector, Html};
|
use anyhow::{anyhow, Result};
|
||||||
|
use scraper::{Html, Selector};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
use anyhow::{Result, anyhow};
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
/// Parser for the darty website
|
/// Parser for the darty website
|
||||||
|
@ -25,15 +25,21 @@ impl Parser for DuBruitDansLaCuisine {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PriceParser for DuBruitDansLaCuisine {
|
impl PriceParser for DuBruitDansLaCuisine {
|
||||||
fn can_parse(&self, url : &Url) -> bool {
|
fn can_parse(&self, url: &Url) -> bool {
|
||||||
url.host_str().unwrap_or("") == "www.dubruitdanslacuisine.fr"
|
url.host_str().unwrap_or("") == "www.dubruitdanslacuisine.fr"
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_price(&self, html : &Html) -> Result<PriceResult> {
|
fn parse_price(&self, html: &Html) -> Result<PriceResult> {
|
||||||
// Get price
|
// Get price
|
||||||
let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?;
|
let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?;
|
||||||
let mut price_text_it = price_element.text();
|
let mut price_text_it = price_element.text();
|
||||||
let price : f64 = price_text_it.next().unwrap_or("0.").trim_end_matches("€").trim().replace(',', ".").parse()?;
|
let price: f64 = price_text_it
|
||||||
|
.next()
|
||||||
|
.unwrap_or("0.")
|
||||||
|
.trim_end_matches("€")
|
||||||
|
.trim()
|
||||||
|
.replace(',', ".")
|
||||||
|
.parse()?;
|
||||||
|
|
||||||
// Get name
|
// Get name
|
||||||
let name_element = html.select(&self.name_selector).next().ok_or(anyhow!("No name element"))?;
|
let name_element = html.select(&self.name_selector).next().ok_or(anyhow!("No name element"))?;
|
||||||
|
@ -42,7 +48,7 @@ impl PriceParser for DuBruitDansLaCuisine {
|
||||||
Ok(PriceResult {
|
Ok(PriceResult {
|
||||||
name: name.to_owned(),
|
name: name.to_owned(),
|
||||||
product: "Cuisine".to_owned(),
|
product: "Cuisine".to_owned(),
|
||||||
price
|
price,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -53,4 +59,4 @@ fn test_parser_du_bruit_dans_la_cuisine() {
|
||||||
assert!(parser.can_parse(&Url::parse("https://www.dubruitdanslacuisine.fr/tapis-a-patisserie-40-62-14377-p").unwrap()));
|
assert!(parser.can_parse(&Url::parse("https://www.dubruitdanslacuisine.fr/tapis-a-patisserie-40-62-14377-p").unwrap()));
|
||||||
assert!(parser.can_parse(&Url::parse("https://www.dubruitdanslacuisine.fr/tapis-a-patisserie-40-62-14377-p").unwrap()));
|
assert!(parser.can_parse(&Url::parse("https://www.dubruitdanslacuisine.fr/tapis-a-patisserie-40-62-14377-p").unwrap()));
|
||||||
assert!(parser.can_parse(&Url::parse("https://www.dubrutdanslacuisine.fr/").unwrap()) == false);
|
assert!(parser.can_parse(&Url::parse("https://www.dubrutdanslacuisine.fr/").unwrap()) == false);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,23 +1,23 @@
|
||||||
use super::{Parser, PriceParser};
|
use super::{Parser, PriceParser};
|
||||||
use crate::PriceResult;
|
use crate::PriceResult;
|
||||||
use scraper::{Selector, Html};
|
use anyhow::{anyhow, Result};
|
||||||
|
use scraper::{Html, Selector};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
use anyhow::{Result, anyhow};
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
/// Parser for the darty website
|
/// Parser for the darty website
|
||||||
pub struct Fnac {
|
pub struct Fnac {
|
||||||
price_selector: Selector,
|
price_selector: Selector,
|
||||||
name_selector: Selector,
|
name_selector: Selector,
|
||||||
product_selector: Selector
|
product_selector: Selector,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Parser for Fnac{
|
impl Parser for Fnac {
|
||||||
fn new() -> Result<Self> {
|
fn new() -> Result<Self> {
|
||||||
Ok(Fnac {
|
Ok(Fnac {
|
||||||
price_selector: Selector::parse(r#".f-priceBox-price.checked"#).unwrap(),
|
price_selector: Selector::parse(r#".f-priceBox-price.checked"#).unwrap(),
|
||||||
name_selector: Selector::parse(r#".f-productHeader-Title"#).unwrap(),
|
name_selector: Selector::parse(r#".f-productHeader-Title"#).unwrap(),
|
||||||
product_selector: Selector::parse(r#".f-productHeader-subTitleLink"#).unwrap()
|
product_selector: Selector::parse(r#".f-productHeader-subTitleLink"#).unwrap(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -27,16 +27,16 @@ impl Parser for Fnac{
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PriceParser for Fnac {
|
impl PriceParser for Fnac {
|
||||||
fn can_parse(&self, url : &Url) -> bool {
|
fn can_parse(&self, url: &Url) -> bool {
|
||||||
url.host_str().unwrap_or("") == "www.fnac.com"
|
url.host_str().unwrap_or("") == "www.fnac.com"
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_price(&self, html : &Html) -> Result<PriceResult> {
|
fn parse_price(&self, html: &Html) -> Result<PriceResult> {
|
||||||
// Get price
|
// Get price
|
||||||
let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?;
|
let price_element = html.select(&self.price_selector).next().ok_or(anyhow!("No price element"))?;
|
||||||
let mut price_text_it = price_element.text();
|
let mut price_text_it = price_element.text();
|
||||||
let price_ent : u32 = price_text_it.next().unwrap_or("0").trim_end_matches(',').parse()?;
|
let price_ent: u32 = price_text_it.next().unwrap_or("0").trim_end_matches(',').parse()?;
|
||||||
let price_dec : u32 = price_text_it.next().unwrap_or("0").trim_start_matches('€').parse()?;
|
let price_dec: u32 = price_text_it.next().unwrap_or("0").trim_start_matches('€').parse()?;
|
||||||
let price = price_ent as f64 + (price_dec as f64) / 100.;
|
let price = price_ent as f64 + (price_dec as f64) / 100.;
|
||||||
|
|
||||||
// Get name
|
// Get name
|
||||||
|
@ -50,7 +50,7 @@ impl PriceParser for Fnac {
|
||||||
Ok(PriceResult {
|
Ok(PriceResult {
|
||||||
name: name.to_owned(),
|
name: name.to_owned(),
|
||||||
product: family.to_owned(),
|
product: family.to_owned(),
|
||||||
price
|
price,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -58,7 +58,9 @@ impl PriceParser for Fnac {
|
||||||
#[test]
|
#[test]
|
||||||
fn test_parser_fnac() {
|
fn test_parser_fnac() {
|
||||||
let fnac_parser = Fnac::new().unwrap();
|
let fnac_parser = Fnac::new().unwrap();
|
||||||
assert!(fnac_parser.can_parse(&Url::parse("https://www.fnac.com/Apple-iPhone-XS-64-Go-5-8-Argent/a12849718/w-4?CtoPid=488371").unwrap()));
|
assert!(
|
||||||
|
fnac_parser.can_parse(&Url::parse("https://www.fnac.com/Apple-iPhone-XS-64-Go-5-8-Argent/a12849718/w-4?CtoPid=488371").unwrap())
|
||||||
|
);
|
||||||
assert!(fnac_parser.can_parse(&Url::parse("http://www.fnac.com/Apple-iPhone-XS-64-Go-5-8-Argent/a12849718/w-4?CtoPid=488371").unwrap()));
|
assert!(fnac_parser.can_parse(&Url::parse("http://www.fnac.com/Apple-iPhone-XS-64-Go-5-8-Argent/a12849718/w-4?CtoPid=488371").unwrap()));
|
||||||
assert!(fnac_parser.can_parse(&Url::parse("https://www.fnace.com").unwrap()) == false);
|
assert!(fnac_parser.can_parse(&Url::parse("https://www.fnace.com").unwrap()) == false);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
use super::{PriceParser, Parser};
|
use super::{Parser, PriceParser};
|
||||||
use crate::PriceResult;
|
use crate::PriceResult;
|
||||||
use scraper::{Selector, Html};
|
use anyhow::{anyhow, Result};
|
||||||
|
use scraper::{Html, Selector};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
use anyhow::{Result, anyhow};
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
/// Parser for the darty website
|
/// Parser for the darty website
|
||||||
|
@ -25,16 +25,16 @@ impl Parser for LDLC {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PriceParser for LDLC {
|
impl PriceParser for LDLC {
|
||||||
fn can_parse(&self, url : &Url) -> bool {
|
fn can_parse(&self, url: &Url) -> bool {
|
||||||
url.host_str().unwrap_or("") == "www.ldlc.com"
|
url.host_str().unwrap_or("") == "www.ldlc.com"
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_price(&self, html : &Html) -> Result<PriceResult> {
|
fn parse_price(&self, html: &Html) -> Result<PriceResult> {
|
||||||
// Get price
|
// Get price
|
||||||
let price_element = html.select(&self.price_selector).nth(4).ok_or(anyhow!("No price element"))?;
|
let price_element = html.select(&self.price_selector).nth(4).ok_or(anyhow!("No price element"))?;
|
||||||
let mut price_text_it = price_element.text();
|
let mut price_text_it = price_element.text();
|
||||||
let price_ent : u32 = price_text_it.next().unwrap_or("0").trim_end_matches('€').parse()?;
|
let price_ent: u32 = price_text_it.next().unwrap_or("0").trim_end_matches('€').parse()?;
|
||||||
let price_dec : u32 = price_text_it.next().unwrap_or("0").parse()?;
|
let price_dec: u32 = price_text_it.next().unwrap_or("0").parse()?;
|
||||||
let price = price_ent as f64 + (price_dec as f64) / 100.;
|
let price = price_ent as f64 + (price_dec as f64) / 100.;
|
||||||
|
|
||||||
// Get name
|
// Get name
|
||||||
|
@ -44,7 +44,7 @@ impl PriceParser for LDLC {
|
||||||
Ok(PriceResult {
|
Ok(PriceResult {
|
||||||
name: name.to_owned(),
|
name: name.to_owned(),
|
||||||
product: "High-tech".to_owned(),
|
product: "High-tech".to_owned(),
|
||||||
price
|
price,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -55,4 +55,4 @@ fn test_parser_du_bruit_dans_la_cuisine() {
|
||||||
assert!(parser.can_parse(&Url::parse("https://www.ldlc.com/fiche/PB00335410.html").unwrap()));
|
assert!(parser.can_parse(&Url::parse("https://www.ldlc.com/fiche/PB00335410.html").unwrap()));
|
||||||
assert!(parser.can_parse(&Url::parse("http://www.ldlc.com/fiche/PB00335410.html").unwrap()));
|
assert!(parser.can_parse(&Url::parse("http://www.ldlc.com/fiche/PB00335410.html").unwrap()));
|
||||||
assert!(parser.can_parse(&Url::parse("https://www.ldlv.com").unwrap()) == false);
|
assert!(parser.can_parse(&Url::parse("https://www.ldlv.com").unwrap()) == false);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,19 +1,19 @@
|
||||||
extern crate reqwest;
|
extern crate reqwest;
|
||||||
extern crate scraper;
|
extern crate scraper;
|
||||||
|
|
||||||
use reqwest::blocking::Client;
|
|
||||||
use scraper::Html;
|
|
||||||
use crate::parser;
|
use crate::parser;
|
||||||
use crate::price_result::PriceResult;
|
use crate::price_result::PriceResult;
|
||||||
|
use anyhow::{anyhow, Result};
|
||||||
|
use reqwest::blocking::Client;
|
||||||
|
use scraper::Html;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
use anyhow::{Result, anyhow};
|
|
||||||
|
|
||||||
const USER_AGENT: &str = "Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0";
|
const USER_AGENT: &str = "Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0";
|
||||||
|
|
||||||
/// Struct used to get price from a website
|
/// Struct used to get price from a website
|
||||||
pub struct PriceChecker {
|
pub struct PriceChecker {
|
||||||
client: Client,
|
client: Client,
|
||||||
pub parser_list: parser::List
|
pub parser_list: parser::List,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PriceChecker {
|
impl PriceChecker {
|
||||||
|
@ -22,19 +22,24 @@ impl PriceChecker {
|
||||||
let client = reqwest::blocking::Client::builder().user_agent(USER_AGENT).build()?;
|
let client = reqwest::blocking::Client::builder().user_agent(USER_AGENT).build()?;
|
||||||
Ok(PriceChecker {
|
Ok(PriceChecker {
|
||||||
client,
|
client,
|
||||||
parser_list: parser::List::new()?
|
parser_list: parser::List::new()?,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_html(&self, url : &Url) -> Result<Html> {
|
fn get_html(&self, url: &Url) -> Result<Html> {
|
||||||
let response = self.client.get(url.clone()).send()?;
|
let response = self.client.get(url.clone()).send()?;
|
||||||
let text = response.text()?;
|
let text = response.text()?;
|
||||||
Ok(Html::parse_document(&text))
|
Ok(Html::parse_document(&text))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get a price from an URL
|
/// Get a price from an URL
|
||||||
pub fn get_price(&self, url : &Url) -> Result<PriceResult> {
|
pub fn get_price(&self, url: &Url) -> Result<PriceResult> {
|
||||||
let parser = *self.parser_list.get_price().iter().find(|p| p.can_parse(&url)).ok_or(anyhow!("No parser can parse {}", url))?;
|
let parser = *self
|
||||||
|
.parser_list
|
||||||
|
.get_price()
|
||||||
|
.iter()
|
||||||
|
.find(|p| p.can_parse(&url))
|
||||||
|
.ok_or(anyhow!("No parser can parse {}", url))?;
|
||||||
let document = self.get_html(url)?;
|
let document = self.get_html(url)?;
|
||||||
Ok(parser.parse_price(&document)?)
|
Ok(parser.parse_price(&document)?)
|
||||||
}
|
}
|
||||||
|
@ -64,24 +69,42 @@ fn test_price_checker() {
|
||||||
assert!(price_result.product != "");
|
assert!(price_result.product != "");
|
||||||
|
|
||||||
// Test fnac
|
// Test fnac
|
||||||
let price_result = price_checker.get_price(&Url::parse("https://www.fnac.com/a12584732/Kaamelott-Les-Six-Livres-L-integrale-de-la-serie-Coffret-Blu-ray-Alexandre-Astier-Blu-ray").unwrap()).unwrap();
|
let price_result = price_checker
|
||||||
|
.get_price(
|
||||||
|
&Url::parse(
|
||||||
|
"https://www.fnac.com/a12584732/Kaamelott-Les-Six-Livres-L-integrale-de-la-serie-Coffret-Blu-ray-Alexandre-Astier-Blu-ray",
|
||||||
|
)
|
||||||
|
.unwrap(),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
assert!(price_result.name != "");
|
assert!(price_result.name != "");
|
||||||
assert!(price_result.price != 0.);
|
assert!(price_result.price != 0.);
|
||||||
assert!(price_result.product != "");
|
assert!(price_result.product != "");
|
||||||
|
|
||||||
// Test du bruis dans la cuisine
|
// Test du bruis dans la cuisine
|
||||||
let price_result = price_checker.get_price(&Url::parse("https://www.dubruitdanslacuisine.fr/tapis-a-patisserie-40-62-14377-p").unwrap()).unwrap();
|
let price_result = price_checker
|
||||||
|
.get_price(&Url::parse("https://www.dubruitdanslacuisine.fr/tapis-a-patisserie-40-62-14377-p").unwrap())
|
||||||
|
.unwrap();
|
||||||
assert!(price_result.name != "");
|
assert!(price_result.name != "");
|
||||||
assert!(price_result.price != 0.);
|
assert!(price_result.price != 0.);
|
||||||
|
|
||||||
// LDLC
|
// LDLC
|
||||||
let price_result = price_checker.get_price(&Url::parse("https://www.ldlc.com/fiche/PB00335410.html").unwrap()).unwrap();
|
let price_result = price_checker
|
||||||
|
.get_price(&Url::parse("https://www.ldlc.com/fiche/PB00335410.html").unwrap())
|
||||||
|
.unwrap();
|
||||||
assert!(price_result.name != "");
|
assert!(price_result.name != "");
|
||||||
assert!(price_result.price != 0.);
|
assert!(price_result.price != 0.);
|
||||||
|
|
||||||
// Amazon
|
// Amazon
|
||||||
let price_result = price_checker.get_price(&Url::parse("https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B?ref_=ast_sto_dp&th=1").unwrap()).unwrap();
|
let price_result = price_checker
|
||||||
|
.get_price(
|
||||||
|
&Url::parse(
|
||||||
|
"https://www.amazon.fr/AmazonBasics-Bo%C3%AEte-crayons-papier-pr%C3%A9taill%C3%A9s/dp/B071JM699B?ref_=ast_sto_dp&th=1",
|
||||||
|
)
|
||||||
|
.unwrap(),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
assert!(price_result.name != "");
|
assert!(price_result.name != "");
|
||||||
assert!(price_result.price != 0.);
|
assert!(price_result.price != 0.);
|
||||||
assert!(price_result.product != "");
|
assert!(price_result.product != "");
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,11 +8,11 @@ pub struct PriceResult {
|
||||||
/// The product type
|
/// The product type
|
||||||
pub product: String,
|
pub product: String,
|
||||||
/// The price
|
/// The price
|
||||||
pub price: f64
|
pub price: f64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for PriceResult {
|
impl fmt::Display for PriceResult {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
write!(f, "Product «{}: {}» price {}€", self.product, self.name, self.price)
|
write!(f, "Product «{}: {}» price {}€", self.product, self.name, self.price)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue