Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Error handling #24

Merged
merged 4 commits into from
Mar 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions src/http/reqwest/async_reqwest.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,26 @@
use reqwest::{Client, Request};
use reqwest::{Method, Error};
use reqwest::Method;
use reqwest::Error as ReqwestError;
use reqwest::header::HeaderValue;
use url::{Origin, Url};
use reqwest::header::USER_AGENT;
use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT};
use crate::parser::{ParseResult, parse_fetched_robots_txt};
use crate::model::FetchedRobotsTxt;
use crate::model::{Error, ErrorKind};
use std::pin::Pin;
use futures::task::{Context, Poll};
use futures::Future;
use futures::future::TryFutureExt;
use futures::future::ok as future_ok;

type FetchFuture = Box<dyn Future<Output=Result<(ResponseInfo, String), Error>>>;
type FetchFuture = Box<dyn Future<Output=Result<(ResponseInfo, String), ReqwestError>>>;

impl RobotsTxtClient for Client {
type Result = RobotsTxtResponse;
type Result = Result<RobotsTxtResponse, Error>;
fn fetch_robots_txt(&self, origin: Origin) -> Self::Result {
let url = format!("{}/robots.txt", origin.unicode_serialization());
let url = Url::parse(&url).expect("Unable to parse robots.txt url");
let url = Url::parse(&url).map_err(|err| Error {kind: ErrorKind::Url(err)})?;
let mut request = Request::new(Method::GET, url);
let _ = request.headers_mut().insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT));
let response = self
Expand All @@ -29,11 +31,11 @@ impl RobotsTxtClient for Client {
return future_ok((response_info, response_text));
});
});
let response: Pin<Box<dyn Future<Output=Result<(ResponseInfo, String), Error>>>> = Box::pin(response);
return RobotsTxtResponse {
let response: Pin<Box<dyn Future<Output=Result<(ResponseInfo, String), ReqwestError>>>> = Box::pin(response);
Ok(RobotsTxtResponse {
origin,
response,
}
})
}
}

Expand All @@ -55,7 +57,7 @@ impl RobotsTxtResponse {
}

impl Future for RobotsTxtResponse {
type Output = Result<ParseResult<FetchedRobotsTxt>, Error>;
type Output = Result<ParseResult<FetchedRobotsTxt>, ReqwestError>;

fn poll(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Self::Output> {
let self_mut = self.get_mut();
Expand All @@ -73,4 +75,4 @@ impl Future for RobotsTxtResponse {
},
}
}
}
}
11 changes: 6 additions & 5 deletions src/http/reqwest/sync_reqwest.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,24 @@
use reqwest::blocking::{Client, Request};
use reqwest::{Method, Error};
use reqwest::Method;
use reqwest::header::HeaderValue;
use url::{Origin, Url};
use reqwest::header::USER_AGENT;
use crate::http::{RobotsTxtClient, DEFAULT_USER_AGENT};
use crate::parser::{ParseResult, parse_fetched_robots_txt};
use crate::model::FetchedRobotsTxt;
use crate::model::{Error, ErrorKind};

impl RobotsTxtClient for Client {
type Result = Result<ParseResult<FetchedRobotsTxt>, Error>;
fn fetch_robots_txt(&self, origin: Origin) -> Self::Result {
let url = format!("{}/robots.txt", origin.unicode_serialization());
let url = Url::parse(&url).expect("Unable to parse robots.txt url");
let url = Url::parse(&url).map_err(|err| Error {kind: ErrorKind::Url(err)})?;
let mut request = Request::new(Method::GET, url);
let _ = request.headers_mut().insert(USER_AGENT, HeaderValue::from_static(DEFAULT_USER_AGENT));
let response = self.execute(request)?;
let response = self.execute(request).map_err(|err| Error {kind: ErrorKind::Http(err)})?;
let status_code = response.status().as_u16();
let text = response.text()?;
let text = response.text().map_err(|err| Error {kind: ErrorKind::Http(err)})?;
let robots_txt = parse_fetched_robots_txt(origin, status_code, &text);
return Ok(robots_txt);
}
}
}
4 changes: 3 additions & 1 deletion src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,6 @@ pub (crate) use self::fetched_robots_txt::FetchedRobotsTxtContainer;
mod fetched_robots_txt;
pub use self::robots_txt::RobotsTxt;
mod path;
pub (crate) use self::path::Path;
pub (crate) use self::path::Path;
mod errors;
pub use self::errors::{Error, ErrorKind};
23 changes: 23 additions & 0 deletions src/model/errors.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
use std::fmt;

#[derive(Debug)]
pub struct Error {
pub kind: ErrorKind,
}

#[derive(Debug)]
pub enum ErrorKind {
Url(url::ParseError),
Http(reqwest::Error),
}

impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.kind {
ErrorKind::Url(ref err) => err.fmt(f),
ErrorKind::Http(ref err) => err.fmt(f),
}
}
}

impl std::error::Error for Error {}
18 changes: 16 additions & 2 deletions tests/test_reqwest_async.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,28 @@ use robotparser::service::RobotsTxtService;
use reqwest::Client;
use url::Url;
use tokio::runtime::Runtime;
use url::{Host, Origin};

#[test]
fn test_reqwest_async() {
let mut runtime = Runtime::new().unwrap();
let client = Client::new();
let robots_txt_url = Url::parse("https://www.python.org/robots.txt").unwrap();
let robots_txt_response = runtime.block_on(client.fetch_robots_txt(robots_txt_url.origin()));
let robots_txt_response = runtime.block_on(client.fetch_robots_txt(robots_txt_url.origin()).unwrap());
let robots_txt = robots_txt_response.unwrap().get_result();
let fetch_url = Url::parse("https://www.python.org/robots.txt").unwrap();
assert!(robots_txt.can_fetch("*", &fetch_url));
}
let fetch_url = Url::parse("http://www.python.org/webstats/").unwrap();
assert!(!robots_txt.can_fetch("*", &fetch_url));
}

#[test]
fn test_reqwest_blocking_panic_url() {
let client = Client::new();
let host = Host::Domain("python.org::".into());
let origin = Origin::Tuple("https".into(), host, 80);
match client.fetch_robots_txt(origin) {
Ok(_) => assert!(false),
Err(_) => assert!(true)
}
}
14 changes: 14 additions & 0 deletions tests/test_reqwest_blocking.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use robotparser::http::RobotsTxtClient;
use robotparser::service::RobotsTxtService;
use reqwest::blocking::Client;
use url::Url;
use url::{Host, Origin};

#[test]
fn test_reqwest_blocking() {
Expand All @@ -10,4 +11,17 @@ fn test_reqwest_blocking() {
let robots_txt = client.fetch_robots_txt(robots_txt_url.origin()).unwrap().get_result();
let fetch_url = Url::parse("https://www.python.org/robots.txt").unwrap();
assert!(robots_txt.can_fetch("*", &fetch_url));
let fetch_url = Url::parse("https://www.python.org/webstats/").unwrap();
assert!(!robots_txt.can_fetch("*", &fetch_url));
}

#[test]
fn test_reqwest_blocking_panic_url() {
let client = Client::new();
let host = Host::Domain("python.org::".into());
let origin = Origin::Tuple("https".into(), host, 80);
match client.fetch_robots_txt(origin) {
Ok(_) => assert!(false),
Err(_) => assert!(true)
}
}