Files
pixiv_downloader/src/webclient.rs

622 lines
20 KiB
Rust

use crate::cookies::Cookie;
use crate::cookies::ManagedCookieJar;
use crate::error::PixivDownloaderError;
use crate::ext::atomic::AtomicQuick;
use crate::ext::json::ToJson;
use crate::ext::replace::ReplaceWith2;
use crate::ext::rw_lock::GetRwLock;
use crate::formdata::FormData;
use crate::gettext;
use crate::list::NonTailList;
use crate::opthelper::get_helper;
use json::JsonValue;
use proc_macros::print_error;
use reqwest::multipart::Form;
use reqwest::{Client, ClientBuilder, IntoUrl, Request, Response};
use serde::ser::Serialize;
use std::collections::HashMap;
use std::default::Default;
use std::sync::atomic::AtomicI64;
use std::sync::Arc;
use std::sync::RwLock;
use std::sync::RwLockReadGuard;
use std::sync::RwLockWriteGuard;
use std::time::Duration;
/// Convert data to HTTP headers map
pub trait ToHeaders {
/// return HTTP headers map
fn to_headers(&self) -> Option<HashMap<String, String>>;
}
impl ToHeaders for Option<HashMap<String, String>> {
fn to_headers(&self) -> Option<HashMap<String, String>> {
self.clone()
}
}
impl ToHeaders for HashMap<String, String> {
fn to_headers(&self) -> Option<HashMap<String, String>> {
Some(self.clone())
}
}
impl ToHeaders for JsonValue {
fn to_headers(&self) -> Option<HashMap<String, String>> {
if !self.is_object() {
return None;
}
let mut h = HashMap::new();
for (k, v) in self.entries() {
let d = if v.is_string() {
String::from(v.as_str().unwrap())
} else {
v.dump()
};
h.insert(String::from(k), d);
}
Some(h)
}
}
/// Generate `cookie` header for a url
/// * `c` - Cookies
/// * `url` - URL
pub fn gen_cookie_header<U: IntoUrl>(c: &WebClient, url: U) -> String {
c.get_cookies_as_mut().jar.get_mut().check_expired();
let mut s = String::from("");
let mut k = String::from("");
let u = url.as_str();
for a in c.get_cookies().jar.get_ref().iter() {
if a.matched(u) {
if s.len() > 0 {
s += " ";
k += ", ";
}
s += a.get_name_value().as_str();
k += a.name();
}
}
log::debug!(target: "webclient", "Cookie List: {}", k);
s
}
pub trait ReqMiddleware {
fn handle(&self, r: Request, c: Client) -> Result<Request, PixivDownloaderError>;
}
impl<T> ReqMiddleware for Arc<T>
where
T: ReqMiddleware,
{
fn handle(&self, r: Request, c: Client) -> Result<Request, PixivDownloaderError> {
self.as_ref().handle(r, c)
}
}
/// A Web Client
pub struct WebClient {
/// Basic Web Client
client: Client,
/// HTTP Headers
headers: RwLock<HashMap<String, String>>,
/// Cookies
cookies: RwLock<ManagedCookieJar>,
/// Retry times, 0 means disable, < 0 means always retry
retry: Arc<AtomicI64>,
/// Retry interval
retry_interval: RwLock<Option<NonTailList<Duration>>>,
/// Request middlewares
req_middlewares: RwLock<Vec<Box<dyn ReqMiddleware + Send + Sync>>>,
/// Set request timeout. The timeout is applied from when the request starts connecting until
/// the response body has finished.
timeout: RwLock<Option<Duration>>,
}
impl WebClient {
/// Create a new instance of client
///
/// This function will not handle any basic options, please use [Self::default()] instead.
pub fn new(client: Client) -> Self {
Self {
client,
headers: RwLock::new(HashMap::new()),
cookies: RwLock::new(ManagedCookieJar::new()),
retry: Arc::new(AtomicI64::new(3)),
retry_interval: RwLock::new(None),
req_middlewares: RwLock::new(Vec::new()),
timeout: RwLock::new(None),
}
}
fn handle_req_middlewares(&self, r: Request) -> Result<Request, PixivDownloaderError> {
let mut r = r;
for i in self.req_middlewares.get_ref().iter() {
r = i.handle(r, self.client.clone())?;
}
Ok(r)
}
pub fn add_req_middleware(&self, m: Box<dyn ReqMiddleware + Send + Sync>) {
self.req_middlewares.get_mut().push(m);
}
pub fn get_cookies_as_mut<'a>(&'a self) -> RwLockWriteGuard<'a, ManagedCookieJar> {
self.cookies.get_mut()
}
pub fn get_cookies<'a>(&'a self) -> RwLockReadGuard<'a, ManagedCookieJar> {
self.cookies.get_ref()
}
pub fn get_headers_as_mut<'a>(&'a self) -> RwLockWriteGuard<'a, HashMap<String, String>> {
self.headers.get_mut()
}
pub fn get_headers<'a>(&'a self) -> RwLockReadGuard<'a, HashMap<String, String>> {
self.headers.get_ref()
}
/// return retry times, 0 means disable
pub fn get_retry(&self) -> i64 {
self.retry.qload()
}
pub fn get_retry_interval_as_mut<'a>(
&'a self,
) -> RwLockWriteGuard<'a, Option<NonTailList<Duration>>> {
self.retry_interval.get_mut()
}
pub fn get_retry_interval<'a>(&'a self) -> RwLockReadGuard<'a, Option<NonTailList<Duration>>> {
self.retry_interval.get_ref()
}
pub fn get_timeout<'a>(&'a self) -> RwLockReadGuard<'a, Option<Duration>> {
self.timeout.get_ref()
}
/// Used to handle Set-Cookie header in an [Response]
/// * `r` - reference to an [Response]
pub fn handle_set_cookie(&self, r: &Response) {
let u = r.url();
let h = r.headers();
let v = h.get_all("Set-Cookie");
for val in v {
let val = val.to_str();
match val {
Ok(val) => {
let c = Cookie::from_set_cookie(u.as_str(), val);
match c {
Some(c) => {
self.get_cookies_as_mut().jar.get_mut().add(c);
}
None => {
log::warn!("{}", gettext("Failed to parse Set-Cookie header."));
}
}
}
Err(e) => {
log::warn!("{} {}", gettext("Failed to convert to string:"), e);
}
}
}
}
/// Read cookies from file.
/// * `file_name`: File name
///
/// returns true if readed successfully.
/// # Note
/// If read failed, will clean all entries in the current [ManagedCookieJar]
pub fn read_cookies(&self, file_name: &str) -> bool {
let mut c = self.get_cookies_as_mut();
let r = c.read(file_name);
if !r {
c.jar.get_mut().clear();
}
r
}
/// Set new HTTP header
/// * `key` - The key of the new HTTP header
/// * `value` - The value of the new HTTP value
///
/// Returns the old HTTP header value if presented.
pub fn set_header(&self, key: &str, value: &str) -> Option<String> {
self.get_headers_as_mut()
.insert(String::from(key), String::from(value))
}
/// Set retry times, 0 means disable
pub fn set_retry(&self, retry: i64) {
self.retry.qstore(retry)
}
/// Set request timeout. The timeout is applied from when the request starts connecting until
/// the response body has finished.
pub fn set_timeout(&self, timeout: Option<Duration>) {
self.timeout.replace_with2(timeout);
}
/// Create a client with no timeout set.
pub fn with_no_timeout() -> Self {
let c = Self::default();
c.set_timeout(None);
c
}
/// Send GET requests with parameters
/// * `param` - GET parameters. Should be a JSON object/array. If value in map is not a string, will dump it
/// # Examples
/// ```
/// let client = WebClient::new();
/// client.set_verbose(true);
/// client.get_with_param("https://test.com/a", json::object!{"data": "param1"}, None);
/// client.get_with_param("https://test.com/a", json::object!{"daa": {"ad": "test"}}, None);
/// client.get_with_param("https://test.com/a", json::array![["daa", "param1"]], None);
/// ```
/// It will GET `https://test.com/a?data=param1`, `https://test.com/a?daa=%7B%22ad%22%3A%22test%22%7D`, `https://test.com/a?daa=param1`
pub async fn get_with_param<U: IntoUrl + Clone, J: ToJson, H: ToHeaders + Clone>(
&self,
url: U,
param: J,
headers: H,
) -> Option<Response> {
let u = url.into_url();
if u.is_err() {
log::error!("{} \"{}\"", gettext("Can not parse URL:"), u.unwrap_err());
return None;
}
let mut u = u.unwrap();
let obj = param.to_json();
if obj.is_none() {
return self.get(u, headers).await;
}
let obj = obj.unwrap();
if !obj.is_object() && !obj.is_array() {
log::error!(
"{} \"{}\"",
gettext("Parameters should be object or array:"),
obj
);
return None;
}
{
let mut query = u.query_pairs_mut();
if obj.is_object() {
for (k, v) in obj.entries() {
let s: String;
if v.is_string() {
s = String::from(v.as_str().unwrap());
} else {
s = v.dump();
}
query.append_pair(k, s.as_str());
}
} else {
for v in obj.members() {
if !v.is_object() {
log::error!("{} \"{}\"", gettext("Parameters should be array:"), v);
return None;
}
if v.len() < 2 {
log::error!("{} \"{}\"", gettext("Parameters need at least a value:"), v);
return None;
}
let okey = &v[0];
let key: String;
if okey.is_string() {
key = String::from(okey.as_str().unwrap());
} else {
key = okey.dump();
}
let mut mems = v.members();
mems.next();
for val in mems {
let s: String;
if val.is_string() {
s = String::from(val.as_str().unwrap());
} else {
s = val.dump();
}
query.append_pair(key.as_str(), s.as_str());
}
}
}
}
self.get(u.as_str(), headers).await
}
/// Send Get Requests
pub async fn get<U: IntoUrl + Clone, H: ToHeaders + Clone>(
&self,
url: U,
headers: H,
) -> Option<Response> {
let mut count = 0i64;
let retry = self.get_retry();
while retry < 0 || count <= retry {
let r = self._aget2(url.clone(), headers.clone()).await;
if r.is_some() {
return r;
}
count += 1;
if retry < 0 || count <= retry {
let t =
self.get_retry_interval().as_ref().unwrap()[(count - 1).try_into().unwrap()];
if !t.is_zero() {
log::info!(
"{}",
gettext("Retry after <num> seconds.")
.replace("<num>", format!("{}", t.as_secs_f64()).as_str())
.as_str()
);
tokio::time::sleep(t).await;
}
}
log::info!(
"{}",
gettext("Retry <count> times now.")
.replace("<count>", format!("{}", count).as_str())
.as_str()
);
}
None
}
/// Send GET requests without retry
pub async fn _aget2<U: IntoUrl, H: ToHeaders>(&self, url: U, headers: H) -> Option<Response> {
let r = print_error!(
gettext("Failed to generate request:"),
self._aget(url, headers)
);
let r = print_error!(gettext("Error when request:"), self.client.execute(r).await);
self.handle_set_cookie(&r);
log::debug!(target: "webclient", "{}", r.status());
Some(r)
}
/// Generate a requests
pub fn _aget<U: IntoUrl, H: ToHeaders>(
&self,
url: U,
headers: H,
) -> Result<Request, PixivDownloaderError> {
let s = url.as_str();
log::debug!(target: "webclient", "GET {}", s);
let mut r = self.client.get(s);
for (k, v) in self.get_headers().iter() {
r = r.header(k, v);
log::debug!(target: "webclient", "{}: {}", k, v);
}
let headers = headers.to_headers();
if headers.is_some() {
let h = headers.unwrap();
for (k, v) in h.iter() {
r = r.header(k, v);
log::debug!(target: "webclient", "{}: {}", k, v);
}
}
let c = gen_cookie_header(&self, s);
if c.len() > 0 {
r = r.header("Cookie", c.as_str());
}
match self.get_timeout().as_ref() {
Some(t) => {
r = r.timeout(t.clone());
}
None => {}
}
self.handle_req_middlewares(r.build()?)
}
pub async fn post<U: IntoUrl + Clone, H: ToHeaders + Clone, S: Serialize + Clone>(
&self,
url: U,
headers: H,
form: Option<S>,
) -> Option<Response> {
let mut count = 0i64;
let retry = self.get_retry();
while retry < 0 || count <= retry {
let r = self
._apost2(url.clone(), headers.clone(), form.clone())
.await;
if r.is_some() {
return r;
}
count += 1;
if retry < 0 || count <= retry {
let t =
self.get_retry_interval().as_ref().unwrap()[(count - 1).try_into().unwrap()];
if !t.is_zero() {
log::info!(
"{}",
gettext("Retry after <num> seconds.")
.replace("<num>", format!("{}", t.as_secs_f64()).as_str())
.as_str()
);
tokio::time::sleep(t).await;
}
}
log::info!(
"{}",
gettext("Retry <count> times now.")
.replace("<count>", format!("{}", count).as_str())
.as_str()
);
}
None
}
pub async fn post_multipart<U: IntoUrl + Clone, H: ToHeaders + Clone>(
&self,
url: U,
headers: H,
form: FormData,
) -> Option<Response> {
let mut count = 0i64;
let retry = self.get_retry();
while retry < 0 || count <= retry {
let f = print_error!(gettext("Failed to generate form:"), form.to_form().await);
let r = self
._apost_multipart2(url.clone(), headers.clone(), f)
.await;
if r.is_some() {
return r;
}
count += 1;
if retry < 0 || count <= retry {
let t =
self.get_retry_interval().as_ref().unwrap()[(count - 1).try_into().unwrap()];
if !t.is_zero() {
log::info!(
"{}",
gettext("Retry after <num> seconds.")
.replace("<num>", format!("{}", t.as_secs_f64()).as_str())
.as_str()
);
tokio::time::sleep(t).await;
}
}
log::info!(
"{}",
gettext("Retry <count> times now.")
.replace("<count>", format!("{}", count).as_str())
.as_str()
);
}
None
}
pub async fn _apost2<U: IntoUrl, H: ToHeaders, S: Serialize>(
&self,
url: U,
headers: H,
form: Option<S>,
) -> Option<Response> {
let r = print_error!(
gettext("Failed to generate request:"),
self._apost(url, headers, form)
);
let r = print_error!(gettext("Error when request:"), self.client.execute(r).await);
self.handle_set_cookie(&r);
log::debug!(target: "webclient","{}", r.status());
Some(r)
}
/// Generate a POST request
pub fn _apost<U: IntoUrl, H: ToHeaders, S: Serialize>(
&self,
url: U,
headers: H,
form: Option<S>,
) -> Result<Request, PixivDownloaderError> {
let s = url.as_str();
log::debug!(target: "webclient", "POST {}", s);
let mut r = self.client.post(s);
for (k, v) in self.get_headers().iter() {
r = r.header(k, v);
log::debug!(target: "webclient", "{}: {}", k, v);
}
let headers = headers.to_headers();
if headers.is_some() {
let h = headers.unwrap();
for (k, v) in h.iter() {
r = r.header(k, v);
log::debug!(target: "webclient", "{}: {}", k, v);
}
}
let c = gen_cookie_header(&self, s);
if c.len() > 0 {
r = r.header("Cookie", c.as_str());
}
match form {
Some(f) => {
r = r.form(&f);
}
None => {}
}
match self.get_timeout().as_ref() {
Some(t) => {
r = r.timeout(t.clone());
}
None => {}
}
self.handle_req_middlewares(r.build()?)
}
pub async fn _apost_multipart2<U: IntoUrl, H: ToHeaders>(
&self,
url: U,
headers: H,
form: Form,
) -> Option<Response> {
let r = print_error!(
gettext("Failed to generate request:"),
self._apost_multipart(url, headers, form)
);
let r = print_error!(gettext("Error when request:"), self.client.execute(r).await);
self.handle_set_cookie(&r);
log::debug!(target: "webclient","{}", r.status());
Some(r)
}
pub fn _apost_multipart<U: IntoUrl, H: ToHeaders>(
&self,
url: U,
headers: H,
form: Form,
) -> Result<Request, PixivDownloaderError> {
let s = url.as_str();
log::debug!(target: "webclient", "POST {}", s);
let mut r = self.client.post(s);
for (k, v) in self.get_headers().iter() {
r = r.header(k, v);
log::debug!(target: "webclient", "{}: {}", k, v);
}
let headers = headers.to_headers();
if headers.is_some() {
let h = headers.unwrap();
for (k, v) in h.iter() {
r = r.header(k, v);
log::debug!(target: "webclient", "{}: {}", k, v);
}
}
let c = gen_cookie_header(&self, s);
if c.len() > 0 {
r = r.header("Cookie", c.as_str());
}
r = r.multipart(form);
match self.get_timeout().as_ref() {
Some(t) => {
r = r.timeout(t.clone());
}
None => {}
}
self.handle_req_middlewares(r.build()?)
}
}
impl Default for WebClient {
fn default() -> Self {
let opt = get_helper();
let mut c = ClientBuilder::new();
let chain = opt.proxy_chain();
if !chain.is_empty() {
c = c.proxy(reqwest::Proxy::custom(move |url| chain.r#match(url)));
}
c = c.connect_timeout(opt.connect_timeout());
let c = c.build().unwrap();
let c = Self::new(c);
match opt.retry() {
Some(retry) => c.set_retry(retry),
None => {}
}
c.get_retry_interval_as_mut().replace(opt.retry_interval());
c.set_timeout(Some(opt.client_timeout()));
c
}
}