From bf17c29ea12e62cc7774b08eb24eaa073ab002c5 Mon Sep 17 00:00:00 2001 From: lifegpc Date: Thu, 10 Mar 2022 22:38:04 +0800 Subject: [PATCH] add author-name-filters settings --- Cargo.lock | 35 +++++++ Cargo.toml | 1 + Language/pixiv_downloader.pot | 63 ++++++++---- Language/pixiv_downloader.zh_CN.po | 65 ++++++++---- src/author_name_filter.rs | 156 +++++++++++++++++++++++++++++ src/data/data.rs | 16 ++- src/data/json.rs | 4 +- src/download.rs | 2 +- src/main.rs | 3 + src/opthelper.rs | 15 +++ src/settings_list.rs | 2 + src/stdext.rs | 14 +++ 12 files changed, 330 insertions(+), 46 deletions(-) create mode 100644 src/author_name_filter.rs create mode 100644 src/stdext.rs diff --git a/Cargo.lock b/Cargo.lock index d8e5d7b..2f19a2f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -269,6 +269,12 @@ dependencies = [ "cc", ] +[[package]] +name = "convert_case" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" + [[package]] name = "core-foundation" version = "0.9.3" @@ -306,6 +312,19 @@ dependencies = [ "regex", ] +[[package]] +name = "derive_more" +version = "0.99.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "rustc_version", + "syn", +] + [[package]] name = "digest" version = "0.8.1" @@ -1076,6 +1095,7 @@ dependencies = [ "chrono", "cmake", "dateparser", + "derive_more", "futures-util", "getopts", "gettext", @@ -1250,6 +1270,15 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + [[package]] name = "rustls" version = "0.20.4" @@ -1320,6 +1349,12 @@ dependencies = [ "libc", ] +[[package]] +name = "semver" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a3381e03edd24287172047536f20cabde766e2cd3e65e6b00fb3af51c4f38d" + [[package]] name = "serde" version = "1.0.136" diff --git a/Cargo.toml b/Cargo.toml index 8231dda..dda0b17 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ edition = "2018" c_fixed_string = { version = "0.2", optional = true } chrono = "0.4" dateparser = "0.1.6" +derive_more = "0.99" futures-util = "0.3" getopts = "0.2" gettext = "0.4" diff --git a/Language/pixiv_downloader.pot b/Language/pixiv_downloader.pot index 5498358..87e4ece 100644 --- a/Language/pixiv_downloader.pot +++ b/Language/pixiv_downloader.pot @@ -2,7 +2,7 @@ msgid "" msgstr "" "Project-Id-Version: pixiv_downloader\n" -"POT-Creation-Date: 2022-03-10 17:17+0800\n" +"POT-Creation-Date: 2022-03-10 22:29+0800\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -13,6 +13,35 @@ msgstr "" "X-Poedit-Basepath: ../src\n" "X-Poedit-SearchPath-0: .\n" +#: author_name_filter.rs:21 +msgid "Failed to parse regex:" +msgstr "" + +#: author_name_filter.rs:46 retry_interval.rs:22 +msgid "Failed to get JSON object." +msgstr "" + +#: author_name_filter.rs:48 author_name_filter.rs:130 retry_interval.rs:61 +#: retry_interval.rs:65 +msgid "Unsupported JSON type." +msgstr "" + +#: author_name_filter.rs:120 +msgid "Failed to get filter's type." +msgstr "" + +#: author_name_filter.rs:121 +msgid "Failed to get filter's rule." +msgstr "" + +#: author_name_filter.rs:127 +msgid "Unknown filter's type:" +msgstr "" + +#: author_name_filter.rs:139 +msgid "Failed parse author name filters:" +msgstr "" + #: cookies.rs:75 cookies.rs:217 msgid "Warning: Failed to parse URL:" msgstr "" @@ -81,7 +110,7 @@ msgstr "" msgid "Failed to write file:" msgstr "" -#: data/data.rs:70 +#: data/data.rs:78 msgid "Failed to unescape string:" msgstr "" @@ -158,11 +187,11 @@ msgstr "" msgid "The location of config file." msgstr "" -#: opts.rs:136 settings_list.rs:10 +#: opts.rs:136 settings_list.rs:11 msgid "The location of cookies file. Used for web API." msgstr "" -#: opts.rs:142 settings_list.rs:11 +#: opts.rs:142 settings_list.rs:12 msgid "The language of translated tags." msgstr "" @@ -178,15 +207,15 @@ msgstr "" msgid "Skip overwrite existing file." msgstr "" -#: opts.rs:151 settings_list.rs:12 +#: opts.rs:151 settings_list.rs:13 msgid "Max retry count if request failed." msgstr "" -#: opts.rs:157 settings_list.rs:13 +#: opts.rs:157 settings_list.rs:14 msgid "The interval (in seconds) between two retries." msgstr "" -#: opts.rs:160 settings_list.rs:14 +#: opts.rs:160 settings_list.rs:15 msgid "Use data from webpage first." msgstr "" @@ -274,18 +303,10 @@ msgstr "" msgid "Warning: Failed to save cookies file:" msgstr "" -#: retry_interval.rs:22 -msgid "Failed to get JSON object." -msgstr "" - #: retry_interval.rs:37 retry_interval.rs:55 msgid "Failed to parse JSON number." msgstr "" -#: retry_interval.rs:61 retry_interval.rs:65 -msgid "Unsupported JSON type." -msgstr "" - #: settings.rs:29 msgid "Multiple type" msgstr "" @@ -322,10 +343,14 @@ msgstr "" msgid "Failed to flush file:" msgstr "" -#: settings_list.rs:9 +#: settings_list.rs:10 msgid "Pixiv's refresh tokens. Used to login." msgstr "" +#: settings_list.rs:16 +msgid "Remove the part which after these parttens." +msgstr "" + #: utils.rs:30 msgid "Do you want to delete file \"\"?" msgstr "" @@ -374,14 +399,14 @@ msgstr "" msgid "Error when downloading file:" msgstr "" -#: main.rs:68 +#: main.rs:71 msgid "Failed to save config file:" msgstr "" -#: main.rs:79 +#: main.rs:82 msgid "All available settings:" msgstr "" -#: main.rs:111 +#: main.rs:114 msgid "Can not read config file:" msgstr "" diff --git a/Language/pixiv_downloader.zh_CN.po b/Language/pixiv_downloader.zh_CN.po index fa296d4..d58bc85 100644 --- a/Language/pixiv_downloader.zh_CN.po +++ b/Language/pixiv_downloader.zh_CN.po @@ -1,8 +1,8 @@ msgid "" msgstr "" "Project-Id-Version: pixiv_downloader\n" -"POT-Creation-Date: 2022-03-10 17:17+0800\n" -"PO-Revision-Date: 2022-03-10 17:20+0800\n" +"POT-Creation-Date: 2022-03-10 22:29+0800\n" +"PO-Revision-Date: 2022-03-10 22:30+0800\n" "Last-Translator: lifegpc \n" "Language-Team: \n" "Language: zh_CN\n" @@ -14,6 +14,35 @@ msgstr "" "X-Poedit-Basepath: ../src\n" "X-Poedit-SearchPath-0: .\n" +#: author_name_filter.rs:21 +msgid "Failed to parse regex:" +msgstr "无法解析正则:" + +#: author_name_filter.rs:46 retry_interval.rs:22 +msgid "Failed to get JSON object." +msgstr "无法获取 JSON 对象。" + +#: author_name_filter.rs:48 author_name_filter.rs:130 retry_interval.rs:61 +#: retry_interval.rs:65 +msgid "Unsupported JSON type." +msgstr "不支持的 JSON 类型。" + +#: author_name_filter.rs:120 +msgid "Failed to get filter's type." +msgstr "无法获取过滤器类型。" + +#: author_name_filter.rs:121 +msgid "Failed to get filter's rule." +msgstr "无法获取过滤器规则。" + +#: author_name_filter.rs:127 +msgid "Unknown filter's type:" +msgstr "未知的过滤器类型:" + +#: author_name_filter.rs:139 +msgid "Failed parse author name filters:" +msgstr "无法解析作者名称过滤器:" + #: cookies.rs:75 cookies.rs:217 msgid "Warning: Failed to parse URL:" msgstr "警告:无法解析 URL:" @@ -82,7 +111,7 @@ msgstr "无法创建文件:" msgid "Failed to write file:" msgstr "无法写入文件:" -#: data/data.rs:70 +#: data/data.rs:78 msgid "Failed to unescape string:" msgstr "无法反转义字符串:" @@ -159,11 +188,11 @@ msgstr "打印帮助信息。" msgid "The location of config file." msgstr "设置文件的位置。" -#: opts.rs:136 settings_list.rs:10 +#: opts.rs:136 settings_list.rs:11 msgid "The location of cookies file. Used for web API." msgstr "cookies 文件的位置。用于网页 API。" -#: opts.rs:142 settings_list.rs:11 +#: opts.rs:142 settings_list.rs:12 msgid "The language of translated tags." msgstr "翻译后的标签语言。" @@ -179,15 +208,15 @@ msgstr "覆盖已有文件。" msgid "Skip overwrite existing file." msgstr "跳过覆盖已有文件。" -#: opts.rs:151 settings_list.rs:12 +#: opts.rs:151 settings_list.rs:13 msgid "Max retry count if request failed." msgstr "请求失败时最大重试次数。" -#: opts.rs:157 settings_list.rs:13 +#: opts.rs:157 settings_list.rs:14 msgid "The interval (in seconds) between two retries." msgstr "两次尝试的间隔时间(单位:秒)。" -#: opts.rs:160 settings_list.rs:14 +#: opts.rs:160 settings_list.rs:15 msgid "Use data from webpage first." msgstr "优先使用来自网页的数据。" @@ -275,18 +304,10 @@ msgstr "作品页面数据:" msgid "Warning: Failed to save cookies file:" msgstr "警告:无法保存 cookies 文件:" -#: retry_interval.rs:22 -msgid "Failed to get JSON object." -msgstr "无法获取 JSON 对象。" - #: retry_interval.rs:37 retry_interval.rs:55 msgid "Failed to parse JSON number." msgstr "无法解析 JSON 数字。" -#: retry_interval.rs:61 retry_interval.rs:65 -msgid "Unsupported JSON type." -msgstr "不支持的 JSON 类型。" - #: settings.rs:29 msgid "Multiple type" msgstr "多种类型" @@ -327,10 +348,14 @@ msgstr "无法将设置转换为 JSON 对象。" msgid "Failed to flush file:" msgstr "无法刷新文件缓冲区:" -#: settings_list.rs:9 +#: settings_list.rs:10 msgid "Pixiv's refresh tokens. Used to login." msgstr "Pixiv 的 refresh tokens。用于登录。" +#: settings_list.rs:16 +msgid "Remove the part which after these parttens." +msgstr "移除匹配的部分。" + #: utils.rs:30 msgid "Do you want to delete file \"\"?" msgstr "你想要删除文件 吗?" @@ -379,14 +404,14 @@ msgstr "请求时发生错误:" msgid "Error when downloading file:" msgstr "下载文件时发生错误:" -#: main.rs:68 +#: main.rs:71 msgid "Failed to save config file:" msgstr "无法保存设置文件:" -#: main.rs:79 +#: main.rs:82 msgid "All available settings:" msgstr "所有可用的设置:" -#: main.rs:111 +#: main.rs:114 msgid "Can not read config file:" msgstr "无法读取设置文件:" diff --git a/src/author_name_filter.rs b/src/author_name_filter.rs new file mode 100644 index 0000000..79b5426 --- /dev/null +++ b/src/author_name_filter.rs @@ -0,0 +1,156 @@ +use crate::data::json::ToJson; +use crate::gettext; +use crate::stdext::TryErr; +use json::JsonValue; +use regex::Regex; +use std::cmp::PartialEq; +use std::convert::From; +use std::convert::TryFrom; +use std::fmt::Display; + +#[derive(Debug, derive_more::From, PartialEq)] +pub enum AuthorNameFilterError { + String(String), + Regex(regex::Error), +} + +impl Display for AuthorNameFilterError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::String(s) => { f.write_str(s) } + Self::Regex(r) => { f.write_fmt(format_args!("{} {}", gettext("Failed to parse regex:"), r)) } + } + } +} + +impl From<&str> for AuthorNameFilterError { + fn from(s: &str) -> Self { + Self::String(String::from(s)) + } +} + +#[derive(Clone, Debug, derive_more::From)] +pub enum AuthorNameFilter { + Simple(String), + Regex(Regex), +} + +/// Used to filter the author name +pub trait AuthorFiler { + /// Used to filter the author name + fn filter(&self, author: &str) -> String; +} + +impl AuthorNameFilter { + pub fn from_json(v: T) -> Result, AuthorNameFilterError> { + let v = v.to_json().try_err(gettext("Failed to get JSON object."))?; + if !v.is_array() { + Err(gettext("Unsupported JSON type."))?; + } + let mut re = Vec::new(); + for j in v.members() { + re.push(Self::try_from(j)?); + } + Ok(re) + } +} + +impl AuthorFiler for AuthorNameFilter { + fn filter(&self, author: &str) -> String { + match self { + Self::Simple(s) => { + match author.find(s) { + Some(i) => { String::from(&author[..i]) } + None => { String::from(author) } + } + } + Self::Regex(r) => { + r.replace(author, "").to_owned().to_string() + } + } + } +} + +impl AuthorFiler for Vec { + fn filter(&self, author: &str) -> String { + let ori = String::from(author); + for i in self { + let r = i.filter(author); + if r != ori { + return r; + } + } + return ori; + } +} + +impl From<&str> for AuthorNameFilter { + fn from(s: &str) -> Self { + Self::Simple(String::from(s)) + } +} + +impl PartialEq for AuthorNameFilter { + fn eq(&self, other: &Self) -> bool { + match self { + Self::Simple(s) => { + match other { + Self::Regex(_) => { false } + Self::Simple(t) => { s == t } + } + } + Self::Regex(r) => { + match other { + Self::Simple(_) => { false } + Self::Regex(s) => { + r.as_str() == s.as_str() + } + } + } + } + } +} + +impl TryFrom<&JsonValue> for AuthorNameFilter { + type Error = AuthorNameFilterError; + fn try_from(j: &JsonValue) -> Result { + if j.is_string() { + return Ok(Self::from(j.as_str().unwrap())); + } else if j.is_object() { + let t = (&j["type"]).as_str().try_err(gettext("Failed to get filter's type."))?.to_lowercase(); + let rule = (&j["rule"]).as_str().try_err(gettext("Failed to get filter's rule."))?; + if t == "simple" { + return Ok(Self::from(rule)); + } else if t == "regex" { + return Ok(Self::from(Regex::new(rule)?)); + } else { + Err(format!("{} {}", gettext("Unknown filter's type:"), t.as_str()))?; + } + } else { + Err(gettext("Unsupported JSON type."))?; + }; + return Err(Self::Error::from("")); + } +} + +pub fn check_author_name_filters(v: &JsonValue) -> bool { + let r = AuthorNameFilter::from_json(v); + if r.is_err() { + println!("{} {}", gettext("Failed parse author name filters:"), r.as_ref().unwrap_err()); + } + r.is_ok() +} + +#[test] +fn test_author_name_filter() { + assert!(AuthorNameFilter::from("s") == AuthorNameFilter::from("s")); + assert!(AuthorNameFilter::from(Regex::new("s").unwrap()) == AuthorNameFilter::from(Regex::new("s").unwrap())); + let l = AuthorNameFilter::from_json(json::array!["🌸"]).unwrap(); + assert_eq!(l, vec![AuthorNameFilter::from("🌸")]); + assert_eq!(l[0].filter("moco🌸お仕事募集中"), String::from("moco")); + let r = AuthorNameFilter::from(Regex::new(".?お仕事募集中").unwrap()); + assert_eq!(r.filter("moco🌸お仕事募集中"), String::from("moco")); + let l = AuthorNameFilter::from_json(json::array![{"type": "simple", "rule": "🌸"}, {"type": "regex", "rule": ".?お仕事募集中"}]).unwrap(); + assert_eq!(l, vec![AuthorNameFilter::from("🌸"), AuthorNameFilter::from(r)]); + assert_eq!(l.filter("moco<お仕事募集中🌸お仕事募集中"), String::from("moco<お仕事募集中")); +} diff --git a/src/data/data.rs b/src/data/data.rs index a8d0038..320bbf1 100644 --- a/src/data/data.rs +++ b/src/data/data.rs @@ -1,4 +1,6 @@ +use crate::author_name_filter::AuthorFiler; use crate::gettext; +use crate::opthelper::OptHelper; use crate::pixiv_link::ToPixivID; use crate::pixiv_link::PixivID; use json::JsonValue; @@ -6,7 +8,7 @@ use std::convert::TryInto; use xml::unescape; /// Pixiv's basic data -pub struct PixivData { +pub struct PixivData<'a> { /// ID pub id: PixivID, /// The title @@ -14,10 +16,11 @@ pub struct PixivData { /// The author pub author: Option, pub description: Option, + helper: OptHelper<'a>, } -impl PixivData { - pub fn new(id: T) -> Option { +impl<'a> PixivData<'a> { + pub fn new(id: T, helper: OptHelper<'a>) -> Option { let i = id.to_pixiv_id(); if i.is_none() { return None; @@ -27,6 +30,7 @@ impl PixivData { title: None, author: None, description: None, + helper: helper, }) } @@ -54,7 +58,11 @@ impl PixivData { if self.author.is_none() || allow_overwrite { let author = value["userName"].as_str(); if author.is_some() { - self.author = Some(String::from(author.unwrap())); + let au = author.unwrap(); + match self.helper.author_name_filters() { + Some(l) => { self.author = Some(l.filter(au)) } + None => { self.author = Some(String::from(author.unwrap())); } + } } } if self.description.is_none() || allow_overwrite { diff --git a/src/data/json.rs b/src/data/json.rs index 094ba8f..015bf49 100644 --- a/src/data/json.rs +++ b/src/data/json.rs @@ -74,13 +74,13 @@ impl JSONDataFile { } } -impl From for JSONDataFile { +impl<'a> From> for JSONDataFile { fn from(p: PixivData) -> Self { JSONDataFile::from(&p) } } -impl From<&PixivData> for JSONDataFile { +impl<'a> From<&'a PixivData<'a>> for JSONDataFile { fn from(p: &PixivData) -> Self { let mut f = Self { id: p.id.clone(), diff --git a/src/download.rs b/src/download.rs index fa7b2c6..92f51b5 100644 --- a/src/download.rs +++ b/src/download.rs @@ -74,7 +74,7 @@ impl Main { } let base = PathBuf::from("."); let json_file = base.join(format!("{}.json", id)); - let mut datas = PixivData::new(id).unwrap(); + let mut datas = PixivData::new(id, pw.helper.clone()).unwrap(); if ajax_ver { datas.from_web_page_ajax_data(&re, true); } else { diff --git a/src/main.rs b/src/main.rs index 2759880..85bea78 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,6 +2,7 @@ extern crate c_fixed_string; extern crate chrono; extern crate dateparser; +extern crate derive_more; extern crate futures_util; extern crate json; #[cfg(feature = "int-enum")] @@ -21,6 +22,7 @@ extern crate xml; #[cfg(feature = "exif")] #[doc(hidden)] mod _exif; +mod author_name_filter; mod cookies; mod data; mod download; @@ -37,6 +39,7 @@ mod pixiv_web; mod retry_interval; mod settings; mod settings_list; +mod stdext; mod utils; mod webclient; diff --git a/src/opthelper.rs b/src/opthelper.rs index ad2b992..4fee114 100644 --- a/src/opthelper.rs +++ b/src/opthelper.rs @@ -1,3 +1,4 @@ +use crate::author_name_filter::AuthorNameFilter; use crate::opts::CommandOpts; use crate::list::NonTailList; use crate::retry_interval::parse_retry_interval_from_json; @@ -12,9 +13,17 @@ pub struct OptHelper<'a> { /// Settings settings: &'a SettingStore, default_retry_interval: NonTailList, + _author_name_filters: Option>, } impl<'a> OptHelper<'a> { + pub fn author_name_filters(&self) -> Option<&Vec> { + if self.settings.have("author-name-filters") { + return self._author_name_filters.as_ref(); + } + None + } + /// return cookies location, no any check pub fn cookies(&self) -> Option { if self.opt.cookies.is_some() { @@ -40,10 +49,16 @@ impl<'a> OptHelper<'a> { pub fn new(opt: &'a CommandOpts, settings: &'a SettingStore) -> Self { let mut l = NonTailList::default(); l += Duration::new(3, 0); + let _author_name_filters = if settings.have("author-name-filters") { + Some(AuthorNameFilter::from_json(settings.get("author-name-filters").unwrap()).unwrap()) + } else { + None + }; Self { opt, settings, default_retry_interval: l, + _author_name_filters: _author_name_filters, } } diff --git a/src/settings_list.rs b/src/settings_list.rs index f4e8f3d..ed7a427 100644 --- a/src/settings_list.rs +++ b/src/settings_list.rs @@ -1,3 +1,4 @@ +use crate::author_name_filter::check_author_name_filters; use crate::gettext; use crate::retry_interval::check_retry_interval; use crate::settings::SettingDes; @@ -12,6 +13,7 @@ pub fn get_settings_list() -> Vec { SettingDes::new("retry", gettext("Max retry count if request failed."), JsonValueType::Number, Some(check_u64)).unwrap(), SettingDes::new("retry-interval", gettext("The interval (in seconds) between two retries."), JsonValueType::Multiple, Some(check_retry_interval)).unwrap(), SettingDes::new("use-webpage", gettext("Use data from webpage first."), JsonValueType::Boolean, None).unwrap(), + SettingDes::new("author-name-filters", gettext("Remove the part which after these parttens."), JsonValueType::Array, Some(check_author_name_filters)).unwrap(), ] } diff --git a/src/stdext.rs b/src/stdext.rs new file mode 100644 index 0000000..f098aa6 --- /dev/null +++ b/src/stdext.rs @@ -0,0 +1,14 @@ +/// Try with custom error message +pub trait TryErr { + /// try with custom error message + fn try_err(&self, err: E) -> Result; +} + +impl, E> TryErr for Option { + fn try_err(&self, v: E) -> Result { + match self { + Some(r) => { Ok(r.to_owned()) } + None => { Err(v) } + } + } +}