add author-name-filters settings

This commit is contained in:
2022-03-10 22:38:04 +08:00
parent 54fdca5b15
commit bf17c29ea1
12 changed files with 330 additions and 46 deletions

35
Cargo.lock generated
View File

@@ -269,6 +269,12 @@ dependencies = [
"cc",
]
[[package]]
name = "convert_case"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e"
[[package]]
name = "core-foundation"
version = "0.9.3"
@@ -306,6 +312,19 @@ dependencies = [
"regex",
]
[[package]]
name = "derive_more"
version = "0.99.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321"
dependencies = [
"convert_case",
"proc-macro2",
"quote",
"rustc_version",
"syn",
]
[[package]]
name = "digest"
version = "0.8.1"
@@ -1076,6 +1095,7 @@ dependencies = [
"chrono",
"cmake",
"dateparser",
"derive_more",
"futures-util",
"getopts",
"gettext",
@@ -1250,6 +1270,15 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustc_version"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366"
dependencies = [
"semver",
]
[[package]]
name = "rustls"
version = "0.20.4"
@@ -1320,6 +1349,12 @@ dependencies = [
"libc",
]
[[package]]
name = "semver"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4a3381e03edd24287172047536f20cabde766e2cd3e65e6b00fb3af51c4f38d"
[[package]]
name = "serde"
version = "1.0.136"

View File

@@ -9,6 +9,7 @@ edition = "2018"
c_fixed_string = { version = "0.2", optional = true }
chrono = "0.4"
dateparser = "0.1.6"
derive_more = "0.99"
futures-util = "0.3"
getopts = "0.2"
gettext = "0.4"

View File

@@ -2,7 +2,7 @@
msgid ""
msgstr ""
"Project-Id-Version: pixiv_downloader\n"
"POT-Creation-Date: 2022-03-10 17:17+0800\n"
"POT-Creation-Date: 2022-03-10 22:29+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <[email protected]>\n"
@@ -13,6 +13,35 @@ msgstr ""
"X-Poedit-Basepath: ../src\n"
"X-Poedit-SearchPath-0: .\n"
#: author_name_filter.rs:21
msgid "Failed to parse regex:"
msgstr ""
#: author_name_filter.rs:46 retry_interval.rs:22
msgid "Failed to get JSON object."
msgstr ""
#: author_name_filter.rs:48 author_name_filter.rs:130 retry_interval.rs:61
#: retry_interval.rs:65
msgid "Unsupported JSON type."
msgstr ""
#: author_name_filter.rs:120
msgid "Failed to get filter's type."
msgstr ""
#: author_name_filter.rs:121
msgid "Failed to get filter's rule."
msgstr ""
#: author_name_filter.rs:127
msgid "Unknown filter's type:"
msgstr ""
#: author_name_filter.rs:139
msgid "Failed parse author name filters:"
msgstr ""
#: cookies.rs:75 cookies.rs:217
msgid "Warning: Failed to parse URL:"
msgstr ""
@@ -81,7 +110,7 @@ msgstr ""
msgid "Failed to write file:"
msgstr ""
#: data/data.rs:70
#: data/data.rs:78
msgid "Failed to unescape string:"
msgstr ""
@@ -158,11 +187,11 @@ msgstr ""
msgid "The location of config file."
msgstr ""
#: opts.rs:136 settings_list.rs:10
#: opts.rs:136 settings_list.rs:11
msgid "The location of cookies file. Used for web API."
msgstr ""
#: opts.rs:142 settings_list.rs:11
#: opts.rs:142 settings_list.rs:12
msgid "The language of translated tags."
msgstr ""
@@ -178,15 +207,15 @@ msgstr ""
msgid "Skip overwrite existing file."
msgstr ""
#: opts.rs:151 settings_list.rs:12
#: opts.rs:151 settings_list.rs:13
msgid "Max retry count if request failed."
msgstr ""
#: opts.rs:157 settings_list.rs:13
#: opts.rs:157 settings_list.rs:14
msgid "The interval (in seconds) between two retries."
msgstr ""
#: opts.rs:160 settings_list.rs:14
#: opts.rs:160 settings_list.rs:15
msgid "Use data from webpage first."
msgstr ""
@@ -274,18 +303,10 @@ msgstr ""
msgid "Warning: Failed to save cookies file:"
msgstr ""
#: retry_interval.rs:22
msgid "Failed to get JSON object."
msgstr ""
#: retry_interval.rs:37 retry_interval.rs:55
msgid "Failed to parse JSON number."
msgstr ""
#: retry_interval.rs:61 retry_interval.rs:65
msgid "Unsupported JSON type."
msgstr ""
#: settings.rs:29
msgid "Multiple type"
msgstr ""
@@ -322,10 +343,14 @@ msgstr ""
msgid "Failed to flush file:"
msgstr ""
#: settings_list.rs:9
#: settings_list.rs:10
msgid "Pixiv's refresh tokens. Used to login."
msgstr ""
#: settings_list.rs:16
msgid "Remove the part which after these parttens."
msgstr ""
#: utils.rs:30
msgid "Do you want to delete file \"<file>\"?"
msgstr ""
@@ -374,14 +399,14 @@ msgstr ""
msgid "Error when downloading file:"
msgstr ""
#: main.rs:68
#: main.rs:71
msgid "Failed to save config file:"
msgstr ""
#: main.rs:79
#: main.rs:82
msgid "All available settings:"
msgstr ""
#: main.rs:111
#: main.rs:114
msgid "Can not read config file:"
msgstr ""

View File

@@ -1,8 +1,8 @@
msgid ""
msgstr ""
"Project-Id-Version: pixiv_downloader\n"
"POT-Creation-Date: 2022-03-10 17:17+0800\n"
"PO-Revision-Date: 2022-03-10 17:20+0800\n"
"POT-Creation-Date: 2022-03-10 22:29+0800\n"
"PO-Revision-Date: 2022-03-10 22:30+0800\n"
"Last-Translator: lifegpc <[email protected]>\n"
"Language-Team: \n"
"Language: zh_CN\n"
@@ -14,6 +14,35 @@ msgstr ""
"X-Poedit-Basepath: ../src\n"
"X-Poedit-SearchPath-0: .\n"
#: author_name_filter.rs:21
msgid "Failed to parse regex:"
msgstr "无法解析正则:"
#: author_name_filter.rs:46 retry_interval.rs:22
msgid "Failed to get JSON object."
msgstr "无法获取 JSON 对象。"
#: author_name_filter.rs:48 author_name_filter.rs:130 retry_interval.rs:61
#: retry_interval.rs:65
msgid "Unsupported JSON type."
msgstr "不支持的 JSON 类型。"
#: author_name_filter.rs:120
msgid "Failed to get filter's type."
msgstr "无法获取过滤器类型。"
#: author_name_filter.rs:121
msgid "Failed to get filter's rule."
msgstr "无法获取过滤器规则。"
#: author_name_filter.rs:127
msgid "Unknown filter's type:"
msgstr "未知的过滤器类型:"
#: author_name_filter.rs:139
msgid "Failed parse author name filters:"
msgstr "无法解析作者名称过滤器:"
#: cookies.rs:75 cookies.rs:217
msgid "Warning: Failed to parse URL:"
msgstr "警告:无法解析 URL:"
@@ -82,7 +111,7 @@ msgstr "无法创建文件:"
msgid "Failed to write file:"
msgstr "无法写入文件:"
#: data/data.rs:70
#: data/data.rs:78
msgid "Failed to unescape string:"
msgstr "无法反转义字符串:"
@@ -159,11 +188,11 @@ msgstr "打印帮助信息。"
msgid "The location of config file."
msgstr "设置文件的位置。"
#: opts.rs:136 settings_list.rs:10
#: opts.rs:136 settings_list.rs:11
msgid "The location of cookies file. Used for web API."
msgstr "cookies 文件的位置。用于网页 API。"
#: opts.rs:142 settings_list.rs:11
#: opts.rs:142 settings_list.rs:12
msgid "The language of translated tags."
msgstr "翻译后的标签语言。"
@@ -179,15 +208,15 @@ msgstr "覆盖已有文件。"
msgid "Skip overwrite existing file."
msgstr "跳过覆盖已有文件。"
#: opts.rs:151 settings_list.rs:12
#: opts.rs:151 settings_list.rs:13
msgid "Max retry count if request failed."
msgstr "请求失败时最大重试次数。"
#: opts.rs:157 settings_list.rs:13
#: opts.rs:157 settings_list.rs:14
msgid "The interval (in seconds) between two retries."
msgstr "两次尝试的间隔时间(单位:秒)。"
#: opts.rs:160 settings_list.rs:14
#: opts.rs:160 settings_list.rs:15
msgid "Use data from webpage first."
msgstr "优先使用来自网页的数据。"
@@ -275,18 +304,10 @@ msgstr "作品页面数据:"
msgid "Warning: Failed to save cookies file:"
msgstr "警告:无法保存 cookies 文件:"
#: retry_interval.rs:22
msgid "Failed to get JSON object."
msgstr "无法获取 JSON 对象。"
#: retry_interval.rs:37 retry_interval.rs:55
msgid "Failed to parse JSON number."
msgstr "无法解析 JSON 数字。"
#: retry_interval.rs:61 retry_interval.rs:65
msgid "Unsupported JSON type."
msgstr "不支持的 JSON 类型。"
#: settings.rs:29
msgid "Multiple type"
msgstr "多种类型"
@@ -327,10 +348,14 @@ msgstr "无法将设置转换为 JSON 对象。"
msgid "Failed to flush file:"
msgstr "无法刷新文件缓冲区:"
#: settings_list.rs:9
#: settings_list.rs:10
msgid "Pixiv's refresh tokens. Used to login."
msgstr "Pixiv 的 refresh tokens。用于登录。"
#: settings_list.rs:16
msgid "Remove the part which after these parttens."
msgstr "移除匹配的部分。"
#: utils.rs:30
msgid "Do you want to delete file \"<file>\"?"
msgstr "你想要删除文件 <file> 吗?"
@@ -379,14 +404,14 @@ msgstr "请求时发生错误:"
msgid "Error when downloading file:"
msgstr "下载文件时发生错误:"
#: main.rs:68
#: main.rs:71
msgid "Failed to save config file:"
msgstr "无法保存设置文件:"
#: main.rs:79
#: main.rs:82
msgid "All available settings:"
msgstr "所有可用的设置:"
#: main.rs:111
#: main.rs:114
msgid "Can not read config file:"
msgstr "无法读取设置文件:"

156
src/author_name_filter.rs Normal file
View File

@@ -0,0 +1,156 @@
use crate::data::json::ToJson;
use crate::gettext;
use crate::stdext::TryErr;
use json::JsonValue;
use regex::Regex;
use std::cmp::PartialEq;
use std::convert::From;
use std::convert::TryFrom;
use std::fmt::Display;
#[derive(Debug, derive_more::From, PartialEq)]
pub enum AuthorNameFilterError {
String(String),
Regex(regex::Error),
}
impl Display for AuthorNameFilterError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::String(s) => { f.write_str(s) }
Self::Regex(r) => { f.write_fmt(format_args!("{} {}", gettext("Failed to parse regex:"), r)) }
}
}
}
impl From<&str> for AuthorNameFilterError {
fn from(s: &str) -> Self {
Self::String(String::from(s))
}
}
#[derive(Clone, Debug, derive_more::From)]
pub enum AuthorNameFilter {
Simple(String),
Regex(Regex),
}
/// Used to filter the author name
pub trait AuthorFiler {
/// Used to filter the author name
fn filter(&self, author: &str) -> String;
}
impl AuthorNameFilter {
pub fn from_json<T: ToJson>(v: T) -> Result<Vec<Self>, AuthorNameFilterError> {
let v = v.to_json().try_err(gettext("Failed to get JSON object."))?;
if !v.is_array() {
Err(gettext("Unsupported JSON type."))?;
}
let mut re = Vec::new();
for j in v.members() {
re.push(Self::try_from(j)?);
}
Ok(re)
}
}
impl AuthorFiler for AuthorNameFilter {
fn filter(&self, author: &str) -> String {
match self {
Self::Simple(s) => {
match author.find(s) {
Some(i) => { String::from(&author[..i]) }
None => { String::from(author) }
}
}
Self::Regex(r) => {
r.replace(author, "").to_owned().to_string()
}
}
}
}
impl<T: AuthorFiler> AuthorFiler for Vec<T> {
fn filter(&self, author: &str) -> String {
let ori = String::from(author);
for i in self {
let r = i.filter(author);
if r != ori {
return r;
}
}
return ori;
}
}
impl From<&str> for AuthorNameFilter {
fn from(s: &str) -> Self {
Self::Simple(String::from(s))
}
}
impl PartialEq for AuthorNameFilter {
fn eq(&self, other: &Self) -> bool {
match self {
Self::Simple(s) => {
match other {
Self::Regex(_) => { false }
Self::Simple(t) => { s == t }
}
}
Self::Regex(r) => {
match other {
Self::Simple(_) => { false }
Self::Regex(s) => {
r.as_str() == s.as_str()
}
}
}
}
}
}
impl TryFrom<&JsonValue> for AuthorNameFilter {
type Error = AuthorNameFilterError;
fn try_from(j: &JsonValue) -> Result<Self, Self::Error> {
if j.is_string() {
return Ok(Self::from(j.as_str().unwrap()));
} else if j.is_object() {
let t = (&j["type"]).as_str().try_err(gettext("Failed to get filter's type."))?.to_lowercase();
let rule = (&j["rule"]).as_str().try_err(gettext("Failed to get filter's rule."))?;
if t == "simple" {
return Ok(Self::from(rule));
} else if t == "regex" {
return Ok(Self::from(Regex::new(rule)?));
} else {
Err(format!("{} {}", gettext("Unknown filter's type:"), t.as_str()))?;
}
} else {
Err(gettext("Unsupported JSON type."))?;
};
return Err(Self::Error::from(""));
}
}
pub fn check_author_name_filters(v: &JsonValue) -> bool {
let r = AuthorNameFilter::from_json(v);
if r.is_err() {
println!("{} {}", gettext("Failed parse author name filters:"), r.as_ref().unwrap_err());
}
r.is_ok()
}
#[test]
fn test_author_name_filter() {
assert!(AuthorNameFilter::from("s") == AuthorNameFilter::from("s"));
assert!(AuthorNameFilter::from(Regex::new("s").unwrap()) == AuthorNameFilter::from(Regex::new("s").unwrap()));
let l = AuthorNameFilter::from_json(json::array!["🌸"]).unwrap();
assert_eq!(l, vec![AuthorNameFilter::from("🌸")]);
assert_eq!(l[0].filter("moco🌸お仕事募集中"), String::from("moco"));
let r = AuthorNameFilter::from(Regex::new(".?お仕事募集中").unwrap());
assert_eq!(r.filter("moco🌸お仕事募集中"), String::from("moco"));
let l = AuthorNameFilter::from_json(json::array![{"type": "simple", "rule": "🌸"}, {"type": "regex", "rule": ".?お仕事募集中"}]).unwrap();
assert_eq!(l, vec![AuthorNameFilter::from("🌸"), AuthorNameFilter::from(r)]);
assert_eq!(l.filter("moco<お仕事募集中🌸お仕事募集中"), String::from("moco<お仕事募集中"));
}

View File

@@ -1,4 +1,6 @@
use crate::author_name_filter::AuthorFiler;
use crate::gettext;
use crate::opthelper::OptHelper;
use crate::pixiv_link::ToPixivID;
use crate::pixiv_link::PixivID;
use json::JsonValue;
@@ -6,7 +8,7 @@ use std::convert::TryInto;
use xml::unescape;
/// Pixiv's basic data
pub struct PixivData {
pub struct PixivData<'a> {
/// ID
pub id: PixivID,
/// The title
@@ -14,10 +16,11 @@ pub struct PixivData {
/// The author
pub author: Option<String>,
pub description: Option<String>,
helper: OptHelper<'a>,
}
impl PixivData {
pub fn new<T: ToPixivID>(id: T) -> Option<Self> {
impl<'a> PixivData<'a> {
pub fn new<T: ToPixivID>(id: T, helper: OptHelper<'a>) -> Option<Self> {
let i = id.to_pixiv_id();
if i.is_none() {
return None;
@@ -27,6 +30,7 @@ impl PixivData {
title: None,
author: None,
description: None,
helper: helper,
})
}
@@ -54,7 +58,11 @@ impl PixivData {
if self.author.is_none() || allow_overwrite {
let author = value["userName"].as_str();
if author.is_some() {
self.author = Some(String::from(author.unwrap()));
let au = author.unwrap();
match self.helper.author_name_filters() {
Some(l) => { self.author = Some(l.filter(au)) }
None => { self.author = Some(String::from(author.unwrap())); }
}
}
}
if self.description.is_none() || allow_overwrite {

View File

@@ -74,13 +74,13 @@ impl JSONDataFile {
}
}
impl From<PixivData> for JSONDataFile {
impl<'a> From<PixivData<'a>> for JSONDataFile {
fn from(p: PixivData) -> Self {
JSONDataFile::from(&p)
}
}
impl From<&PixivData> for JSONDataFile {
impl<'a> From<&'a PixivData<'a>> for JSONDataFile {
fn from(p: &PixivData) -> Self {
let mut f = Self {
id: p.id.clone(),

View File

@@ -74,7 +74,7 @@ impl Main {
}
let base = PathBuf::from(".");
let json_file = base.join(format!("{}.json", id));
let mut datas = PixivData::new(id).unwrap();
let mut datas = PixivData::new(id, pw.helper.clone()).unwrap();
if ajax_ver {
datas.from_web_page_ajax_data(&re, true);
} else {

View File

@@ -2,6 +2,7 @@
extern crate c_fixed_string;
extern crate chrono;
extern crate dateparser;
extern crate derive_more;
extern crate futures_util;
extern crate json;
#[cfg(feature = "int-enum")]
@@ -21,6 +22,7 @@ extern crate xml;
#[cfg(feature = "exif")]
#[doc(hidden)]
mod _exif;
mod author_name_filter;
mod cookies;
mod data;
mod download;
@@ -37,6 +39,7 @@ mod pixiv_web;
mod retry_interval;
mod settings;
mod settings_list;
mod stdext;
mod utils;
mod webclient;

View File

@@ -1,3 +1,4 @@
use crate::author_name_filter::AuthorNameFilter;
use crate::opts::CommandOpts;
use crate::list::NonTailList;
use crate::retry_interval::parse_retry_interval_from_json;
@@ -12,9 +13,17 @@ pub struct OptHelper<'a> {
/// Settings
settings: &'a SettingStore,
default_retry_interval: NonTailList<Duration>,
_author_name_filters: Option<Vec<AuthorNameFilter>>,
}
impl<'a> OptHelper<'a> {
pub fn author_name_filters(&self) -> Option<&Vec<AuthorNameFilter>> {
if self.settings.have("author-name-filters") {
return self._author_name_filters.as_ref();
}
None
}
/// return cookies location, no any check
pub fn cookies(&self) -> Option<String> {
if self.opt.cookies.is_some() {
@@ -40,10 +49,16 @@ impl<'a> OptHelper<'a> {
pub fn new(opt: &'a CommandOpts, settings: &'a SettingStore) -> Self {
let mut l = NonTailList::default();
l += Duration::new(3, 0);
let _author_name_filters = if settings.have("author-name-filters") {
Some(AuthorNameFilter::from_json(settings.get("author-name-filters").unwrap()).unwrap())
} else {
None
};
Self {
opt,
settings,
default_retry_interval: l,
_author_name_filters: _author_name_filters,
}
}

View File

@@ -1,3 +1,4 @@
use crate::author_name_filter::check_author_name_filters;
use crate::gettext;
use crate::retry_interval::check_retry_interval;
use crate::settings::SettingDes;
@@ -12,6 +13,7 @@ pub fn get_settings_list() -> Vec<SettingDes> {
SettingDes::new("retry", gettext("Max retry count if request failed."), JsonValueType::Number, Some(check_u64)).unwrap(),
SettingDes::new("retry-interval", gettext("The interval (in seconds) between two retries."), JsonValueType::Multiple, Some(check_retry_interval)).unwrap(),
SettingDes::new("use-webpage", gettext("Use data from webpage first."), JsonValueType::Boolean, None).unwrap(),
SettingDes::new("author-name-filters", gettext("Remove the part which after these parttens."), JsonValueType::Array, Some(check_author_name_filters)).unwrap(),
]
}

14
src/stdext.rs Normal file
View File

@@ -0,0 +1,14 @@
/// Try with custom error message
pub trait TryErr<T, E> {
/// try with custom error message
fn try_err(&self, err: E) -> Result<T, E>;
}
impl<T: ToOwned + ToOwned<Owned = T>, E> TryErr<T, E> for Option<T> {
fn try_err(&self, v: E) -> Result<T, E> {
match self {
Some(r) => { Ok(r.to_owned()) }
None => { Err(v) }
}
}
}