diff --git a/Cargo.lock b/Cargo.lock index 18cf73b..4c49508 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "RustyXML" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b5ace29ee3216de37c0546865ad08edef58b0f9e76838ed8959a84a990e58c5" + [[package]] name = "adler" version = "1.0.2" @@ -1052,6 +1058,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" name = "pixiv_downloader" version = "0.0.1" dependencies = [ + "RustyXML", "bindgen", "c_fixed_string", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 8db5c29..08762d8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,7 @@ json = "0.12" utf16string = { version= "0.2", optional = true } regex = "1" reqwest = { version = "0.11", features = ["brotli", "deflate", "gzip", "rustls-tls", "socks", "stream"] } +RustyXML = "0.3" spin_on = "0.1.1" tokio = { version = "1.17", features = ["rt", "macros", "rt-multi-thread", "time"] } diff --git a/Language/pixiv_downloader.pot b/Language/pixiv_downloader.pot index 16d270e..abeb2d4 100644 --- a/Language/pixiv_downloader.pot +++ b/Language/pixiv_downloader.pot @@ -2,7 +2,7 @@ msgid "" msgstr "" "Project-Id-Version: pixiv_downloader\n" -"POT-Creation-Date: 2022-02-27 17:47+0800\n" +"POT-Creation-Date: 2022-03-02 21:58+0800\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -69,54 +69,63 @@ msgstr "" msgid "Can not parse expired time:" msgstr "" -#: cookies.rs:366 data/json.rs:39 settings.rs:392 webclient.rs:257 +#: cookies.rs:366 data/json.rs:53 settings.rs:392 webclient.rs:257 msgid "Failed to remove file:" msgstr "" -#: cookies.rs:372 data/json.rs:50 settings.rs:399 webclient.rs:263 +#: cookies.rs:372 data/json.rs:64 settings.rs:399 webclient.rs:263 msgid "Failed to create file:" msgstr "" -#: cookies.rs:379 data/json.rs:56 settings.rs:405 webclient.rs:276 +#: cookies.rs:379 data/json.rs:70 settings.rs:405 webclient.rs:276 msgid "Failed to write file:" msgstr "" -#: download.rs:15 pixiv_web.rs:53 +#: data/data.rs:62 +msgid "Failed to unescape string:" +msgstr "" + +#: download.rs:18 pixiv_web.rs:53 msgid "Failed to initialize pixiv web api client." msgstr "" -#: download.rs:22 +#: download.rs:25 msgid "Warning: Web api client not logined, some future may not work." msgstr "" -#: download.rs:45 +#: download.rs:49 msgid "Failed to get page count." msgstr "" -#: download.rs:54 +#: download.rs:58 msgid "Failed to get pages' data." msgstr "" -#: download.rs:61 +#: download.rs:67 msgid "Failed to save metadata to JSON file." msgstr "" -#: download.rs:69 +#: download.rs:75 download.rs:117 msgid "Failed to get original picture's link." msgstr "" -#: download.rs:75 +#: download.rs:81 download.rs:123 msgid "Failed to get file name from url:" msgstr "" -#: download.rs:82 download.rs:88 pixiv_web.rs:148 +#: download.rs:88 download.rs:94 download.rs:130 download.rs:136 +#: pixiv_web.rs:148 msgid "Failed to download image:" msgstr "" -#: download.rs:93 +#: download.rs:99 download.rs:141 msgid "Downloaded image:" msgstr "" +#: download.rs:108 download.rs:150 +msgid "Failed to add exif data to image:" +msgstr "" + #: opts.rs:76 msgid "Warning: The specified config file not found." msgstr "" @@ -185,18 +194,22 @@ msgstr "" msgid "Unknown config subcommand." msgstr "" -#: parser/metadata.rs:54 pixiv_web.rs:107 -msgid "Failed to parse JSON:" -msgstr "" - -#: parser/metadata.rs:76 +#: parser/description.rs:127 parser/metadata.rs:76 msgid "Failed to parse HTML:" msgstr "" -#: parser/metadata.rs:81 +#: parser/description.rs:132 parser/metadata.rs:81 msgid "Some errors occured during parsing:" msgstr "" +#: parser/description.rs:143 +msgid "There are some nodes still in stack:" +msgstr "" + +#: parser/metadata.rs:54 pixiv_web.rs:107 +msgid "Failed to parse JSON:" +msgstr "" + #: pixiv_web.rs:68 pixiv_web.rs:73 msgid "Failed to get main page:" msgstr "" @@ -277,15 +290,15 @@ msgstr "" msgid "Pixiv's refresh tokens. Used to login." msgstr "" -#: utils.rs:29 +#: utils.rs:30 msgid "Do you want to delete file \"\"?" msgstr "" -#: utils.rs:49 webclient.rs:143 +#: utils.rs:51 webclient.rs:143 msgid "Can not parse URL:" msgstr "" -#: utils.rs:56 +#: utils.rs:58 msgid "Failed to get file name from path:" msgstr "" @@ -317,14 +330,14 @@ msgstr "" msgid "Error when downloading file:" msgstr "" -#: main.rs:50 +#: main.rs:62 msgid "Failed to save config file:" msgstr "" -#: main.rs:61 +#: main.rs:73 msgid "All available settings:" msgstr "" -#: main.rs:93 +#: main.rs:105 msgid "Can not read config file:" msgstr "" diff --git a/Language/pixiv_downloader.zh_CN.po b/Language/pixiv_downloader.zh_CN.po index 70c962a..4fef741 100644 --- a/Language/pixiv_downloader.zh_CN.po +++ b/Language/pixiv_downloader.zh_CN.po @@ -1,8 +1,8 @@ msgid "" msgstr "" "Project-Id-Version: pixiv_downloader\n" -"POT-Creation-Date: 2022-02-27 17:47+0800\n" -"PO-Revision-Date: 2022-02-27 17:48+0800\n" +"POT-Creation-Date: 2022-03-02 21:58+0800\n" +"PO-Revision-Date: 2022-03-02 22:02+0800\n" "Last-Translator: lifegpc \n" "Language-Team: \n" "Language: zh_CN\n" @@ -70,54 +70,63 @@ msgstr "无效的Cookie:" msgid "Can not parse expired time:" msgstr "无法解析过期时间:" -#: cookies.rs:366 data/json.rs:39 settings.rs:392 webclient.rs:257 +#: cookies.rs:366 data/json.rs:53 settings.rs:392 webclient.rs:257 msgid "Failed to remove file:" msgstr "无法删除文件:" -#: cookies.rs:372 data/json.rs:50 settings.rs:399 webclient.rs:263 +#: cookies.rs:372 data/json.rs:64 settings.rs:399 webclient.rs:263 msgid "Failed to create file:" msgstr "无法创建文件:" -#: cookies.rs:379 data/json.rs:56 settings.rs:405 webclient.rs:276 +#: cookies.rs:379 data/json.rs:70 settings.rs:405 webclient.rs:276 msgid "Failed to write file:" msgstr "无法写入文件:" -#: download.rs:15 pixiv_web.rs:53 +#: data/data.rs:62 +msgid "Failed to unescape string:" +msgstr "无法反转义字符串:" + +#: download.rs:18 pixiv_web.rs:53 msgid "Failed to initialize pixiv web api client." msgstr "无法初始化 Pixiv 网页 API 客户端。" -#: download.rs:22 +#: download.rs:25 msgid "Warning: Web api client not logined, some future may not work." msgstr "警告:Web API 客户端未登录,一些功能可能无法工作。" -#: download.rs:45 +#: download.rs:49 msgid "Failed to get page count." msgstr "无法获取页数。" -#: download.rs:54 +#: download.rs:58 msgid "Failed to get pages' data." msgstr "无法获取每页数据。" -#: download.rs:61 +#: download.rs:67 msgid "Failed to save metadata to JSON file." msgstr "无法将元数据保存到 JSON 文件。" -#: download.rs:69 +#: download.rs:75 download.rs:117 msgid "Failed to get original picture's link." msgstr "无法获取原图链接。" -#: download.rs:75 +#: download.rs:81 download.rs:123 msgid "Failed to get file name from url:" msgstr "无法从 URL 获取文件名:" -#: download.rs:82 download.rs:88 pixiv_web.rs:148 +#: download.rs:88 download.rs:94 download.rs:130 download.rs:136 +#: pixiv_web.rs:148 msgid "Failed to download image:" msgstr "无法下载图片:" -#: download.rs:93 +#: download.rs:99 download.rs:141 msgid "Downloaded image:" msgstr "已下载图片:" +#: download.rs:108 download.rs:150 +msgid "Failed to add exif data to image:" +msgstr "无法往图片增加 EXIF 数据:" + #: opts.rs:76 msgid "Warning: The specified config file not found." msgstr "警告:没有找到指定的设置文件。" @@ -186,18 +195,22 @@ msgstr "没有指定更详细的指令。" msgid "Unknown config subcommand." msgstr "未知的 config 子指令。" -#: parser/metadata.rs:54 pixiv_web.rs:107 -msgid "Failed to parse JSON:" -msgstr "无法解析 JSON:" - -#: parser/metadata.rs:76 +#: parser/description.rs:127 parser/metadata.rs:76 msgid "Failed to parse HTML:" msgstr "无法解析 HTML:" -#: parser/metadata.rs:81 +#: parser/description.rs:132 parser/metadata.rs:81 msgid "Some errors occured during parsing:" msgstr "在解析中发生了一些错误:" +#: parser/description.rs:143 +msgid "There are some nodes still in stack:" +msgstr "堆栈中依旧有一些节点:" + +#: parser/metadata.rs:54 pixiv_web.rs:107 +msgid "Failed to parse JSON:" +msgstr "无法解析 JSON:" + #: pixiv_web.rs:68 pixiv_web.rs:73 msgid "Failed to get main page:" msgstr "无法获取主页:" @@ -282,15 +295,15 @@ msgstr "无法刷新文件缓冲区:" msgid "Pixiv's refresh tokens. Used to login." msgstr "Pixiv 的 refresh tokens。用于登录。" -#: utils.rs:29 +#: utils.rs:30 msgid "Do you want to delete file \"\"?" msgstr "你想要删除文件 吗?" -#: utils.rs:49 webclient.rs:143 +#: utils.rs:51 webclient.rs:143 msgid "Can not parse URL:" msgstr "无法解析 URL:" -#: utils.rs:56 +#: utils.rs:58 msgid "Failed to get file name from path:" msgstr "无法从路径获取文件名:" @@ -322,14 +335,14 @@ msgstr "请求时发生错误:" msgid "Error when downloading file:" msgstr "下载文件时发生错误:" -#: main.rs:50 +#: main.rs:62 msgid "Failed to save config file:" msgstr "无法保存设置文件:" -#: main.rs:61 +#: main.rs:73 msgid "All available settings:" msgstr "所有可用的设置:" -#: main.rs:93 +#: main.rs:105 msgid "Can not read config file:" msgstr "无法读取设置文件:" diff --git a/src/data/data.rs b/src/data/data.rs index 3141a24..382bbaf 100644 --- a/src/data/data.rs +++ b/src/data/data.rs @@ -1,7 +1,9 @@ +use crate::gettext; use crate::pixiv_link::ToPixivID; use crate::pixiv_link::PixivID; use json::JsonValue; use std::convert::TryInto; +use xml::unescape; /// Pixiv's basic data pub struct PixivData { @@ -11,6 +13,7 @@ pub struct PixivData { pub title: Option, /// The author pub author: Option, + pub description: Option, } impl PixivData { @@ -23,6 +26,7 @@ impl PixivData { id: i.unwrap(), title: None, author: None, + description: None, }) } @@ -45,5 +49,20 @@ impl PixivData { self.author = Some(String::from(author.unwrap())); } } + if self.description.is_none() || allow_overwrite { + let mut description = value["illust"][ids.as_str()]["description"].as_str(); + if description.is_none() { + description = value["illust"][ids.as_str()]["illustComment"].as_str(); + } + if description.is_some() { + let re = unescape(description.unwrap()); + match re { + Ok(s) => { self.description = Some(s); } + Err(s) => { + println!("{} {}", gettext("Failed to unescape string:"), s.as_str()); + } + } + } + } } } diff --git a/src/data/exif.rs b/src/data/exif.rs index ce80d09..38da491 100644 --- a/src/data/exif.rs +++ b/src/data/exif.rs @@ -4,6 +4,7 @@ use crate::exif::ExifImage; use crate::exif::ExifKey; use crate::exif::ExifTypeID; use crate::exif::ExifValue; +use crate::parser::description::parse_description; use std::convert::TryFrom; use std::ffi::OsStr; use utf16string::LittleEndian; @@ -48,12 +49,31 @@ fn add_image_author(data: &mut ExifData, d: &PixivData) -> Result<(), ()> { Ok(()) } +fn add_image_comment(data: &mut ExifData, d: &PixivData) -> Result<(), ()> { + if d.description.is_none() { + return Ok(()); + } + let desc = parse_description(d.description.as_ref().unwrap()); + let desc = if desc.is_some() { + desc.as_ref().unwrap() + } else { + d.description.as_ref().unwrap() + }; + let key = ExifKey::try_from("Exif.Image.XPComment")?; + let mut value = ExifValue::try_from(ExifTypeID::BYTE)?; + let s: WString = WString::from(desc); + value.read(s.as_bytes(), None)?; + data.add(&key, &value)?; + Ok(()) +} + pub fn add_exifdata_to_image + ?Sized>(file_name: &S, data: &PixivData) -> Result<(), ()> { let mut f = ExifImage::new(file_name)?; let mut d = ExifData::new()?; add_image_id(&mut d, data)?; add_image_title(&mut d, data)?; add_image_author(&mut d, data)?; + add_image_comment(&mut d, data)?; f.set_exif_data(&d)?; f.write_metadata()?; Ok(()) diff --git a/src/data/json.rs b/src/data/json.rs index 99994f3..9e80563 100644 --- a/src/data/json.rs +++ b/src/data/json.rs @@ -1,5 +1,6 @@ use crate::data::data::PixivData; use crate::gettext; +use crate::parser::description::parse_description; use crate::pixiv_link::PixivID; use crate::pixiv_link::ToPixivID; use json::JsonValue; @@ -91,6 +92,13 @@ impl From<&PixivData> for JSONDataFile { if p.author.is_some() { f.add("author", p.author.as_ref().unwrap()).unwrap(); } + if p.description.is_some() { + f.add("description", p.description.as_ref().unwrap()).unwrap(); + let pd = parse_description(p.description.as_ref().unwrap()); + if pd.is_some() { + f.add("parsed_description", pd.unwrap()).unwrap(); + } + } f } } @@ -121,3 +129,9 @@ impl ToJson for &String { Some(JsonValue::String((*self).to_string())) } } + +impl ToJson for String { + fn to_json(&self) -> Option { + Some(JsonValue::String(self.to_string())) + } +} diff --git a/src/main.rs b/src/main.rs index fe1f315..8055231 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,6 +13,7 @@ extern crate regex; extern crate reqwest; #[cfg(feature = "utf16string")] extern crate utf16string; +extern crate xml; #[cfg(feature = "exif")] #[doc(hidden)] diff --git a/src/parser/description.rs b/src/parser/description.rs new file mode 100644 index 0000000..164f072 --- /dev/null +++ b/src/parser/description.rs @@ -0,0 +1,174 @@ +use crate::gettext; +use html_parser::Dom; +use html_parser::Node; +use std::collections::HashMap; +use std::default::Default; + +/// Reprent a node +#[derive(Debug)] +struct DescriptionNode { + /// Tag name + pub tag: String, + /// Output data in this node + pub data: String, + /// Attributes + pub attrs: HashMap, +} + +impl Default for DescriptionNode { + fn default() -> Self { + Self { + tag: String::from(""), + data: String::from(""), + attrs: HashMap::new(), + } + } +} + +impl DescriptionNode { + pub fn add_attr(&mut self, k: &str, v: &str) -> Option { + self.attrs.insert(String::from(k), String::from(v)) + } + + pub fn is_link(&self) -> bool { + if self.tag != "a" { + return false; + } + if !self.attrs.contains_key("href") { + return false; + } + let href = self.attrs.get("href").unwrap(); + if href.as_str() == self.data.as_str() { + return false; + } + true + } + + pub fn to_link(&self) -> String { + format!( + "[{}]({})", + self.data.as_str(), + self.attrs.get("href").unwrap() + ) + } +} + +/// A simple HTML parser to parse description HTML +pub struct DescriptionParser { + /// Current nodes stack + nodes: Vec, + /// Output + pub data: String, +} + +impl DescriptionParser { + pub fn new() -> Self { + Self { + nodes: Vec::new(), + data: String::from(""), + } + } + + pub fn iter(&mut self, node: &Node) { + match node { + Node::Comment(_) => {} + Node::Text(s) => { + if self.nodes.len() == 0 { + self.data += s; + } else { + self.nodes.last_mut().unwrap().data += s; + } + } + Node::Element(e) => { + let tag = e.name.as_str(); + if tag == "script" || tag == "style" { + return; + } else if tag == "br" { + if self.nodes.len() == 0 { + self.data += "\n"; + } else { + self.nodes.last_mut().unwrap().data += "\n"; + } + return; + } + let mut node = DescriptionNode::default(); + node.tag = tag.to_string(); + if tag == "a" { + let href = e.attributes.get("href"); + if href.is_some() { + let href = href.unwrap(); + if href.is_some() { + node.add_attr("href", href.as_ref().unwrap()); + } + } + } + self.nodes.push(node); + for n in e.children.iter() { + self.iter(n); + } + let node = self.nodes.pop().unwrap(); + let s = if node.is_link() { + node.to_link() + } else { + node.data + }; + if self.nodes.len() == 0 { + self.data += s.as_str(); + } else { + self.nodes.last_mut().unwrap().data += s.as_str(); + } + } + } + } + + pub fn parse(&mut self, desc: &str) -> Result<(), ()> { + let r = Dom::parse(desc); + if r.is_err() { + println!("{} {}", gettext("Failed to parse HTML:"), r.unwrap_err()); + return Err(()); + } + let dom = r.unwrap(); + if dom.errors.len() > 0 { + println!("{}", gettext("Some errors occured during parsing:")); + for i in dom.errors.iter() { + println!("{}", i); + } + } + for node in dom.children.iter() { + self.iter(node) + } + if self.nodes.len() != 0 { + println!( + "{} {:?}", + gettext("There are some nodes still in stack:"), + self.nodes + ); + return Err(()); + } + Ok(()) + } +} + +pub fn parse_description(desc: &str) -> Option { + let mut p = DescriptionParser::new(); + match p.parse(desc) { + Ok(_) => Some(p.data), + Err(_) => None, + } +} + +#[test] +fn test_parse_description() { + assert_eq!( + Some(String::from("a [example](https://a.com)")), + parse_description("a example") + ); + assert_eq!( + Some(String::from("a https://a.com")), + parse_description("a https://a.com") + ); + assert_eq!( + Some(String::from("a [a\n[bc](a.com)d](b.com)\ndata")), + parse_description("a a
bcd
data") + ); +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8adefde..e007d8a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,3 +1,4 @@ extern crate html_parser; +pub mod description; pub mod metadata;