add support to output description information

This commit is contained in:
2022-03-02 22:03:31 +08:00
parent 8f6fb37340
commit ded0e77956
10 changed files with 314 additions and 51 deletions

7
Cargo.lock generated
View File

@@ -2,6 +2,12 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "RustyXML"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b5ace29ee3216de37c0546865ad08edef58b0f9e76838ed8959a84a990e58c5"
[[package]]
name = "adler"
version = "1.0.2"
@@ -1052,6 +1058,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
name = "pixiv_downloader"
version = "0.0.1"
dependencies = [
"RustyXML",
"bindgen",
"c_fixed_string",
"chrono",

View File

@@ -19,6 +19,7 @@ json = "0.12"
utf16string = { version= "0.2", optional = true }
regex = "1"
reqwest = { version = "0.11", features = ["brotli", "deflate", "gzip", "rustls-tls", "socks", "stream"] }
RustyXML = "0.3"
spin_on = "0.1.1"
tokio = { version = "1.17", features = ["rt", "macros", "rt-multi-thread", "time"] }

View File

@@ -2,7 +2,7 @@
msgid ""
msgstr ""
"Project-Id-Version: pixiv_downloader\n"
"POT-Creation-Date: 2022-02-27 17:47+0800\n"
"POT-Creation-Date: 2022-03-02 21:58+0800\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <[email protected]>\n"
@@ -69,54 +69,63 @@ msgstr ""
msgid "Can not parse expired time:"
msgstr ""
#: cookies.rs:366 data/json.rs:39 settings.rs:392 webclient.rs:257
#: cookies.rs:366 data/json.rs:53 settings.rs:392 webclient.rs:257
msgid "Failed to remove file:"
msgstr ""
#: cookies.rs:372 data/json.rs:50 settings.rs:399 webclient.rs:263
#: cookies.rs:372 data/json.rs:64 settings.rs:399 webclient.rs:263
msgid "Failed to create file:"
msgstr ""
#: cookies.rs:379 data/json.rs:56 settings.rs:405 webclient.rs:276
#: cookies.rs:379 data/json.rs:70 settings.rs:405 webclient.rs:276
msgid "Failed to write file:"
msgstr ""
#: download.rs:15 pixiv_web.rs:53
#: data/data.rs:62
msgid "Failed to unescape string:"
msgstr ""
#: download.rs:18 pixiv_web.rs:53
msgid "Failed to initialize pixiv web api client."
msgstr ""
#: download.rs:22
#: download.rs:25
msgid "Warning: Web api client not logined, some future may not work."
msgstr ""
#: download.rs:45
#: download.rs:49
msgid "Failed to get page count."
msgstr ""
#: download.rs:54
#: download.rs:58
msgid "Failed to get pages' data."
msgstr ""
#: download.rs:61
#: download.rs:67
msgid "Failed to save metadata to JSON file."
msgstr ""
#: download.rs:69
#: download.rs:75 download.rs:117
msgid "Failed to get original picture's link."
msgstr ""
#: download.rs:75
#: download.rs:81 download.rs:123
msgid "Failed to get file name from url:"
msgstr ""
#: download.rs:82 download.rs:88 pixiv_web.rs:148
#: download.rs:88 download.rs:94 download.rs:130 download.rs:136
#: pixiv_web.rs:148
msgid "Failed to download image:"
msgstr ""
#: download.rs:93
#: download.rs:99 download.rs:141
msgid "Downloaded image:"
msgstr ""
#: download.rs:108 download.rs:150
msgid "Failed to add exif data to image:"
msgstr ""
#: opts.rs:76
msgid "Warning: The specified config file not found."
msgstr ""
@@ -185,18 +194,22 @@ msgstr ""
msgid "Unknown config subcommand."
msgstr ""
#: parser/metadata.rs:54 pixiv_web.rs:107
msgid "Failed to parse JSON:"
msgstr ""
#: parser/metadata.rs:76
#: parser/description.rs:127 parser/metadata.rs:76
msgid "Failed to parse HTML:"
msgstr ""
#: parser/metadata.rs:81
#: parser/description.rs:132 parser/metadata.rs:81
msgid "Some errors occured during parsing:"
msgstr ""
#: parser/description.rs:143
msgid "There are some nodes still in stack:"
msgstr ""
#: parser/metadata.rs:54 pixiv_web.rs:107
msgid "Failed to parse JSON:"
msgstr ""
#: pixiv_web.rs:68 pixiv_web.rs:73
msgid "Failed to get main page:"
msgstr ""
@@ -277,15 +290,15 @@ msgstr ""
msgid "Pixiv's refresh tokens. Used to login."
msgstr ""
#: utils.rs:29
#: utils.rs:30
msgid "Do you want to delete file \"<file>\"?"
msgstr ""
#: utils.rs:49 webclient.rs:143
#: utils.rs:51 webclient.rs:143
msgid "Can not parse URL:"
msgstr ""
#: utils.rs:56
#: utils.rs:58
msgid "Failed to get file name from path:"
msgstr ""
@@ -317,14 +330,14 @@ msgstr ""
msgid "Error when downloading file:"
msgstr ""
#: main.rs:50
#: main.rs:62
msgid "Failed to save config file:"
msgstr ""
#: main.rs:61
#: main.rs:73
msgid "All available settings:"
msgstr ""
#: main.rs:93
#: main.rs:105
msgid "Can not read config file:"
msgstr ""

View File

@@ -1,8 +1,8 @@
msgid ""
msgstr ""
"Project-Id-Version: pixiv_downloader\n"
"POT-Creation-Date: 2022-02-27 17:47+0800\n"
"PO-Revision-Date: 2022-02-27 17:48+0800\n"
"POT-Creation-Date: 2022-03-02 21:58+0800\n"
"PO-Revision-Date: 2022-03-02 22:02+0800\n"
"Last-Translator: lifegpc <[email protected]>\n"
"Language-Team: \n"
"Language: zh_CN\n"
@@ -70,54 +70,63 @@ msgstr "无效的Cookie:"
msgid "Can not parse expired time:"
msgstr "无法解析过期时间:"
#: cookies.rs:366 data/json.rs:39 settings.rs:392 webclient.rs:257
#: cookies.rs:366 data/json.rs:53 settings.rs:392 webclient.rs:257
msgid "Failed to remove file:"
msgstr "无法删除文件:"
#: cookies.rs:372 data/json.rs:50 settings.rs:399 webclient.rs:263
#: cookies.rs:372 data/json.rs:64 settings.rs:399 webclient.rs:263
msgid "Failed to create file:"
msgstr "无法创建文件:"
#: cookies.rs:379 data/json.rs:56 settings.rs:405 webclient.rs:276
#: cookies.rs:379 data/json.rs:70 settings.rs:405 webclient.rs:276
msgid "Failed to write file:"
msgstr "无法写入文件:"
#: download.rs:15 pixiv_web.rs:53
#: data/data.rs:62
msgid "Failed to unescape string:"
msgstr "无法反转义字符串:"
#: download.rs:18 pixiv_web.rs:53
msgid "Failed to initialize pixiv web api client."
msgstr "无法初始化 Pixiv 网页 API 客户端。"
#: download.rs:22
#: download.rs:25
msgid "Warning: Web api client not logined, some future may not work."
msgstr "警告:Web API 客户端未登录,一些功能可能无法工作。"
#: download.rs:45
#: download.rs:49
msgid "Failed to get page count."
msgstr "无法获取页数。"
#: download.rs:54
#: download.rs:58
msgid "Failed to get pages' data."
msgstr "无法获取每页数据。"
#: download.rs:61
#: download.rs:67
msgid "Failed to save metadata to JSON file."
msgstr "无法将元数据保存到 JSON 文件。"
#: download.rs:69
#: download.rs:75 download.rs:117
msgid "Failed to get original picture's link."
msgstr "无法获取原图链接。"
#: download.rs:75
#: download.rs:81 download.rs:123
msgid "Failed to get file name from url:"
msgstr "无法从 URL 获取文件名:"
#: download.rs:82 download.rs:88 pixiv_web.rs:148
#: download.rs:88 download.rs:94 download.rs:130 download.rs:136
#: pixiv_web.rs:148
msgid "Failed to download image:"
msgstr "无法下载图片:"
#: download.rs:93
#: download.rs:99 download.rs:141
msgid "Downloaded image:"
msgstr "已下载图片:"
#: download.rs:108 download.rs:150
msgid "Failed to add exif data to image:"
msgstr "无法往图片增加 EXIF 数据:"
#: opts.rs:76
msgid "Warning: The specified config file not found."
msgstr "警告:没有找到指定的设置文件。"
@@ -186,18 +195,22 @@ msgstr "没有指定更详细的指令。"
msgid "Unknown config subcommand."
msgstr "未知的 config 子指令。"
#: parser/metadata.rs:54 pixiv_web.rs:107
msgid "Failed to parse JSON:"
msgstr "无法解析 JSON:"
#: parser/metadata.rs:76
#: parser/description.rs:127 parser/metadata.rs:76
msgid "Failed to parse HTML:"
msgstr "无法解析 HTML:"
#: parser/metadata.rs:81
#: parser/description.rs:132 parser/metadata.rs:81
msgid "Some errors occured during parsing:"
msgstr "在解析中发生了一些错误:"
#: parser/description.rs:143
msgid "There are some nodes still in stack:"
msgstr "堆栈中依旧有一些节点:"
#: parser/metadata.rs:54 pixiv_web.rs:107
msgid "Failed to parse JSON:"
msgstr "无法解析 JSON:"
#: pixiv_web.rs:68 pixiv_web.rs:73
msgid "Failed to get main page:"
msgstr "无法获取主页:"
@@ -282,15 +295,15 @@ msgstr "无法刷新文件缓冲区:"
msgid "Pixiv's refresh tokens. Used to login."
msgstr "Pixiv 的 refresh tokens。用于登录。"
#: utils.rs:29
#: utils.rs:30
msgid "Do you want to delete file \"<file>\"?"
msgstr "你想要删除文件 <file> 吗?"
#: utils.rs:49 webclient.rs:143
#: utils.rs:51 webclient.rs:143
msgid "Can not parse URL:"
msgstr "无法解析 URL:"
#: utils.rs:56
#: utils.rs:58
msgid "Failed to get file name from path:"
msgstr "无法从路径获取文件名:"
@@ -322,14 +335,14 @@ msgstr "请求时发生错误:"
msgid "Error when downloading file:"
msgstr "下载文件时发生错误:"
#: main.rs:50
#: main.rs:62
msgid "Failed to save config file:"
msgstr "无法保存设置文件:"
#: main.rs:61
#: main.rs:73
msgid "All available settings:"
msgstr "所有可用的设置:"
#: main.rs:93
#: main.rs:105
msgid "Can not read config file:"
msgstr "无法读取设置文件:"

View File

@@ -1,7 +1,9 @@
use crate::gettext;
use crate::pixiv_link::ToPixivID;
use crate::pixiv_link::PixivID;
use json::JsonValue;
use std::convert::TryInto;
use xml::unescape;
/// Pixiv's basic data
pub struct PixivData {
@@ -11,6 +13,7 @@ pub struct PixivData {
pub title: Option<String>,
/// The author
pub author: Option<String>,
pub description: Option<String>,
}
impl PixivData {
@@ -23,6 +26,7 @@ impl PixivData {
id: i.unwrap(),
title: None,
author: None,
description: None,
})
}
@@ -45,5 +49,20 @@ impl PixivData {
self.author = Some(String::from(author.unwrap()));
}
}
if self.description.is_none() || allow_overwrite {
let mut description = value["illust"][ids.as_str()]["description"].as_str();
if description.is_none() {
description = value["illust"][ids.as_str()]["illustComment"].as_str();
}
if description.is_some() {
let re = unescape(description.unwrap());
match re {
Ok(s) => { self.description = Some(s); }
Err(s) => {
println!("{} {}", gettext("Failed to unescape string:"), s.as_str());
}
}
}
}
}
}

View File

@@ -4,6 +4,7 @@ use crate::exif::ExifImage;
use crate::exif::ExifKey;
use crate::exif::ExifTypeID;
use crate::exif::ExifValue;
use crate::parser::description::parse_description;
use std::convert::TryFrom;
use std::ffi::OsStr;
use utf16string::LittleEndian;
@@ -48,12 +49,31 @@ fn add_image_author(data: &mut ExifData, d: &PixivData) -> Result<(), ()> {
Ok(())
}
fn add_image_comment(data: &mut ExifData, d: &PixivData) -> Result<(), ()> {
if d.description.is_none() {
return Ok(());
}
let desc = parse_description(d.description.as_ref().unwrap());
let desc = if desc.is_some() {
desc.as_ref().unwrap()
} else {
d.description.as_ref().unwrap()
};
let key = ExifKey::try_from("Exif.Image.XPComment")?;
let mut value = ExifValue::try_from(ExifTypeID::BYTE)?;
let s: WString<LittleEndian> = WString::from(desc);
value.read(s.as_bytes(), None)?;
data.add(&key, &value)?;
Ok(())
}
pub fn add_exifdata_to_image<S: AsRef<OsStr> + ?Sized>(file_name: &S, data: &PixivData) -> Result<(), ()> {
let mut f = ExifImage::new(file_name)?;
let mut d = ExifData::new()?;
add_image_id(&mut d, data)?;
add_image_title(&mut d, data)?;
add_image_author(&mut d, data)?;
add_image_comment(&mut d, data)?;
f.set_exif_data(&d)?;
f.write_metadata()?;
Ok(())

View File

@@ -1,5 +1,6 @@
use crate::data::data::PixivData;
use crate::gettext;
use crate::parser::description::parse_description;
use crate::pixiv_link::PixivID;
use crate::pixiv_link::ToPixivID;
use json::JsonValue;
@@ -91,6 +92,13 @@ impl From<&PixivData> for JSONDataFile {
if p.author.is_some() {
f.add("author", p.author.as_ref().unwrap()).unwrap();
}
if p.description.is_some() {
f.add("description", p.description.as_ref().unwrap()).unwrap();
let pd = parse_description(p.description.as_ref().unwrap());
if pd.is_some() {
f.add("parsed_description", pd.unwrap()).unwrap();
}
}
f
}
}
@@ -121,3 +129,9 @@ impl ToJson for &String {
Some(JsonValue::String((*self).to_string()))
}
}
impl ToJson for String {
fn to_json(&self) -> Option<JsonValue> {
Some(JsonValue::String(self.to_string()))
}
}

View File

@@ -13,6 +13,7 @@ extern crate regex;
extern crate reqwest;
#[cfg(feature = "utf16string")]
extern crate utf16string;
extern crate xml;
#[cfg(feature = "exif")]
#[doc(hidden)]

174
src/parser/description.rs Normal file
View File

@@ -0,0 +1,174 @@
use crate::gettext;
use html_parser::Dom;
use html_parser::Node;
use std::collections::HashMap;
use std::default::Default;
/// Reprent a node
#[derive(Debug)]
struct DescriptionNode {
/// Tag name
pub tag: String,
/// Output data in this node
pub data: String,
/// Attributes
pub attrs: HashMap<String, String>,
}
impl Default for DescriptionNode {
fn default() -> Self {
Self {
tag: String::from(""),
data: String::from(""),
attrs: HashMap::new(),
}
}
}
impl DescriptionNode {
pub fn add_attr(&mut self, k: &str, v: &str) -> Option<String> {
self.attrs.insert(String::from(k), String::from(v))
}
pub fn is_link(&self) -> bool {
if self.tag != "a" {
return false;
}
if !self.attrs.contains_key("href") {
return false;
}
let href = self.attrs.get("href").unwrap();
if href.as_str() == self.data.as_str() {
return false;
}
true
}
pub fn to_link(&self) -> String {
format!(
"[{}]({})",
self.data.as_str(),
self.attrs.get("href").unwrap()
)
}
}
/// A simple HTML parser to parse description HTML
pub struct DescriptionParser {
/// Current nodes stack
nodes: Vec<DescriptionNode>,
/// Output
pub data: String,
}
impl DescriptionParser {
pub fn new() -> Self {
Self {
nodes: Vec::new(),
data: String::from(""),
}
}
pub fn iter(&mut self, node: &Node) {
match node {
Node::Comment(_) => {}
Node::Text(s) => {
if self.nodes.len() == 0 {
self.data += s;
} else {
self.nodes.last_mut().unwrap().data += s;
}
}
Node::Element(e) => {
let tag = e.name.as_str();
if tag == "script" || tag == "style" {
return;
} else if tag == "br" {
if self.nodes.len() == 0 {
self.data += "\n";
} else {
self.nodes.last_mut().unwrap().data += "\n";
}
return;
}
let mut node = DescriptionNode::default();
node.tag = tag.to_string();
if tag == "a" {
let href = e.attributes.get("href");
if href.is_some() {
let href = href.unwrap();
if href.is_some() {
node.add_attr("href", href.as_ref().unwrap());
}
}
}
self.nodes.push(node);
for n in e.children.iter() {
self.iter(n);
}
let node = self.nodes.pop().unwrap();
let s = if node.is_link() {
node.to_link()
} else {
node.data
};
if self.nodes.len() == 0 {
self.data += s.as_str();
} else {
self.nodes.last_mut().unwrap().data += s.as_str();
}
}
}
}
pub fn parse(&mut self, desc: &str) -> Result<(), ()> {
let r = Dom::parse(desc);
if r.is_err() {
println!("{} {}", gettext("Failed to parse HTML:"), r.unwrap_err());
return Err(());
}
let dom = r.unwrap();
if dom.errors.len() > 0 {
println!("{}", gettext("Some errors occured during parsing:"));
for i in dom.errors.iter() {
println!("{}", i);
}
}
for node in dom.children.iter() {
self.iter(node)
}
if self.nodes.len() != 0 {
println!(
"{} {:?}",
gettext("There are some nodes still in stack:"),
self.nodes
);
return Err(());
}
Ok(())
}
}
pub fn parse_description(desc: &str) -> Option<String> {
let mut p = DescriptionParser::new();
match p.parse(desc) {
Ok(_) => Some(p.data),
Err(_) => None,
}
}
#[test]
fn test_parse_description() {
assert_eq!(
Some(String::from("a [example](https://a.com)")),
parse_description("a <a href=\"https://a.com\">example</a>")
);
assert_eq!(
Some(String::from("a https://a.com")),
parse_description("a <a href=\"https://a.com\">https://a.com</a>")
);
assert_eq!(
Some(String::from("a [a\n[bc](a.com)d](b.com)\ndata")),
parse_description("a <a href=\"b.com\">a<br/><a href=\"a.com\">bc</a>d</a><br>data")
);
}

View File

@@ -1,3 +1,4 @@
extern crate html_parser;
pub mod description;
pub mod metadata;