diff --git a/Cargo.toml b/Cargo.toml index a591ef6..f3d10e3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,7 +39,7 @@ modular-bitfield = "0.11" multipart = { features = ["server"], git = 'https://github.com/lifegpc/multipart', optional = true, default-features = false } openssl = { version = "0.10", optional = true } parse-size = "1" -percent-encoding = { version = "*", optional = true } +percent-encoding = "*" proc_macros = { path = "proc_macros" } rand = { version = "0", optional = true } regex = "1" @@ -66,7 +66,7 @@ db_all = ["db", "db_sqlite"] db_sqlite = ["rusqlite"] docker = [] exif = ["bindgen", "c_fixed_string", "cmake", "link-cplusplus", "utf16string"] -server = ["async-trait", "base64", "db", "hex", "hyper", "multipart", "openssl", "serde_json", "rand", "serde_urlencoded", "percent-encoding"] +server = ["async-trait", "base64", "db", "hex", "hyper", "multipart", "openssl", "serde_json", "rand", "serde_urlencoded"] ugoira = ["avdict", "bindgen", "cmake", "link-cplusplus"] [patch.crates-io] diff --git a/src/log_cfg.rs b/src/log_cfg.rs index 29edec4..36a1de2 100644 --- a/src/log_cfg.rs +++ b/src/log_cfg.rs @@ -1,5 +1,4 @@ use crate::ext::rw_lock::GetRwLock; -use lazy_static::lazy_static; use log::LevelFilter; use log4rs::append::console::ConsoleAppender; use log4rs::config::{Appender, Logger, Root}; @@ -8,6 +7,7 @@ use log4rs::{init_config, Config, Handle}; use std::sync::RwLock; lazy_static! { + #[doc(hidden)] static ref HANDLE: RwLock> = RwLock::new(None); } diff --git a/src/parser/description.rs b/src/parser/description.rs index 8c2dd41..d2fbb54 100644 --- a/src/parser/description.rs +++ b/src/parser/description.rs @@ -4,9 +4,33 @@ use crate::pixiv_link::remove_track; use html5ever::tendril::TendrilSink; use html5ever::{parse_document, ParseOpts}; use markup5ever_rcdom::{Node, NodeData, RcDom}; +use percent_encoding::{percent_encode, AsciiSet, NON_ALPHANUMERIC}; use std::collections::HashMap; use std::default::Default; +const URLENCODE: &AsciiSet = &NON_ALPHANUMERIC + .remove(b':') + .remove(b'/') + .remove(b'?') + .remove(b'#') + .remove(b'[') + .remove(b']') + .remove(b'@') + .remove(b'!') + .remove(b'$') + .remove(b'&') + .remove(b'\'') + .remove(b'(') + .remove(b')') + .remove(b'*') + .remove(b'+') + .remove(b',') + .remove(b';') + .remove(b'=') + .remove(b'%') + .remove(b' ') + .remove(b'.'); + /// Reprent a node #[derive(Debug)] struct DescriptionNode { @@ -87,12 +111,14 @@ impl DescriptionNode { s } - pub fn to_link(&self) -> String { - format!( - "[{}]({})", - self.data.as_str(), - self.attrs.get("href").unwrap() - ) + pub fn to_link(&self, ascii: bool) -> String { + let href = self.attrs.get("href").unwrap(); + let href = if ascii { + percent_encode(href.as_bytes(), URLENCODE).to_string() + } else { + href.clone() + }; + format!("[{}]({})", self.data.as_str(), href) } pub fn to_paragraph(&self) -> String { @@ -106,14 +132,41 @@ impl DescriptionNode { } } +pub struct DescriptionParserBuilder { + /// Markdown mode + md_mode: bool, + /// Ensure link is ASCII + _ensure_link_ascii: bool, +} + +#[allow(dead_code)] +impl DescriptionParserBuilder { + pub fn new(md_mode: bool) -> Self { + Self { + md_mode, + _ensure_link_ascii: false, + } + } + + /// Ensure link is ASCII + pub fn ensure_link_ascii(mut self) -> Self { + self._ensure_link_ascii = true; + self + } + + pub fn build(self) -> DescriptionParser { + DescriptionParser::from(self) + } +} + /// A simple HTML parser to parse description HTML pub struct DescriptionParser { /// Current nodes stack nodes: Vec, /// Output pub data: String, - /// Markdown mode - md_mode: bool, + /// Options + opts: DescriptionParserBuilder, } impl DescriptionParser { @@ -121,7 +174,7 @@ impl DescriptionParser { Self { nodes: Vec::new(), data: String::from(""), - md_mode, + opts: DescriptionParserBuilder::new(md_mode), } } @@ -140,7 +193,7 @@ impl DescriptionParser { if tag == "script" || tag == "style" { return; } else if tag == "br" { - let br = if self.md_mode { " \n" } else { "\n" }; + let br = if self.opts.md_mode { " \n" } else { "\n" }; if self.nodes.len() == 0 { self.data += br; } else { @@ -165,28 +218,28 @@ impl DescriptionParser { } let node = self.nodes.pop().unwrap(); let mut is_paragraph = false; - let s = if node.is_link(self.md_mode) { - node.to_link() - } else if self.md_mode && node.is_headline() { + let s = if node.is_link(self.opts.md_mode) { + node.to_link(self.opts._ensure_link_ascii) + } else if self.opts.md_mode && node.is_headline() { node.to_headline() - } else if self.md_mode && node.is_paragraph() { + } else if self.opts.md_mode && node.is_paragraph() { is_paragraph = true; node.to_paragraph() - } else if self.md_mode && node.is_strong() { + } else if self.opts.md_mode && node.is_strong() { node.to_strong() - } else if self.md_mode && node.is_em() { + } else if self.opts.md_mode && node.is_em() { node.to_em() } else { node.data }; if self.nodes.len() == 0 { - while self.md_mode && is_paragraph && !self.data.ends_with("\n\n") { + while self.opts.md_mode && is_paragraph && !self.data.ends_with("\n\n") { self.data += "\n"; } self.data += s.as_str(); } else { let n = self.nodes.last_mut().unwrap(); - while self.md_mode && is_paragraph && !n.data.ends_with("\n\n") { + while self.opts.md_mode && is_paragraph && !n.data.ends_with("\n\n") { n.data += "\n"; } n.data += s.as_str(); @@ -222,6 +275,21 @@ impl DescriptionParser { } Ok(()) } + + #[allow(dead_code)] + pub fn builder(md_mode: bool) -> DescriptionParserBuilder { + DescriptionParserBuilder::new(md_mode) + } +} + +impl From for DescriptionParser { + fn from(opts: DescriptionParserBuilder) -> Self { + Self { + nodes: Vec::new(), + data: String::from(""), + opts, + } + } } pub fn parse_description + ?Sized>(desc: &S) -> Option { @@ -285,3 +353,14 @@ fn test_convert_description_to_md() { convert_description_to_md("

Head

D

He

Be

test

Link

").unwrap() ); } + +#[test] +fn test_ensure_link_ascii() { + let mut p = DescriptionParser::builder(true).ensure_link_ascii().build(); + p.parse("测试") + .unwrap(); + assert_eq!( + String::from("[测试](https://test:pass@www.test.com/ad/%E6%B5%8B%E8%AF%95?p=1&t=*)"), + p.data + ); +} diff --git a/src/server/push/task/pixiv_send_message.rs b/src/server/push/task/pixiv_send_message.rs index d07bd0a..127fd46 100644 --- a/src/server/push/task/pixiv_send_message.rs +++ b/src/server/push/task/pixiv_send_message.rs @@ -393,7 +393,7 @@ impl<'a> RunContext<'a> { } } if let Some(desc) = self.desc() { - let mut p = DescriptionParser::new(true); + let mut p = DescriptionParser::builder(true).ensure_link_ascii().build(); p.parse(desc)?; while !text.ends_with("\n\n") { text.push_str("\n"); @@ -473,7 +473,7 @@ impl<'a> RunContext<'a> { } } if let Some(desc) = self.desc() { - let mut p = DescriptionParser::new(true); + let mut p = DescriptionParser::builder(true).ensure_link_ascii().build(); p.parse(desc)?; while !text.ends_with("\n\n") { text.push_str("\n");