diff --git a/src/parser/description.rs b/src/parser/description.rs
index ad5295a..1b67184 100644
--- a/src/parser/description.rs
+++ b/src/parser/description.rs
@@ -1,6 +1,7 @@
use crate::error::PixivDownloaderError;
use crate::gettext;
use crate::pixiv_link::remove_track;
+use fancy_regex::Regex;
use html5ever::tendril::TendrilSink;
use html5ever::{parse_document, ParseOpts};
use markup5ever_rcdom::{Node, NodeData, RcDom};
@@ -8,6 +9,11 @@ use percent_encoding::{percent_encode, AsciiSet, NON_ALPHANUMERIC};
use std::collections::HashMap;
use std::default::Default;
+lazy_static! {
+ #[doc(hidden)]
+ static ref RE: Regex = Regex::new(":(?=[^ ])").unwrap();
+}
+
const URLENCODE: &AsciiSet = &NON_ALPHANUMERIC
.remove(b':')
.remove(b'/')
@@ -139,6 +145,8 @@ pub struct DescriptionParserBuilder {
_ensure_link_ascii: bool,
/// Telegram HTML Mode
tg_html_mode: bool,
+ /// Add space after `:`
+ _add_space_after_fullwidth_colon: bool,
}
#[allow(dead_code)]
@@ -148,9 +156,16 @@ impl DescriptionParserBuilder {
md_mode,
_ensure_link_ascii: false,
tg_html_mode,
+ _add_space_after_fullwidth_colon: false,
}
}
+ /// Add space after `:`
+ pub fn add_space_after_fullwidth_colon(mut self) -> Self {
+ self._add_space_after_fullwidth_colon = true;
+ self
+ }
+
/// Ensure link is ASCII
pub fn ensure_link_ascii(mut self) -> Self {
self._ensure_link_ascii = true;
@@ -222,11 +237,16 @@ impl DescriptionParser {
let node = self.nodes.pop().unwrap();
let mut is_paragraph = false;
let s = if self.opts.tg_html_mode {
+ let data = if self.opts._add_space_after_fullwidth_colon {
+ RE.replace_all(&node.data, ": ").into_owned()
+ } else {
+ node.data.clone()
+ };
if node.tag == "a" && node.is_link(true) {
format!(
"{}",
node.attrs.get("href").unwrap(),
- node.data
+ data
)
} else if node.tag.is_empty()
|| node.tag == "a"
@@ -234,9 +254,9 @@ impl DescriptionParser {
|| node.tag == "body"
|| node.tag == "head"
{
- node.data
+ data
} else {
- format!("<{}>{}{}>", node.tag, node.data, node.tag)
+ format!("<{}>{}{}>", node.tag, data, node.tag)
}
} else if node.is_link(self.opts.md_mode) {
node.to_link(self.opts._ensure_link_ascii)
@@ -334,7 +354,9 @@ pub fn convert_description_to_md + ?Sized>(
pub fn convert_description_to_tg_html + ?Sized>(
desc: &S,
) -> Result {
- let mut p = DescriptionParser::new(false, true);
+ let mut p = DescriptionParser::builder(false, true)
+ .add_space_after_fullwidth_colon()
+ .build();
p.parse(desc)?;
Ok(p.data)
}
@@ -398,7 +420,7 @@ fn test_ensure_link_ascii() {
#[test]
fn test_convert_description_to_tg_html() {
assert_eq!(
- String::from("ご依頼・お仕事について:https://lit.link/en/hamiyamiko\nVGen:https://vgen.co/hamiyamiko\nFanbox:https://hamiya.fanbox.cc/\nX(Twitter):twitter/hamiyamiko"),
+ String::from("ご依頼・お仕事について: https://lit.link/en/hamiyamiko\nVGen: https://vgen.co/hamiyamiko\nFanbox: https://hamiya.fanbox.cc/\nX(Twitter): twitter/hamiyamiko"),
convert_description_to_tg_html("ご依頼・お仕事について:https://lit.link/en/hamiyamiko
VGen:https://vgen.co/hamiyamiko
Fanbox:https://hamiya.fanbox.cc/
X(Twitter):twitter/hamiyamiko").unwrap(),
);
assert_eq!(