From 1814cf330ddb0140598491e76e40ec3e1f70474a Mon Sep 17 00:00:00 2001 From: lifegpc Date: Sat, 21 Sep 2024 02:38:51 +0000 Subject: [PATCH] add space to full space colon --- src/parser/description.rs | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/src/parser/description.rs b/src/parser/description.rs index ad5295a..1b67184 100644 --- a/src/parser/description.rs +++ b/src/parser/description.rs @@ -1,6 +1,7 @@ use crate::error::PixivDownloaderError; use crate::gettext; use crate::pixiv_link::remove_track; +use fancy_regex::Regex; use html5ever::tendril::TendrilSink; use html5ever::{parse_document, ParseOpts}; use markup5ever_rcdom::{Node, NodeData, RcDom}; @@ -8,6 +9,11 @@ use percent_encoding::{percent_encode, AsciiSet, NON_ALPHANUMERIC}; use std::collections::HashMap; use std::default::Default; +lazy_static! { + #[doc(hidden)] + static ref RE: Regex = Regex::new(":(?=[^ ])").unwrap(); +} + const URLENCODE: &AsciiSet = &NON_ALPHANUMERIC .remove(b':') .remove(b'/') @@ -139,6 +145,8 @@ pub struct DescriptionParserBuilder { _ensure_link_ascii: bool, /// Telegram HTML Mode tg_html_mode: bool, + /// Add space after `:` + _add_space_after_fullwidth_colon: bool, } #[allow(dead_code)] @@ -148,9 +156,16 @@ impl DescriptionParserBuilder { md_mode, _ensure_link_ascii: false, tg_html_mode, + _add_space_after_fullwidth_colon: false, } } + /// Add space after `:` + pub fn add_space_after_fullwidth_colon(mut self) -> Self { + self._add_space_after_fullwidth_colon = true; + self + } + /// Ensure link is ASCII pub fn ensure_link_ascii(mut self) -> Self { self._ensure_link_ascii = true; @@ -222,11 +237,16 @@ impl DescriptionParser { let node = self.nodes.pop().unwrap(); let mut is_paragraph = false; let s = if self.opts.tg_html_mode { + let data = if self.opts._add_space_after_fullwidth_colon { + RE.replace_all(&node.data, ": ").into_owned() + } else { + node.data.clone() + }; if node.tag == "a" && node.is_link(true) { format!( "{}", node.attrs.get("href").unwrap(), - node.data + data ) } else if node.tag.is_empty() || node.tag == "a" @@ -234,9 +254,9 @@ impl DescriptionParser { || node.tag == "body" || node.tag == "head" { - node.data + data } else { - format!("<{}>{}", node.tag, node.data, node.tag) + format!("<{}>{}", node.tag, data, node.tag) } } else if node.is_link(self.opts.md_mode) { node.to_link(self.opts._ensure_link_ascii) @@ -334,7 +354,9 @@ pub fn convert_description_to_md + ?Sized>( pub fn convert_description_to_tg_html + ?Sized>( desc: &S, ) -> Result { - let mut p = DescriptionParser::new(false, true); + let mut p = DescriptionParser::builder(false, true) + .add_space_after_fullwidth_colon() + .build(); p.parse(desc)?; Ok(p.data) } @@ -398,7 +420,7 @@ fn test_ensure_link_ascii() { #[test] fn test_convert_description_to_tg_html() { assert_eq!( - String::from("ご依頼・お仕事について:https://lit.link/en/hamiyamiko\nVGen:https://vgen.co/hamiyamiko\nFanbox:https://hamiya.fanbox.cc/\nX(Twitter):twitter/hamiyamiko"), + String::from("ご依頼・お仕事について: https://lit.link/en/hamiyamiko\nVGen: https://vgen.co/hamiyamiko\nFanbox: https://hamiya.fanbox.cc/\nX(Twitter): twitter/hamiyamiko"), convert_description_to_tg_html("ご依頼・お仕事について:https://lit.link/en/hamiyamiko
VGen:https://vgen.co/hamiyamiko
Fanbox:https://hamiya.fanbox.cc/
X(Twitter):twitter/hamiyamiko").unwrap(), ); assert_eq!(