diff --git a/pixiv_json_to_html.py b/pixiv_json_to_html.py new file mode 100644 index 0000000..f3edd5b --- /dev/null +++ b/pixiv_json_to_html.py @@ -0,0 +1,125 @@ +from os.path import join, isfile, basename, abspath +from json import load +from html import escape + + +def pixiv_json_to_html(dir_path): + json_path = join(dir_path, 'data.json') + with open(json_path, 'r', encoding='utf-8') as f: + data = load(f) + raw = data['raw'] + title = raw['title'] + html_data = f'{escape(title)}\n' + html_data += f'

{escape(title)}

\n' + typ = raw['type'] + img_idx = 1 + used = set() + pbase = basename(abspath(dir_path)) + if typ == 'article': + blocks = raw['body']['blocks'] + imageMap = raw['body']['imageMap'] + fileMap = raw['body']['fileMap'] + urlEmbedMap = raw['body']['urlEmbedMap'] + for block in blocks: + block_type = block['type'] + if block_type == 'p': + text = block['text'] + styles = [] + if 'styles' in block: + styles = block['styles'] + events = [] + for style in styles: + if style['type'] == 'bold': + start = style['offset'] + end = style['offset'] + style['length'] + events.append((start, '', False)) + events.append((end, '', True)) + else: + raise ValueError(f'Unsupported style type: {style["type"]}') + events.sort(key=lambda x: (x[0], not x[2])) + output = "" + last_idx = 0 + for pos, tag, is_closing in events: + output += escape(text[last_idx:pos]) + output += tag + last_idx = pos + output += escape(text[last_idx:]) + html_data += f'

{output}

\n' + elif block_type == 'header': + for key in block: + if key not in ['type', 'text']: + print(block) + raise ValueError(f'Unsupported header block key: {key}') + text = block['text'] + html_data += f'

{escape(text)}

\n' + elif block_type == 'image': + image_id = block['imageId'] + image_url = imageMap[image_id]['originalUrl'] + image_ext = imageMap[image_id]['extension'] + image_path = join(dir_path, f'{pbase}_{img_idx}.{image_ext}') + if not isfile(image_path): + print(image_path) + image_path = join(dir_path, f'{image_id}.{image_ext}') + image_path = basename(image_path) + html_data += f'

\n' + if image_id not in used: + used.add(image_id) + img_idx += 1 + elif block_type == 'url_embed': + urlEmbedId = block['urlEmbedId'] + embedData = urlEmbedMap[urlEmbedId] + embedType = embedData['type'] + if embedType == 'fanbox.post': + postInfo = embedData['postInfo'] + postTitle = postInfo['title'] + postUrl = f'https://www.fanbox.cc/{postInfo['creatorId']}/posts/{postInfo['id']}' + postCover = None + if 'cover' in postInfo and postInfo['cover']['type'] == 'cover_image': + postCover = postInfo['cover']['url'] + postExcerpt = postInfo['excerpt'] + html_data += f'

{escape(postTitle)}

' + if postCover: + html_data += f'

' + html_data += f'

{escape(postExcerpt)}

\n' + elif embedType == 'html.card': + embed_html_data = embedData['html'] + html_data += f'
{embed_html_data}
\n' + elif embedType == 'html': + embed_html_data = embedData['html'] + html_data += f'
{embed_html_data}
\n' + elif embedType == 'default': + url = embedData['url'] + html_data += f'