mirror of
https://github.com/lifegpc/pythonscript.git
synced 2026-06-05 11:08:49 +08:00
Add pixiv_json_to_html
This commit is contained in:
125
pixiv_json_to_html.py
Normal file
125
pixiv_json_to_html.py
Normal file
@@ -0,0 +1,125 @@
|
||||
from os.path import join, isfile, basename, abspath
|
||||
from json import load
|
||||
from html import escape
|
||||
|
||||
|
||||
def pixiv_json_to_html(dir_path):
|
||||
json_path = join(dir_path, 'data.json')
|
||||
with open(json_path, 'r', encoding='utf-8') as f:
|
||||
data = load(f)
|
||||
raw = data['raw']
|
||||
title = raw['title']
|
||||
html_data = f'<!DOCTYPE html><html><head><meta charset="utf-8"><title>{escape(title)}</title></head><body>\n'
|
||||
html_data += f'<h1>{escape(title)}</h1>\n'
|
||||
typ = raw['type']
|
||||
img_idx = 1
|
||||
used = set()
|
||||
pbase = basename(abspath(dir_path))
|
||||
if typ == 'article':
|
||||
blocks = raw['body']['blocks']
|
||||
imageMap = raw['body']['imageMap']
|
||||
fileMap = raw['body']['fileMap']
|
||||
urlEmbedMap = raw['body']['urlEmbedMap']
|
||||
for block in blocks:
|
||||
block_type = block['type']
|
||||
if block_type == 'p':
|
||||
text = block['text']
|
||||
styles = []
|
||||
if 'styles' in block:
|
||||
styles = block['styles']
|
||||
events = []
|
||||
for style in styles:
|
||||
if style['type'] == 'bold':
|
||||
start = style['offset']
|
||||
end = style['offset'] + style['length']
|
||||
events.append((start, '<b>', False))
|
||||
events.append((end, '</b>', True))
|
||||
else:
|
||||
raise ValueError(f'Unsupported style type: {style["type"]}')
|
||||
events.sort(key=lambda x: (x[0], not x[2]))
|
||||
output = ""
|
||||
last_idx = 0
|
||||
for pos, tag, is_closing in events:
|
||||
output += escape(text[last_idx:pos])
|
||||
output += tag
|
||||
last_idx = pos
|
||||
output += escape(text[last_idx:])
|
||||
html_data += f'<p>{output}</p>\n'
|
||||
elif block_type == 'header':
|
||||
for key in block:
|
||||
if key not in ['type', 'text']:
|
||||
print(block)
|
||||
raise ValueError(f'Unsupported header block key: {key}')
|
||||
text = block['text']
|
||||
html_data += f'<h2>{escape(text)}</h2>\n'
|
||||
elif block_type == 'image':
|
||||
image_id = block['imageId']
|
||||
image_url = imageMap[image_id]['originalUrl']
|
||||
image_ext = imageMap[image_id]['extension']
|
||||
image_path = join(dir_path, f'{pbase}_{img_idx}.{image_ext}')
|
||||
if not isfile(image_path):
|
||||
print(image_path)
|
||||
image_path = join(dir_path, f'{image_id}.{image_ext}')
|
||||
image_path = basename(image_path)
|
||||
html_data += f'<p><img src="{escape(image_path)}" data-original-url="{escape(image_url)}"></p>\n'
|
||||
if image_id not in used:
|
||||
used.add(image_id)
|
||||
img_idx += 1
|
||||
elif block_type == 'url_embed':
|
||||
urlEmbedId = block['urlEmbedId']
|
||||
embedData = urlEmbedMap[urlEmbedId]
|
||||
embedType = embedData['type']
|
||||
if embedType == 'fanbox.post':
|
||||
postInfo = embedData['postInfo']
|
||||
postTitle = postInfo['title']
|
||||
postUrl = f'https://www.fanbox.cc/{postInfo['creatorId']}/posts/{postInfo['id']}'
|
||||
postCover = None
|
||||
if 'cover' in postInfo and postInfo['cover']['type'] == 'cover_image':
|
||||
postCover = postInfo['cover']['url']
|
||||
postExcerpt = postInfo['excerpt']
|
||||
html_data += f'<div class="url-embed"><h2><a href="{escape(postUrl)}">{escape(postTitle)}</a></h2>'
|
||||
if postCover:
|
||||
html_data += f'<p><img src="{escape(postCover)}"></p>'
|
||||
html_data += f'<p>{escape(postExcerpt)}</p></div>\n'
|
||||
elif embedType == 'html.card':
|
||||
embed_html_data = embedData['html']
|
||||
html_data += f'<div class="url-embed">{embed_html_data}</div>\n'
|
||||
elif embedType == 'html':
|
||||
embed_html_data = embedData['html']
|
||||
html_data += f'<div class="url-embed">{embed_html_data}</div>\n'
|
||||
elif embedType == 'default':
|
||||
url = embedData['url']
|
||||
html_data += f'<div class="url-embed"><iframe src="{escape(url)}" /></div>\n'
|
||||
else:
|
||||
raise ValueError(f'Unsupported embed type: {embedType}')
|
||||
else:
|
||||
print(block)
|
||||
raise ValueError(f'Unsupported block type: {block_type}')
|
||||
elif typ == 'image':
|
||||
text = raw['body']['text']
|
||||
images = raw['body']['images']
|
||||
html_data += f'<p>{escape(text).replace('\n', '<br>')}</p>\n'
|
||||
for image in images:
|
||||
image_id = image['id']
|
||||
image_url = image['originalUrl']
|
||||
image_ext = image['extension']
|
||||
image_path = join(dir_path, f'{pbase}_{img_idx}.{image_ext}')
|
||||
if not isfile(image_path):
|
||||
print(image_path)
|
||||
image_path = join(dir_path, f'{image_id}.{image_ext}')
|
||||
image_path = basename(image_path)
|
||||
html_data += f'<p><img src="{escape(image_path)}" data-original-url="{escape(image_url)}"></p>\n'
|
||||
img_idx += 1
|
||||
else:
|
||||
raise ValueError(f'Unsupported body type: {typ}')
|
||||
html_data += '</body></html>\n'
|
||||
with open(join(dir_path, 'output.html'), 'w', encoding='utf-8') as f:
|
||||
f.write(html_data)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description='Convert Pixiv JSON to HTML')
|
||||
parser.add_argument('dir', help='Directory containing data.json')
|
||||
args = parser.parse_args()
|
||||
pixiv_json_to_html(args.dir)
|
||||
Reference in New Issue
Block a user