mirror of
https://github.com/lifegpc/pythonscript.git
synced 2026-06-06 11:28:58 +08:00
add split_novel
This commit is contained in:
41
split_novel.py
Normal file
41
split_novel.py
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
|
||||||
|
from re import compile
|
||||||
|
from os.path import join, splitext
|
||||||
|
from os import makedirs
|
||||||
|
|
||||||
|
|
||||||
|
arg = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
|
||||||
|
arg.add_argument('PATH', help='The path to input file.')
|
||||||
|
arg.add_argument('-o', '--output', help="The path to output directory.",
|
||||||
|
metavar='OUTPUT', dest='output')
|
||||||
|
arg.add_argument('-e', '--encoding', help='File encoding.',
|
||||||
|
default='UTF-8', metavar='ENCODING', dest='encoding')
|
||||||
|
arg.add_argument('-r', '--regex', help='Chapter detect regex.',
|
||||||
|
default=r'^第 ?\d+ ?章', metavar='REGEX', dest='regex')
|
||||||
|
args = arg.parse_intermixed_args()
|
||||||
|
print(args)
|
||||||
|
regex = compile(args.regex)
|
||||||
|
if args.output is None:
|
||||||
|
args.output = splitext(args.PATH)[0]
|
||||||
|
makedirs(args.output, exist_ok=True)
|
||||||
|
cur = []
|
||||||
|
ind = 1
|
||||||
|
with open(args.PATH, 'r', encoding=args.encoding) as f:
|
||||||
|
while True:
|
||||||
|
now = f.readline()
|
||||||
|
if not now.endswith('\n'):
|
||||||
|
break
|
||||||
|
now = now.rstrip('\n')
|
||||||
|
if regex.search(now) is not None:
|
||||||
|
print(now, len(cur))
|
||||||
|
if len(cur):
|
||||||
|
with open(join(args.output, f'{ind:06}.txt'), 'w', encoding=args.encoding) as o:
|
||||||
|
o.write('\n'.join(cur))
|
||||||
|
cur = []
|
||||||
|
ind += 1
|
||||||
|
cur.append(now)
|
||||||
|
if len(cur):
|
||||||
|
with open(join(args.output, f'{ind:06}.txt'), 'w', encoding=args.encoding) as o:
|
||||||
|
o.write('\n'.join(cur))
|
||||||
|
cur = []
|
||||||
|
ind += 1
|
||||||
Reference in New Issue
Block a user