add split_novel

This commit is contained in:
2022-06-25 18:43:51 +08:00
parent 80225d0e0c
commit b7245e1ef9

41
split_novel.py Normal file
View File

@@ -0,0 +1,41 @@
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
from re import compile
from os.path import join, splitext
from os import makedirs
arg = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
arg.add_argument('PATH', help='The path to input file.')
arg.add_argument('-o', '--output', help="The path to output directory.",
metavar='OUTPUT', dest='output')
arg.add_argument('-e', '--encoding', help='File encoding.',
default='UTF-8', metavar='ENCODING', dest='encoding')
arg.add_argument('-r', '--regex', help='Chapter detect regex.',
default=r'^第 ?\d+ ?章', metavar='REGEX', dest='regex')
args = arg.parse_intermixed_args()
print(args)
regex = compile(args.regex)
if args.output is None:
args.output = splitext(args.PATH)[0]
makedirs(args.output, exist_ok=True)
cur = []
ind = 1
with open(args.PATH, 'r', encoding=args.encoding) as f:
while True:
now = f.readline()
if not now.endswith('\n'):
break
now = now.rstrip('\n')
if regex.search(now) is not None:
print(now, len(cur))
if len(cur):
with open(join(args.output, f'{ind:06}.txt'), 'w', encoding=args.encoding) as o:
o.write('\n'.join(cur))
cur = []
ind += 1
cur.append(now)
if len(cur):
with open(join(args.output, f'{ind:06}.txt'), 'w', encoding=args.encoding) as o:
o.write('\n'.join(cur))
cur = []
ind += 1