mirror of
https://github.com/lifegpc/pythonscript.git
synced 2026-06-05 11:08:49 +08:00
add split_novel
This commit is contained in:
41
split_novel.py
Normal file
41
split_novel.py
Normal file
@@ -0,0 +1,41 @@
|
||||
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
|
||||
from re import compile
|
||||
from os.path import join, splitext
|
||||
from os import makedirs
|
||||
|
||||
|
||||
arg = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
|
||||
arg.add_argument('PATH', help='The path to input file.')
|
||||
arg.add_argument('-o', '--output', help="The path to output directory.",
|
||||
metavar='OUTPUT', dest='output')
|
||||
arg.add_argument('-e', '--encoding', help='File encoding.',
|
||||
default='UTF-8', metavar='ENCODING', dest='encoding')
|
||||
arg.add_argument('-r', '--regex', help='Chapter detect regex.',
|
||||
default=r'^第 ?\d+ ?章', metavar='REGEX', dest='regex')
|
||||
args = arg.parse_intermixed_args()
|
||||
print(args)
|
||||
regex = compile(args.regex)
|
||||
if args.output is None:
|
||||
args.output = splitext(args.PATH)[0]
|
||||
makedirs(args.output, exist_ok=True)
|
||||
cur = []
|
||||
ind = 1
|
||||
with open(args.PATH, 'r', encoding=args.encoding) as f:
|
||||
while True:
|
||||
now = f.readline()
|
||||
if not now.endswith('\n'):
|
||||
break
|
||||
now = now.rstrip('\n')
|
||||
if regex.search(now) is not None:
|
||||
print(now, len(cur))
|
||||
if len(cur):
|
||||
with open(join(args.output, f'{ind:06}.txt'), 'w', encoding=args.encoding) as o:
|
||||
o.write('\n'.join(cur))
|
||||
cur = []
|
||||
ind += 1
|
||||
cur.append(now)
|
||||
if len(cur):
|
||||
with open(join(args.output, f'{ind:06}.txt'), 'w', encoding=args.encoding) as o:
|
||||
o.write('\n'.join(cur))
|
||||
cur = []
|
||||
ind += 1
|
||||
Reference in New Issue
Block a user