diff --git a/.gitignore b/.gitignore index e72a38f..494fa2e 100644 --- a/.gitignore +++ b/.gitignore @@ -129,3 +129,6 @@ dmypy.json .pyre/ .vscode/ + +# Cython generated files +*.c diff --git a/example.yaml b/example.yaml index 671d74d..75794d0 100644 --- a/example.yaml +++ b/example.yaml @@ -1,9 +1,11 @@ dest: /path/to/store/backup/files # The programs will store database and backup files in this location +enable_pcre2: false # Optional. Default value: false. Try to use PCRE2 first. PCRE2 may be a little slower than internal regex library. remove_old_files: true # Optional. Default value: true. Remove unneeded backup files which already deleted in source tree when backuping files. ignore_hidden_files: true # Optional. Default value: true. Whether to ignore files which its name starts with ".". Only effect folder which type is "path". programs: - name: Your program name # This name is used to identify different application. base: /path/to/save/path # Must be absoulte path. + enable_pcre2: false # Optional. remove_old_files: true # Optional. ignore_hidden_files: true # Optional. files: @@ -11,11 +13,20 @@ programs: - type: path path: folder # path to a file/folder. All subfolders will include if it is a folder. Must be relative path if name not found. name: folder2 # optional. path to the backup files. Shoule be a relative path + enable_pcre2: false remove_old_files: true # Optional. ignore_hidden_files: true # Optional. + excludes: # Optional. Exculde some files/folders + - data.db # Releative path + - /path/to/data.db # Absolute path + - type: wildcards + rule: "*/*.db" + - type: regex + rule: "\\d+\\.db" - type: leveldb # module plyvel is needed to support this type. This will store leveldb database to a single file database (sqlite3) path: leveldb # path to leveldb. Must be relative path. name: dest # optional. path to the backup files. Shoule be a relative path + enable_pcre2: false remove_old_files: true # Optional. domains: # optional. Just backup minor domains in localstorage database. Only chromium is tested. - some domain diff --git a/game_backuper/_Python.pxd b/game_backuper/_Python.pxd new file mode 100644 index 0000000..27739a0 --- /dev/null +++ b/game_backuper/_Python.pxd @@ -0,0 +1,5 @@ +cdef extern from "Python.h": + void Py_INCREF(object o) + void Py_DECREF(object o) + const char* PyUnicode_AsUTF8(object unicode) + const char* PyUnicode_AsUTF8AndSize(object unicode, Py_ssize_t *size) diff --git a/game_backuper/_pcre2.h b/game_backuper/_pcre2.h new file mode 100644 index 0000000..9d01197 --- /dev/null +++ b/game_backuper/_pcre2.h @@ -0,0 +1,2 @@ +#define PCRE2_CODE_UNIT_WIDTH 8 +#include "pcre2.h" diff --git a/game_backuper/_pcre2.pxd b/game_backuper/_pcre2.pxd new file mode 100644 index 0000000..6065f22 --- /dev/null +++ b/game_backuper/_pcre2.pxd @@ -0,0 +1,35 @@ +from libc.stddef cimport size_t +from libc.stdint cimport uint8_t, uint32_t + + +cdef extern from "_pcre2.h": + ctypedef uint8_t PCRE2_UCHAR + ctypedef const uint8_t* PCRE2_SPTR8 + ctypedef PCRE2_SPTR8 PCRE2_SPTR + ctypedef size_t PCRE2_SIZE + ctypedef struct pcre2_compile_context: + pass + ctypedef struct pcre2_code: + pass + ctypedef struct pcre2_match_data: + pass + ctypedef struct pcre2_general_context: + pass + ctypedef struct pcre2_match_context: + pass + + pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length, uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext) + void pcre2_code_free(pcre2_code *code) + int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer, PCRE2_SIZE bufflen) + PCRE2_SPTR pcre2_get_mark(pcre2_match_data *match_data) + uint32_t pcre2_get_ovector_count(pcre2_match_data *match_data) + PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data) + int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options, pcre2_match_data *match_data, pcre2_match_context *mcontext) + int pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where) + pcre2_match_data *pcre2_match_data_create_from_pattern(const pcre2_code *code, pcre2_general_context *gcontext) + void pcre2_match_data_free(pcre2_match_data *match_data) + int pcre2_substring_get_bynumber(pcre2_match_data *match_data, uint32_t number, PCRE2_UCHAR **bufferptr, PCRE2_SIZE *bufflen) + void pcre2_substring_free(PCRE2_UCHAR *buffer) + int pcre2_substring_length_bynumber(pcre2_match_data *match_data, uint32_t number, PCRE2_SIZE *length) + void pcre2_substring_list_free(PCRE2_SPTR *list) + int pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr, PCRE2_SIZE **lengthsptr) diff --git a/game_backuper/_pcre2.pyx b/game_backuper/_pcre2.pyx new file mode 100644 index 0000000..e4dc556 --- /dev/null +++ b/game_backuper/_pcre2.pyx @@ -0,0 +1,440 @@ +from ._pcre2 cimport * +from ._Python cimport * +from enum import IntFlag +from libc.stdlib cimport malloc, free +from libc.string cimport strcpy +from cpython.mem cimport PyMem_Free +try: + from functools import cached_property +except ImportError: + cached_property = property + + +cdef extern from "_pcre2.h": + const uint32_t _PCRE2_ANCHORED "PCRE2_ANCHORED" + const uint32_t _PCRE2_ALLOW_EMPTY_CLASS "PCRE2_ALLOW_EMPTY_CLASS" + const uint32_t _PCRE2_ALT_BSUX "PCRE2_ALT_BSUX" + const uint32_t _PCRE2_ALT_CIRCUMFLEX "PCRE2_ALT_CIRCUMFLEX" + const uint32_t _PCRE2_ALT_VERBNAMES "PCRE2_ALT_VERBNAMES" + const uint32_t _PCRE2_AUTO_CALLOUT "PCRE2_AUTO_CALLOUT" + const uint32_t _PCRE2_CASELESS "PCRE2_CASELESS" + const uint32_t _PCRE2_COPY_MATCHED_SUBJECT "PCRE2_COPY_MATCHED_SUBJECT" + const uint32_t _PCRE2_DOLLAR_ENDONLY "PCRE2_DOLLAR_ENDONLY" + const uint32_t _PCRE2_DOTALL "PCRE2_DOTALL" + const uint32_t _PCRE2_DUPNAMES "PCRE2_DUPNAMES" + const uint32_t _PCRE2_ENDANCHORED "PCRE2_ENDANCHORED" + const uint32_t _PCRE2_EXTENDED "PCRE2_EXTENDED" + const uint32_t _PCRE2_FIRSTLINE "PCRE2_FIRSTLINE" + const uint32_t _PCRE2_LITERAL "PCRE2_LITERAL" + const uint32_t _PCRE2_MATCH_INVALID_UTF "PCRE2_MATCH_INVALID_UTF" + const uint32_t _PCRE2_MATCH_UNSET_BACKREF "PCRE2_MATCH_UNSET_BACKREF" + const uint32_t _PCRE2_MULTILINE "PCRE2_MULTILINE" + const uint32_t _PCRE2_NEVER_BACKSLASH_C "PCRE2_NEVER_BACKSLASH_C" + const uint32_t _PCRE2_NEVER_UCP "PCRE2_NEVER_UCP" + const uint32_t _PCRE2_NEVER_UTF "PCRE2_NEVER_UTF" + const uint32_t _PCRE2_NOTBOL "PCRE2_NOTBOL" + const uint32_t _PCRE2_NOTEOL "PCRE2_NOTEOL" + const uint32_t _PCRE2_NOTEMPTY "PCRE2_NOTEMPTY" + const uint32_t _PCRE2_NOTEMPTY_ATSTART "PCRE2_NOTEMPTY_ATSTART" + const uint32_t _PCRE2_NO_AUTO_CAPTURE "PCRE2_NO_AUTO_CAPTURE" + const uint32_t _PCRE2_NO_AUTO_POSSESS "PCRE2_NO_AUTO_POSSESS" + const uint32_t _PCRE2_NO_DOTSTAR_ANCHOR "PCRE2_NO_DOTSTAR_ANCHOR" + const uint32_t _PCRE2_NO_JIT "PCRE2_NO_JIT" + const uint32_t _PCRE2_NO_START_OPTIMIZE "PCRE2_NO_START_OPTIMIZE" + const uint32_t _PCRE2_NO_UTF_CHECK "PCRE2_NO_UTF_CHECK" + const uint32_t _PCRE2_PARTIAL_HARD "PCRE2_PARTIAL_HARD" + const uint32_t _PCRE2_PARTIAL_SOFT "PCRE2_PARTIAL_SOFT" + const uint32_t _PCRE2_UCP "PCRE2_UCP" + const uint32_t _PCRE2_UNGREEDY "PCRE2_UNGREEDY" + const uint32_t _PCRE2_USE_OFFSET_LIMIT "PCRE2_USE_OFFSET_LIMIT" + const uint32_t _PCRE2_UTF "PCRE2_UTF" + const size_t PCRE2_ZERO_TERMINATED + const int PCRE2_ERROR_NOMEMORY + const int PCRE2_ERROR_UNSET + const int PCRE2_INFO_NAMECOUNT + const int PCRE2_INFO_NAMEENTRYSIZE + const int PCRE2_INFO_NAMETABLE + + +class Option(IntFlag): + PCRE2_ANCHORED = _PCRE2_ANCHORED + PCRE2_ALLOW_EMPTY_CLASS = _PCRE2_ALLOW_EMPTY_CLASS + PCRE2_ALT_BSUX = _PCRE2_ALT_BSUX + PCRE2_ALT_CIRCUMFLEX = _PCRE2_ALT_CIRCUMFLEX + PCRE2_ALT_VERBNAMES = _PCRE2_ALT_VERBNAMES + PCRE2_AUTO_CALLOUT = _PCRE2_AUTO_CALLOUT + PCRE2_CASELESS = _PCRE2_CASELESS + PCRE2_DOLLAR_ENDONLY = _PCRE2_DOLLAR_ENDONLY + PCRE2_DOTALL = _PCRE2_DOTALL + PCRE2_DUPNAMES = _PCRE2_DUPNAMES + PCRE2_ENDANCHORED = _PCRE2_ENDANCHORED + PCRE2_EXTENDED = _PCRE2_EXTENDED + PCRE2_FIRSTLINE = _PCRE2_FIRSTLINE + PCRE2_LITERAL = _PCRE2_LITERAL + PCRE2_MATCH_INVALID_UTF = _PCRE2_MATCH_INVALID_UTF + PCRE2_MATCH_UNSET_BACKREF = _PCRE2_MATCH_UNSET_BACKREF + PCRE2_MULTILINE = _PCRE2_MULTILINE + PCRE2_NEVER_BACKSLASH_C = _PCRE2_NEVER_BACKSLASH_C + PCRE2_NEVER_UCP = _PCRE2_NEVER_UCP + PCRE2_NEVER_UTF = _PCRE2_NEVER_UTF + PCRE2_NO_AUTO_CAPTURE = _PCRE2_NO_AUTO_CAPTURE + PCRE2_NO_AUTO_POSSESS = _PCRE2_NO_AUTO_POSSESS + PCRE2_NO_DOTSTAR_ANCHOR = _PCRE2_NO_DOTSTAR_ANCHOR + PCRE2_NO_START_OPTIMIZE = _PCRE2_NO_START_OPTIMIZE + PCRE2_NO_UTF_CHECK = _PCRE2_NO_UTF_CHECK + PCRE2_UCP = _PCRE2_UCP + PCRE2_UNGREEDY = _PCRE2_UNGREEDY + PCRE2_USE_OFFSET_LIMIT = _PCRE2_USE_OFFSET_LIMIT + PCRE2_UTF = _PCRE2_UTF + + +class MatchOption(IntFlag): + PCRE2_ANCHORED = _PCRE2_ANCHORED + PCRE2_COPY_MATCHED_SUBJECT = _PCRE2_COPY_MATCHED_SUBJECT + PCRE2_ENDANCHORED = _PCRE2_ENDANCHORED + PCRE2_NOTBOL = _PCRE2_NOTBOL + PCRE2_NOTEOL = _PCRE2_NOTEOL + PCRE2_NOTEMPTY = _PCRE2_NOTEMPTY + PCRE2_NOTEMPTY_ATSTART = _PCRE2_NOTEMPTY_ATSTART + PCRE2_NO_JIT = _PCRE2_NO_JIT + PCRE2_NO_UTF_CHECK = _PCRE2_NO_UTF_CHECK + PCRE2_PARTIAL_HARD = _PCRE2_PARTIAL_HARD + PCRE2_PARTIAL_SOFT = _PCRE2_PARTIAL_SOFT + + +cdef class Match: + cdef pcre2_match_data* data + cdef object inp + cdef object r + def __cinit__(self): + self.data = NULL + + def __dealloc__(self): + if self.data != NULL: + pcre2_match_data_free(self.data) + self.data = NULL + Py_DECREF(self.r) + Py_DECREF(self.inp) + + def __getitem__(self, uint32_t i): + if self.data == NULL: + raise ValueError(u'No matched data.') + cdef uint32_t count = pcre2_get_ovector_count(self.data) + if count <= 0: + raise ValueError(u'No match') + if i >= count: + raise IndexError(u'No such group') + cdef PCRE2_UCHAR *buf + cdef PCRE2_SIZE le + cdef PCRE2_UCHAR* errbuf + cdef size_t errsize = sizeof(PCRE2_UCHAR) * 1024 + cdef int re = pcre2_substring_get_bynumber(self.data, i, &buf, &le) + if re == 0: + s = buf.decode() + pcre2_substring_free(buf) + return s + elif re == PCRE2_ERROR_UNSET: + return None + elif re == PCRE2_ERROR_NOMEMORY: + raise MemoryError() + else: + errbuf = malloc(errsize) + if errbuf == NULL: + raise MemoryError() + s = None + if pcre2_get_error_message(re, errbuf, errsize) > 0: + s = errbuf.decode() + free(errbuf) + raise ValueError(s if s else u'Can not get substring.') + + def __init__(self, unicode inp, r): + self.inp = inp + Py_INCREF(inp) + self.r = r + Py_INCREF(r) + + def end(self) -> int: + if self.data == NULL: + raise ValueError(u'No matched data.') + cdef uint32_t count = pcre2_get_ovector_count(self.data) + if count <= 0: + raise ValueError(u'No match') + cdef PCRE2_SIZE* vect = pcre2_get_ovector_pointer(self.data) + return vect[1] + + @cached_property + def endpos(self) -> int: + return self.end() + + def group(self) -> str: + if self.data == NULL: + raise ValueError(u'No matched data.') + cdef uint32_t count = pcre2_get_ovector_count(self.data) + if count <= 0: + raise ValueError(u'No match') + cdef PCRE2_UCHAR *buf + cdef PCRE2_SIZE le + cdef PCRE2_UCHAR* errbuf + cdef size_t errsize = sizeof(PCRE2_UCHAR) * 1024 + cdef int re = pcre2_substring_get_bynumber(self.data, 0, &buf, &le) + if re == 0: + s = buf.decode() + pcre2_substring_free(buf) + return s + elif re == PCRE2_ERROR_NOMEMORY: + raise MemoryError() + else: + errbuf = malloc(errsize) + if errbuf == NULL: + raise MemoryError() + s = None + if pcre2_get_error_message(re, errbuf, errsize) > 0: + s = errbuf.decode() + free(errbuf) + raise ValueError(s if s else u'Can not get substring.') + + def groupdict(self): + if self.data == NULL: + raise ValueError(u'No matched data.') + cdef uint32_t count = pcre2_get_ovector_count(self.data) + if count <= 0: + raise ValueError(u'No match') + cdef PCRE2_UCHAR **li + cdef re = pcre2_substring_list_get(self.data, &li, NULL) + if re == PCRE2_ERROR_NOMEMORY: + raise MemoryError() + elif re != 0: + raise ValueError(u'Unexpected error') + d = {} + cdef size_t i = 1 + t = self.r.namedtable() + while li[i] != NULL: + if i not in t: + i += 1 + continue + if li[i][0] == 0 and pcre2_substring_length_bynumber(self.data, i, NULL) != 0: + if t[i] not in d: + d[t[i]] = None + else: + if t[i] not in d or d[t[i]] is None: + d[t[i]] = li[i].decode() + i += 1 + while i < count: + if i in t: + if t[i] not in d: + d[t[i]] = None + i += 1 + pcre2_substring_list_free(li) + return d + + def groups(self): + if self.data == NULL: + raise ValueError(u'No matched data.') + cdef uint32_t count = pcre2_get_ovector_count(self.data) + if count <= 0: + raise ValueError(u'No match') + cdef PCRE2_UCHAR **li + cdef re = pcre2_substring_list_get(self.data, &li, NULL) + if re == PCRE2_ERROR_NOMEMORY: + raise MemoryError() + elif re != 0: + raise ValueError(u'Unexpected error') + cdef size_t i = 1 + l = [] + while li[i] != NULL: + if li[i][0] == 0 and pcre2_substring_length_bynumber(self.data, i, NULL) != 0: + l.append(None) + else: + l.append(li[i].decode()) + i += 1 + while i < count: + l.append(None) + i += 1 + pcre2_substring_list_free(li) + return tuple(l) + + @cached_property + def lastgroup(self) -> str: + regs = self.regs + cdef uint32_t le = len(regs) + cdef uint32_t i = le - 1 + while i >= 0: + if regs[i][0] == -1: + i -= 1 + continue + break + if i == 0: + return None + t = self.r.namedtable() + if i in t: + return t[i] + + @cached_property + def lastindex(self) -> int: + if self.data == NULL: + raise ValueError(u'No matched data.') + cdef uint32_t count = pcre2_get_ovector_count(self.data) + if count <= 0: + raise ValueError(u'No match') + return count - 1 + + @cached_property + def pos(self) -> int: + return self.start() + + @cached_property + def re(self): + return self.r + + @cached_property + def regs(self): + if self.data == NULL: + raise ValueError(u'No matched data.') + cdef uint32_t count = pcre2_get_ovector_count(self.data) + if count <= 0: + raise ValueError(u'No match') + cdef PCRE2_SIZE* vect = pcre2_get_ovector_pointer(self.data) + l = [] + cdef uint32_t i = 0 + while i < count: + if vect[i * 2] == -1: + l.append((-1, -1)) + else: + l.append((vect[i * 2], vect[i * 2 + 1])) + i += 1 + return tuple(l) + + def span(self) -> (int, int): + if self.data == NULL: + raise ValueError(u'No matched data.') + cdef uint32_t count = pcre2_get_ovector_count(self.data) + if count <= 0: + raise ValueError(u'No match') + cdef PCRE2_SIZE* vect = pcre2_get_ovector_pointer(self.data) + return (vect[0], vect[1]) + + def start(self) -> int: + if self.data == NULL: + raise ValueError(u'No matched data.') + cdef uint32_t count = pcre2_get_ovector_count(self.data) + if count <= 0: + raise ValueError(u'No match') + cdef PCRE2_SIZE* vect = pcre2_get_ovector_pointer(self.data) + return vect[0] + + @cached_property + def string(self) -> str: + return self.inp + + cdef set_data(self, pcre2_match_data* data): + if data == NULL: + raise ValueError(u'data is NULL.') + self.data = data + + +cdef class PCRE2: + cdef pcre2_code* code + def __cinit__(self): + self.code = NULL + + def __dealloc__(self): + if self.code != NULL: + pcre2_code_free(self.code) + self.code = NULL + + def __init__(self, unicode inp, opt: Option = None): + if inp is None: + raise ValueError(u'Empty pattern.') + cdef uint32_t opts = _PCRE2_UTF | _PCRE2_ALT_BSUX + cdef int err + cdef PCRE2_SIZE erroffset + cdef PCRE2_UCHAR* errbuf + cdef size_t errsize = sizeof(PCRE2_UCHAR) * 1024 + if isinstance(opt, Option): + opts = opt.value + elif isinstance(opt, int): + opts = opt + cdef pcre2_code* re = pcre2_compile(PyUnicode_AsUTF8(inp), + PCRE2_ZERO_TERMINATED, opts, &err, + &erroffset, NULL) + if re is NULL: + errbuf = malloc(errsize) + if errbuf == NULL: + raise MemoryError() + s = None + if pcre2_get_error_message(err, errbuf, errsize) > 0: + s = u"Error at offset %d: %s" % (erroffset, errbuf.decode()) + free(errbuf) + raise ValueError(s if s else u'Not invalid') + self.code = re + + def match(self, unicode inp, opt: MatchOption = None, PCRE2_SIZE startoffset = 0, int search_only = 0) -> Match: + if inp is None: + raise ValueError(u'Empty input.') + if self.code == NULL: + raise ValueError(u'pattern is NULL.') + cdef uint32_t opts = 0 + if isinstance(opt, MatchOption): + opts = opt.value + elif isinstance(opt, int): + opts = opt + cdef pcre2_match_data* data = pcre2_match_data_create_from_pattern(self.code, NULL) + cdef PCRE2_UCHAR* errbuf + cdef size_t errsize = sizeof(PCRE2_UCHAR) * 1024 + if data == NULL: + raise MemoryError() + cdef int re = pcre2_match(self.code, PyUnicode_AsUTF8(inp), + PCRE2_ZERO_TERMINATED, startoffset, opts, data, NULL) + if re <= 0: + pcre2_match_data_free(data) + data = NULL + if re == -1: # No match + if search_only: + return False + else: + return None + elif re == 0: + raise ValueError(u'The vector of offsets is too small') + else: + errbuf = malloc(errsize) + if errbuf == NULL: + raise MemoryError() + s = None + if pcre2_get_error_message(re, errbuf, errsize) > 0: + s = errbuf.decode() + free(errbuf) + raise ValueError(s if s else u'Can not match') + if search_only: + pcre2_match_data_free(data) + return True + m = Match(inp, self) + m.set_data(data) + return m + + def namedtable(self): + if self.code == NULL: + raise ValueError(u'pattern is NULL.') + cdef uint32_t count + cdef uint32_t ensize + if pcre2_pattern_info(self.code, PCRE2_INFO_NAMECOUNT, &count) != 0: + raise ValueError(u'Can not get namedtable') + if pcre2_pattern_info(self.code, PCRE2_INFO_NAMEENTRYSIZE, &ensize) != 0: + raise ValueError(u'Can not get namedtable') + if count <= 0 or ensize <= 0: + raise ValueError(u'Can not get namedtable') + cdef PCRE2_SPTR buf + if pcre2_pattern_info(self.code, PCRE2_INFO_NAMETABLE, &buf) != 0: + raise ValueError(u'Can not get namedtable') + cdef uint32_t i = 0 + cdef size_t ind = 0 + cdef char* tmp = malloc(ensize) + if tmp == NULL: + raise MemoryError() + d = {} + while i < count: + ind = i * ensize + strcpy(tmp, buf + ind + 2) + d[buf[ind] * 256 + buf[ind + 1]] = tmp.decode() + i += 1 + free(tmp) + return d diff --git a/game_backuper/config.py b/game_backuper/config.py index c6d63a5..6591679 100644 --- a/game_backuper/config.py +++ b/game_backuper/config.py @@ -3,7 +3,7 @@ try: from yaml import CSafeLoader as SafeLoader except Exception: from yaml import SafeLoader -from os.path import join, relpath, isfile, isdir, isabs +from os.path import join, relpath, isfile, isdir, isabs, abspath from typing import List, Union from game_backuper.file import listdirs from collections import namedtuple @@ -11,11 +11,27 @@ try: from functools import cached_property except ImportError: cached_property = property +from game_backuper.regexp import Regex, wildcards_to_regex class BasicOption: '''Basic options which is included in config, program and files.''' _remove_old_files = None + _enable_pcre2 = None + + @cached_property + def enable_pcre2(self) -> bool: + if self._enable_pcre2 is not None: + return self._enable_pcre2 + prog = getattr(self, "_prog", None) + if prog is not None: + if prog._enable_pcre2 is not None: + return prog._enable_pcre2 + cfg = getattr(self, "_cfg", None) + if cfg is not None: + if cfg._enable_pcre2 is not None: + return cfg._enable_pcre2 + return False @cached_property def remove_old_files(self) -> bool: @@ -33,6 +49,18 @@ class BasicOption: def parse_all(self, data=None): self.parse_remove_old_files(data) + self.parse_enable_pcre2(data) + + def parse_enable_pcre2(self, data=None): + if data is None: + data = getattr(self, 'data') + if 'enable_pcre2' in data: + v = data['enable_pcre2'] + if isinstance(v, bool): + self._enable_pcre2 = v + else: + raise TypeError('enable_pcre2 option must be a boolean.') + del v def parse_remove_old_files(self, data=None): if data is None: @@ -133,6 +161,53 @@ class ConfigPath(BasicOption, NFBasicOption, BasicConfig): self.parse_all() self.parse_all_nf() + @property + def excludes(self) -> List[Union[str, Regex]]: + t = getattr(self, "__excludes", None) + if t is not None: + return t + del t + if 'excludes' in self.data: + if isinstance(self.data['excludes'], list): + r = [] + for i in self.data["excludes"]: + if isinstance(i, str): + r.append(i) + elif isinstance(i, dict): + t = i['type'] + if t == 'wildcards': + r.append(wildcards_to_regex(i['rule'], use_pcre2=self.enable_pcre2)) # noqa: E501 + elif t == "regex": + r.append(Regex(i['rule'], + use_pcre2=self.enable_pcre2)) + self.__excludes = r + return r + + def is_exclude(self, b: str, loc: str) -> bool: + e = self.excludes + if e is None: + return False + if isabs(loc): + bl = abspath(loc) + rl = relpath(loc, b) + else: + bl = abspath(join(b, loc)) + rl = relpath(join(b, loc), b) + for i in e: + if isinstance(i, str): + if isabs(i): + if abspath(i) == bl: + return True + else: + if relpath(join(b, i), b) == rl: + return True + elif isinstance(i, Regex): + if i.match_only(rl): + return True + elif bl != loc and i.match_only(bl): + return True + return False + class ConfigOLeveldb(BasicOption, NFBasicOption, BasicConfig): def __init__(self, data, cfg, prog): @@ -218,78 +293,44 @@ class Program(BasicOption, NFBasicOption): return self._files.copy() r = [] self._files = r.copy() - for i in self.data[ke]: + for i in self.all_configs: b = self.base - if isinstance(i, str): - if isabs(i): - raise ValueError('Absolute path must need a name.') - bp = join(b, i) + if isinstance(i, ConfigPath): + if isabs(i.path): + bp = i.path + else: + bp = join(b, i.path) + name = i.real_name if isfile(bp): - tname = relpath(join(b, i), b) - r.append(ConfigNormalFile(tname, bp)) + tname = relpath(join(b, name), b) + tmp = ConfigNormalFile(tname, bp) del tname + tmp.parse_all(i.data) + r.append(tmp) elif isdir(bp): top = NFBasicOption(self._cfg, self) + top.parse_ignore_hidden_files(i.data) ll = listdirs(bp, top.ignore_hidden_files) del top for ii in ll: - r.append(ConfigNormalFile(relpath(ii, b), ii)) - elif isinstance(i, dict): - t = i['type'] - if t == 'path': - if isabs(i['path']): - if 'name' not in i or not isinstance(i['name'], str) or i['name'] == '': # noqa: E501 - raise ValueError('Absolute path must need a name.') - bp = i['path'] - name = i['name'] - else: - bp = join(b, i['path']) - name = i['path'] - if 'name' in i and isinstance(i['name'], str): - if i['name'] != '': - name = i['name'] - if isfile(bp): - tname = relpath(join(b, name), b) - tmp = ConfigNormalFile(tname, bp) + if i.is_exclude(bp, ii): + continue + tname = relpath(join(b, join(name, relpath(ii, bp))), b) # noqa: E501 + tmp = ConfigNormalFile(tname, ii) del tname - tmp.parse_all(i) + tmp.parse_all(i.data) r.append(tmp) - elif isdir(bp): - top = NFBasicOption(self._cfg, self) - top.parse_ignore_hidden_files(i) - ll = listdirs(bp, top.ignore_hidden_files) - del top - for ii in ll: - tname = relpath(join(b, join(name, relpath(ii, bp))), b) # noqa: E501 - tmp = ConfigNormalFile(tname, ii) - del tname - tmp.parse_all(i) - r.append(tmp) - elif t == 'leveldb': - if isabs(i['path']): - if 'name' not in i or not isinstance(i['name'], str) or i['name'] == '': # noqa: E501 - raise ValueError('Absolute path must need a name.') - p = i['path'] - n = i['name'] - else: - p = join(b, i['path']) - n = i['path'] - if 'name' in i and isinstance(i['name'], str): - if i['name'] != '': - n = i['name'] - dms = None - if 'domains' in i and isinstance(i['domains'], list): - dms = [] - for ii in i['domains']: - if isinstance(ii, str) and len(ii) > 0: - dms.append(ii.encode()) - if len(dms) == 0: - dms = None - tname = relpath(join(b, n), b) - tmp = ConfigLeveldb(tname, p, dms) - del tname - tmp.parse_all(i) - r.append(tmp) + elif isinstance(i, ConfigOLeveldb): + if isabs(i.path): + p = i.path + else: + p = join(b, i.path) + name = i.real_name + tname = relpath(join(b, name), b) + tmp = ConfigLeveldb(tname, p, i.domains) + del tname + tmp.parse_all(i.data) + r.append(tmp) for i in r: i._cfg = self._cfg i._prog = self diff --git a/game_backuper/regexp.py b/game_backuper/regexp.py new file mode 100644 index 0000000..85a5c32 --- /dev/null +++ b/game_backuper/regexp.py @@ -0,0 +1,51 @@ +try: + from game_backuper._pcre2 import PCRE2, Option as PCRE2Option, MatchOption + have_pcre2 = True +except ImportError: + have_pcre2 = False +from enum import IntFlag +from re import I as REI, compile as re_comp + + +class RegexFlag(IntFlag): + I = 1 # noqa: E741 + IGNORECASE = 1 + + +class Regex: + def __init__(self, r: str, flags: RegexFlag = 0, use_pcre2: bool = False): + if have_pcre2 and use_pcre2: + opt = 0 + if flags & RegexFlag.I: + opt = opt | PCRE2Option.PCRE2_CASELESS + self._re = PCRE2(r) + self._use_pcre2 = True + else: + if use_pcre2: + from sys import stderr + stderr.write("Can not load pcre2.\n") + self._use_pcre2 = False + opt = 0 + if flags & RegexFlag.I: + opt = opt | REI + self._re = re_comp(r) + + def match(self, s: str, startpos: int = 0): + if self._use_pcre2: + return self._re.match(s, MatchOption.PCRE2_ANCHORED, startpos) + else: + return self._re.match(s, startpos) + + def match_only(self, s: str, startpos: int = 0) -> bool: + if self._use_pcre2: + return self._re.match(s, MatchOption.PCRE2_ANCHORED, startpos, True) # noqa: E501 + else: + return False if self._re.match(s, startpos) is None else True + + +def wildcards_to_regex(s: str, **k): + for i in ['\\', '$', '(', ')', '+', '.', '[', '^', '{', '|']: + s = s.replace(i, f"\\{i}") + s = s.replace("*", ".*") + s = s.replace("?", ".") + return Regex(s, **k) diff --git a/setup.py b/setup.py index 2b3290c..5865bfb 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,15 @@ # flake8: noqa import sys from game_backuper import __version__ +from setuptools import Extension +try: + from Cython.Build import cythonize +except ImportError: + def cythonize(li): + return [] + +ext_modules = [Extension("game_backuper._pcre2", ["game_backuper/_pcre2.pyx"], libraries=["pcre2-8"])] + if "py2exe" in sys.argv: from distutils.core import setup import py2exe @@ -50,5 +59,6 @@ setup( long_description="A game backuper", keywords="backup", packages=["game_backuper"], + ext_modules=cythonize(ext_modules, compiler_directives={'language_level': "3"}), **params )