add regex and excludes
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -129,3 +129,6 @@ dmypy.json
|
||||
.pyre/
|
||||
|
||||
.vscode/
|
||||
|
||||
# Cython generated files
|
||||
*.c
|
||||
|
||||
11
example.yaml
11
example.yaml
@@ -1,9 +1,11 @@
|
||||
dest: /path/to/store/backup/files # The programs will store database and backup files in this location
|
||||
enable_pcre2: false # Optional. Default value: false. Try to use PCRE2 first. PCRE2 may be a little slower than internal regex library.
|
||||
remove_old_files: true # Optional. Default value: true. Remove unneeded backup files which already deleted in source tree when backuping files.
|
||||
ignore_hidden_files: true # Optional. Default value: true. Whether to ignore files which its name starts with ".". Only effect folder which type is "path".
|
||||
programs:
|
||||
- name: Your program name # This name is used to identify different application.
|
||||
base: /path/to/save/path # Must be absoulte path.
|
||||
enable_pcre2: false # Optional.
|
||||
remove_old_files: true # Optional.
|
||||
ignore_hidden_files: true # Optional.
|
||||
files:
|
||||
@@ -11,11 +13,20 @@ programs:
|
||||
- type: path
|
||||
path: folder # path to a file/folder. All subfolders will include if it is a folder. Must be relative path if name not found.
|
||||
name: folder2 # optional. path to the backup files. Shoule be a relative path
|
||||
enable_pcre2: false
|
||||
remove_old_files: true # Optional.
|
||||
ignore_hidden_files: true # Optional.
|
||||
excludes: # Optional. Exculde some files/folders
|
||||
- data.db # Releative path
|
||||
- /path/to/data.db # Absolute path
|
||||
- type: wildcards
|
||||
rule: "*/*.db"
|
||||
- type: regex
|
||||
rule: "\\d+\\.db"
|
||||
- type: leveldb # module plyvel is needed to support this type. This will store leveldb database to a single file database (sqlite3)
|
||||
path: leveldb # path to leveldb. Must be relative path.
|
||||
name: dest # optional. path to the backup files. Shoule be a relative path
|
||||
enable_pcre2: false
|
||||
remove_old_files: true # Optional.
|
||||
domains: # optional. Just backup minor domains in localstorage database. Only chromium is tested.
|
||||
- some domain
|
||||
|
||||
5
game_backuper/_Python.pxd
Normal file
5
game_backuper/_Python.pxd
Normal file
@@ -0,0 +1,5 @@
|
||||
cdef extern from "Python.h":
|
||||
void Py_INCREF(object o)
|
||||
void Py_DECREF(object o)
|
||||
const char* PyUnicode_AsUTF8(object unicode)
|
||||
const char* PyUnicode_AsUTF8AndSize(object unicode, Py_ssize_t *size)
|
||||
2
game_backuper/_pcre2.h
Normal file
2
game_backuper/_pcre2.h
Normal file
@@ -0,0 +1,2 @@
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
#include "pcre2.h"
|
||||
35
game_backuper/_pcre2.pxd
Normal file
35
game_backuper/_pcre2.pxd
Normal file
@@ -0,0 +1,35 @@
|
||||
from libc.stddef cimport size_t
|
||||
from libc.stdint cimport uint8_t, uint32_t
|
||||
|
||||
|
||||
cdef extern from "_pcre2.h":
|
||||
ctypedef uint8_t PCRE2_UCHAR
|
||||
ctypedef const uint8_t* PCRE2_SPTR8
|
||||
ctypedef PCRE2_SPTR8 PCRE2_SPTR
|
||||
ctypedef size_t PCRE2_SIZE
|
||||
ctypedef struct pcre2_compile_context:
|
||||
pass
|
||||
ctypedef struct pcre2_code:
|
||||
pass
|
||||
ctypedef struct pcre2_match_data:
|
||||
pass
|
||||
ctypedef struct pcre2_general_context:
|
||||
pass
|
||||
ctypedef struct pcre2_match_context:
|
||||
pass
|
||||
|
||||
pcre2_code *pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE length, uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext)
|
||||
void pcre2_code_free(pcre2_code *code)
|
||||
int pcre2_get_error_message(int errorcode, PCRE2_UCHAR *buffer, PCRE2_SIZE bufflen)
|
||||
PCRE2_SPTR pcre2_get_mark(pcre2_match_data *match_data)
|
||||
uint32_t pcre2_get_ovector_count(pcre2_match_data *match_data)
|
||||
PCRE2_SIZE *pcre2_get_ovector_pointer(pcre2_match_data *match_data)
|
||||
int pcre2_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options, pcre2_match_data *match_data, pcre2_match_context *mcontext)
|
||||
int pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
|
||||
pcre2_match_data *pcre2_match_data_create_from_pattern(const pcre2_code *code, pcre2_general_context *gcontext)
|
||||
void pcre2_match_data_free(pcre2_match_data *match_data)
|
||||
int pcre2_substring_get_bynumber(pcre2_match_data *match_data, uint32_t number, PCRE2_UCHAR **bufferptr, PCRE2_SIZE *bufflen)
|
||||
void pcre2_substring_free(PCRE2_UCHAR *buffer)
|
||||
int pcre2_substring_length_bynumber(pcre2_match_data *match_data, uint32_t number, PCRE2_SIZE *length)
|
||||
void pcre2_substring_list_free(PCRE2_SPTR *list)
|
||||
int pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr, PCRE2_SIZE **lengthsptr)
|
||||
440
game_backuper/_pcre2.pyx
Normal file
440
game_backuper/_pcre2.pyx
Normal file
@@ -0,0 +1,440 @@
|
||||
from ._pcre2 cimport *
|
||||
from ._Python cimport *
|
||||
from enum import IntFlag
|
||||
from libc.stdlib cimport malloc, free
|
||||
from libc.string cimport strcpy
|
||||
from cpython.mem cimport PyMem_Free
|
||||
try:
|
||||
from functools import cached_property
|
||||
except ImportError:
|
||||
cached_property = property
|
||||
|
||||
|
||||
cdef extern from "_pcre2.h":
|
||||
const uint32_t _PCRE2_ANCHORED "PCRE2_ANCHORED"
|
||||
const uint32_t _PCRE2_ALLOW_EMPTY_CLASS "PCRE2_ALLOW_EMPTY_CLASS"
|
||||
const uint32_t _PCRE2_ALT_BSUX "PCRE2_ALT_BSUX"
|
||||
const uint32_t _PCRE2_ALT_CIRCUMFLEX "PCRE2_ALT_CIRCUMFLEX"
|
||||
const uint32_t _PCRE2_ALT_VERBNAMES "PCRE2_ALT_VERBNAMES"
|
||||
const uint32_t _PCRE2_AUTO_CALLOUT "PCRE2_AUTO_CALLOUT"
|
||||
const uint32_t _PCRE2_CASELESS "PCRE2_CASELESS"
|
||||
const uint32_t _PCRE2_COPY_MATCHED_SUBJECT "PCRE2_COPY_MATCHED_SUBJECT"
|
||||
const uint32_t _PCRE2_DOLLAR_ENDONLY "PCRE2_DOLLAR_ENDONLY"
|
||||
const uint32_t _PCRE2_DOTALL "PCRE2_DOTALL"
|
||||
const uint32_t _PCRE2_DUPNAMES "PCRE2_DUPNAMES"
|
||||
const uint32_t _PCRE2_ENDANCHORED "PCRE2_ENDANCHORED"
|
||||
const uint32_t _PCRE2_EXTENDED "PCRE2_EXTENDED"
|
||||
const uint32_t _PCRE2_FIRSTLINE "PCRE2_FIRSTLINE"
|
||||
const uint32_t _PCRE2_LITERAL "PCRE2_LITERAL"
|
||||
const uint32_t _PCRE2_MATCH_INVALID_UTF "PCRE2_MATCH_INVALID_UTF"
|
||||
const uint32_t _PCRE2_MATCH_UNSET_BACKREF "PCRE2_MATCH_UNSET_BACKREF"
|
||||
const uint32_t _PCRE2_MULTILINE "PCRE2_MULTILINE"
|
||||
const uint32_t _PCRE2_NEVER_BACKSLASH_C "PCRE2_NEVER_BACKSLASH_C"
|
||||
const uint32_t _PCRE2_NEVER_UCP "PCRE2_NEVER_UCP"
|
||||
const uint32_t _PCRE2_NEVER_UTF "PCRE2_NEVER_UTF"
|
||||
const uint32_t _PCRE2_NOTBOL "PCRE2_NOTBOL"
|
||||
const uint32_t _PCRE2_NOTEOL "PCRE2_NOTEOL"
|
||||
const uint32_t _PCRE2_NOTEMPTY "PCRE2_NOTEMPTY"
|
||||
const uint32_t _PCRE2_NOTEMPTY_ATSTART "PCRE2_NOTEMPTY_ATSTART"
|
||||
const uint32_t _PCRE2_NO_AUTO_CAPTURE "PCRE2_NO_AUTO_CAPTURE"
|
||||
const uint32_t _PCRE2_NO_AUTO_POSSESS "PCRE2_NO_AUTO_POSSESS"
|
||||
const uint32_t _PCRE2_NO_DOTSTAR_ANCHOR "PCRE2_NO_DOTSTAR_ANCHOR"
|
||||
const uint32_t _PCRE2_NO_JIT "PCRE2_NO_JIT"
|
||||
const uint32_t _PCRE2_NO_START_OPTIMIZE "PCRE2_NO_START_OPTIMIZE"
|
||||
const uint32_t _PCRE2_NO_UTF_CHECK "PCRE2_NO_UTF_CHECK"
|
||||
const uint32_t _PCRE2_PARTIAL_HARD "PCRE2_PARTIAL_HARD"
|
||||
const uint32_t _PCRE2_PARTIAL_SOFT "PCRE2_PARTIAL_SOFT"
|
||||
const uint32_t _PCRE2_UCP "PCRE2_UCP"
|
||||
const uint32_t _PCRE2_UNGREEDY "PCRE2_UNGREEDY"
|
||||
const uint32_t _PCRE2_USE_OFFSET_LIMIT "PCRE2_USE_OFFSET_LIMIT"
|
||||
const uint32_t _PCRE2_UTF "PCRE2_UTF"
|
||||
const size_t PCRE2_ZERO_TERMINATED
|
||||
const int PCRE2_ERROR_NOMEMORY
|
||||
const int PCRE2_ERROR_UNSET
|
||||
const int PCRE2_INFO_NAMECOUNT
|
||||
const int PCRE2_INFO_NAMEENTRYSIZE
|
||||
const int PCRE2_INFO_NAMETABLE
|
||||
|
||||
|
||||
class Option(IntFlag):
|
||||
PCRE2_ANCHORED = _PCRE2_ANCHORED
|
||||
PCRE2_ALLOW_EMPTY_CLASS = _PCRE2_ALLOW_EMPTY_CLASS
|
||||
PCRE2_ALT_BSUX = _PCRE2_ALT_BSUX
|
||||
PCRE2_ALT_CIRCUMFLEX = _PCRE2_ALT_CIRCUMFLEX
|
||||
PCRE2_ALT_VERBNAMES = _PCRE2_ALT_VERBNAMES
|
||||
PCRE2_AUTO_CALLOUT = _PCRE2_AUTO_CALLOUT
|
||||
PCRE2_CASELESS = _PCRE2_CASELESS
|
||||
PCRE2_DOLLAR_ENDONLY = _PCRE2_DOLLAR_ENDONLY
|
||||
PCRE2_DOTALL = _PCRE2_DOTALL
|
||||
PCRE2_DUPNAMES = _PCRE2_DUPNAMES
|
||||
PCRE2_ENDANCHORED = _PCRE2_ENDANCHORED
|
||||
PCRE2_EXTENDED = _PCRE2_EXTENDED
|
||||
PCRE2_FIRSTLINE = _PCRE2_FIRSTLINE
|
||||
PCRE2_LITERAL = _PCRE2_LITERAL
|
||||
PCRE2_MATCH_INVALID_UTF = _PCRE2_MATCH_INVALID_UTF
|
||||
PCRE2_MATCH_UNSET_BACKREF = _PCRE2_MATCH_UNSET_BACKREF
|
||||
PCRE2_MULTILINE = _PCRE2_MULTILINE
|
||||
PCRE2_NEVER_BACKSLASH_C = _PCRE2_NEVER_BACKSLASH_C
|
||||
PCRE2_NEVER_UCP = _PCRE2_NEVER_UCP
|
||||
PCRE2_NEVER_UTF = _PCRE2_NEVER_UTF
|
||||
PCRE2_NO_AUTO_CAPTURE = _PCRE2_NO_AUTO_CAPTURE
|
||||
PCRE2_NO_AUTO_POSSESS = _PCRE2_NO_AUTO_POSSESS
|
||||
PCRE2_NO_DOTSTAR_ANCHOR = _PCRE2_NO_DOTSTAR_ANCHOR
|
||||
PCRE2_NO_START_OPTIMIZE = _PCRE2_NO_START_OPTIMIZE
|
||||
PCRE2_NO_UTF_CHECK = _PCRE2_NO_UTF_CHECK
|
||||
PCRE2_UCP = _PCRE2_UCP
|
||||
PCRE2_UNGREEDY = _PCRE2_UNGREEDY
|
||||
PCRE2_USE_OFFSET_LIMIT = _PCRE2_USE_OFFSET_LIMIT
|
||||
PCRE2_UTF = _PCRE2_UTF
|
||||
|
||||
|
||||
class MatchOption(IntFlag):
|
||||
PCRE2_ANCHORED = _PCRE2_ANCHORED
|
||||
PCRE2_COPY_MATCHED_SUBJECT = _PCRE2_COPY_MATCHED_SUBJECT
|
||||
PCRE2_ENDANCHORED = _PCRE2_ENDANCHORED
|
||||
PCRE2_NOTBOL = _PCRE2_NOTBOL
|
||||
PCRE2_NOTEOL = _PCRE2_NOTEOL
|
||||
PCRE2_NOTEMPTY = _PCRE2_NOTEMPTY
|
||||
PCRE2_NOTEMPTY_ATSTART = _PCRE2_NOTEMPTY_ATSTART
|
||||
PCRE2_NO_JIT = _PCRE2_NO_JIT
|
||||
PCRE2_NO_UTF_CHECK = _PCRE2_NO_UTF_CHECK
|
||||
PCRE2_PARTIAL_HARD = _PCRE2_PARTIAL_HARD
|
||||
PCRE2_PARTIAL_SOFT = _PCRE2_PARTIAL_SOFT
|
||||
|
||||
|
||||
cdef class Match:
|
||||
cdef pcre2_match_data* data
|
||||
cdef object inp
|
||||
cdef object r
|
||||
def __cinit__(self):
|
||||
self.data = NULL
|
||||
|
||||
def __dealloc__(self):
|
||||
if self.data != NULL:
|
||||
pcre2_match_data_free(self.data)
|
||||
self.data = NULL
|
||||
Py_DECREF(self.r)
|
||||
Py_DECREF(self.inp)
|
||||
|
||||
def __getitem__(self, uint32_t i):
|
||||
if self.data == NULL:
|
||||
raise ValueError(u'No matched data.')
|
||||
cdef uint32_t count = pcre2_get_ovector_count(self.data)
|
||||
if count <= 0:
|
||||
raise ValueError(u'No match')
|
||||
if i >= count:
|
||||
raise IndexError(u'No such group')
|
||||
cdef PCRE2_UCHAR *buf
|
||||
cdef PCRE2_SIZE le
|
||||
cdef PCRE2_UCHAR* errbuf
|
||||
cdef size_t errsize = sizeof(PCRE2_UCHAR) * 1024
|
||||
cdef int re = pcre2_substring_get_bynumber(self.data, i, &buf, &le)
|
||||
if re == 0:
|
||||
s = buf.decode()
|
||||
pcre2_substring_free(buf)
|
||||
return s
|
||||
elif re == PCRE2_ERROR_UNSET:
|
||||
return None
|
||||
elif re == PCRE2_ERROR_NOMEMORY:
|
||||
raise MemoryError()
|
||||
else:
|
||||
errbuf = <PCRE2_UCHAR*> malloc(errsize)
|
||||
if errbuf == NULL:
|
||||
raise MemoryError()
|
||||
s = None
|
||||
if pcre2_get_error_message(re, errbuf, errsize) > 0:
|
||||
s = errbuf.decode()
|
||||
free(errbuf)
|
||||
raise ValueError(s if s else u'Can not get substring.')
|
||||
|
||||
def __init__(self, unicode inp, r):
|
||||
self.inp = inp
|
||||
Py_INCREF(inp)
|
||||
self.r = r
|
||||
Py_INCREF(r)
|
||||
|
||||
def end(self) -> int:
|
||||
if self.data == NULL:
|
||||
raise ValueError(u'No matched data.')
|
||||
cdef uint32_t count = pcre2_get_ovector_count(self.data)
|
||||
if count <= 0:
|
||||
raise ValueError(u'No match')
|
||||
cdef PCRE2_SIZE* vect = pcre2_get_ovector_pointer(self.data)
|
||||
return vect[1]
|
||||
|
||||
@cached_property
|
||||
def endpos(self) -> int:
|
||||
return self.end()
|
||||
|
||||
def group(self) -> str:
|
||||
if self.data == NULL:
|
||||
raise ValueError(u'No matched data.')
|
||||
cdef uint32_t count = pcre2_get_ovector_count(self.data)
|
||||
if count <= 0:
|
||||
raise ValueError(u'No match')
|
||||
cdef PCRE2_UCHAR *buf
|
||||
cdef PCRE2_SIZE le
|
||||
cdef PCRE2_UCHAR* errbuf
|
||||
cdef size_t errsize = sizeof(PCRE2_UCHAR) * 1024
|
||||
cdef int re = pcre2_substring_get_bynumber(self.data, 0, &buf, &le)
|
||||
if re == 0:
|
||||
s = buf.decode()
|
||||
pcre2_substring_free(buf)
|
||||
return s
|
||||
elif re == PCRE2_ERROR_NOMEMORY:
|
||||
raise MemoryError()
|
||||
else:
|
||||
errbuf = <PCRE2_UCHAR*> malloc(errsize)
|
||||
if errbuf == NULL:
|
||||
raise MemoryError()
|
||||
s = None
|
||||
if pcre2_get_error_message(re, errbuf, errsize) > 0:
|
||||
s = errbuf.decode()
|
||||
free(errbuf)
|
||||
raise ValueError(s if s else u'Can not get substring.')
|
||||
|
||||
def groupdict(self):
|
||||
if self.data == NULL:
|
||||
raise ValueError(u'No matched data.')
|
||||
cdef uint32_t count = pcre2_get_ovector_count(self.data)
|
||||
if count <= 0:
|
||||
raise ValueError(u'No match')
|
||||
cdef PCRE2_UCHAR **li
|
||||
cdef re = pcre2_substring_list_get(self.data, &li, NULL)
|
||||
if re == PCRE2_ERROR_NOMEMORY:
|
||||
raise MemoryError()
|
||||
elif re != 0:
|
||||
raise ValueError(u'Unexpected error')
|
||||
d = {}
|
||||
cdef size_t i = 1
|
||||
t = self.r.namedtable()
|
||||
while li[i] != NULL:
|
||||
if i not in t:
|
||||
i += 1
|
||||
continue
|
||||
if li[i][0] == 0 and pcre2_substring_length_bynumber(self.data, i, NULL) != 0:
|
||||
if t[i] not in d:
|
||||
d[t[i]] = None
|
||||
else:
|
||||
if t[i] not in d or d[t[i]] is None:
|
||||
d[t[i]] = li[i].decode()
|
||||
i += 1
|
||||
while i < count:
|
||||
if i in t:
|
||||
if t[i] not in d:
|
||||
d[t[i]] = None
|
||||
i += 1
|
||||
pcre2_substring_list_free(li)
|
||||
return d
|
||||
|
||||
def groups(self):
|
||||
if self.data == NULL:
|
||||
raise ValueError(u'No matched data.')
|
||||
cdef uint32_t count = pcre2_get_ovector_count(self.data)
|
||||
if count <= 0:
|
||||
raise ValueError(u'No match')
|
||||
cdef PCRE2_UCHAR **li
|
||||
cdef re = pcre2_substring_list_get(self.data, &li, NULL)
|
||||
if re == PCRE2_ERROR_NOMEMORY:
|
||||
raise MemoryError()
|
||||
elif re != 0:
|
||||
raise ValueError(u'Unexpected error')
|
||||
cdef size_t i = 1
|
||||
l = []
|
||||
while li[i] != NULL:
|
||||
if li[i][0] == 0 and pcre2_substring_length_bynumber(self.data, i, NULL) != 0:
|
||||
l.append(None)
|
||||
else:
|
||||
l.append(li[i].decode())
|
||||
i += 1
|
||||
while i < count:
|
||||
l.append(None)
|
||||
i += 1
|
||||
pcre2_substring_list_free(li)
|
||||
return tuple(l)
|
||||
|
||||
@cached_property
|
||||
def lastgroup(self) -> str:
|
||||
regs = self.regs
|
||||
cdef uint32_t le = len(regs)
|
||||
cdef uint32_t i = le - 1
|
||||
while i >= 0:
|
||||
if regs[i][0] == -1:
|
||||
i -= 1
|
||||
continue
|
||||
break
|
||||
if i == 0:
|
||||
return None
|
||||
t = self.r.namedtable()
|
||||
if i in t:
|
||||
return t[i]
|
||||
|
||||
@cached_property
|
||||
def lastindex(self) -> int:
|
||||
if self.data == NULL:
|
||||
raise ValueError(u'No matched data.')
|
||||
cdef uint32_t count = pcre2_get_ovector_count(self.data)
|
||||
if count <= 0:
|
||||
raise ValueError(u'No match')
|
||||
return count - 1
|
||||
|
||||
@cached_property
|
||||
def pos(self) -> int:
|
||||
return self.start()
|
||||
|
||||
@cached_property
|
||||
def re(self):
|
||||
return self.r
|
||||
|
||||
@cached_property
|
||||
def regs(self):
|
||||
if self.data == NULL:
|
||||
raise ValueError(u'No matched data.')
|
||||
cdef uint32_t count = pcre2_get_ovector_count(self.data)
|
||||
if count <= 0:
|
||||
raise ValueError(u'No match')
|
||||
cdef PCRE2_SIZE* vect = pcre2_get_ovector_pointer(self.data)
|
||||
l = []
|
||||
cdef uint32_t i = 0
|
||||
while i < count:
|
||||
if vect[i * 2] == <PCRE2_SIZE> -1:
|
||||
l.append((-1, -1))
|
||||
else:
|
||||
l.append((vect[i * 2], vect[i * 2 + 1]))
|
||||
i += 1
|
||||
return tuple(l)
|
||||
|
||||
def span(self) -> (int, int):
|
||||
if self.data == NULL:
|
||||
raise ValueError(u'No matched data.')
|
||||
cdef uint32_t count = pcre2_get_ovector_count(self.data)
|
||||
if count <= 0:
|
||||
raise ValueError(u'No match')
|
||||
cdef PCRE2_SIZE* vect = pcre2_get_ovector_pointer(self.data)
|
||||
return (vect[0], vect[1])
|
||||
|
||||
def start(self) -> int:
|
||||
if self.data == NULL:
|
||||
raise ValueError(u'No matched data.')
|
||||
cdef uint32_t count = pcre2_get_ovector_count(self.data)
|
||||
if count <= 0:
|
||||
raise ValueError(u'No match')
|
||||
cdef PCRE2_SIZE* vect = pcre2_get_ovector_pointer(self.data)
|
||||
return vect[0]
|
||||
|
||||
@cached_property
|
||||
def string(self) -> str:
|
||||
return self.inp
|
||||
|
||||
cdef set_data(self, pcre2_match_data* data):
|
||||
if data == NULL:
|
||||
raise ValueError(u'data is NULL.')
|
||||
self.data = data
|
||||
|
||||
|
||||
cdef class PCRE2:
|
||||
cdef pcre2_code* code
|
||||
def __cinit__(self):
|
||||
self.code = NULL
|
||||
|
||||
def __dealloc__(self):
|
||||
if self.code != NULL:
|
||||
pcre2_code_free(self.code)
|
||||
self.code = NULL
|
||||
|
||||
def __init__(self, unicode inp, opt: Option = None):
|
||||
if inp is None:
|
||||
raise ValueError(u'Empty pattern.')
|
||||
cdef uint32_t opts = _PCRE2_UTF | _PCRE2_ALT_BSUX
|
||||
cdef int err
|
||||
cdef PCRE2_SIZE erroffset
|
||||
cdef PCRE2_UCHAR* errbuf
|
||||
cdef size_t errsize = sizeof(PCRE2_UCHAR) * 1024
|
||||
if isinstance(opt, Option):
|
||||
opts = opt.value
|
||||
elif isinstance(opt, int):
|
||||
opts = opt
|
||||
cdef pcre2_code* re = pcre2_compile(<PCRE2_SPTR>PyUnicode_AsUTF8(inp),
|
||||
PCRE2_ZERO_TERMINATED, opts, &err,
|
||||
&erroffset, NULL)
|
||||
if re is NULL:
|
||||
errbuf = <PCRE2_UCHAR*> malloc(errsize)
|
||||
if errbuf == NULL:
|
||||
raise MemoryError()
|
||||
s = None
|
||||
if pcre2_get_error_message(err, errbuf, errsize) > 0:
|
||||
s = u"Error at offset %d: %s" % (erroffset, errbuf.decode())
|
||||
free(errbuf)
|
||||
raise ValueError(s if s else u'Not invalid')
|
||||
self.code = re
|
||||
|
||||
def match(self, unicode inp, opt: MatchOption = None, PCRE2_SIZE startoffset = 0, int search_only = 0) -> Match:
|
||||
if inp is None:
|
||||
raise ValueError(u'Empty input.')
|
||||
if self.code == NULL:
|
||||
raise ValueError(u'pattern is NULL.')
|
||||
cdef uint32_t opts = 0
|
||||
if isinstance(opt, MatchOption):
|
||||
opts = opt.value
|
||||
elif isinstance(opt, int):
|
||||
opts = opt
|
||||
cdef pcre2_match_data* data = pcre2_match_data_create_from_pattern(self.code, NULL)
|
||||
cdef PCRE2_UCHAR* errbuf
|
||||
cdef size_t errsize = sizeof(PCRE2_UCHAR) * 1024
|
||||
if data == NULL:
|
||||
raise MemoryError()
|
||||
cdef int re = pcre2_match(self.code, <PCRE2_SPTR>PyUnicode_AsUTF8(inp),
|
||||
PCRE2_ZERO_TERMINATED, startoffset, opts, data, NULL)
|
||||
if re <= 0:
|
||||
pcre2_match_data_free(data)
|
||||
data = NULL
|
||||
if re == -1: # No match
|
||||
if search_only:
|
||||
return False
|
||||
else:
|
||||
return None
|
||||
elif re == 0:
|
||||
raise ValueError(u'The vector of offsets is too small')
|
||||
else:
|
||||
errbuf = <PCRE2_UCHAR*> malloc(errsize)
|
||||
if errbuf == NULL:
|
||||
raise MemoryError()
|
||||
s = None
|
||||
if pcre2_get_error_message(re, errbuf, errsize) > 0:
|
||||
s = errbuf.decode()
|
||||
free(errbuf)
|
||||
raise ValueError(s if s else u'Can not match')
|
||||
if search_only:
|
||||
pcre2_match_data_free(data)
|
||||
return True
|
||||
m = Match(inp, self)
|
||||
m.set_data(data)
|
||||
return m
|
||||
|
||||
def namedtable(self):
|
||||
if self.code == NULL:
|
||||
raise ValueError(u'pattern is NULL.')
|
||||
cdef uint32_t count
|
||||
cdef uint32_t ensize
|
||||
if pcre2_pattern_info(self.code, PCRE2_INFO_NAMECOUNT, &count) != 0:
|
||||
raise ValueError(u'Can not get namedtable')
|
||||
if pcre2_pattern_info(self.code, PCRE2_INFO_NAMEENTRYSIZE, &ensize) != 0:
|
||||
raise ValueError(u'Can not get namedtable')
|
||||
if count <= 0 or ensize <= 0:
|
||||
raise ValueError(u'Can not get namedtable')
|
||||
cdef PCRE2_SPTR buf
|
||||
if pcre2_pattern_info(self.code, PCRE2_INFO_NAMETABLE, &buf) != 0:
|
||||
raise ValueError(u'Can not get namedtable')
|
||||
cdef uint32_t i = 0
|
||||
cdef size_t ind = 0
|
||||
cdef char* tmp = <char*> malloc(ensize)
|
||||
if tmp == NULL:
|
||||
raise MemoryError()
|
||||
d = {}
|
||||
while i < count:
|
||||
ind = i * ensize
|
||||
strcpy(tmp, <char*>buf + ind + 2)
|
||||
d[buf[ind] * 256 + buf[ind + 1]] = tmp.decode()
|
||||
i += 1
|
||||
free(tmp)
|
||||
return d
|
||||
@@ -3,7 +3,7 @@ try:
|
||||
from yaml import CSafeLoader as SafeLoader
|
||||
except Exception:
|
||||
from yaml import SafeLoader
|
||||
from os.path import join, relpath, isfile, isdir, isabs
|
||||
from os.path import join, relpath, isfile, isdir, isabs, abspath
|
||||
from typing import List, Union
|
||||
from game_backuper.file import listdirs
|
||||
from collections import namedtuple
|
||||
@@ -11,11 +11,27 @@ try:
|
||||
from functools import cached_property
|
||||
except ImportError:
|
||||
cached_property = property
|
||||
from game_backuper.regexp import Regex, wildcards_to_regex
|
||||
|
||||
|
||||
class BasicOption:
|
||||
'''Basic options which is included in config, program and files.'''
|
||||
_remove_old_files = None
|
||||
_enable_pcre2 = None
|
||||
|
||||
@cached_property
|
||||
def enable_pcre2(self) -> bool:
|
||||
if self._enable_pcre2 is not None:
|
||||
return self._enable_pcre2
|
||||
prog = getattr(self, "_prog", None)
|
||||
if prog is not None:
|
||||
if prog._enable_pcre2 is not None:
|
||||
return prog._enable_pcre2
|
||||
cfg = getattr(self, "_cfg", None)
|
||||
if cfg is not None:
|
||||
if cfg._enable_pcre2 is not None:
|
||||
return cfg._enable_pcre2
|
||||
return False
|
||||
|
||||
@cached_property
|
||||
def remove_old_files(self) -> bool:
|
||||
@@ -33,6 +49,18 @@ class BasicOption:
|
||||
|
||||
def parse_all(self, data=None):
|
||||
self.parse_remove_old_files(data)
|
||||
self.parse_enable_pcre2(data)
|
||||
|
||||
def parse_enable_pcre2(self, data=None):
|
||||
if data is None:
|
||||
data = getattr(self, 'data')
|
||||
if 'enable_pcre2' in data:
|
||||
v = data['enable_pcre2']
|
||||
if isinstance(v, bool):
|
||||
self._enable_pcre2 = v
|
||||
else:
|
||||
raise TypeError('enable_pcre2 option must be a boolean.')
|
||||
del v
|
||||
|
||||
def parse_remove_old_files(self, data=None):
|
||||
if data is None:
|
||||
@@ -133,6 +161,53 @@ class ConfigPath(BasicOption, NFBasicOption, BasicConfig):
|
||||
self.parse_all()
|
||||
self.parse_all_nf()
|
||||
|
||||
@property
|
||||
def excludes(self) -> List[Union[str, Regex]]:
|
||||
t = getattr(self, "__excludes", None)
|
||||
if t is not None:
|
||||
return t
|
||||
del t
|
||||
if 'excludes' in self.data:
|
||||
if isinstance(self.data['excludes'], list):
|
||||
r = []
|
||||
for i in self.data["excludes"]:
|
||||
if isinstance(i, str):
|
||||
r.append(i)
|
||||
elif isinstance(i, dict):
|
||||
t = i['type']
|
||||
if t == 'wildcards':
|
||||
r.append(wildcards_to_regex(i['rule'], use_pcre2=self.enable_pcre2)) # noqa: E501
|
||||
elif t == "regex":
|
||||
r.append(Regex(i['rule'],
|
||||
use_pcre2=self.enable_pcre2))
|
||||
self.__excludes = r
|
||||
return r
|
||||
|
||||
def is_exclude(self, b: str, loc: str) -> bool:
|
||||
e = self.excludes
|
||||
if e is None:
|
||||
return False
|
||||
if isabs(loc):
|
||||
bl = abspath(loc)
|
||||
rl = relpath(loc, b)
|
||||
else:
|
||||
bl = abspath(join(b, loc))
|
||||
rl = relpath(join(b, loc), b)
|
||||
for i in e:
|
||||
if isinstance(i, str):
|
||||
if isabs(i):
|
||||
if abspath(i) == bl:
|
||||
return True
|
||||
else:
|
||||
if relpath(join(b, i), b) == rl:
|
||||
return True
|
||||
elif isinstance(i, Regex):
|
||||
if i.match_only(rl):
|
||||
return True
|
||||
elif bl != loc and i.match_only(bl):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
class ConfigOLeveldb(BasicOption, NFBasicOption, BasicConfig):
|
||||
def __init__(self, data, cfg, prog):
|
||||
@@ -218,78 +293,44 @@ class Program(BasicOption, NFBasicOption):
|
||||
return self._files.copy()
|
||||
r = []
|
||||
self._files = r.copy()
|
||||
for i in self.data[ke]:
|
||||
for i in self.all_configs:
|
||||
b = self.base
|
||||
if isinstance(i, str):
|
||||
if isabs(i):
|
||||
raise ValueError('Absolute path must need a name.')
|
||||
bp = join(b, i)
|
||||
if isinstance(i, ConfigPath):
|
||||
if isabs(i.path):
|
||||
bp = i.path
|
||||
else:
|
||||
bp = join(b, i.path)
|
||||
name = i.real_name
|
||||
if isfile(bp):
|
||||
tname = relpath(join(b, i), b)
|
||||
r.append(ConfigNormalFile(tname, bp))
|
||||
tname = relpath(join(b, name), b)
|
||||
tmp = ConfigNormalFile(tname, bp)
|
||||
del tname
|
||||
tmp.parse_all(i.data)
|
||||
r.append(tmp)
|
||||
elif isdir(bp):
|
||||
top = NFBasicOption(self._cfg, self)
|
||||
top.parse_ignore_hidden_files(i.data)
|
||||
ll = listdirs(bp, top.ignore_hidden_files)
|
||||
del top
|
||||
for ii in ll:
|
||||
r.append(ConfigNormalFile(relpath(ii, b), ii))
|
||||
elif isinstance(i, dict):
|
||||
t = i['type']
|
||||
if t == 'path':
|
||||
if isabs(i['path']):
|
||||
if 'name' not in i or not isinstance(i['name'], str) or i['name'] == '': # noqa: E501
|
||||
raise ValueError('Absolute path must need a name.')
|
||||
bp = i['path']
|
||||
name = i['name']
|
||||
else:
|
||||
bp = join(b, i['path'])
|
||||
name = i['path']
|
||||
if 'name' in i and isinstance(i['name'], str):
|
||||
if i['name'] != '':
|
||||
name = i['name']
|
||||
if isfile(bp):
|
||||
tname = relpath(join(b, name), b)
|
||||
tmp = ConfigNormalFile(tname, bp)
|
||||
if i.is_exclude(bp, ii):
|
||||
continue
|
||||
tname = relpath(join(b, join(name, relpath(ii, bp))), b) # noqa: E501
|
||||
tmp = ConfigNormalFile(tname, ii)
|
||||
del tname
|
||||
tmp.parse_all(i)
|
||||
tmp.parse_all(i.data)
|
||||
r.append(tmp)
|
||||
elif isdir(bp):
|
||||
top = NFBasicOption(self._cfg, self)
|
||||
top.parse_ignore_hidden_files(i)
|
||||
ll = listdirs(bp, top.ignore_hidden_files)
|
||||
del top
|
||||
for ii in ll:
|
||||
tname = relpath(join(b, join(name, relpath(ii, bp))), b) # noqa: E501
|
||||
tmp = ConfigNormalFile(tname, ii)
|
||||
del tname
|
||||
tmp.parse_all(i)
|
||||
r.append(tmp)
|
||||
elif t == 'leveldb':
|
||||
if isabs(i['path']):
|
||||
if 'name' not in i or not isinstance(i['name'], str) or i['name'] == '': # noqa: E501
|
||||
raise ValueError('Absolute path must need a name.')
|
||||
p = i['path']
|
||||
n = i['name']
|
||||
else:
|
||||
p = join(b, i['path'])
|
||||
n = i['path']
|
||||
if 'name' in i and isinstance(i['name'], str):
|
||||
if i['name'] != '':
|
||||
n = i['name']
|
||||
dms = None
|
||||
if 'domains' in i and isinstance(i['domains'], list):
|
||||
dms = []
|
||||
for ii in i['domains']:
|
||||
if isinstance(ii, str) and len(ii) > 0:
|
||||
dms.append(ii.encode())
|
||||
if len(dms) == 0:
|
||||
dms = None
|
||||
tname = relpath(join(b, n), b)
|
||||
tmp = ConfigLeveldb(tname, p, dms)
|
||||
del tname
|
||||
tmp.parse_all(i)
|
||||
r.append(tmp)
|
||||
elif isinstance(i, ConfigOLeveldb):
|
||||
if isabs(i.path):
|
||||
p = i.path
|
||||
else:
|
||||
p = join(b, i.path)
|
||||
name = i.real_name
|
||||
tname = relpath(join(b, name), b)
|
||||
tmp = ConfigLeveldb(tname, p, i.domains)
|
||||
del tname
|
||||
tmp.parse_all(i.data)
|
||||
r.append(tmp)
|
||||
for i in r:
|
||||
i._cfg = self._cfg
|
||||
i._prog = self
|
||||
|
||||
51
game_backuper/regexp.py
Normal file
51
game_backuper/regexp.py
Normal file
@@ -0,0 +1,51 @@
|
||||
try:
|
||||
from game_backuper._pcre2 import PCRE2, Option as PCRE2Option, MatchOption
|
||||
have_pcre2 = True
|
||||
except ImportError:
|
||||
have_pcre2 = False
|
||||
from enum import IntFlag
|
||||
from re import I as REI, compile as re_comp
|
||||
|
||||
|
||||
class RegexFlag(IntFlag):
|
||||
I = 1 # noqa: E741
|
||||
IGNORECASE = 1
|
||||
|
||||
|
||||
class Regex:
|
||||
def __init__(self, r: str, flags: RegexFlag = 0, use_pcre2: bool = False):
|
||||
if have_pcre2 and use_pcre2:
|
||||
opt = 0
|
||||
if flags & RegexFlag.I:
|
||||
opt = opt | PCRE2Option.PCRE2_CASELESS
|
||||
self._re = PCRE2(r)
|
||||
self._use_pcre2 = True
|
||||
else:
|
||||
if use_pcre2:
|
||||
from sys import stderr
|
||||
stderr.write("Can not load pcre2.\n")
|
||||
self._use_pcre2 = False
|
||||
opt = 0
|
||||
if flags & RegexFlag.I:
|
||||
opt = opt | REI
|
||||
self._re = re_comp(r)
|
||||
|
||||
def match(self, s: str, startpos: int = 0):
|
||||
if self._use_pcre2:
|
||||
return self._re.match(s, MatchOption.PCRE2_ANCHORED, startpos)
|
||||
else:
|
||||
return self._re.match(s, startpos)
|
||||
|
||||
def match_only(self, s: str, startpos: int = 0) -> bool:
|
||||
if self._use_pcre2:
|
||||
return self._re.match(s, MatchOption.PCRE2_ANCHORED, startpos, True) # noqa: E501
|
||||
else:
|
||||
return False if self._re.match(s, startpos) is None else True
|
||||
|
||||
|
||||
def wildcards_to_regex(s: str, **k):
|
||||
for i in ['\\', '$', '(', ')', '+', '.', '[', '^', '{', '|']:
|
||||
s = s.replace(i, f"\\{i}")
|
||||
s = s.replace("*", ".*")
|
||||
s = s.replace("?", ".")
|
||||
return Regex(s, **k)
|
||||
10
setup.py
10
setup.py
@@ -1,6 +1,15 @@
|
||||
# flake8: noqa
|
||||
import sys
|
||||
from game_backuper import __version__
|
||||
from setuptools import Extension
|
||||
try:
|
||||
from Cython.Build import cythonize
|
||||
except ImportError:
|
||||
def cythonize(li):
|
||||
return []
|
||||
|
||||
ext_modules = [Extension("game_backuper._pcre2", ["game_backuper/_pcre2.pyx"], libraries=["pcre2-8"])]
|
||||
|
||||
if "py2exe" in sys.argv:
|
||||
from distutils.core import setup
|
||||
import py2exe
|
||||
@@ -50,5 +59,6 @@ setup(
|
||||
long_description="A game backuper",
|
||||
keywords="backup",
|
||||
packages=["game_backuper"],
|
||||
ext_modules=cythonize(ext_modules, compiler_directives={'language_level': "3"}),
|
||||
**params
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user