From 3ad22a14d23ee4a4e163723415d25676cc49ed69 Mon Sep 17 00:00:00 2001 From: lifegpc Date: Sat, 9 Jan 2021 08:37:37 +0800 Subject: [PATCH] update the method to calculate the hash --- RSSEntry.py | 12 +++++++++++- readset.py | 20 +++++++++++++++++++- rssbot.py | 6 +++++- rsschecker.py | 11 +++++++++-- 4 files changed, 44 insertions(+), 5 deletions(-) diff --git a/RSSEntry.py b/RSSEntry.py index 426f584..f63b389 100644 --- a/RSSEntry.py +++ b/RSSEntry.py @@ -49,7 +49,17 @@ def calHash(url: dict, item: dict) -> HashEntry: hasht = hasht + item['title'] if 'link' in item and item['link'] is not None: hasht = hasht + item['link'] - if 'description' in item and item['description'] is not None: + matched = False + if 'published' in item and item['published'] is not None: + hasht = hasht + item['published'] + matched = True + if 'updated' in item and item['updated'] is not None: + hasht = hasht + item['updated'] + matched = True + if 'pubDate' in item and item['pubDate'] is not None: + hasht = hasht + item['pubDate'] + matched = True + if not matched and 'description' in item and item['description'] is not None: hasht = hasht + item['description'] hashed = sha256WithBase64(hasht) return HashEntry(id=hashd, hash=hashed) diff --git a/readset.py b/readset.py index 1c4e876..e841f1a 100644 --- a/readset.py +++ b/readset.py @@ -13,6 +13,10 @@ # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +from typing import List +from getopt import getopt + + class settings: def __init__(self, fn: str = None): if fn is not None: @@ -33,5 +37,19 @@ class settings: ) and int(d['minTTL']) >= 1 else 5 self._maxTTL = int(d['maxTTL']) if 'maxTTL' in d and d['maxTTL'].isnumeric( ) and int(d['maxTTL']) >= self._minTTL else max(1440, self._minTTL) - self._maxRetryCount = int(d['maxRetryCount']) if 'maxRetryCount' in d and d['maxRetryCount'].isnumeric() and int(d['maxRetryCount']) >= 0 else 3 + self._maxRetryCount = int(d['maxRetryCount']) if 'maxRetryCount' in d and d['maxRetryCount'].isnumeric( + ) and int(d['maxRetryCount']) >= 0 else 3 self._telegramBotApiServer = d['telegramBotApiServer'] if 'telegramBotApiServer' in d else 'https://api.telegram.org' + + +class commandline: + def __init__(self, commandline: List[str] = None): + self._rebuildHashlist = False + if commandline is not None: + self.parse(commandline) + + def parse(self, commandline: List[str]): + cml = getopt(commandline, '', ['rebuild-hashlist']) + for i in cml[0]: + if i[0] == '--rebuild-hashlist': + self._rebuildHashlist = True diff --git a/rssbot.py b/rssbot.py index d5f7cb4..930a31b 100644 --- a/rssbot.py +++ b/rssbot.py @@ -16,7 +16,7 @@ from database import database, userStatus, RSSConfig from RSSEntry import HashEntry, HashEntries, calHash from os.path import exists -from readset import settings +from readset import settings, commandline from requests import Session from traceback import format_exc from threading import Thread @@ -32,6 +32,7 @@ from re import search, I from rsschecker import RSSCheckerThread from rsslist import getInlineKeyBoardForRSSList, InlineKeyBoardForRSSList, getInlineKeyBoardForRSSInList, getTextContentForRSSInList from usercheck import checkUserPermissionsInChat, UserPermissionsInChatCheckResult +import sys def getMediaInfo(m: dict, config: RSSConfig = RSSConfig()) -> str: @@ -246,6 +247,9 @@ class main: if self._setting._token is None: print('没有机器人token') return -1 + self._commandLine = commandline() + if len(sys.argv) > 1: + self._commandLine.parse(sys.argv[1:]) self._telegramBotApiServer = self._setting._telegramBotApiServer self._db = database(self) if not exists('settings.txt'): diff --git a/rsschecker.py b/rsschecker.py index 1b02427..7f78f63 100644 --- a/rsschecker.py +++ b/rsschecker.py @@ -15,7 +15,7 @@ # along with this program. If not, see . from threading import Thread from time import sleep, time -from RSSEntry import RSSEntry, calHash, ChatEntry +from RSSEntry import RSSEntry, calHash, ChatEntry, HashEntries from traceback import format_exc from rssparser import RSSParser @@ -23,7 +23,7 @@ from rssparser import RSSParser class RSSCheckerThread(Thread): def __loop(self): for rss in self._main._db.getAllRSSList(): - if self.__needUpdate(rss): + if self.__needUpdate(rss) or self._main._commandLine._rebuildHashlist: try: p = RSSParser() p.parse(rss.url) @@ -32,8 +32,14 @@ class RSSCheckerThread(Thread): meta = p.m itemList = p.itemList[:self._main._setting._maxCount] itemList.reverse() + if self._main._commandLine._rebuildHashlist: + rss.hashList = HashEntries( + self._main._setting._maxCount) for item in itemList: hashEntry = calHash(rss.url, item) + if self._main._commandLine._rebuildHashlist: + rss.hashList.add(hashEntry) + continue if not rss.hashList.has(hashEntry): rss.hashList.add(hashEntry) for info in rss.chatList: @@ -54,6 +60,7 @@ class RSSCheckerThread(Thread): rss.title, rss.url, updateTime, rss.hashList, p.ttl) except: print(format_exc()) + self._main._commandLine._rebuildHashlist = False def __init__(self, m): Thread.__init__(self)