diff --git a/RSSEntry.py b/RSSEntry.py
index 91a4007..872af4c 100644
--- a/RSSEntry.py
+++ b/RSSEntry.py
@@ -15,6 +15,9 @@
# along with this program. If not, see .
from json import loads
from config import RSSConfig
+from time import time_ns
+from typing import List
+from hashl import sha256WithBase64
class ChatEntry:
@@ -28,8 +31,66 @@ class ChatEntry:
self.config = RSSConfig()
+class HashEntry:
+ def __init__(self, data=None, id: str = None, hash: str = None):
+ self.id = data[0] if data is not None and data[0] is not None else None
+ self.hash = data[1] if data is not None and data[1] is not None else None
+ self.time = data[2] if data is not None and data[2] is not None else time_ns()
+ if id is not None:
+ self.id = id
+ if hash is not None:
+ self.hash = hash
+
+
+def calHash(url: dict, item: dict) -> HashEntry:
+ hashd = sha256WithBase64(url)
+ hasht = url
+ if 'title' in item and item['title'] is not None:
+ hasht = hasht + item['title']
+ if 'link' in item and item['link'] is not None:
+ hasht = hasht + item['link']
+ if 'description' in item and item['description'] is not None:
+ hasht = hasht + item['description']
+ hashed = sha256WithBase64(hasht)
+ return HashEntry(id=hashd, hash=hashed)
+
+
+class HashEntries:
+ def __init__(self, maxCount: int = 100):
+ self.__list = []
+ self.__maxCount = maxCount if maxCount is not None and maxCount >= 1 else 100
+
+ def __removeMax(self):
+ self.__sort()
+ while len(self.__list) > self.__maxCount:
+ t = self.__list[0]
+ self.__list.remove(t)
+
+ def __sort(self, reverse: bool = False):
+ self.__list.sort(key=lambda d: d.time, reverse=reverse)
+
+ def add(self, d: HashEntry):
+ if d.hash is not None and d.id is not None:
+ for v in self.__list:
+ if v.hash == d.hash and v.id == d.id:
+ return
+ self.__list.append(d)
+ self.__removeMax()
+
+ def getList(self) -> List[HashEntry]:
+ self.__removeMax()
+ r = []
+ for i in self.__list:
+ r.append(i)
+ return r
+
+ def setMaxCount(self, maxCount: int):
+ self.__maxCount = maxCount if maxCount >= 1 else 100
+ self.__removeMax()
+
+
class RSSEntry:
- def __init__(self, data=None):
+ def __init__(self, data=None, maxCount: int = 100):
self.title = None
if data is not None and data[0] is not None:
self.title = data[0]
@@ -46,3 +107,4 @@ class RSSEntry:
if data is not None and data[4] is not None:
self.id = data[4]
self.chatList = []
+ self.hashList = HashEntries(maxCount)
diff --git a/config.py b/config.py
index 3b95a19..5ca7780 100644
--- a/config.py
+++ b/config.py
@@ -21,10 +21,12 @@ class RSSConfig:
self.disable_web_page_preview = False
self.show_RSS_title = True
self.show_Content_title = True
+ self.show_content = True
+ self.send_media = True
if d is not None:
for k in d.keys():
if hasattr(self, k):
setattr(self, k, d[k])
def toJson(self):
- return dumps({'disable_web_page_preview': self.disable_web_page_preview, 'show_RSS_title': self.show_RSS_title, 'show_Content_title': self.show_Content_title}, ensure_ascii=False)
+ return dumps({'disable_web_page_preview': self.disable_web_page_preview, 'show_RSS_title': self.show_RSS_title, 'show_Content_title': self.show_Content_title, 'show_content': self.show_content, 'send_media': self.send_media}, ensure_ascii=False)
diff --git a/database.py b/database.py
index 20715af..28a3a77 100644
--- a/database.py
+++ b/database.py
@@ -15,7 +15,7 @@
# along with this program. If not, see .
import sqlite3
from config import RSSConfig
-from RSSEntry import RSSEntry, ChatEntry
+from RSSEntry import RSSEntry, ChatEntry, HashEntry, HashEntries
from typing import List
from enum import Enum, unique
from threading import Lock
@@ -84,13 +84,15 @@ PRIMARY KEY (hash)
)''')
self._db.commit()
- def __init__(self):
+ def __init__(self, m):
self._version = [1, 0, 0, 0]
self._value_lock = Lock()
self._db = sqlite3.connect('data.db', check_same_thread=False)
ok = self.__check_database()
if not ok:
self.__create_table()
+ from rssbot import main
+ self._main: main = m
def __removeRSSEntry(self, id: str) -> bool:
try:
@@ -106,7 +108,7 @@ PRIMARY KEY (hash)
f'INSERT INTO config VALUES ({self._version[0]}, {self._version[1]}, {self._version[2]}, {self._version[3]});')
self._db.commit()
- def addRSSList(self, title: str, url: str, chatId: int, config: RSSConfig, ttl: int = None):
+ def addRSSList(self, title: str, url: str, chatId: int, config: RSSConfig, ttl: int = None, hashEntries: HashEntries = None):
with self._value_lock:
try:
hashd = sha256WithBase64(url)
@@ -115,20 +117,37 @@ PRIMARY KEY (hash)
has_data = False
for i in cur:
has_data = True
+ break
if has_data:
- self._db.execute(f'DELETE FROM RSSList WHERE id="{hashd}"')
- self._db.execute(
- f"INSERT INTO RSSList VALUES ('{dealtext(title)}', '{dealtext(url)}', {ttl if ttl is not None else 'null'}, null, '{hashd}')")
+ self._db.execute(
+ f"UPDATE RSSList SET title='{dealtext(title)}', ttl={ttl if ttl is not None else 'null'} WHERE id='{hashd}'")
+ else:
+ self._db.execute(
+ f"INSERT INTO RSSList VALUES ('{dealtext(title)}', '{dealtext(url)}', {ttl if ttl is not None else 'null'}, null, '{hashd}')")
cur = self._db.execute(
f'SELECT * FROM chatList WHERE id="{hashd}" AND chatId={chatId}')
- has_data = False
+ has_data2 = False
for i in cur:
- has_data = True
- if has_data:
+ has_data2 = True
+ break
+ if has_data2:
self._db.execute(
f'DELETE FROM chatList WHERE id="{hashd}" AND chatId={chatId}')
self._db.execute(
f"INSERT INTO chatList VALUES ({chatId}, '{hashd}', '{dealtext(config.toJson())}')")
+ if hashEntries is not None and not has_data:
+ cur = self._db.execute(
+ f"SELECT * FROM hashList WHERE id='{hashd}'")
+ has_data3 = False
+ for i in cur:
+ has_data3 = True
+ break
+ if has_data3:
+ self._db.execute(
+ f"DELETE FROM hashList WHERE ID='{hashd}'")
+ for v in hashEntries.getList():
+ self._db.execute(
+ f"INSERT INTO hashList VALUES ('{v.id}', '{v.hash}', {v.time})")
self._db.commit()
return True
except:
@@ -139,12 +158,16 @@ PRIMARY KEY (hash)
cur = self._db.execute(f'SELECT * FROM RSSList;')
r = []
for i in cur:
- temp = RSSEntry(i)
+ temp = RSSEntry(i, self._main._setting._maxCount)
cur2 = self._db.execute(
f'SELECT * FROM chatList WHERE id="{temp.id}"')
for i2 in cur2:
temp2 = ChatEntry(i2)
temp.chatList.append(temp2)
+ cur3 = self._db.execute(
+ f"SELECT * FROM hashList WHERE id='{temp.id}' ORDER BY time")
+ for i3 in cur3:
+ temp.hashList.add(HashEntry(i3))
if len(temp.chatList) == 0:
self.__removeRSSEntry(temp.id)
else:
diff --git a/readset.py b/readset.py
index 72b6982..864feba 100644
--- a/readset.py
+++ b/readset.py
@@ -27,3 +27,4 @@ class settings:
if len(l) == 2:
d[l[0]] = l[1]
self._token = d['token'] if 'token' in d else None
+ self._maxCount = int(d['maxCount']) if 'maxCount' in d and d['maxCount'].isnumeric() else 100
diff --git a/rssbot.py b/rssbot.py
index 95cdf0c..072f6dc 100644
--- a/rssbot.py
+++ b/rssbot.py
@@ -14,6 +14,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
from database import database, userStatus, RSSConfig
+from RSSEntry import HashEntry, HashEntries, calHash
from os.path import exists
from readset import settings
from requests import Session
@@ -46,10 +47,14 @@ def getMediaInfo(m: dict, config: RSSConfig = RSSConfig()) -> str:
s = f"""{s}\n群/频道ID:{m['chatId']}"""
elif 'userId' in m and m['userId'] is not None:
s = f"""{s}\n订阅的账号"""
+ if '_type' in m and m['_type'] is not None:
+ s = f"""{s}\n类型:{m['_type']}"""
s = f"{s}\n设置:"
s = f"{s}\n禁用预览:{config.disable_web_page_preview}"
s = f"{s}\n显示RSS标题:{config.show_RSS_title}"
s = f"{s}\n显示内容标题:{config.show_Content_title}"
+ s = f"{s}\n显示内容:{config.show_content}"
+ s = f"{s}\n发送媒体:{config.send_media}"
return s
@@ -63,6 +68,8 @@ class InlineKeyBoardCallBack(Enum):
DisableWebPagePreview = 6
ShowRSSTitle = 7
ShowContentTitle = 8
+ ShowContent = 9
+ SendMedia = 10
def getInlineKeyBoardWhenRSS(hashd: str, m: dict) -> str:
@@ -108,6 +115,14 @@ def getInlineKeyBoardWhenRSS2(hashd: str, config: RSSConfig) -> str:
temp = '隐藏内容标题' if config.show_Content_title else '显示内容标题'
d[i].append(
{'text': temp, 'callback_data': f'0,{hashd},{InlineKeyBoardCallBack.ShowContentTitle.value}'})
+ temp = '隐藏内容' if config.show_content else '显示内容'
+ d[i].append(
+ {'text': temp, 'callback_data': f'0,{hashd},{InlineKeyBoardCallBack.ShowContent.value}'})
+ d.append([])
+ i = i + 1
+ temp = '禁用发送媒体' if config.send_media else '启用发送媒体'
+ d[i].append(
+ {'text': temp, 'callback_data': f'0,{hashd},{InlineKeyBoardCallBack.SendMedia.value}'})
d[i].append(
{'text': '返回', 'callback_data': f'0,{hashd},{InlineKeyBoardCallBack.BackToNormalPage.value}'})
return {'inline_keyboard': d}
@@ -148,14 +163,14 @@ class main:
elif 'link' in content and content['link'] is not None and content['link'] != '':
text.addtotext(
f"""{escape(content['link'])}""")
- if 'description' in content and content['description'] is not None and content['description'] != '':
+ if config.show_content and 'description' in content and content['description'] is not None and content['description'] != '':
text.addtotext(content['description'])
def getListCount(content: dict, key: str):
- if key not in content and content[key] is None:
+ if key not in content or content[key] is None:
return 0
return len(content[key])
- if getListCount(content, 'imgList') == 0 and getListCount(content, 'videoList') == 0:
+ if not config.send_media or (getListCount(content, 'imgList') == 0 and getListCount(content, 'videoList') == 0):
if config.disable_web_page_preview:
di['disable_web_page_preview'] = True
di['text'] = text.tostr()
@@ -218,14 +233,14 @@ class main:
self._upi = i['update_id'] + 1
def start(self):
- self._db = database()
- if not exists('settings.txt'):
- print('找不到settings.txt')
- return -1
self._setting = settings('settings.txt')
if self._setting._token is None:
print('没有机器人token')
return -1
+ self._db = database(self)
+ if not exists('settings.txt'):
+ print('找不到settings.txt')
+ return -1
self._r = Session()
self._me = self._request('getMe')
self._rssMetaList = rssMetaList()
@@ -390,9 +405,9 @@ class messageHandle(Thread):
continue
if chatMember['status'] not in ['creator', 'administrator']:
continue
- if re2['type'] == 'channel' and ('can_post_messages' not in chatMember or not chatMember['can_post_messages']):
+ if re2['type'] == 'channel' and chatMember['status'] == 'administrator' and ('can_post_messages' not in chatMember or not chatMember['can_post_messages']):
continue
- if re2['type'] == 'channel' and ('can_edit_messages' not in chatMember or not chatMember['can_edit_messages']):
+ if re2['type'] == 'channel' and chatMember['status'] == 'administrator' and ('can_edit_messages' not in chatMember or not chatMember['can_edit_messages']):
continue
chatM = chatMember
if chatM is None:
@@ -564,7 +579,13 @@ class callbackQueryHandle(Thread):
return
config = self._rssMeta.config
ttl = self._rssMeta.meta['ttl'] if 'ttl' in self._rssMeta.meta else None
- suc = self._main._db.addRSSList(title, url, chatId, config, ttl)
+ hashEntries = HashEntries(self._main._setting._maxCount)
+ tempList = self._rssMeta.itemList.copy()
+ tempList.reverse()
+ for v in tempList[-100:]:
+ hashEntries.add(calHash(url, v))
+ suc = self._main._db.addRSSList(
+ title, url, chatId, config, ttl, hashEntries)
if suc:
self.answer('订阅成功!')
else:
@@ -677,6 +698,32 @@ class callbackQueryHandle(Thread):
self._main._request("editMessageText", "post", json=di)
self.answer()
return
+ elif self._inlineKeyBoardCommand == InlineKeyBoardCallBack.ShowContent:
+ self._rssMeta.config.show_content = not self._rssMeta.config.show_content
+ di = {'chat_id': self._rssMeta.chatId,
+ 'message_id': self._rssMeta.messageId}
+ di['text'] = getMediaInfo(
+ self._rssMeta.meta, self._rssMeta.config)
+ di['parse_mode'] = 'HTML'
+ di['disable_web_page_preview'] = True
+ di['reply_markup'] = getInlineKeyBoardWhenRSS2(
+ self._hashd, self._rssMeta.config)
+ self._main._request("editMessageText", "post", json=di)
+ self.answer()
+ return
+ elif self._inlineKeyBoardCommand == InlineKeyBoardCallBack.SendMedia:
+ self._rssMeta.config.send_media = not self._rssMeta.config.send_media
+ di = {'chat_id': self._rssMeta.chatId,
+ 'message_id': self._rssMeta.messageId}
+ di['text'] = getMediaInfo(
+ self._rssMeta.meta, self._rssMeta.config)
+ di['parse_mode'] = 'HTML'
+ di['disable_web_page_preview'] = True
+ di['reply_markup'] = getInlineKeyBoardWhenRSS2(
+ self._hashd, self._rssMeta.config)
+ self._main._request("editMessageText", "post", json=di)
+ self.answer()
+ return
else:
self.answer('未知的按钮。')
return
diff --git a/rssparser.py b/rssparser.py
index 93da76f..a072bc5 100644
--- a/rssparser.py
+++ b/rssparser.py
@@ -15,20 +15,24 @@
# along with this program. If not, see .
from xml.dom import minidom
from html.parser import HTMLParser
-from html import escape
+from html import escape, unescape
import sys
import requests
from traceback import format_exc
+from urllib.parse import urljoin
class HTMLSimpleParser(HTMLParser):
- def __init__(self):
+ def __init__(self, baseUrl: str=None):
self.data = ''
self.istag = False
self.tagContent = ''
self.tagAttrs = ''
self.imgList = []
self.videoList = []
+ self.baseUrl = ''
+ if baseUrl is not None:
+ self.baseUrl = baseUrl
HTMLParser.__init__(self)
def handle_startendtag(self, tag, attrs):
@@ -42,16 +46,16 @@ class HTMLSimpleParser(HTMLParser):
elif tag == 'img':
for key, value in attrs:
if key == 'src':
- self.imgList.append(value)
+ self.imgList.append(urljoin(self.baseUrl, value))
break
return
elif tag == 'video':
p = {}
for key, value in attrs:
if key == 'src':
- p['src'] = value
+ p['src'] = urljoin(self.baseUrl, value)
if key == 'poster':
- p['poster'] = value
+ p['poster'] = urljoin(self.baseUrl, value)
if 'src' in p:
self.videoList.append(p)
return
@@ -61,18 +65,20 @@ class HTMLSimpleParser(HTMLParser):
if tag == 'a':
for key, value in attrs:
if key == 'href':
- self.tagAttrs = f'{self.tagAttrs} href="{value}"'
+ self.tagAttrs = f'{self.tagAttrs} href="{urljoin(self.baseUrl, value)}"'
def handle_data(self, data):
if self.istag:
self.tagContent = self.tagContent + data
else:
- self.data = self.data + data
+ self.data = self.data + escape(data)
def handle_endtag(self, tag):
self.istag = False
if tag in ['a', 'b', 'i', 'u', 's', 'strong', 'em', 'ins', 'strike', 'del', 'code', 'pre']:
- self.data = f"{self.data}<{tag}{self.tagAttrs}>{self.tagContent}{tag}>"
+ self.data = f"{self.data}<{tag}{self.tagAttrs}>{escape(self.tagContent)}{tag}>"
+ elif tag not in ['img', 'video', 'br']:
+ self.data = f"{self.data}{escape(self.tagContent)}"
self.tagAttrs = ''
@@ -90,13 +96,19 @@ class RSSParser:
if i.nodeName == 'entry':
itemList.append(self.__dealItemAtom(i))
elif i.nodeName == 'link':
- if 'href' in i.attributes:
+ typ = 'text/html'
+ if 'type' in i.attributes:
+ typ = i.attributes['type'].nodeValue
+ if 'href' in i.attributes and typ == 'text/html':
m[i.nodeName] = i.attributes['href'].nodeValue
elif i.nodeName == 'author':
- if len(i.childNodes) == 1 and i.firstChild.nodeName == 'name':
- name = i.firstChild
- if len(name.childNodes) == 1 and name.firstChild.nodeName == '#cdata-section':
- m['author'] = name.firstChild.nodeValue
+ for k in i.childNodes:
+ if k.nodeName == 'name':
+ m['author'] = k.nodeValue
+ break
+ elif len(k.childNodes) == 1 and k.firstChild.nodeName == '#cdata-section':
+ m['author'] = k.firstChild.nodeValue
+ break
else:
if len(i.childNodes) == 0:
m[i.nodeName] = i.nodeValue
@@ -115,7 +127,7 @@ class RSSParser:
self._type = 'atom'
return True
- def __checkasrss3(self):
+ def __checkasrss2(self):
self._root = self.xmldoc.documentElement
if self._root.localName != 'rss' or len(self._root.childNodes) != 1:
return False
@@ -147,16 +159,29 @@ class RSSParser:
if 'ttl' in m and m['ttl'] is not None and m['ttl'].isnumeric():
self.ttl = int(m['ttl'])
self.itemList = itemList
- self._type = 'rss3.0'
+ self._type = 'rss2.0'
return True
def __dealItem(self, node):
m = {}
for i in node.childNodes:
- if len(i.childNodes) == 0:
+ if i.nodeName == 'link':
+ if len(i.childNodes) == 0:
+ m[i.nodeName] = i.nodeValue
+ else:
+ m[i.nodeName] = ''
+ for k in i.childNodes:
+ m[i.nodeName] = m[i.nodeName] + k.toxml()
+ break
+ for i in node.childNodes:
+ if i.nodeName == 'link':
+ continue
+ elif len(i.childNodes) == 0:
m[i.nodeName] = i.nodeValue
elif len(i.childNodes) == 1 and i.firstChild.nodeName == '#cdata-section':
p = HTMLSimpleParser()
+ if 'link' in m and m['link'] is not None:
+ p.baseUrl = m['link']
p.feed(i.firstChild.nodeValue)
if p.data == '' and i.firstChild.nodeValue.find('<') == -1:
m[i.nodeName] = i.firstChild.nodeValue
@@ -174,28 +199,95 @@ class RSSParser:
def __dealItemAtom(self, node):
m = {}
for i in node.childNodes:
- if i.nodeName == 'author':
- if len(i.childNodes) == 1 and i.firstChild.nodeName == 'name':
- name = i.firstChild
- if len(name.childNodes) == 1 and name.firstChild.nodeName == '#cdata-section':
- m['author'] = name.firstChild.nodeValue
- elif i.nodeName == 'link':
+ if i.nodeName == 'link':
if 'href' in i.attributes:
m[i.nodeName] = i.attributes['href'].nodeValue
+ for i in node.childNodes:
+ if i.nodeName == 'author':
+ for k in i.childNodes:
+ if k.nodeName == 'name':
+ if k.nodeValue is not None:
+ m['author'] = k.nodeValue
+ break
+ elif len(k.childNodes) == 1 and k.firstChild.nodeName == '#cdata-section':
+ m['author'] = k.firstChild.nodeValue
+ break
+ elif i.nodeName == 'link':
+ continue
+ elif i.nodeName in ['title', 'content', 'summary']:
+ typ = 'text'
+ if 'type' in i.attributes:
+ if i.attributes['type'].nodeValue in ['text', 'html', 'xhtml']:
+ typ = i.attributes['type'].nodeValue
+ if len(i.childNodes) == 1 and i.firstChild.nodeName == '#cdata-section':
+ p = HTMLSimpleParser()
+ if 'link' in m and m['link'] is not None:
+ p.baseUrl = m['link']
+ p.feed(i.firstChild.nodeValue)
+ if p.data == '' and i.firstChild.nodeValue.find('<') == -1:
+ m[i.nodeName] = i.firstChild.nodeValue
+ else:
+ m[i.nodeName] = p.data
+ if i.nodeName in ['content', 'summary']:
+ m['imgList'] = p.imgList
+ m['videoList'] = p.videoList
+ m['description'] = m[i.nodeName]
+ del m[i.nodeName]
+ elif i.nodeValue is None and len(i.childNodes) == 0:
+ continue
+ elif typ == 'text':
+ s = ''
+ if i.nodeValue is not None:
+ s = i.nodeValue
+ else:
+ for k in i.childNodes:
+ s = s + k.toxml()
+ m[i.nodeName] = unescape(s)
+ elif typ == 'html':
+ s = ''
+ if i.nodeValue is not None:
+ s = i.nodeValue
+ else:
+ for k in i.childNodes:
+ s = s + k.toxml()
+ p = HTMLSimpleParser()
+ if 'link' in m and m['link'] is not None:
+ p.baseUrl = m['link']
+ p.feed(unescape(s))
+ if p.data == '' and i.firstChild.nodeValue.find('<') == -1:
+ m[i.nodeName] = i.firstChild.nodeValue
+ else:
+ m[i.nodeName] = p.data
+ if i.nodeName in ['content', 'summary']:
+ m['imgList'] = p.imgList
+ m['videoList'] = p.videoList
+ m['description'] = m[i.nodeName]
+ del m[i.nodeName]
+ elif typ == 'xhtml':
+ p = HTMLSimpleParser()
+ if 'link' in m and m['link'] is not None:
+ p.baseUrl = m['link']
+ p.feed(i.firstChild.toxml())
+ if p.data == '' and i.firstChild.nodeValue.find('<') == -1:
+ m[i.nodeName] = i.firstChild.nodeValue
+ else:
+ m[i.nodeName] = p.data
+ if i.nodeName in ['content', 'summary']:
+ m['imgList'] = p.imgList
+ m['videoList'] = p.videoList
+ m['description'] = m[i.nodeName]
+ del m[i.nodeName]
elif len(i.childNodes) == 0:
m[i.nodeName] = i.nodeValue
elif len(i.childNodes) == 1 and i.firstChild.nodeName == '#cdata-section':
p = HTMLSimpleParser()
+ if 'link' in m and m['link'] is not None:
+ p.baseUrl = m['link']
p.feed(i.firstChild.nodeValue)
if p.data == '' and i.firstChild.nodeValue.find('<') == -1:
m[i.nodeName] = i.firstChild.nodeValue
else:
m[i.nodeName] = p.data
- if i.nodeName == 'content':
- m['imgList'] = p.imgList
- m['videoList'] = p.videoList
- m['description'] = m['content']
- del m['content']
else:
m[i.nodeName] = ''
for k in i.childNodes:
@@ -203,14 +295,15 @@ class RSSParser:
return m
def check(self):
- try:
- checked = self.__checkasrss3()
- if not checked:
- checked = self.__checkasratom()
- return checked
- except:
- print(format_exc())
- return False
+ for f in [self.__checkasrss2, self.__checkasratom]:
+ try:
+ if f():
+ self.m['_type'] = self._type
+ return True
+ except:
+ print(format_exc())
+ pass
+ return False
def normalize(self):
self.removeblank(self.xmldoc.documentElement)
@@ -243,4 +336,7 @@ if __name__ == "__main__":
fn = sys.argv[1]
p = RSSParser()
p.parse(fn)
- p.check()
+ if p.check():
+ print(p._type)
+ else:
+ print('解析失败')