From 7d0aa5abc2d215ccc58cca328534c25000bf07d9 Mon Sep 17 00:00:00 2001
From: lifegpc <g1710431395@gmail.com>
Date: Thu, 7 Jan 2021 14:17:02 +0800
Subject: [PATCH] update

---
 RSSEntry.py  |  64 +++++++++++++++++++-
 config.py    |   4 +-
 database.py  |  43 ++++++++++---
 readset.py   |   1 +
 rssbot.py    |  67 +++++++++++++++++---
 rssparser.py | 168 ++++++++++++++++++++++++++++++++++++++++-----------
 6 files changed, 289 insertions(+), 58 deletions(-)

diff --git a/RSSEntry.py b/RSSEntry.py
index 91a4007..872af4c 100644
--- a/RSSEntry.py
+++ b/RSSEntry.py
@@ -15,6 +15,9 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 from json import loads
 from config import RSSConfig
+from time import time_ns
+from typing import List
+from hashl import sha256WithBase64
 
 
 class ChatEntry:
@@ -28,8 +31,66 @@ class ChatEntry:
             self.config = RSSConfig()
 
 
+class HashEntry:
+    def __init__(self, data=None, id: str = None, hash: str = None):
+        self.id = data[0] if data is not None and data[0] is not None else None
+        self.hash = data[1] if data is not None and data[1] is not None else None
+        self.time = data[2] if data is not None and data[2] is not None else time_ns()
+        if id is not None:
+            self.id = id
+        if hash is not None:
+            self.hash = hash
+
+
+def calHash(url: dict, item: dict) -> HashEntry:
+    hashd = sha256WithBase64(url)
+    hasht = url
+    if 'title' in item and item['title'] is not None:
+        hasht = hasht + item['title']
+    if 'link' in item and item['link'] is not None:
+        hasht = hasht + item['link']
+    if 'description' in item and item['description'] is not None:
+        hasht = hasht + item['description']
+    hashed = sha256WithBase64(hasht)
+    return HashEntry(id=hashd, hash=hashed)
+
+
+class HashEntries:
+    def __init__(self, maxCount: int = 100):
+        self.__list = []
+        self.__maxCount = maxCount if maxCount is not None and maxCount >= 1 else 100
+
+    def __removeMax(self):
+        self.__sort()
+        while len(self.__list) > self.__maxCount:
+            t = self.__list[0]
+            self.__list.remove(t)
+
+    def __sort(self, reverse: bool = False):
+        self.__list.sort(key=lambda d: d.time, reverse=reverse)
+
+    def add(self, d: HashEntry):
+        if d.hash is not None and d.id is not None:
+            for v in self.__list:
+                if v.hash == d.hash and v.id == d.id:
+                    return
+            self.__list.append(d)
+            self.__removeMax()
+
+    def getList(self) -> List[HashEntry]:
+        self.__removeMax()
+        r = []
+        for i in self.__list:
+            r.append(i)
+        return r
+
+    def setMaxCount(self, maxCount: int):
+        self.__maxCount = maxCount if maxCount >= 1 else 100
+        self.__removeMax()
+
+
 class RSSEntry:
-    def __init__(self, data=None):
+    def __init__(self, data=None, maxCount: int = 100):
         self.title = None
         if data is not None and data[0] is not None:
             self.title = data[0]
@@ -46,3 +107,4 @@ class RSSEntry:
         if data is not None and data[4] is not None:
             self.id = data[4]
         self.chatList = []
+        self.hashList = HashEntries(maxCount)
diff --git a/config.py b/config.py
index 3b95a19..5ca7780 100644
--- a/config.py
+++ b/config.py
@@ -21,10 +21,12 @@ class RSSConfig:
         self.disable_web_page_preview = False
         self.show_RSS_title = True
         self.show_Content_title = True
+        self.show_content = True
+        self.send_media = True
         if d is not None:
             for k in d.keys():
                 if hasattr(self, k):
                     setattr(self, k, d[k])
 
     def toJson(self):
-        return dumps({'disable_web_page_preview': self.disable_web_page_preview, 'show_RSS_title': self.show_RSS_title, 'show_Content_title': self.show_Content_title}, ensure_ascii=False)
+        return dumps({'disable_web_page_preview': self.disable_web_page_preview, 'show_RSS_title': self.show_RSS_title, 'show_Content_title': self.show_Content_title, 'show_content': self.show_content, 'send_media': self.send_media}, ensure_ascii=False)
diff --git a/database.py b/database.py
index 20715af..28a3a77 100644
--- a/database.py
+++ b/database.py
@@ -15,7 +15,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 import sqlite3
 from config import RSSConfig
-from RSSEntry import RSSEntry, ChatEntry
+from RSSEntry import RSSEntry, ChatEntry, HashEntry, HashEntries
 from typing import List
 from enum import Enum, unique
 from threading import Lock
@@ -84,13 +84,15 @@ PRIMARY KEY (hash)
 )''')
         self._db.commit()
 
-    def __init__(self):
+    def __init__(self, m):
         self._version = [1, 0, 0, 0]
         self._value_lock = Lock()
         self._db = sqlite3.connect('data.db', check_same_thread=False)
         ok = self.__check_database()
         if not ok:
             self.__create_table()
+        from rssbot import main
+        self._main: main = m
 
     def __removeRSSEntry(self, id: str) -> bool:
         try:
@@ -106,7 +108,7 @@ PRIMARY KEY (hash)
             f'INSERT INTO config VALUES ({self._version[0]}, {self._version[1]}, {self._version[2]}, {self._version[3]});')
         self._db.commit()
 
-    def addRSSList(self, title: str, url: str, chatId: int, config: RSSConfig, ttl: int = None):
+    def addRSSList(self, title: str, url: str, chatId: int, config: RSSConfig, ttl: int = None, hashEntries: HashEntries = None):
         with self._value_lock:
             try:
                 hashd = sha256WithBase64(url)
@@ -115,20 +117,37 @@ PRIMARY KEY (hash)
                 has_data = False
                 for i in cur:
                     has_data = True
+                    break
                 if has_data:
-                    self._db.execute(f'DELETE FROM RSSList WHERE id="{hashd}"')
-                self._db.execute(
-                    f"INSERT INTO RSSList VALUES ('{dealtext(title)}', '{dealtext(url)}', {ttl if ttl is not None else 'null'}, null, '{hashd}')")
+                    self._db.execute(
+                        f"UPDATE RSSList SET title='{dealtext(title)}', ttl={ttl if ttl is not None else 'null'} WHERE id='{hashd}'")
+                else:
+                    self._db.execute(
+                        f"INSERT INTO RSSList VALUES ('{dealtext(title)}', '{dealtext(url)}', {ttl if ttl is not None else 'null'}, null, '{hashd}')")
                 cur = self._db.execute(
                     f'SELECT * FROM chatList WHERE id="{hashd}" AND chatId={chatId}')
-                has_data = False
+                has_data2 = False
                 for i in cur:
-                    has_data = True
-                if has_data:
+                    has_data2 = True
+                    break
+                if has_data2:
                     self._db.execute(
                         f'DELETE FROM chatList WHERE id="{hashd}" AND chatId={chatId}')
                 self._db.execute(
                     f"INSERT INTO chatList VALUES ({chatId}, '{hashd}', '{dealtext(config.toJson())}')")
+                if hashEntries is not None and not has_data:
+                    cur = self._db.execute(
+                        f"SELECT * FROM hashList WHERE id='{hashd}'")
+                    has_data3 = False
+                    for i in cur:
+                        has_data3 = True
+                        break
+                    if has_data3:
+                        self._db.execute(
+                            f"DELETE FROM hashList WHERE ID='{hashd}'")
+                    for v in hashEntries.getList():
+                        self._db.execute(
+                            f"INSERT INTO hashList VALUES ('{v.id}', '{v.hash}', {v.time})")
                 self._db.commit()
                 return True
             except:
@@ -139,12 +158,16 @@ PRIMARY KEY (hash)
             cur = self._db.execute(f'SELECT * FROM RSSList;')
             r = []
             for i in cur:
-                temp = RSSEntry(i)
+                temp = RSSEntry(i, self._main._setting._maxCount)
                 cur2 = self._db.execute(
                     f'SELECT * FROM chatList WHERE id="{temp.id}"')
                 for i2 in cur2:
                     temp2 = ChatEntry(i2)
                     temp.chatList.append(temp2)
+                cur3 = self._db.execute(
+                    f"SELECT * FROM hashList WHERE id='{temp.id}' ORDER BY time")
+                for i3 in cur3:
+                    temp.hashList.add(HashEntry(i3))
                 if len(temp.chatList) == 0:
                     self.__removeRSSEntry(temp.id)
                 else:
diff --git a/readset.py b/readset.py
index 72b6982..864feba 100644
--- a/readset.py
+++ b/readset.py
@@ -27,3 +27,4 @@ class settings:
                 if len(l) == 2:
                     d[l[0]] = l[1]
         self._token = d['token'] if 'token' in d else None
+        self._maxCount = int(d['maxCount']) if 'maxCount' in d and d['maxCount'].isnumeric() else 100
diff --git a/rssbot.py b/rssbot.py
index 95cdf0c..072f6dc 100644
--- a/rssbot.py
+++ b/rssbot.py
@@ -14,6 +14,7 @@
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 from database import database, userStatus, RSSConfig
+from RSSEntry import HashEntry, HashEntries, calHash
 from os.path import exists
 from readset import settings
 from requests import Session
@@ -46,10 +47,14 @@ def getMediaInfo(m: dict, config: RSSConfig = RSSConfig()) -> str:
         s = f"""{s}\n群/频道ID：{m['chatId']}"""
     elif 'userId' in m and m['userId'] is not None:
         s = f"""{s}\n<a href="tg://user?id={m['userId']}">订阅的账号</a>"""
+    if '_type' in m and m['_type'] is not None:
+        s = f"""{s}\n类型：{m['_type']}"""
     s = f"{s}\n设置："
     s = f"{s}\n禁用预览：{config.disable_web_page_preview}"
     s = f"{s}\n显示RSS标题：{config.show_RSS_title}"
     s = f"{s}\n显示内容标题：{config.show_Content_title}"
+    s = f"{s}\n显示内容：{config.show_content}"
+    s = f"{s}\n发送媒体：{config.send_media}"
     return s
 
 
@@ -63,6 +68,8 @@ class InlineKeyBoardCallBack(Enum):
     DisableWebPagePreview = 6
     ShowRSSTitle = 7
     ShowContentTitle = 8
+    ShowContent = 9
+    SendMedia = 10
 
 
 def getInlineKeyBoardWhenRSS(hashd: str, m: dict) -> str:
@@ -108,6 +115,14 @@ def getInlineKeyBoardWhenRSS2(hashd: str, config: RSSConfig) -> str:
     temp = '隐藏内容标题' if config.show_Content_title else '显示内容标题'
     d[i].append(
         {'text': temp, 'callback_data': f'0,{hashd},{InlineKeyBoardCallBack.ShowContentTitle.value}'})
+    temp = '隐藏内容' if config.show_content else '显示内容'
+    d[i].append(
+        {'text': temp, 'callback_data': f'0,{hashd},{InlineKeyBoardCallBack.ShowContent.value}'})
+    d.append([])
+    i = i + 1
+    temp = '禁用发送媒体' if config.send_media else '启用发送媒体'
+    d[i].append(
+        {'text': temp, 'callback_data': f'0,{hashd},{InlineKeyBoardCallBack.SendMedia.value}'})
     d[i].append(
         {'text': '返回', 'callback_data': f'0,{hashd},{InlineKeyBoardCallBack.BackToNormalPage.value}'})
     return {'inline_keyboard': d}
@@ -148,14 +163,14 @@ class main:
         elif 'link' in content and content['link'] is not None and content['link'] != '':
             text.addtotext(
                 f"""<a href="{content['link']}">{escape(content['link'])}</a>""")
-        if 'description' in content and content['description'] is not None and content['description'] != '':
+        if config.show_content and 'description' in content and content['description'] is not None and content['description'] != '':
             text.addtotext(content['description'])
 
         def getListCount(content: dict, key: str):
-            if key not in content and content[key] is None:
+            if key not in content or content[key] is None:
                 return 0
             return len(content[key])
-        if getListCount(content, 'imgList') == 0 and getListCount(content, 'videoList') == 0:
+        if not config.send_media or (getListCount(content, 'imgList') == 0 and getListCount(content, 'videoList') == 0):
             if config.disable_web_page_preview:
                 di['disable_web_page_preview'] = True
             di['text'] = text.tostr()
@@ -218,14 +233,14 @@ class main:
                 self._upi = i['update_id'] + 1
 
     def start(self):
-        self._db = database()
-        if not exists('settings.txt'):
-            print('找不到settings.txt')
-            return -1
         self._setting = settings('settings.txt')
         if self._setting._token is None:
             print('没有机器人token')
             return -1
+        self._db = database(self)
+        if not exists('settings.txt'):
+            print('找不到settings.txt')
+            return -1
         self._r = Session()
         self._me = self._request('getMe')
         self._rssMetaList = rssMetaList()
@@ -390,9 +405,9 @@ class messageHandle(Thread):
                                         continue
                                     if chatMember['status'] not in ['creator', 'administrator']:
                                         continue
-                                    if re2['type'] == 'channel' and ('can_post_messages' not in chatMember or not chatMember['can_post_messages']):
+                                    if re2['type'] == 'channel' and chatMember['status'] == 'administrator' and ('can_post_messages' not in chatMember or not chatMember['can_post_messages']):
                                         continue
-                                    if re2['type'] == 'channel' and ('can_edit_messages' not in chatMember or not chatMember['can_edit_messages']):
+                                    if re2['type'] == 'channel' and chatMember['status'] == 'administrator' and ('can_edit_messages' not in chatMember or not chatMember['can_edit_messages']):
                                         continue
                                     chatM = chatMember
                                 if chatM is None:
@@ -564,7 +579,13 @@ class callbackQueryHandle(Thread):
                     return
                 config = self._rssMeta.config
                 ttl = self._rssMeta.meta['ttl'] if 'ttl' in self._rssMeta.meta else None
-                suc = self._main._db.addRSSList(title, url, chatId, config, ttl)
+                hashEntries = HashEntries(self._main._setting._maxCount)
+                tempList = self._rssMeta.itemList.copy()
+                tempList.reverse()
+                for v in tempList[-100:]:
+                    hashEntries.add(calHash(url, v))
+                suc = self._main._db.addRSSList(
+                    title, url, chatId, config, ttl, hashEntries)
                 if suc:
                     self.answer('订阅成功！')
                 else:
@@ -677,6 +698,32 @@ class callbackQueryHandle(Thread):
                 self._main._request("editMessageText", "post", json=di)
                 self.answer()
                 return
+            elif self._inlineKeyBoardCommand == InlineKeyBoardCallBack.ShowContent:
+                self._rssMeta.config.show_content = not self._rssMeta.config.show_content
+                di = {'chat_id': self._rssMeta.chatId,
+                      'message_id': self._rssMeta.messageId}
+                di['text'] = getMediaInfo(
+                    self._rssMeta.meta, self._rssMeta.config)
+                di['parse_mode'] = 'HTML'
+                di['disable_web_page_preview'] = True
+                di['reply_markup'] = getInlineKeyBoardWhenRSS2(
+                    self._hashd, self._rssMeta.config)
+                self._main._request("editMessageText", "post", json=di)
+                self.answer()
+                return
+            elif self._inlineKeyBoardCommand == InlineKeyBoardCallBack.SendMedia:
+                self._rssMeta.config.send_media = not self._rssMeta.config.send_media
+                di = {'chat_id': self._rssMeta.chatId,
+                      'message_id': self._rssMeta.messageId}
+                di['text'] = getMediaInfo(
+                    self._rssMeta.meta, self._rssMeta.config)
+                di['parse_mode'] = 'HTML'
+                di['disable_web_page_preview'] = True
+                di['reply_markup'] = getInlineKeyBoardWhenRSS2(
+                    self._hashd, self._rssMeta.config)
+                self._main._request("editMessageText", "post", json=di)
+                self.answer()
+                return
         else:
             self.answer('未知的按钮。')
             return
diff --git a/rssparser.py b/rssparser.py
index 93da76f..a072bc5 100644
--- a/rssparser.py
+++ b/rssparser.py
@@ -15,20 +15,24 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 from xml.dom import minidom
 from html.parser import HTMLParser
-from html import escape
+from html import escape, unescape
 import sys
 import requests
 from traceback import format_exc
+from urllib.parse import urljoin
 
 
 class HTMLSimpleParser(HTMLParser):
-    def __init__(self):
+    def __init__(self, baseUrl: str=None):
         self.data = ''
         self.istag = False
         self.tagContent = ''
         self.tagAttrs = ''
         self.imgList = []
         self.videoList = []
+        self.baseUrl = ''
+        if baseUrl is not None:
+            self.baseUrl = baseUrl
         HTMLParser.__init__(self)
 
     def handle_startendtag(self, tag, attrs):
@@ -42,16 +46,16 @@ class HTMLSimpleParser(HTMLParser):
         elif tag == 'img':
             for key, value in attrs:
                 if key == 'src':
-                    self.imgList.append(value)
+                    self.imgList.append(urljoin(self.baseUrl, value))
                     break
             return
         elif tag == 'video':
             p = {}
             for key, value in attrs:
                 if key == 'src':
-                    p['src'] = value
+                    p['src'] = urljoin(self.baseUrl, value)
                 if key == 'poster':
-                    p['poster'] = value
+                    p['poster'] = urljoin(self.baseUrl, value)
             if 'src' in p:
                 self.videoList.append(p)
             return
@@ -61,18 +65,20 @@ class HTMLSimpleParser(HTMLParser):
         if tag == 'a':
             for key, value in attrs:
                 if key == 'href':
-                    self.tagAttrs = f'{self.tagAttrs} href="{value}"'
+                    self.tagAttrs = f'{self.tagAttrs} href="{urljoin(self.baseUrl, value)}"'
 
     def handle_data(self, data):
         if self.istag:
             self.tagContent = self.tagContent + data
         else:
-            self.data = self.data + data
+            self.data = self.data + escape(data)
 
     def handle_endtag(self, tag):
         self.istag = False
         if tag in ['a', 'b', 'i', 'u', 's', 'strong', 'em', 'ins', 'strike', 'del', 'code', 'pre']:
-            self.data = f"{self.data}<{tag}{self.tagAttrs}>{self.tagContent}</{tag}>"
+            self.data = f"{self.data}<{tag}{self.tagAttrs}>{escape(self.tagContent)}</{tag}>"
+        elif tag not in ['img', 'video', 'br']:
+            self.data = f"{self.data}{escape(self.tagContent)}"
         self.tagAttrs = ''
 
 
@@ -90,13 +96,19 @@ class RSSParser:
             if i.nodeName == 'entry':
                 itemList.append(self.__dealItemAtom(i))
             elif i.nodeName == 'link':
-                if 'href' in i.attributes:
+                typ = 'text/html'
+                if 'type' in i.attributes:
+                    typ = i.attributes['type'].nodeValue
+                if 'href' in i.attributes and typ == 'text/html':
                     m[i.nodeName] = i.attributes['href'].nodeValue
             elif i.nodeName == 'author':
-                if len(i.childNodes) == 1 and i.firstChild.nodeName == 'name':
-                    name = i.firstChild
-                    if len(name.childNodes) == 1 and name.firstChild.nodeName == '#cdata-section':
-                        m['author'] = name.firstChild.nodeValue
+                for k in i.childNodes:
+                    if k.nodeName == 'name':
+                        m['author'] = k.nodeValue
+                        break
+                    elif len(k.childNodes) == 1 and k.firstChild.nodeName == '#cdata-section':
+                        m['author'] = k.firstChild.nodeValue
+                        break
             else:
                 if len(i.childNodes) == 0:
                     m[i.nodeName] = i.nodeValue
@@ -115,7 +127,7 @@ class RSSParser:
         self._type = 'atom'
         return True
 
-    def __checkasrss3(self):
+    def __checkasrss2(self):
         self._root = self.xmldoc.documentElement
         if self._root.localName != 'rss' or len(self._root.childNodes) != 1:
             return False
@@ -147,16 +159,29 @@ class RSSParser:
         if 'ttl' in m and m['ttl'] is not None and m['ttl'].isnumeric():
             self.ttl = int(m['ttl'])
         self.itemList = itemList
-        self._type = 'rss3.0'
+        self._type = 'rss2.0'
         return True
 
     def __dealItem(self, node):
         m = {}
         for i in node.childNodes:
-            if len(i.childNodes) == 0:
+            if i.nodeName == 'link':
+                if len(i.childNodes) == 0:
+                    m[i.nodeName] = i.nodeValue
+                else:
+                    m[i.nodeName] = ''
+                    for k in i.childNodes:
+                        m[i.nodeName] = m[i.nodeName] + k.toxml()
+                break
+        for i in node.childNodes:
+            if i.nodeName == 'link':
+                continue
+            elif len(i.childNodes) == 0:
                 m[i.nodeName] = i.nodeValue
             elif len(i.childNodes) == 1 and i.firstChild.nodeName == '#cdata-section':
                 p = HTMLSimpleParser()
+                if 'link' in m and m['link'] is not None:
+                    p.baseUrl = m['link']
                 p.feed(i.firstChild.nodeValue)
                 if p.data == '' and i.firstChild.nodeValue.find('<') == -1:
                     m[i.nodeName] = i.firstChild.nodeValue
@@ -174,28 +199,95 @@ class RSSParser:
     def __dealItemAtom(self, node):
         m = {}
         for i in node.childNodes:
-            if i.nodeName == 'author':
-                if len(i.childNodes) == 1 and i.firstChild.nodeName == 'name':
-                    name = i.firstChild
-                    if len(name.childNodes) == 1 and name.firstChild.nodeName == '#cdata-section':
-                        m['author'] = name.firstChild.nodeValue
-            elif i.nodeName == 'link':
+            if i.nodeName == 'link':
                 if 'href' in i.attributes:
                     m[i.nodeName] = i.attributes['href'].nodeValue
+        for i in node.childNodes:
+            if i.nodeName == 'author':
+                for k in i.childNodes:
+                    if k.nodeName == 'name':
+                        if k.nodeValue is not None:
+                            m['author'] = k.nodeValue
+                            break
+                        elif len(k.childNodes) == 1 and k.firstChild.nodeName == '#cdata-section':
+                            m['author'] = k.firstChild.nodeValue
+                            break
+            elif i.nodeName == 'link':
+                continue
+            elif i.nodeName in ['title', 'content', 'summary']:
+                typ = 'text'
+                if 'type' in i.attributes:
+                    if i.attributes['type'].nodeValue in ['text', 'html', 'xhtml']:
+                        typ = i.attributes['type'].nodeValue
+                if len(i.childNodes) == 1 and i.firstChild.nodeName == '#cdata-section':
+                    p = HTMLSimpleParser()
+                    if 'link' in m and m['link'] is not None:
+                        p.baseUrl = m['link']
+                    p.feed(i.firstChild.nodeValue)
+                    if p.data == '' and i.firstChild.nodeValue.find('<') == -1:
+                        m[i.nodeName] = i.firstChild.nodeValue
+                    else:
+                        m[i.nodeName] = p.data
+                    if i.nodeName in ['content', 'summary']:
+                        m['imgList'] = p.imgList
+                        m['videoList'] = p.videoList
+                        m['description'] = m[i.nodeName]
+                        del m[i.nodeName]
+                elif i.nodeValue is None and len(i.childNodes) == 0:
+                    continue
+                elif typ == 'text':
+                    s = ''
+                    if i.nodeValue is not None:
+                        s = i.nodeValue
+                    else:
+                        for k in i.childNodes:
+                            s = s + k.toxml()
+                    m[i.nodeName] = unescape(s)
+                elif typ == 'html':
+                    s = ''
+                    if i.nodeValue is not None:
+                        s = i.nodeValue
+                    else:
+                        for k in i.childNodes:
+                            s = s + k.toxml()
+                    p = HTMLSimpleParser()
+                    if 'link' in m and m['link'] is not None:
+                        p.baseUrl = m['link']
+                    p.feed(unescape(s))
+                    if p.data == '' and i.firstChild.nodeValue.find('<') == -1:
+                        m[i.nodeName] = i.firstChild.nodeValue
+                    else:
+                        m[i.nodeName] = p.data
+                    if i.nodeName in ['content', 'summary']:
+                        m['imgList'] = p.imgList
+                        m['videoList'] = p.videoList
+                        m['description'] = m[i.nodeName]
+                        del m[i.nodeName]
+                elif typ == 'xhtml':
+                    p = HTMLSimpleParser()
+                    if 'link' in m and m['link'] is not None:
+                        p.baseUrl = m['link']
+                    p.feed(i.firstChild.toxml())
+                    if p.data == '' and i.firstChild.nodeValue.find('<') == -1:
+                        m[i.nodeName] = i.firstChild.nodeValue
+                    else:
+                        m[i.nodeName] = p.data
+                    if i.nodeName in ['content', 'summary']:
+                        m['imgList'] = p.imgList
+                        m['videoList'] = p.videoList
+                        m['description'] = m[i.nodeName]
+                        del m[i.nodeName]
             elif len(i.childNodes) == 0:
                 m[i.nodeName] = i.nodeValue
             elif len(i.childNodes) == 1 and i.firstChild.nodeName == '#cdata-section':
                 p = HTMLSimpleParser()
+                if 'link' in m and m['link'] is not None:
+                    p.baseUrl = m['link']
                 p.feed(i.firstChild.nodeValue)
                 if p.data == '' and i.firstChild.nodeValue.find('<') == -1:
                     m[i.nodeName] = i.firstChild.nodeValue
                 else:
                     m[i.nodeName] = p.data
-                if i.nodeName == 'content':
-                    m['imgList'] = p.imgList
-                    m['videoList'] = p.videoList
-                    m['description'] = m['content']
-                    del m['content']
             else:
                 m[i.nodeName] = ''
                 for k in i.childNodes:
@@ -203,14 +295,15 @@ class RSSParser:
         return m
 
     def check(self):
-        try:
-            checked = self.__checkasrss3()
-            if not checked:
-                checked = self.__checkasratom()
-            return checked
-        except:
-            print(format_exc())
-            return False
+        for f in [self.__checkasrss2, self.__checkasratom]:
+            try:
+                if f():
+                    self.m['_type'] = self._type
+                    return True
+            except:
+                print(format_exc())
+                pass
+        return False
 
     def normalize(self):
         self.removeblank(self.xmldoc.documentElement)
@@ -243,4 +336,7 @@ if __name__ == "__main__":
         fn = sys.argv[1]
         p = RSSParser()
         p.parse(fn)
-        p.check()
+        if p.check():
+            print(p._type)
+        else:
+            print('解析失败')