58 lines
1.6 KiB
Python
58 lines
1.6 KiB
Python
from xml.dom import minidom
|
|
import sys
|
|
import requests
|
|
|
|
|
|
class RSSParser:
|
|
def __init__(self):
|
|
pass
|
|
|
|
def check(self):
|
|
self._root = self.xmldoc.documentElement
|
|
if self._root.localName != 'rss' or len(self._root.childNodes) != 1:
|
|
return False
|
|
self._root2 = self._root.childNodes[0]
|
|
if self._root2.localName != 'channel':
|
|
return False
|
|
m = {}
|
|
for i in self._root2.childNodes:
|
|
if i.localName == 'item':
|
|
pass
|
|
else:
|
|
m[i.localName] = i.firstChild.nodeValue if len(
|
|
i.childNodes) > 0 else i.nodeValue
|
|
print()
|
|
|
|
def normalize(self):
|
|
self.removeblank(self.xmldoc.documentElement)
|
|
self.xmldoc.normalize()
|
|
|
|
def parse(self, fn: str):
|
|
try:
|
|
if fn.find('://') > -1:
|
|
re = requests.get(fn)
|
|
if re.status_code == 200:
|
|
self.xmldoc = minidom.parseString(re.text)
|
|
else:
|
|
self.xmldoc = minidom.parse(fn)
|
|
self.normalize()
|
|
return True
|
|
except:
|
|
return False
|
|
|
|
def removeblank(self, node):
|
|
for i in node.childNodes:
|
|
if i.nodeType == minidom.Node.TEXT_NODE:
|
|
if i.nodeValue:
|
|
i.nodeValue = i.nodeValue.strip()
|
|
elif i.nodeType == minidom.Node.ELEMENT_NODE:
|
|
self.removeblank(i)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) > 1:
|
|
fn = sys.argv[1]
|
|
p = RSSParser()
|
|
p.parse(fn)
|
|
p.check()
|