Index: parsers/security.py
===================================================================
--- parsers/security.py (revision 16)
+++ parsers/security.py (working copy)
@@ -24,6 +24,7 @@
errata_p.__init__(self)
self.odir = "advisories/"
self.ofile = "security.xml"
+ self.synopsis = []
self.detail = []
self.cve_counter = 0
self.errata_counter = 0
@@ -41,7 +42,7 @@
if os.environ['http_proxy'] == '':
self.proxy = None
else:
- self.proxy = { 'http' : os.environ['http_proxy'] }
+ self.proxy = { 'http' : os.environ['http_proxy'], 'https' : os.environ['http_proxy'] }
except:
self.proxy = None
@@ -93,6 +94,13 @@
self.detail = result
+ # Get RHSA title (contains severity and synopsis)
+ result = re.search('
(.+?)
', page, re.I | re.M)
+ if result:
+ self.synopsis = result.group(1)
+ else:
+ self.synopsis = 'Unknown: Unknown'
+
def update_database(self, link):
"""
Updates the output XML file to reflect the new contents
@@ -111,15 +119,20 @@
# 6 - RPMs Required
# 7 - References
# 8 - Requirements (not used)
- self.get_rhsa_detail(link)
+ try:
+ self.get_rhsa_detail(link)
+ except urlgrabber.grabber.URLGrabError:
+ print 'Error grabbing link', link
+ return
# Hack to get past RPMs that outdate other rpms.
# FIXME: Check to see what RPM is outdated and update XML file
# as necessary
- for detail in self.detail:
- if detail.find("File outdated") > 0:
- #print "Encountered outdated RPM"
- return
+# if not __main__.all_advisories:
+# for detail in self.detail:
+# if detail.find("File outdated") > 0:
+# #print "Encountered outdated RPM"
+# return
self.advisory_url = link
@@ -162,7 +175,7 @@
self.parse_rights(w)
self.parse_type(w)
- self.parse_synopsis(w, self.detail[1])
+ self.parse_synopsis(w, self.synopsis)
self.parse_issue_date(w, self.strip_html(self.detail[0]))
self.parse_updated_on(w, self.strip_html(self.detail[0]))
@@ -268,30 +281,16 @@
"""
data = self.tags_to_space(data)
data = self.strip_html(data)
- block = data.split("\n")
+ line = data.strip().split(':')
- # Red hat doesnt separate out the severity information
- # in their webpages like they do in their email archives.
- # Therefore I'm looking for the severity string to
- # determine the severity level.
- for line in block:
- if line.find("moderate security") > -1:
- w.element("severity", "moderate")
- break
- elif line.find("important security") > -1:
- w.element("severity", "important")
- break
- elif line.find("critical security") > -1:
- w.element("severity", "critical")
- break
- elif line.find("low security") > -1:
- w.element("severity", "low")
- break
+ severity = line[0].strip().lower()
+ synopsis = ':'.join(line[1:]).strip()
+
+ if severity in ('low', 'moderate', 'important', 'critical'):
+ w.element('severity', severity)
else:
- w.element("severity", "unknown")
+ w.element('severity', 'unknown')
- synopsis = block[0].strip()
-
w.element("synopsis", synopsis, lang="en_US")
def tags_to_space(self, item):
@@ -464,7 +463,7 @@
# in the script to differentiate between releases
for i in block:
item = i.strip()
- if item == "":
+ if not item:
continue
else:
if self.at_arch(item[0:-1]):
@@ -508,9 +507,8 @@
self.parse_srpm_arch(w, block, item)
elif self.at_arch(item):
self.parse_arch(w, block, item)
- elif item == "":
- item = block.pop().replace(" ", '').strip()
- continue
+ elif not item:
+ pass
else:
block.append(item)
break
@@ -561,11 +559,11 @@
error = "new_arch"
break
- if filename == "":
+ elif not filename:
error = "new_arch"
break
- if not self.linked:
+ elif not self.linked:
junk = block.pop()
checksum = block.pop().replace(" ", '').strip()
@@ -594,18 +592,21 @@
error = "header"
break
- if self.at_arch(filename):
+ elif self.at_arch(filename):
error = "new_arch"
break
- if filename == "":
+ elif not filename:
error = "new_arch"
break
- if not self.linked:
+ elif not self.linked:
junk = block.pop()
- checksum = block.pop().replace(" ", '').strip()
+ try:
+ checksum = block.pop().replace(" ", '').strip()
+ except IndexError:
+ break
# Hack to make RH webpages that list IA-32 stuff
# equal the data that is actually listed in the
@@ -618,7 +619,7 @@
w.element("sum", checksum, type="md5")
w.end("file")
- if error == "header" or error == "new_arch":
+ if error in ('header', 'new_arch'):
block.append(filename)
return
Index: scrapers/site.py
===================================================================
--- scrapers/site.py (revision 16)
+++ scrapers/site.py (working copy)
@@ -63,7 +63,11 @@
self.releases = {}
p = re.compile('\')
- page = urlgrabber.urlread(self.release_list+"/errata", proxies=self.proxy)
+ try:
+ page = urlgrabber.urlread(self.release_list+"/errata", proxies=self.proxy)
+ except urlgrabber.grabber.URLGrabError:
+ print 'Error grabbing list of all releases from', self.release_list+"/errata"
+ return
result = p.split(page)
Index: aerrate.py
===================================================================
--- aerrate.py (revision 16)
+++ aerrate.py (working copy)
@@ -30,8 +30,11 @@
enhancements
Argument list
- --source Source to read from
+ --all Download al advisories (do not skip outdated advisories)
+ --source Source to read from (either archive, feed or site)
+ *Defaults to site*
+
--type The type of errata list to read from
--release Parse errata for specified release
@@ -42,11 +45,13 @@
source = 0
type = 0
release = 'none'
+ all_advisories = 0
print_releases = 0
# Try to parse any command line arguments
try:
opts, args = getopt.getopt(sys.argv[1:], "hstru", [ "help",
+ "all",
"source=",
"type=",
"printreleases",
@@ -63,24 +68,27 @@
if o in ("-h", "--help"):
usage()
sys.exit()
- if o in ("-s", "--source"):
+ elif o in ("-s", "--source"):
source = a
- if o in ("-t", "--type"):
+ elif o in ("-t", "--type"):
type = a
- if o in ("-p", "--printreleases"):
+ elif o in ("-a", "--all"):
+ all_advisories = 1
+ elif o in ("-p", "--printreleases"):
print_releases = 1
- if o in ("-r", "--release"):
+ elif o in ("-r", "--release"):
release = a
+ else:
+ print 'Option %s not understood.' %o
+ sys.exit(1)
# Print releases is one argument in particular
# that doesnt require the source argument.
# So if that argument has been sent, skip
# over this small argument checking block
if not print_releases:
- # The source argument is required always
if not source:
- usage()
- sys.exit(0)
+ source = "site"
# The site argument requires a type
# argument and a release arg be sent too
if source == "site":