Index: parsers/security.py =================================================================== --- parsers/security.py (revision 16) +++ parsers/security.py (working copy) @@ -24,6 +24,7 @@ errata_p.__init__(self) self.odir = "advisories/" self.ofile = "security.xml" + self.synopsis = [] self.detail = [] self.cve_counter = 0 self.errata_counter = 0 @@ -41,7 +42,7 @@ if os.environ['http_proxy'] == '': self.proxy = None else: - self.proxy = { 'http' : os.environ['http_proxy'] } + self.proxy = { 'http' : os.environ['http_proxy'], 'https' : os.environ['http_proxy'] } except: self.proxy = None @@ -93,6 +94,13 @@ self.detail = result + # Get RHSA title (contains severity and synopsis) + result = re.search('

(.+?)

', page, re.I | re.M) + if result: + self.synopsis = result.group(1) + else: + self.synopsis = 'Unknown: Unknown' + def update_database(self, link): """ Updates the output XML file to reflect the new contents @@ -111,15 +119,20 @@ # 6 - RPMs Required # 7 - References # 8 - Requirements (not used) - self.get_rhsa_detail(link) + try: + self.get_rhsa_detail(link) + except urlgrabber.grabber.URLGrabError: + print 'Error grabbing link', link + return # Hack to get past RPMs that outdate other rpms. # FIXME: Check to see what RPM is outdated and update XML file # as necessary - for detail in self.detail: - if detail.find("File outdated") > 0: - #print "Encountered outdated RPM" - return +# if not __main__.all_advisories: +# for detail in self.detail: +# if detail.find("File outdated") > 0: +# #print "Encountered outdated RPM" +# return self.advisory_url = link @@ -162,7 +175,7 @@ self.parse_rights(w) self.parse_type(w) - self.parse_synopsis(w, self.detail[1]) + self.parse_synopsis(w, self.synopsis) self.parse_issue_date(w, self.strip_html(self.detail[0])) self.parse_updated_on(w, self.strip_html(self.detail[0])) @@ -268,30 +281,16 @@ """ data = self.tags_to_space(data) data = self.strip_html(data) - block = data.split("\n") + line = data.strip().split(':') - # Red hat doesnt separate out the severity information - # in their webpages like they do in their email archives. - # Therefore I'm looking for the severity string to - # determine the severity level. - for line in block: - if line.find("moderate security") > -1: - w.element("severity", "moderate") - break - elif line.find("important security") > -1: - w.element("severity", "important") - break - elif line.find("critical security") > -1: - w.element("severity", "critical") - break - elif line.find("low security") > -1: - w.element("severity", "low") - break + severity = line[0].strip().lower() + synopsis = ':'.join(line[1:]).strip() + + if severity in ('low', 'moderate', 'important', 'critical'): + w.element('severity', severity) else: - w.element("severity", "unknown") + w.element('severity', 'unknown') - synopsis = block[0].strip() - w.element("synopsis", synopsis, lang="en_US") def tags_to_space(self, item): @@ -464,7 +463,7 @@ # in the script to differentiate between releases for i in block: item = i.strip() - if item == "": + if not item: continue else: if self.at_arch(item[0:-1]): @@ -508,9 +507,8 @@ self.parse_srpm_arch(w, block, item) elif self.at_arch(item): self.parse_arch(w, block, item) - elif item == "": - item = block.pop().replace(" ", '').strip() - continue + elif not item: + pass else: block.append(item) break @@ -561,11 +559,11 @@ error = "new_arch" break - if filename == "": + elif not filename: error = "new_arch" break - if not self.linked: + elif not self.linked: junk = block.pop() checksum = block.pop().replace(" ", '').strip() @@ -594,18 +592,21 @@ error = "header" break - if self.at_arch(filename): + elif self.at_arch(filename): error = "new_arch" break - if filename == "": + elif not filename: error = "new_arch" break - if not self.linked: + elif not self.linked: junk = block.pop() - checksum = block.pop().replace(" ", '').strip() + try: + checksum = block.pop().replace(" ", '').strip() + except IndexError: + break # Hack to make RH webpages that list IA-32 stuff # equal the data that is actually listed in the @@ -618,7 +619,7 @@ w.element("sum", checksum, type="md5") w.end("file") - if error == "header" or error == "new_arch": + if error in ('header', 'new_arch'): block.append(filename) return Index: scrapers/site.py =================================================================== --- scrapers/site.py (revision 16) +++ scrapers/site.py (working copy) @@ -63,7 +63,11 @@ self.releases = {} p = re.compile('\') - page = urlgrabber.urlread(self.release_list+"/errata", proxies=self.proxy) + try: + page = urlgrabber.urlread(self.release_list+"/errata", proxies=self.proxy) + except urlgrabber.grabber.URLGrabError: + print 'Error grabbing list of all releases from', self.release_list+"/errata" + return result = p.split(page) Index: aerrate.py =================================================================== --- aerrate.py (revision 16) +++ aerrate.py (working copy) @@ -30,8 +30,11 @@ enhancements Argument list - --source Source to read from + --all Download al advisories (do not skip outdated advisories) + --source Source to read from (either archive, feed or site) + *Defaults to site* + --type The type of errata list to read from --release Parse errata for specified release @@ -42,11 +45,13 @@ source = 0 type = 0 release = 'none' + all_advisories = 0 print_releases = 0 # Try to parse any command line arguments try: opts, args = getopt.getopt(sys.argv[1:], "hstru", [ "help", + "all", "source=", "type=", "printreleases", @@ -63,24 +68,27 @@ if o in ("-h", "--help"): usage() sys.exit() - if o in ("-s", "--source"): + elif o in ("-s", "--source"): source = a - if o in ("-t", "--type"): + elif o in ("-t", "--type"): type = a - if o in ("-p", "--printreleases"): + elif o in ("-a", "--all"): + all_advisories = 1 + elif o in ("-p", "--printreleases"): print_releases = 1 - if o in ("-r", "--release"): + elif o in ("-r", "--release"): release = a + else: + print 'Option %s not understood.' %o + sys.exit(1) # Print releases is one argument in particular # that doesnt require the source argument. # So if that argument has been sent, skip # over this small argument checking block if not print_releases: - # The source argument is required always if not source: - usage() - sys.exit(0) + source = "site" # The site argument requires a type # argument and a release arg be sent too if source == "site":