From 9d52c8454bbfb4d3ba420dec9816c2f6757ea46e Mon Sep 17 00:00:00 2001 From: iou1name Date: Sun, 21 Oct 2018 02:29:24 -0400 Subject: [PATCH] update scrapers to handle new feeds --- database.py | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/database.py b/database.py index 083c54f..22e15c3 100644 --- a/database.py +++ b/database.py @@ -34,6 +34,8 @@ def scrape_feed(feed_url): if "factorio.com" in feed_url: # because screw making it generic return scrape_factorio(soup) + elif "zombieknight" in feed_url: + return scrape_zombie_knight(soup) meta = {} meta['title'] = soup.title.text @@ -48,11 +50,13 @@ def scrape_feed(feed_url): entry_dict['link'] = entry.link.text try: date = entry.pubDate.text - date = time.strptime(date, '%a, %d %b %Y %H:%M:%S %z') + try: + date = time.strptime(date, '%a, %d %b %Y %H:%M:%S %z') + except ValueError: + date = time.strptime(date, '%a, %d %b %Y %H:%M:%S %Z') + entry_dict['date'] = time.strftime('%Y-%m-%d', date) except AttributeError: - date = entry_dict['date'] = entry.find('dc:date').text - date=time.strptime(date[:-3]+date[-2:], '%Y-%m-%dT%H:%M:%S%z') - entry_dict['date'] = time.strftime('%Y-%m-%d', date) + entry_dict['date'] = "" entry_dict['description'] = entry.description.text[:200] # TODO: html sanitation feed_entries.append(entry_dict) @@ -60,6 +64,30 @@ def scrape_feed(feed_url): return feed +def scrape_zombie_knight(soup): + """Handles the special case that is Zombie Knight Saga.""" + meta = {} + meta['title'] = soup.title.text + meta['html_url'] = soup.find("link", rel='alternate').get('href') + meta['description'] = soup.subtitle.text + + entries = soup.find_all('entry') + feed_entries = [] + for entry in entries[:20]: + entry_dict = {} + entry_dict['title'] = entry.title.text + entry_dict['link'] = soup.find("link", rel='alternate').get('href') + date = entry.updated.text + date = "".join(re.search("(.*)\.\d{3}(.\d{2}):(\d{2})", date).groups()) + date = time.strptime(date, '%Y-%m-%dT%H:%M:%S%z') + entry_dict['date'] = time.strftime('%Y-%m-%d', date) + entry_dict['description'] = entry.content.text[:200] + # TODO: html sanitation + feed_entries.append(entry_dict) + feed = {'meta': meta, 'entries': feed_entries} + return feed + + def scrape_factorio(soup): """Handles the special case that is the Factorio development blog.""" meta = {} @@ -92,6 +120,7 @@ def update_feed(feed_url): def update_all_feeds(): """Updates all feeds being watched.""" for feed_url in config.FEEDS: + #print(feed_url) update_feed(feed_url)