update scrapers to handle new feeds
This commit is contained in:
parent
34515fba31
commit
9d52c8454b
35
database.py
35
database.py
|
@ -34,6 +34,8 @@ def scrape_feed(feed_url):
|
||||||
|
|
||||||
if "factorio.com" in feed_url: # because screw making it generic
|
if "factorio.com" in feed_url: # because screw making it generic
|
||||||
return scrape_factorio(soup)
|
return scrape_factorio(soup)
|
||||||
|
elif "zombieknight" in feed_url:
|
||||||
|
return scrape_zombie_knight(soup)
|
||||||
|
|
||||||
meta = {}
|
meta = {}
|
||||||
meta['title'] = soup.title.text
|
meta['title'] = soup.title.text
|
||||||
|
@ -48,11 +50,13 @@ def scrape_feed(feed_url):
|
||||||
entry_dict['link'] = entry.link.text
|
entry_dict['link'] = entry.link.text
|
||||||
try:
|
try:
|
||||||
date = entry.pubDate.text
|
date = entry.pubDate.text
|
||||||
|
try:
|
||||||
date = time.strptime(date, '%a, %d %b %Y %H:%M:%S %z')
|
date = time.strptime(date, '%a, %d %b %Y %H:%M:%S %z')
|
||||||
except AttributeError:
|
except ValueError:
|
||||||
date = entry_dict['date'] = entry.find('dc:date').text
|
date = time.strptime(date, '%a, %d %b %Y %H:%M:%S %Z')
|
||||||
date=time.strptime(date[:-3]+date[-2:], '%Y-%m-%dT%H:%M:%S%z')
|
|
||||||
entry_dict['date'] = time.strftime('%Y-%m-%d', date)
|
entry_dict['date'] = time.strftime('%Y-%m-%d', date)
|
||||||
|
except AttributeError:
|
||||||
|
entry_dict['date'] = ""
|
||||||
entry_dict['description'] = entry.description.text[:200]
|
entry_dict['description'] = entry.description.text[:200]
|
||||||
# TODO: html sanitation
|
# TODO: html sanitation
|
||||||
feed_entries.append(entry_dict)
|
feed_entries.append(entry_dict)
|
||||||
|
@ -60,6 +64,30 @@ def scrape_feed(feed_url):
|
||||||
return feed
|
return feed
|
||||||
|
|
||||||
|
|
||||||
|
def scrape_zombie_knight(soup):
|
||||||
|
"""Handles the special case that is Zombie Knight Saga."""
|
||||||
|
meta = {}
|
||||||
|
meta['title'] = soup.title.text
|
||||||
|
meta['html_url'] = soup.find("link", rel='alternate').get('href')
|
||||||
|
meta['description'] = soup.subtitle.text
|
||||||
|
|
||||||
|
entries = soup.find_all('entry')
|
||||||
|
feed_entries = []
|
||||||
|
for entry in entries[:20]:
|
||||||
|
entry_dict = {}
|
||||||
|
entry_dict['title'] = entry.title.text
|
||||||
|
entry_dict['link'] = soup.find("link", rel='alternate').get('href')
|
||||||
|
date = entry.updated.text
|
||||||
|
date = "".join(re.search("(.*)\.\d{3}(.\d{2}):(\d{2})", date).groups())
|
||||||
|
date = time.strptime(date, '%Y-%m-%dT%H:%M:%S%z')
|
||||||
|
entry_dict['date'] = time.strftime('%Y-%m-%d', date)
|
||||||
|
entry_dict['description'] = entry.content.text[:200]
|
||||||
|
# TODO: html sanitation
|
||||||
|
feed_entries.append(entry_dict)
|
||||||
|
feed = {'meta': meta, 'entries': feed_entries}
|
||||||
|
return feed
|
||||||
|
|
||||||
|
|
||||||
def scrape_factorio(soup):
|
def scrape_factorio(soup):
|
||||||
"""Handles the special case that is the Factorio development blog."""
|
"""Handles the special case that is the Factorio development blog."""
|
||||||
meta = {}
|
meta = {}
|
||||||
|
@ -92,6 +120,7 @@ def update_feed(feed_url):
|
||||||
def update_all_feeds():
|
def update_all_feeds():
|
||||||
"""Updates all feeds being watched."""
|
"""Updates all feeds being watched."""
|
||||||
for feed_url in config.FEEDS:
|
for feed_url in config.FEEDS:
|
||||||
|
#print(feed_url)
|
||||||
update_feed(feed_url)
|
update_feed(feed_url)
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user