diff --git a/scrape_quest.py b/scrape_quest.py index 06a03fc..0aaada1 100644 --- a/scrape_quest.py +++ b/scrape_quest.py @@ -32,7 +32,9 @@ def scrape_posts(root_dir): filepath = os.path.join(root_dir, dir_name, file) print("Processing:", filepath) with open(filepath, 'r') as file: - soup = bs4.BeautifulSoup(file.read(), 'html.parser') + data = file.read() + data = re.sub(r'', '', data) + soup = bs4.BeautifulSoup(data, 'html.parser') mobiles = soup.find_all(class_='mobile') for tag in mobiles: @@ -74,11 +76,11 @@ def scrape_posts(root_dir): links = list(set(links)) # heuristics - tags = [] + tags = set() if name in QM_NAMES: - tags.append('qm_post') + tags.add('qm_post') # also counts shitposters and broken tripcodes - tags.append('story_post') + tags.add('story_post') # assume every QM post is also a story post, until # proven otherwise if "dropped my trip" in post_body.lower(): @@ -91,28 +93,34 @@ def scrape_posts(root_dir): ) ) # dropped trip doesn't necessarily mean story_post - tags.append('dropped_trip') + tags.add('dropped_trip') if len(links) > 1: - tags.append('tally_post') + tags.add('tally_post') # also counts Q&A posts if 'story_post' in tags: tags.remove('story_post') + if 'writin' in post_body.lower(): + tags.add('tally_post') + if 'story_post' in tags: + tags.remove('story_post') if posts.index(post) == 0: - tags.append('op_post') + tags.add('op_post') if "Welcome to Banished Quest!" in post_body: if 'story_post' in tags: tags.remove('story_post') if re.search(r'ro+l+ me', post_body.lower()): - tags.append('dice_call') + tags.add('dice_call') if 'story_post' in tags: tags.remove('story_post') + if re.search(r'roll .*3d10', post_body.lower()): + tags.add('dice_call') if 'final destination' in post_body.lower(): - tags.append('final_destination') + tags.add('final_destination') if 'story_post' in tags: tags.remove('story_post') if 'story_post' in tags: if len(re.findall(r'\n>', post_body)) > 1: - tags.append('vote_choices') + tags.add('vote_choices') # database insert cur.execute( diff --git a/static/voyage.css b/static/voyage.css index a83c800..20efeb1 100644 --- a/static/voyage.css +++ b/static/voyage.css @@ -20,7 +20,7 @@ body { .tag_button { color: blue; - font-size: 1em; + font-size: 1.5em; } .tag_button:hover {