fifth commit
This commit is contained in:
parent
715fc2af8e
commit
b15de6f803
|
@ -32,7 +32,9 @@ def scrape_posts(root_dir):
|
|||
filepath = os.path.join(root_dir, dir_name, file)
|
||||
print("Processing:", filepath)
|
||||
with open(filepath, 'r') as file:
|
||||
soup = bs4.BeautifulSoup(file.read(), 'html.parser')
|
||||
data = file.read()
|
||||
data = re.sub(r'<wbr ?\/?>', '', data)
|
||||
soup = bs4.BeautifulSoup(data, 'html.parser')
|
||||
|
||||
mobiles = soup.find_all(class_='mobile')
|
||||
for tag in mobiles:
|
||||
|
@ -74,11 +76,11 @@ def scrape_posts(root_dir):
|
|||
links = list(set(links))
|
||||
|
||||
# heuristics
|
||||
tags = []
|
||||
tags = set()
|
||||
if name in QM_NAMES:
|
||||
tags.append('qm_post')
|
||||
tags.add('qm_post')
|
||||
# also counts shitposters and broken tripcodes
|
||||
tags.append('story_post')
|
||||
tags.add('story_post')
|
||||
# assume every QM post is also a story post, until
|
||||
# proven otherwise
|
||||
if "dropped my trip" in post_body.lower():
|
||||
|
@ -91,28 +93,34 @@ def scrape_posts(root_dir):
|
|||
)
|
||||
)
|
||||
# dropped trip doesn't necessarily mean story_post
|
||||
tags.append('dropped_trip')
|
||||
tags.add('dropped_trip')
|
||||
if len(links) > 1:
|
||||
tags.append('tally_post')
|
||||
tags.add('tally_post')
|
||||
# also counts Q&A posts
|
||||
if 'story_post' in tags:
|
||||
tags.remove('story_post')
|
||||
if 'writin' in post_body.lower():
|
||||
tags.add('tally_post')
|
||||
if 'story_post' in tags:
|
||||
tags.remove('story_post')
|
||||
if posts.index(post) == 0:
|
||||
tags.append('op_post')
|
||||
tags.add('op_post')
|
||||
if "Welcome to Banished Quest!" in post_body:
|
||||
if 'story_post' in tags:
|
||||
tags.remove('story_post')
|
||||
if re.search(r'ro+l+ me', post_body.lower()):
|
||||
tags.append('dice_call')
|
||||
tags.add('dice_call')
|
||||
if 'story_post' in tags:
|
||||
tags.remove('story_post')
|
||||
if re.search(r'roll .*3d10', post_body.lower()):
|
||||
tags.add('dice_call')
|
||||
if 'final destination' in post_body.lower():
|
||||
tags.append('final_destination')
|
||||
tags.add('final_destination')
|
||||
if 'story_post' in tags:
|
||||
tags.remove('story_post')
|
||||
if 'story_post' in tags:
|
||||
if len(re.findall(r'\n>', post_body)) > 1:
|
||||
tags.append('vote_choices')
|
||||
tags.add('vote_choices')
|
||||
|
||||
# database insert
|
||||
cur.execute(
|
||||
|
|
|
@ -20,7 +20,7 @@ body {
|
|||
|
||||
.tag_button {
|
||||
color: blue;
|
||||
font-size: 1em;
|
||||
font-size: 1.5em;
|
||||
}
|
||||
|
||||
.tag_button:hover {
|
||||
|
|
Loading…
Reference in New Issue
Block a user