add email and id fields

This commit is contained in:
iou1name 2020-01-20 13:18:47 -05:00
parent 2c6adac225
commit 8385103796
2 changed files with 17 additions and 9 deletions

View File

@ -52,13 +52,19 @@ def scrape_posts(root_dir):
for post in posts:
# information gathering
post_id = int(post.get('id')[2:])
name = post.find(class_='name').text
tripcode = post.find(class_='postertrip')
if tripcode:
tripcode = tripcode.text
subject = post.find(class_='subject')
if subject:
subject = subject.text
name = post.find(class_='name').text
user_email = post.find(class_='useremail')
if user_email:
user_email = user_email.get('href')
tripcode = post.find(class_='postertrip')
if tripcode:
tripcode = tripcode.text
user_id = post.find(class_='hand')
if user_id:
user_id = user_id.text
post_time = int(post.find(class_='dateTime').get('data-utc'))
post_time = datetime.datetime.utcfromtimestamp(post_time)
post_time = post_time.replace(tzinfo=datetime.timezone.utc)
@ -124,9 +130,9 @@ def scrape_posts(root_dir):
# database insert
cur.execute(
"INSERT INTO post VALUES (%s,%s,%s,%s,%s,%s,%s)",
(thread_id, post_id, name, tripcode, subject,
post_time, post_body)
"INSERT INTO post VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)",
(thread_id, post_id, subject, name, user_email, tripcode,
user_id, post_time, post_body)
)
for link in links:
cur.execute("INSERT INTO link VALUES (%s,%s)",

View File

@ -7,9 +7,11 @@ CREATE TABLE IF NOT EXISTS thread (
CREATE TABLE IF NOT EXISTS post (
thread_id INTEGER REFERENCES thread(id) ON DELETE CASCADE NOT NULL,
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
tripcode TEXT,
subject TEXT,
name TEXT NOT NULL,
email TEXT,
tripcode TEXT,
user_id TEXT,
time TIMESTAMP WITH TIME ZONE NOT NULL,
body TEXT NOT NULL
);