add email and id fields
This commit is contained in:
parent
2c6adac225
commit
8385103796
|
@ -52,13 +52,19 @@ def scrape_posts(root_dir):
|
|||
for post in posts:
|
||||
# information gathering
|
||||
post_id = int(post.get('id')[2:])
|
||||
name = post.find(class_='name').text
|
||||
tripcode = post.find(class_='postertrip')
|
||||
if tripcode:
|
||||
tripcode = tripcode.text
|
||||
subject = post.find(class_='subject')
|
||||
if subject:
|
||||
subject = subject.text
|
||||
name = post.find(class_='name').text
|
||||
user_email = post.find(class_='useremail')
|
||||
if user_email:
|
||||
user_email = user_email.get('href')
|
||||
tripcode = post.find(class_='postertrip')
|
||||
if tripcode:
|
||||
tripcode = tripcode.text
|
||||
user_id = post.find(class_='hand')
|
||||
if user_id:
|
||||
user_id = user_id.text
|
||||
post_time = int(post.find(class_='dateTime').get('data-utc'))
|
||||
post_time = datetime.datetime.utcfromtimestamp(post_time)
|
||||
post_time = post_time.replace(tzinfo=datetime.timezone.utc)
|
||||
|
@ -124,9 +130,9 @@ def scrape_posts(root_dir):
|
|||
|
||||
# database insert
|
||||
cur.execute(
|
||||
"INSERT INTO post VALUES (%s,%s,%s,%s,%s,%s,%s)",
|
||||
(thread_id, post_id, name, tripcode, subject,
|
||||
post_time, post_body)
|
||||
"INSERT INTO post VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)",
|
||||
(thread_id, post_id, subject, name, user_email, tripcode,
|
||||
user_id, post_time, post_body)
|
||||
)
|
||||
for link in links:
|
||||
cur.execute("INSERT INTO link VALUES (%s,%s)",
|
||||
|
|
|
@ -7,9 +7,11 @@ CREATE TABLE IF NOT EXISTS thread (
|
|||
CREATE TABLE IF NOT EXISTS post (
|
||||
thread_id INTEGER REFERENCES thread(id) ON DELETE CASCADE NOT NULL,
|
||||
id INTEGER PRIMARY KEY,
|
||||
name TEXT NOT NULL,
|
||||
tripcode TEXT,
|
||||
subject TEXT,
|
||||
name TEXT NOT NULL,
|
||||
email TEXT,
|
||||
tripcode TEXT,
|
||||
user_id TEXT,
|
||||
time TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||
body TEXT NOT NULL
|
||||
);
|
||||
|
|
Loading…
Reference in New Issue
Block a user