add email and id fields
This commit is contained in:
parent
2c6adac225
commit
8385103796
|
@ -52,13 +52,19 @@ def scrape_posts(root_dir):
|
||||||
for post in posts:
|
for post in posts:
|
||||||
# information gathering
|
# information gathering
|
||||||
post_id = int(post.get('id')[2:])
|
post_id = int(post.get('id')[2:])
|
||||||
name = post.find(class_='name').text
|
|
||||||
tripcode = post.find(class_='postertrip')
|
|
||||||
if tripcode:
|
|
||||||
tripcode = tripcode.text
|
|
||||||
subject = post.find(class_='subject')
|
subject = post.find(class_='subject')
|
||||||
if subject:
|
if subject:
|
||||||
subject = subject.text
|
subject = subject.text
|
||||||
|
name = post.find(class_='name').text
|
||||||
|
user_email = post.find(class_='useremail')
|
||||||
|
if user_email:
|
||||||
|
user_email = user_email.get('href')
|
||||||
|
tripcode = post.find(class_='postertrip')
|
||||||
|
if tripcode:
|
||||||
|
tripcode = tripcode.text
|
||||||
|
user_id = post.find(class_='hand')
|
||||||
|
if user_id:
|
||||||
|
user_id = user_id.text
|
||||||
post_time = int(post.find(class_='dateTime').get('data-utc'))
|
post_time = int(post.find(class_='dateTime').get('data-utc'))
|
||||||
post_time = datetime.datetime.utcfromtimestamp(post_time)
|
post_time = datetime.datetime.utcfromtimestamp(post_time)
|
||||||
post_time = post_time.replace(tzinfo=datetime.timezone.utc)
|
post_time = post_time.replace(tzinfo=datetime.timezone.utc)
|
||||||
|
@ -124,9 +130,9 @@ def scrape_posts(root_dir):
|
||||||
|
|
||||||
# database insert
|
# database insert
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"INSERT INTO post VALUES (%s,%s,%s,%s,%s,%s,%s)",
|
"INSERT INTO post VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)",
|
||||||
(thread_id, post_id, name, tripcode, subject,
|
(thread_id, post_id, subject, name, user_email, tripcode,
|
||||||
post_time, post_body)
|
user_id, post_time, post_body)
|
||||||
)
|
)
|
||||||
for link in links:
|
for link in links:
|
||||||
cur.execute("INSERT INTO link VALUES (%s,%s)",
|
cur.execute("INSERT INTO link VALUES (%s,%s)",
|
||||||
|
|
|
@ -7,9 +7,11 @@ CREATE TABLE IF NOT EXISTS thread (
|
||||||
CREATE TABLE IF NOT EXISTS post (
|
CREATE TABLE IF NOT EXISTS post (
|
||||||
thread_id INTEGER REFERENCES thread(id) ON DELETE CASCADE NOT NULL,
|
thread_id INTEGER REFERENCES thread(id) ON DELETE CASCADE NOT NULL,
|
||||||
id INTEGER PRIMARY KEY,
|
id INTEGER PRIMARY KEY,
|
||||||
name TEXT NOT NULL,
|
|
||||||
tripcode TEXT,
|
|
||||||
subject TEXT,
|
subject TEXT,
|
||||||
|
name TEXT NOT NULL,
|
||||||
|
email TEXT,
|
||||||
|
tripcode TEXT,
|
||||||
|
user_id TEXT,
|
||||||
time TIMESTAMP WITH TIME ZONE NOT NULL,
|
time TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||||
body TEXT NOT NULL
|
body TEXT NOT NULL
|
||||||
);
|
);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user