-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdataCollection.py
47 lines (42 loc) · 1.24 KB
/
dataCollection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import praw
import config
import json
import os
import urllib
#import getusers
def botLogin():
redditObj = praw.Reddit(username = config.username,
password = config.password,
client_id = config.client_id,
client_secret = config.client_secret,
user_agent = config.user_agent)
return redditObj
def get_comments(author,redditObj):
commentsList = []
for comment in redditObj.redditor(author).comments.new(limit=None):
# print(comment.body)
commentsList.append(comment.body)
# print(commentsList)
return commentsList
obj = botLogin()
users_already_processed = set(os.listdir('data'))
authors = set(json.load(open('authors.json')).keys())
users_to_process = authors.difference(users_already_processed)
print(len(users_to_process)," users to process")
for author in users_to_process:
data = {}
fail = 1
while fail:
try:
print(author)
commentsList = get_comments(author,obj)
data["comments"] = (commentsList)
#data = addUserComments(author,commentsList)
fail = 0
except Exception as excpt:
with open("user_errors.txt","a") as f:
f.write(author+" "+str(excpt)+"\n")
fail=0
with open("./data/"+author,'w+') as f:
#print(data)
json.dump(data,f,indent=4)