lookup_users_ids.py
4.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import time
import sys
import json
import cPickle as pickle
import os
import datetime
import csv
import codecs
import cStringIO
import urllib
import oauth2
def load_users(path):
userset = set()
c = 0
with codecs.open(path, 'r', encoding="utf-8") as f:
for line in f.readlines():
c += 1
userset.add(line.strip().encode("utf-8"))
print len(userset), "distinct user names loaded (out of", c, "lines)."
return [u for u in sorted(userset)]
def load_tokens(loc):
f = file(loc, "r")
index = []
for line in f.readlines():
if line.startswith("#"): continue
parts = [x.strip() for x in line.split(",")]
(consumer_key, consumer_secret, auth_key, auth_secret) = parts
tokens = dict()
tokens["CLIENT_KEY"] = consumer_key
tokens["CLIENT_SECRET"] = consumer_secret
tokens["ATOKEN_KEY"] = auth_key
tokens["ATOKEN_SECRET"] = auth_secret
index = index + [tokens]
return index[0]
def request(url):
resp = {}
try:
consumer = oauth2.Consumer(key=tokens["CLIENT_KEY"], secret=tokens["CLIENT_SECRET"])
token = oauth2.Token(key=tokens["ATOKEN_KEY"], secret=tokens["ATOKEN_SECRET"])
client = oauth2.Client(consumer, token)
resp, content = client.request(url, method="GET")
except Exception as ex:
print "Exception for", url, ":", ex
resp['status'] = 404
content = '[]'
return resp, content
def execute_request(url, WAIT_PERIOD, max_errors):
error_count = 0
while True:
r, c = request(url)
st = int(r['status'])
if st == 200:
return json.loads(c)
elif st == 429:
print "Checking rate limit..."
wait_time = 60
r, c = request('https://api.twitter.com/1.1/application/rate_limit_status.json')
try:
rstatus = json.loads(c)
if int(rstatus['resources']['users']['/users/show']['remaining']) == 0:
now = time.time()
reset = int(rstatus['resources']['users']['/users/show']['reset'])
wait_time = reset - now + 1
except:
pass
print 'Rate limit reached, waiting %i seconds' % wait_time
time.sleep(wait_time)
else:
print 'Error %i, waiting %i seconds, error count: %i' % (st, WAIT_PERIOD, error_count)
time.sleep(WAIT_PERIOD)
error_count += 1
if error_count >= max_errors:
raise Exception("Max errors reached, skipping request.")
def update_data(screen_name2user_id, screen_name):
url = 'https://api.twitter.com/1.1/users/show.json?' \
+ 'screen_name=' + screen_name + "&include_entities=false"
try:
response = execute_request(url, WAIT_PERIOD, 5)
screen_name2user_id[screen_name] = response["id"]
except Exception as ex:
print "Exception for name:", screen_name, ":", ex
#### main ########################################################################
# constants
WAIT_PERIOD = 2 # time until retry for a failed Twitter API call
# load parameters
if len(sys.argv) != 4:
print "Wrong number of arguments! Try: tokens_file user_names_file target_info_file"
sys.exit()
TOKENS_FILE = sys.argv[1]
USER_NAMES_FILE = sys.argv[2]
TARGET_FILE = sys.argv[3]
# load tokens and userlist
tokens = load_tokens(TOKENS_FILE)
user_names = load_users(USER_NAMES_FILE)
screen_name2user_id = {}
wrong_users = []
c = 1
for name in user_names:
print "Processing name", c, ":", name
c += 1
update_data(screen_name2user_id, name)
if name not in screen_name2user_id:
wrong_users.append(name)
print len(wrong_users), "users not found:"
for u in sorted(wrong_users):
print "\t", u
with open(TARGET_FILE, "w") as f:
for screen_name in sorted(screen_name2user_id):
f.write(str(screen_name2user_id[screen_name])+"\t"+screen_name+"\n")
print "Done"