-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawl.py
109 lines (98 loc) · 3.39 KB
/
crawl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import json
import argparse
import requests
def get_groups(apikey):
group_results = []
params = {
"country": "hu",
"state": "hu",
"city": "Budapest",
"format": "json",
"lon": 19.0799999237,
"lat": 47.5099983215,
"radius": 25.0,
"page": 25,
"desc": False,
"offset": 0,
"key": apikey,
"only": ",".join(["rating", "members", "id", "name", "urlname"])
}
nurl = "https://api.meetup.com/2/groups"
while nurl:
resp = requests.get(nurl, params=params)
params = None
res = json.loads(resp.text)
for group in res["results"]:
group_results.append(group)
if "meta" in res and "next" in res["meta"]:
nurl = res["meta"]["next"]
else:
nurl = False
return group_results
def get_members(groups, apikey):
for group in groups:
params = {
"offset": 0,
"format": "json",
"page": 50,
"desc": False,
"order": "name",
"key": apikey,
"group_urlname": group,
"only":
",".join(["country", "city", "joined", "name", "id", "visited"])
}
nurl = "https://api.meetup.com/2/members"
while nurl:
resp = requests.get(nurl, params=params)
params = None
if resp.text == "":
break
res = json.loads(resp.text)
for members in res["results"]:
element = ";".join([str(r) for r in members.values()])
element += ";" + group
print(element)
if "meta" in res and "next" in res["meta"]:
nurl = res["meta"]["next"]
else:
nurl = False
GROUPIDS = [
"budapest_data_science", "HUG-MSSQL", "Hungarian-nlp", "KURT_Akademia",
"Big-Data-Meetup-Budapest", "Budapest-Database-Meetup",
"Budapest-Cassandra-Users", "Pro-Bono-Analytics",
"Budapest-Users-of-R-Network", "MongoDB-Budapest", "Budapest-BI-Meetup",
"Budapest-Data-Visualization-Meetup", "Budapest-network-science",
"Big-Data-Budapest", "Budapest-Machine-Learning-Meetup",
"Budapest-Data-Projects-Meetup", "Budapest-Analytics-Rockstars",
"neo4j-budapest-users", "Snowplow-Analytics-Budapest",
"Budapest-Spark-Meetup", "Budapest-NOSQL",
"Budapest-dataSTREAM-Meetup-Series", "futureofdata-budapest",
"Excel-tippek-es-trukkok-tips-and-tricks-Budapest-Meetup",
"R-Ladies-Budapest",
"NLP-Nyilt-est-Karizmatikus-kommunikacio-NLP-bevezető", "Budapest-GraphQL",
"DataFest", "PyData-Budapest", "Budapest-Deep-Learning-Reading-Seminar",
"Neurons-AI-Hungary-Budapest"
]
def main():
parser = argparse.ArgumentParser(description='Meetup API Crawler')
parser.add_argument(
'--apikey',
type=str,
required=True,
help='Meetup API Key. Check: https://secure.meetup.com/meetup_api/key/'
)
parser.add_argument(
'command',
type=str,
help='The crawl command to execute',
choices=["groups", "members"])
args = parser.parse_args()
if args.command == "members":
get_members(GROUPIDS, args.apikey)
elif args.command == "groups":
results = get_groups(args.apikey)
for result in results:
print(",".join([str(t) for t in result.values()]))
if __name__ == "__main__":
main()