Skip to content
This repository has been archived by the owner on Jul 8, 2024. It is now read-only.

Commit

Permalink
Merge pull request #64 from MichaelKarpe/master
Browse files Browse the repository at this point in the history
Adds minimum engagement and "none of these words" filters
  • Loading branch information
Mottl authored May 4, 2020
2 parents 291866b + 8e4ec94 commit 54a8e73
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 17 deletions.
41 changes: 41 additions & 0 deletions GetOldTweets3/manager/TweetCriteria.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,17 @@ def setUsername(self, username):
self.username = username
return self

def setExcludeWords(self, excludeWords):
"""Set word(s) to exclude from tweets
Parameters
----------
excludeWords : list or iterable
Example: ["red", "blue", "yellow", "green"]
"""
self.excludeWords = excludeWords
return self

def setSince(self, since):
"""Set a lower bound date in UTC
Parameters
Expand All @@ -46,6 +57,36 @@ def setUntil(self, until):
self.until = until
return self

def setMinReplies(self, minReplies):
"""Set the minimum number of replies of tweets to search
Parameters
----------
minReplies : str,
for example: 42
"""
self.minReplies = minReplies
return self

def setMinFaves(self, minFaves):
"""Set the minimum number of favorites of tweets to search
Parameters
----------
minFaves : str,
for example: 42
"""
self.minFaves = minFaves
return self

def setMinRetweets(self, minRetweets):
"""Set the minimum number of retweets of tweets to search
Parameters
----------
minRetweets : str,
for example: 42
"""
self.minRetweets = minRetweets
return self

def setNear(self, near):
"""Set location to search nearby
Parameters
Expand Down
12 changes: 12 additions & 0 deletions GetOldTweets3/manager/TweetManager.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,9 @@ def getJsonResponse(tweetCriteria, refreshCursor, cookieJar, proxy, useragent=No
if hasattr(tweetCriteria, 'querySearch'):
urlGetData += tweetCriteria.querySearch

if hasattr(tweetCriteria, 'excludeWords'):
urlGetData += ' -'.join([''] + tweetCriteria.excludeWords)

if hasattr(tweetCriteria, 'username'):
if not hasattr(tweetCriteria.username, '__iter__'):
tweetCriteria.username = [tweetCriteria.username]
Expand All @@ -312,6 +315,15 @@ def getJsonResponse(tweetCriteria, refreshCursor, cookieJar, proxy, useragent=No
if hasattr(tweetCriteria, 'until'):
urlGetData += ' until:' + tweetCriteria.until

if hasattr(tweetCriteria, 'minReplies'):
urlGetData += ' min_replies:' + tweetCriteria.minReplies

if hasattr(tweetCriteria, 'minFaves'):
urlGetData += ' min_faves:' + tweetCriteria.minFaves

if hasattr(tweetCriteria, 'minRetweets'):
urlGetData += ' min_retweets:' + tweetCriteria.minRetweets

if hasattr(tweetCriteria, 'lang'):
urlLang = 'l=' + tweetCriteria.lang + '&'
else:
Expand Down
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,28 @@ GetOldTweets3 --querysearch "bitcoin" --near "Berlin, Germany" --within 25km --m
GetOldTweets3 --querysearch "museum" --near "55.75, 37.61" --within 40km --maxtweets 10
```

**Example 8 - Get tweets by minimum number of replies:**
```bash
GetOldTweets3 --querysearch "bitcoin" --minreplies 10 --maxtweets 10
```

**Example 9 - Get tweets by minimum number of favorites:**
```bash
GetOldTweets3 --querysearch "bitcoin" --minfaves 10 --maxtweets 10
```

**Example 10 - Get tweets by minimum number of retweets:**
```bash
GetOldTweets3 --querysearch "bitcoin" --minretweets 10 --maxtweets 10
```

**Example 11 - Get tweets by excluding tweets with any word of a list:**
```bash
GetOldTweets3 --querysearch "bitcoin" --exclude-words-from-file excludewords.txt --maxtweets 10
```

where words to exclude are separated by a whitespace character in the `excludewords.txt` file.

## Python classes
- **Tweet:** Model class that describes a specific tweet.
- id (str)
Expand Down
71 changes: 54 additions & 17 deletions bin/GetOldTweets3
Original file line number Diff line number Diff line change
@@ -1,22 +1,26 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""To use this script you can pass the following attributes:
--querysearch: a query text to be matched
--username: a username or a list of usernames (comma or space separated)
of a specific twitter account(s) (with or without @)
--username-from-file: a file with a list of usernames,
--since: a lower bound date in UTC (yyyy-mm-dd)
--until: an upper bound date in UTC (yyyy-mm-dd) (not included)
--near: a reference location area from where tweets were generated
--within: a distance radius from "near" location (e.g. 15mi)
--toptweets: only the tweets provided as top tweets by Twitter (no parameters required)
--maxtweets: the maximum number of tweets to retrieve
--lang: the language of tweets
--emoji: "ignore" (the default, discards emojis), "unicode"
or "named" (replaces with "Emoji[Name of emoji]")
--output: a filename to export the results (default is "output_got.csv").
Pass a '.gz' suffix to have it gzipped.
--debug: outputs debug information to standard error stream
--querysearch: a query text to be matched
--exclude-words-from-file: a file with a list of words to exclude
--username: a username or a list of usernames (comma or space separated)
of a specific twitter account(s) (with or without @)
--username-from-file: a file with a list of usernames
--since: a lower bound date in UTC (yyyy-mm-dd)
--until: an upper bound date in UTC (yyyy-mm-dd) (not included)
--minreplies: a minimum number of replies for a tweet
--minfaves: a minimum number of favorites for a tweet
--minretweets: a minimum number of retweets for a tweet
--near: a reference location area from where tweets were generated
--within: a distance radius from "near" location (e.g. 15mi)
--toptweets: only the tweets provided as top tweets by Twitter (no parameters required)
--maxtweets: the maximum number of tweets to retrieve
--lang: the language of tweets
--emoji: "ignore" (the default, discards emojis), "unicode"
or "named" (replaces with "Emoji[Name of emoji]")
--output: a filename to export the results (default is "output_got.csv")
Pass a '.gz' suffix to have it gzipped.
--debug: outputs debug information to standard error stream
Examples:
Expand All @@ -39,7 +43,19 @@ GetOldTweets3 --querysearch "bitcoin" --lang cn --maxtweets 10
GetOldTweets3 --querysearch "bitcoin" --near "Berlin, Germany" --within 25km --maxtweets 10
# Example 7 - Get tweets by geo coordinates:
GetOldTweets3 --querysearch "museum" --near "55.75, 37.61" --within 40km --maxtweets 10
GetOldTweets3 --querysearch "museum" --near "55.75, 37.61" --within 40km --maxtweets 10
# Example 8 - Get tweets by minimum number of replies:
GetOldTweets3 --querysearch "bitcoin" --minreplies 10 --maxtweets 10
# Example 9 - Get tweets by minimum number of favorites:
GetOldTweets3 --querysearch "bitcoin" --minfaves 10 --maxtweets 10
# Example 10 - Get tweets by minimum number of retweets:
GetOldTweets3 --querysearch "bitcoin" --minretweets 10 --maxtweets 10
# Example 11 - Get tweets by excluding tweets with any word of a list:
GetOldTweets3 --querysearch "bitcoin" --exclude-words-from-file excludewords.txt --maxtweets 10
"""

from functools import partial
Expand All @@ -64,10 +80,14 @@ def main(argv):

try:
opts, args = getopt.getopt(argv, "", ("querysearch=",
"exclude-words-from-file=",
"username=",
"usernames-from-file=",
"since=",
"until=",
"minreplies=",
"minfaves=",
"minretweets=",
"near=",
"within=",
"toptweets",
Expand All @@ -83,6 +103,7 @@ def main(argv):
debug = False
usernames = set()
username_files = set()
exclude_words_files = set()
for opt, arg in opts:
if opt == '--querysearch':
tweetCriteria.querySearch = arg
Expand All @@ -95,12 +116,24 @@ def main(argv):
elif opt == '--usernames-from-file':
username_files.add(arg)

elif opt == '--exclude-words-from-file':
exclude_words_files.add(arg)

elif opt == '--since':
tweetCriteria.since = arg

elif opt == '--until':
tweetCriteria.until = arg

elif opt == '--minreplies':
tweetCriteria.minReplies = arg

elif opt == '--minfaves':
tweetCriteria.minFaves = arg

elif opt == '--minretweets':
tweetCriteria.minRetweets = arg

elif opt == '--near':
geocode = arg.split(',')
try:
Expand Down Expand Up @@ -169,6 +202,10 @@ def main(argv):
usernames |= set(usernames_)
print("Found %i usernames in %s" % (len(usernames_), uf))

if exclude_words_files:
exclude_words = sum([open(ewf).read().split() for ewf in exclude_words_files], [])
tweetCriteria.excludeWords = exclude_words

if usernames:
if len(usernames) > 1:
tweetCriteria.username = usernames
Expand Down

0 comments on commit 54a8e73

Please sign in to comment.