我解决了这个问题,所以我将成为那些回答自己问题的网络英雄之一。
这是通过使用静态 Python 变量作为计数器和停止值来实现的(例如,在抓取 20 条推文后停止)。目前这是一个地理位置搜索,但您可以使用以下命令轻松地将其替换为主题标签搜索:getTweetsByHashtag()
method.
#!/usr/bin/env python
from tweepy import (Stream, OAuthHandler)
from tweepy.streaming import StreamListener
class Listener(StreamListener):
tweet_counter = 0 # Static variable
def login(self):
CONSUMER_KEY =
CONSUMER_SECRET =
ACCESS_TOKEN =
ACCESS_TOKEN_SECRET =
auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
return auth
def on_status(self, status):
Listener.tweet_counter += 1
print(str(Listener.tweet_counter) + '. Screen name = "%s" Tweet = "%s"'
%(status.author.screen_name, status.text.replace('\n', ' ')))
if Listener.tweet_counter < Listener.stop_at:
return True
else:
print('Max num reached = ' + str(Listener.tweet_counter))
return False
def getTweetsByGPS(self, stop_at_number, latitude_start, longitude_start, latitude_finish, longitude_finish):
try:
Listener.stop_at = stop_at_number # Create static variable
auth = self.login()
streaming_api = Stream(auth, Listener(), timeout=60) # Socket timeout value
streaming_api.filter(follow=None, locations=[latitude_start, longitude_start, latitude_finish, longitude_finish])
except KeyboardInterrupt:
print('Got keyboard interrupt')
def getTweetsByHashtag(self, stop_at_number, hashtag):
try:
Listener.stopAt = stop_at_number
auth = self.login()
streaming_api = Stream(auth, Listener(), timeout=60)
# Atlanta area.
streaming_api.filter(track=[hashtag])
except KeyboardInterrupt:
print('Got keyboard interrupt')
listener = Listener()
listener.getTweetsByGPS(20, -84.395198, 33.746876, -84.385585, 33.841601) # Atlanta area.