-
-
Notifications
You must be signed in to change notification settings - Fork 233
Open
Description
Yesturday my code scraped 5407 tweets containing the word '$MASK' the 25/06/2025. Now I ran the same code today and scraped only 4 tweet the 25/06/2025.
Is this normal?
Here is my code:
START_DATE = "2025-06-23"
END_DATE = "2025-07-01"
KEYWORDS = ["catwifmask", "$MASK"]
QUERY = f'({" OR ".join(KEYWORDS)}) since:{START_DATE} until:{END_DATE}'
print(QUERY)
async def main():
api = API()
await api.pool.login_all()
count_by_day = defaultdict(int)
score_by_day = defaultdict(int)
count_by_hour = defaultdict(int)
score_by_hour = defaultdict(int)
influencer_scores = defaultdict(int)
tweet_urls_by_user = defaultdict(list)
async for tweet in api.search(QUERY):
if not tweet.date:
print("skipped tweet because it had no tweet.date")
continue
if not any(term.lower() in tweet.rawContent.lower() for term in KEYWORDS): <-I made this to avoid tweets containg the keyword only in comment
print(f"Skipped tweet because of RawContent")
continue # Skip tweets that don’t explicitly contain one of the keywords
dt = tweet.date
d = dt.date()
h = dt.replace(minute=0, second=0, microsecond=0) # truncate to hour
if not (date.fromisoformat(START_DATE) <= d < date.fromisoformat(END_DATE)):
print("skipped tweet because dates are not matching")
continue
#tweet_score = tweet.user.followersCount * 0.05 + tweet.likeCount
tweet_score = tweet.user.followersCount * 0.1 + tweet.retweetCount * 50 + tweet.likeCount * 10
count_by_day[d] += 1
score_by_day[d] += tweet_score
count_by_hour[h] += 1
score_by_hour[h] += tweet_score
influencer_scores[tweet.user.username] += tweet_score
url = f"https://twitter.com/{tweet.user.username}/status/{tweet.id}"
tweet_urls_by_user[tweet.user.username].append(url)
Thank you very much for any help
Metadata
Metadata
Assignees
Labels
No labels