This post has been de-listed
It is no longer included in search results and normal feeds (front page, hot posts, subreddit posts, etc). It remains visible only via the author's post history.
Hello! I'm new to python so please forgive me if I'm asking something dumb :) I found a script online that uses pushshift to pull all submissions based upon a keyword and then publishes them to a csv on my desktop -- however, I would like to know how to pull all COMMENTS rather than submission titles. Is there a way to alter the below script to pull comments instead of titles? I've tried a couple different things but I keep getting a variety of errors. Thank you so so much in advance for any and all help!
import requests
import json
import csv
import time
import datetime
##step 1
def getPushshiftData(query, after, before, sub):
url = 'https://api.pushshift.io/reddit/search/submission/?title=' str(query) '&size=1000&after=' str(
after) '&before=' str(before) '&subreddit=' str(sub)
print(url)
r = requests.get(url)
data = json.loads(r.text)
return data['data']
##step 2
def collectSubData(subm):
subData = list() # list to store data points
title = subm['title']
url = subm['url']
try:
flair = subm['link_flair_text']
except KeyError:
flair = "NaN"
author = subm['author']
sub_id = subm['id']
score = subm['score']
created = datetime.datetime.fromtimestamp(subm['created_utc']) # 1520561700.0
numComms = subm['num_comments']
permalink = subm['permalink']
subData.append((sub_id, title, url, author, score, created, numComms, permalink, flair))
subStats[sub_id] = subData
##step 3
# Subreddit to query
sub = 'technology'
# before and after dates
before = "1577836800" # January 1st 2020
after = "1546300800" # January 1st 2019
query = "privacy"
subCount = 0
subStats = {}
##step 4
data = getPushshiftData(query, after, before, sub)
# Will run until all posts have been gathered
# from the 'after' date up until before date
while len(data) > 0:
for submission in data:
collectSubData(submission)
subCount = 1
# Calls getPushshiftData() with the created date of the last submission
print(len(data))
print(str(datetime.datetime.fromtimestamp(data[-1]['created_utc'])))
after = data[-1]['created_utc']
data = getPushshiftData(query, after, before, sub)
print(len(data))
##step 5
print(str(len(subStats)) " submissions have added to list")
print("1st entry is:")
print(list(subStats.values())[0][0][1] " created: " str(list(subStats.values())[0][0][5]))
print("Last entry is:")
print(list(subStats.values())[-1][0][1] " created: " str(list(subStats.values())[-1][0][5]))
##step 6
def updateSubs_file():
upload_count = 0
location = "/Users/username/Desktop/"
print("input filename of submission file, please add .csv")
filename = input()
file = location filename
with open(file, 'w', newline='', encoding='utf-8') as file:
a = csv.writer(file, delimiter=',')
headers = ["Post ID", "Title", "Url", "Author", "Score", "Publish Date", "Total No. of Comments", "Permalink",
"Flair"]
a.writerow(headers)
for sub in subStats:
a.writerow(subStats[sub][0])
upload_count = 1
print(str(upload_count) " submissions have been uploaded")
updateSubs_file()
Subreddit
Post Details
- Posted
- 4 years ago
- Reddit URL
- View post on reddit.com
- External URL
- reddit.com/r/pushshift/c...