SI 506: Programming I (Python)
Here is my Final Project Code to create a Rest API with New York Times and the Guardian searching for Rugby articles and creating a CSV file.
To view my code on creating the CSV file above click on ‘Keep reading’
#Angela Chih
#SI 506_Final Project
import json
import requests
#Use that investigation to define a class `NYTArticle` and a class `GuardianArticle` that fulfill requirements, and that you can use as tools to complete the rest of the project.
#Access data from each source with data about articles, and create a list of instances of `NYTArticle` and a list of instances of `GuardianArticle`.
CACHE_FNAME = "cache_file_name.json"
# if I already have cached data, I want to load that data into a python dictionary
try:
cache_file = open(CACHE_FNAME, 'r')
cache_contents = cache_file.read()
cache_diction = json.loads(cache_contents)
cache_file.close()
# if I don't have cached data, I will create an empty dictionary to save data later
except:
cache_diction = {}
def params_unique_combination(baseurl, params_d, private_keys=["api_key"]):
alphabetized_keys = sorted(params_d.keys())
res = []
for k in alphabetized_keys:
if k not in private_keys:
res.append("{}-{}".format(k, params_d[k]))
return baseurl + "_".join(res)
def get_from_NYT(a):
# put together base url and parameters
baseurl = "https://api.nytimes.com/svc/search/v2/articlesearch.json"
params_diction = {}
params_diction["api_key"] = "TakenOffDueToRestrictions"
params_diction["q"] = a
unique_ident = params_unique_combination(baseurl,params_diction)
# see if the url identifier is already in cache dictionary
if unique_ident in cache_diction:
# if so, get data from cache dictionary and return
return cache_diction[unique_ident]
# if not, make a new API call using the requests module
else:
resp = requests.get(baseurl, params_diction)
# Or as:
# resp = requests.get(unique_ident)
# save the data in the cache dictionary, using unique url identifier
## as a key and the response data as value
cache_diction[unique_ident] = json.loads(resp.text)
# convert the cache dictionary to a JSON string
dumped_json_cache = json.dumps(cache_diction)
# save the JSON string in the cache file
fw = open(CACHE_FNAME,"w")
fw.write(dumped_json_cache)
fw.close() # Close the open file
# return the data
return cache_diction[unique_ident]
def get_from_Guard(a):
baseurl = "https://content.guardianapis.com/search"
params_diction = {}
params_diction["q"] = a
params_diction["show-tags"]= "contributor"
params_diction["api-key"] = "TakenOffDueToRestrictions"
unique_ident = params_unique_combination(baseurl,params_diction)
# see if the url identifier is already in cache dictionary
if unique_ident in cache_diction:
# if so, get data from cache dictionary and return
return cache_diction[unique_ident]
# if not, make a new API call using the requests module
else:
resp = requests.get(baseurl, params_diction)
# Or as:
# resp = requests.get(unique_ident)
# save the data in the cache dictionary, using unique url identifier
## as a key and the response data as value
cache_diction[unique_ident] = json.loads(resp.text)
# convert the cache dictionary to a JSON string
dumped_json_cache = json.dumps(cache_diction)
# save the JSON string in the cache file
fw = open(CACHE_FNAME,"w")
fw.write(dumped_json_cache)
fw.close() # Close the open file
# return the data
return cache_diction[unique_ident]
cache_NYT = get_from_NYT("Rugby")
cache_Guard = get_from_Guard("Rugby")
###Citation: Retrieved from Week 11 Discussion problem set
class NYT(object):
def __init__(self, times_diction):
self.title = times_diction["headline"]["main"].replace(",","")
try:
self.author = times_diction["byline"]["original"].replace("By","")
except:
self.author = "Not Available"
try:
self.subject = times_diction["news_desk"]
except:
self.subject = "Not Available"
self.publication = "The New York Times"
def words_title(self):
total_words = 0
title = self.title.split(' ')
for word in title:
total_words += 1
return total_words
def __str__(self):
return "{},{},{},{},{}".format(self.title, self.author, self.subject, self.words_title(), self.publication)
class Guard(object):
def __init__(self, guardian_diction):
self.title = guardian_diction["webTitle"].replace("\r\n","")
self.author = guardian_diction["tags"][0]["webTitle"]
self.subject = guardian_diction["sectionName"]
self.publication = "The Guardian"
def words_title(self):
total_words = 0
title = self.title.split(' ')
for word in title:
total_words += 1
return total_words
def __str__(self):
return "{},{},{},{},{}".format(self.title, self.author, self.subject, self.words_title(), self.publication)
list_NYT = []
for data in cache_NYT["response"]["docs"]:
list_NYT.append(NYT(data))
#print(str(list_NYT[0]))
list_Guard = []
for data in cache_Guard["response"]["results"]:
list_Guard.append(Guard(data))
#print(str(list_Guard[0]))
finalprj = open("articles.csv","w")
finalprj.write("Article Title, Author, Subject, Number of words in Title, Publication\n")
for data in list_NYT:
data_string = str(data)
finalprj.write(data_string)
finalprj.write('\n')
for data in list_Guard:
data_string = str(data)
finalprj.write(data_string)
finalprj.write('\n')
finalprj.close()
###Citation: Retrieved from SI506F18_ps11.py -- problem set #11
0 notes