Difference between revisions of "CSC111 Homework 11 2018"
(→Assignment) |
|||
Line 1: | Line 1: | ||
[[User:Thiebaut|D. Thiebaut]] ([[User talk:Thiebaut|talk]]) 20:58, 19 April 2018 (EDT) | [[User:Thiebaut|D. Thiebaut]] ([[User talk:Thiebaut|talk]]) 20:58, 19 April 2018 (EDT) | ||
---- | ---- | ||
+ | <onlydft> | ||
=Make-Up Homework 11= | =Make-Up Homework 11= | ||
<br /> | <br /> | ||
Line 54: | Line 55: | ||
* Downloading the csv file from the URL may take between 3 to 30 seconds, depending on how busy the network and the server are... | * Downloading the csv file from the URL may take between 3 to 30 seconds, depending on how busy the network and the server are... | ||
<br /> | <br /> | ||
+ | |||
+ | </onlydft> | ||
<showafterdate after="20180504 12:00" before="20180601 00:00"> | <showafterdate after="20180504 12:00" before="20180601 00:00"> | ||
<br /> | <br /> |
Latest revision as of 12:47, 1 June 2018
D. Thiebaut (talk) 20:58, 19 April 2018 (EDT)
<showafterdate after="20180504 12:00" before="20180601 00:00">
Solution Program
Source
# collegeScorecard.py # https://ed-public-download.app.cloud.gov/downloads/Most-Recent-Cohorts-Scorecard-Elements.csv URL = "http://cs.smith.edu/~dthiebaut/111/collegeScorecard.csv" def getLines( fileName ): '''Given a file name, reads the file and returns all the lines contained in the file''' file = open( fileName, 'r' ) lines = file.read() file.close() lines = lines.split( "\n" ) return lines def getURLWriteToFile( URL, outputFileName ): '''Given a URL, gets the text stored in the file on the Web and saves it into a local file''' from urllib.request import urlopen # library for fetching # Web pages # open the URL response = urlopen(URL) # get its contents, and store it in a string called text text = response.read().decode('utf-8') # save the string to file open( outputFileName, "w" ).write( text ) def main(): # get the file from the URL, and save it locally getURLWriteToFile( URL, "collegeScorecard.csv" ) # get the lines from the local file lines = getLines( "collegeScorecard.csv" ) # get some information about the file header = lines[0].split( ',' ) noFields = len( header ) # indices of fields of interest cityIndex = 4 stateIndex = 5 # create a dictionary for counting the number of # times each city and state appears in a line. # cityStateDico = {} for line in lines[1: ]: try: city = line.split(',')[cityIndex].strip() state = line.split(',')[stateIndex].strip() except: continue # skip invalid states if len( state ) != 2: continue # create a string containing both city and state cityState = city + ", " + state # if it's not in directory, add it, otherwise # increment the counter associated with it if cityState not in cityStateDico: cityStateDico[ cityState ] = 1 else: cityStateDico[ cityState ] += 1 # create a list of tuples for all the cities, with the # counter first, and then the city name. listCities = [] for cityState in cityStateDico.keys(): listCities.append( (cityStateDico[cityState], cityState ) ) # sort and reverse the list listCities.sort() listCities.reverse() # display the first 10 for i in range( 10 ): print( listCities[i][0], listCities[i][1] ) main()
Output
91 New York, NY 76 Chicago, IL 74 Houston, TX 59 Los Angeles, CA 52 San Antonio, TX 50 Miami, FL 48 Brooklyn, NY 46 Philadelphia, PA 42 Atlanta, GA 40 Dallas, TX
</showafterdate>