CSC111 Homework 11 2018

From dftwiki3
Jump to: navigation, search

D. Thiebaut (talk) 20:58, 19 April 2018 (EDT)



...

<showafterdate after="20180504 12:00" before="20180601 00:00">

Solution Program


Source


# collegeScorecard.py
# https://ed-public-download.app.cloud.gov/downloads/Most-Recent-Cohorts-Scorecard-Elements.csv

URL = "http://cs.smith.edu/~dthiebaut/111/collegeScorecard.csv"

def getLines( fileName ):
    '''Given a file name, reads the file and returns
    all the lines contained in the file'''
    file = open( fileName, 'r' )
    lines = file.read() 
    file.close()
    lines = lines.split( "\n" )
    return lines

def getURLWriteToFile( URL, outputFileName ):
    '''Given a URL, gets the text stored in the file on the Web
    and saves it into a local file'''
    from urllib.request import urlopen  # library for fetching
                                        # Web pages 
    # open the URL
    response = urlopen(URL)

    # get its contents, and store it in a string called text
    text = response.read().decode('utf-8')

    # save the string to file
    open( outputFileName, "w" ).write( text )

def main():
    # get the file from the URL, and save it locally
    getURLWriteToFile( URL, "collegeScorecard.csv" )

    # get the lines from the local file
    lines = getLines( "collegeScorecard.csv" )

    # get some information about the file
    header = lines[0].split( ',' )
    noFields = len( header )

    # indices of fields of interest
    cityIndex = 4
    stateIndex = 5

    # create a dictionary for counting the number of
    # times each city and state appears in a line.
    # 
    cityStateDico = {}
    for line in lines[1: ]:
        try:
            city = line.split(',')[cityIndex].strip()
            state = line.split(',')[stateIndex].strip()
        except:
            continue

        # skip invalid states
        if len( state ) != 2:
            continue

        # create a string containing both city and state
        cityState = city + ", " + state

        # if it's not in directory, add it, otherwise
        # increment the counter associated with it
        if cityState not in cityStateDico:
            cityStateDico[ cityState ] = 1
        else:
            cityStateDico[ cityState ] += 1

    # create a list of tuples for all the cities, with the
    # counter first, and then the city name.
    listCities = []
    for cityState in cityStateDico.keys():
        listCities.append( (cityStateDico[cityState], cityState ) )

    # sort and reverse the list
    listCities.sort()
    listCities.reverse()

    # display the first 10
    for i in range( 10 ):
        print( listCities[i][0], listCities[i][1] )
        
        
main()

Output


91 New York, NY
76 Chicago, IL
74 Houston, TX
59 Los Angeles, CA
52 San Antonio, TX
50 Miami, FL
48 Brooklyn, NY
46 Philadelphia, PA
42 Atlanta, GA
40 Dallas, TX


</showafterdate>