CSC111 Homework 8 Solution 2011

From dftwiki3
Jump to: navigation, search

--D. Thiebaut 08:26, 22 November 2011 (EST)


# hw8.py
# Julia Edwards
# 111a-ak
#
# This program takes the html version of the New York Times' 
# list "The Best 1000 Movies Ever Made" (from 1927 until 2002)
# and finds all the movies in the list that match a  given year 
# (the user's birth year). After finding all the movies from 
# that year, the program sorts, formats, and displays the titles
# so that they appear alphabetically in a list, one movie per 
# line, with correct capitalization and formatting of apostrophes. 
# Then the program will save this list in a file titled:
# "movies_nnnn.txt" where nnnn is the year the user enters.
#
# Note: If the user enters ANY 4-digit number between 1927 and
# 2002, the program will always run and display a list of movies. 
# However, if the year entered does not match the year of any of the 
# 1000 best, a blank list will appear and an empty file will be 
# written.
#
# Restrictions: The apostrophes are only formatted correctly if the 
# letter following the apostrophe is an "s" (for example, Schindler's 
# List will be formatted correctly). However, for any other letter a 
# blank space will appear where the apostrophe should (so that 
# "L'Aged d'Or" appears "L Aged d Or"). Also, other punctuation marks 
# (such as ":" or "!") will not be displayed in the list of titles 
# outputted.


def getYear():
    """ This function asks the user to enter the year they were born 
    in and returns the input to main(). It will show an error message 
    if the user enters a character instead of a number and prompt them 
    to enter their birth year until they enter a number"""

    while True:
        try:
            year = eval(input("What year were you born in? "))
            if len(str(year)) != 4 or (year < 1927) or (year > 2002):
                print("You did not enter a valid year.")
                continue
            break
        except (NameError, SyntaxError, TypeError):
            print("Invalid input.")
    return str(year)

def getMovies(year, movieFile):
    """ Prints the title of the list of movies (the movies for that
    specified year) and then opens the html file, reads it, pulls
    out the movies that were made in that year, and returns that
    list (still written in the original html version) to main()"""
    print()
    print("Movies that came out in " + year + ":")
    print()
    file1 = open(movieFile, "r")
    moviesList = []
    for line in file1:
        if ("<td><a href" in line) ==  True:
            index = line.find("(")
            y = line[index+1:index+5]
            if (y == year):
                moviesList.append(line)
    file1.close()
    return(moviesList)

def findMovies(year, moviesList):
    """ Extracts the title from the list of movies it
    is passed from main (the list that was returned from
    getMovies()) and sends the list of titles to formatMovies()"""
    movies = []
    for movie in moviesList:
        start = movie.find("http")
        index = start + len("http://movies.nytimes.com/movie/")
        titleStart  = movie.find('/', index)
        titleEnd = movie.find('/', titleStart + 1)
        title = movie[(titleStart + 1):(titleEnd)]
        movies.append(title)
    formattedMovie = formatMovies(movies)
    return(formattedMovie)

def formatApostrophe(movie):
    """ Takes the list of parts of each movie title passed from 
    formatMovies() and goes through each element to see if any are
    just an "s" (meaning that they would need an apostrophe in
    front of them). It then adds the apostrophe and recreates
    the movie title and passes it back to formatMovies()"""
    apostrophe = []
    n = None
    for i in range(len(movie)):
        i = i-1
        if movie[i] == "s":
            apostrophe.append(movie[i-1])
            apostrophe.append(movie[i])
            movie.remove(movie[i])
            movie.remove(movie[i-1])
            apostrophe = "'".join(apostrophe)
            movie.append(apostrophe)
            n = i - 1
        title = []
        for i in range(len(movie)):
            if i == n:
                title.append(apostrophe)
                title.append(movie[i])
            elif movie[i] != apostrophe:
                title.append(movie[i])
    return(title)

def formatMovies(movies):
    """Takes the list of titles and formats them so that the
    '-'s separating the words are replaced with spaces, any
    extra spaces before or after a word are removed, and sends
    that title to formatApostrophe() in case an apostrophe needs
    to be added. It then prints each title in alphabetical order
    and sends the list of correctly formatted movie titles (with
    a comma and space attached to them to make the file they will
    be saved to more readable) back to main()"""
    movies.sort()
    formattedMovie = []
    for movie in movies:
        movie.strip()
        movie = movie.split("-")
        title = formatApostrophe(movie)
        movie = " ".join(title)
        print(movie)
        formattedMovie.append(movie + ", ")
    return formattedMovie

def saveMovies(year, movies):
    """ Saves the list of correctly formatted titles to a file
    called "movies_nnnn.txt" where nnnn is the year specified
    by the user"""
    print()
    print("Saving movies to file movies_" + year + ".txt")
    txtfile = str("movies_" + year +".txt")
    file2 = open(txtfile, "w")
    for movie in movies:
        file2.write(movie)
    file2.close()
    
def main():
    while True:
        try:
            file = open("1000best.html", "r")
            file.close()
        except IOError:
            print("Error. Need file '1000best.html' to run.")
            break
        year = getYear()
        moviesList = getMovies(year, "1000best.html")
        formattedMovie = findMovies(year, moviesList)
        saveMovies(year, formattedMovie)
        break

main()