Difference between revisions of "CSC111 Homework 8 Solution 2011"
(Created page with "--~~~~ ---- <source lang="python"> # hw8.py # Julia Edwards # 111a-ak # # This program takes the html version of the New York Times' # list "The Best 1000 Movies Ever Made" (f...") |
|||
Line 141: | Line 141: | ||
for movie in movies: | for movie in movies: | ||
file2.write(movie) | file2.write(movie) | ||
− | + | file2.close() | |
def main(): | def main(): |
Latest revision as of 09:27, 22 November 2011
--D. Thiebaut 08:26, 22 November 2011 (EST)
# hw8.py
# Julia Edwards
# 111a-ak
#
# This program takes the html version of the New York Times'
# list "The Best 1000 Movies Ever Made" (from 1927 until 2002)
# and finds all the movies in the list that match a given year
# (the user's birth year). After finding all the movies from
# that year, the program sorts, formats, and displays the titles
# so that they appear alphabetically in a list, one movie per
# line, with correct capitalization and formatting of apostrophes.
# Then the program will save this list in a file titled:
# "movies_nnnn.txt" where nnnn is the year the user enters.
#
# Note: If the user enters ANY 4-digit number between 1927 and
# 2002, the program will always run and display a list of movies.
# However, if the year entered does not match the year of any of the
# 1000 best, a blank list will appear and an empty file will be
# written.
#
# Restrictions: The apostrophes are only formatted correctly if the
# letter following the apostrophe is an "s" (for example, Schindler's
# List will be formatted correctly). However, for any other letter a
# blank space will appear where the apostrophe should (so that
# "L'Aged d'Or" appears "L Aged d Or"). Also, other punctuation marks
# (such as ":" or "!") will not be displayed in the list of titles
# outputted.
def getYear():
""" This function asks the user to enter the year they were born
in and returns the input to main(). It will show an error message
if the user enters a character instead of a number and prompt them
to enter their birth year until they enter a number"""
while True:
try:
year = eval(input("What year were you born in? "))
if len(str(year)) != 4 or (year < 1927) or (year > 2002):
print("You did not enter a valid year.")
continue
break
except (NameError, SyntaxError, TypeError):
print("Invalid input.")
return str(year)
def getMovies(year, movieFile):
""" Prints the title of the list of movies (the movies for that
specified year) and then opens the html file, reads it, pulls
out the movies that were made in that year, and returns that
list (still written in the original html version) to main()"""
print()
print("Movies that came out in " + year + ":")
print()
file1 = open(movieFile, "r")
moviesList = []
for line in file1:
if ("<td><a href" in line) == True:
index = line.find("(")
y = line[index+1:index+5]
if (y == year):
moviesList.append(line)
file1.close()
return(moviesList)
def findMovies(year, moviesList):
""" Extracts the title from the list of movies it
is passed from main (the list that was returned from
getMovies()) and sends the list of titles to formatMovies()"""
movies = []
for movie in moviesList:
start = movie.find("http")
index = start + len("http://movies.nytimes.com/movie/")
titleStart = movie.find('/', index)
titleEnd = movie.find('/', titleStart + 1)
title = movie[(titleStart + 1):(titleEnd)]
movies.append(title)
formattedMovie = formatMovies(movies)
return(formattedMovie)
def formatApostrophe(movie):
""" Takes the list of parts of each movie title passed from
formatMovies() and goes through each element to see if any are
just an "s" (meaning that they would need an apostrophe in
front of them). It then adds the apostrophe and recreates
the movie title and passes it back to formatMovies()"""
apostrophe = []
n = None
for i in range(len(movie)):
i = i-1
if movie[i] == "s":
apostrophe.append(movie[i-1])
apostrophe.append(movie[i])
movie.remove(movie[i])
movie.remove(movie[i-1])
apostrophe = "'".join(apostrophe)
movie.append(apostrophe)
n = i - 1
title = []
for i in range(len(movie)):
if i == n:
title.append(apostrophe)
title.append(movie[i])
elif movie[i] != apostrophe:
title.append(movie[i])
return(title)
def formatMovies(movies):
"""Takes the list of titles and formats them so that the
'-'s separating the words are replaced with spaces, any
extra spaces before or after a word are removed, and sends
that title to formatApostrophe() in case an apostrophe needs
to be added. It then prints each title in alphabetical order
and sends the list of correctly formatted movie titles (with
a comma and space attached to them to make the file they will
be saved to more readable) back to main()"""
movies.sort()
formattedMovie = []
for movie in movies:
movie.strip()
movie = movie.split("-")
title = formatApostrophe(movie)
movie = " ".join(title)
print(movie)
formattedMovie.append(movie + ", ")
return formattedMovie
def saveMovies(year, movies):
""" Saves the list of correctly formatted titles to a file
called "movies_nnnn.txt" where nnnn is the year specified
by the user"""
print()
print("Saving movies to file movies_" + year + ".txt")
txtfile = str("movies_" + year +".txt")
file2 = open(txtfile, "w")
for movie in movies:
file2.write(movie)
file2.close()
def main():
while True:
try:
file = open("1000best.html", "r")
file.close()
except IOError:
print("Error. Need file '1000best.html' to run.")
break
year = getYear()
moviesList = getMovies(year, "1000best.html")
formattedMovie = findMovies(year, moviesList)
saveMovies(year, formattedMovie)
break
main()