CSC111 Lab 12 Solution
--D. Thiebaut 13:30, 23 April 2010 (UTC)
# lab12sol.py
# -*- coding: iso-8859-15 -*-
# D. Thiebaut
# This program opens a text file, computes the frequency of occurrence
# of each character in the file, and outputs it to help identify the
# language the text is written in.
def getText( filename ):
"""opens a text file and returns its contents"""
file = open( filename, "r" )
text = file.read()
file.close()
return text
def display( caption, D ):
"""display the contents of dictionary D"""
print 60 * '-'
print caption
print "D = ", str( D )
print
def main( debug=False ):
#--- open file and get its contents ---
if debug:
filename = "secret.txt"
else:
filename = raw_input( "filename? " )
text = getText( filename ).lower()
#--- compute # of occurrence of each character ---
D = {}
display( "before starting", D )
for ch in text:
if D.has_key( ch ):
D[ ch ] = D[ ch ] + 1
else:
D[ ch ] = 1
#display( "ch = " + ch, D )
#raw_input( "> " )
#--- concentrate only on the letters of the Latin alphabet ---
L = [ (D[k], k) for k in D.keys() if k in "abcdefghijklmnopqrstuvwxyz" ]
L.sort()
L.reverse()
mostFreq = ''.join( [ k for n,k in L ][0:10] )
print "most frequent characters: ", mostFreq
main( True )