CSC111 Lab 12 Solution

From dftwiki3
Jump to: navigation, search

--D. Thiebaut 13:30, 23 April 2010 (UTC)


# lab12sol.py                                                                                                                              
# -*- coding: iso-8859-15 -*-                                                                                                              
# D. Thiebaut                                                                                                                              
# This program opens a text file, computes the frequency of occurrence 
# of each character in the file, and outputs it to help identify the 
# language the text is written in.

def getText( filename ):
    """opens a text file and returns its contents"""
    file = open( filename, "r" )
    text = file.read()
    file.close()
    return text

def display( caption, D ):
    """display the contents of dictionary D"""
    print 60 * '-'
    print caption
    print "D = ", str( D )
    print

def main( debug=False ):

    #--- open file and get its contents ---
    if debug:
        filename = "secret.txt"
    else:
        filename = raw_input( "filename?  " )
    text = getText( filename ).lower()

    #--- compute # of occurrence of each character ---
    D = {}
    display( "before starting", D )
    for ch in text:
        if D.has_key( ch ):
            D[ ch ] = D[ ch ] + 1
        else:
            D[ ch ] = 1
        #display( "ch = " + ch, D )                                                                                                        
        #raw_input( "> " )                                                                                                                 

    #--- concentrate only on the letters of the Latin alphabet ---
    L = [ (D[k], k) for k in D.keys() if k in "abcdefghijklmnopqrstuvwxyz" ]
    L.sort()
    L.reverse()
    mostFreq = ''.join( [ k for n,k in L ][0:10] )

    print "most frequent characters: ", mostFreq



main( True )