CSC111 Counting Frequencies of Occurrence

From dftwiki3
Revision as of 09:51, 8 December 2011 by Thiebaut (talk | contribs) (Created page with "--~~~~ ---- <source lang="python"> # dicoDemo.py # D. Thiebaut # demo of dictionary. # computes the frequency of occurrence of words and of letters # in a text. text = """enter ...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to: navigation, search

--D. Thiebaut 09:51, 8 December 2011 (EST)


# dicoDemo.py
# D. Thiebaut
# demo of dictionary.
# computes the frequency of occurrence of words and of letters
# in a text.

text = """enter some text captured from some source"""

text = text.lower()

#---       W O R D S       ---
#--- compute the frequency of occurrence of words ---
freq = {}
for word in text.split():
    if word in freq:
        # word is in dictionary, add one to its counter
        freq[ word ] += 1
    else:
        # no: start counting its occurences
        freq[ word ] = 1

#--- put dico in a list of pairs, where first item is the count ---
list = []
for word in freq:
    list.append( [ freq[word], word ] )

#--- sort by most frequent to least frequent ---
list.sort()      # least frequent word first
list.reverse()

#--- display 10 most frequent ---
for i, (frequency, word) in enumerate( list ):
    if i >= 10:
        break
    print( word,'appears', frequency,'times' )

                 

#---       L E T T E R S       ---
#--- compute the frequency of occurrence of words ---
freq = {}
for letter in text.lower():
    if letter in freq:
        # word is in dictionary, add one to its counter
        freq[ letter ] += 1
    else:
        # no: start counting its occurences
        freq[ letter ] = 1

#--- put dico in a list of pairs, where first item is the count ---
list = []
for letter in freq:
    list.append( [ freq[letter], letter ] )

#--- sort by most frequent to least frequent ---
list.sort()      # least frequent word first
list.reverse()

#--- display 10 most frequent ---
for i, (frequency, letter) in enumerate( list ):
    if i >= 10:
        break
    print( letter,'appears', frequency,'times' )