Latest revision as of 04:17, 28 June 2018

D. Thiebaut (talk) 10:35, 19 June 2018 (EDT)

Requirements

Install the Python docx library using pip3
Uses the csv library that is already part of Python 3

Source Code

# parseEcoModParticipants.py
# D. Thiebaut
from __future__ import print_function
import csv
from docx import Document
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH

INDEXOFCOURSE = 10   # column number where the course name is located
LOGO = "logo.png"    # the logo to include in each doc page
CSV  = "data.csv"    # the file containing the csv data

# the course titles
courses=[ "Machine Learning I",
          "Machine Learning II" ]

# short version of the course titles.  This will be used as a file name
# for the output docx document
coursesShort = [ "ML1", "ML2" ]


def printCoursesAsText( CourseParticipants ):
    ''' Display the courses and participants as plain ASCII
    text.  Used mostly for debugging'''

    global courses
    # for each course, generate list of participants                   
    for j, course in enumerate( CourseParticipants ):
        
        # if course not valid, skip it
        if course not in courses: continue

        print( "\n" )
        print( "Course #{0}: {1}".format( j+1, course ) )
        #continue

        for i,fields in enumerate( CourseParticipants[ course ] ):
            lastName,firstName, institution, address1, address2, \
                city, zip, country, phone, email, course, _,_,_ = fields
       
            print( "{0}, {1}".format( lastName, firstName ) )
            print( "{0}".format( institution ) )
            if len( address2 ) > 0:
                print( "{0}, {1}, {2}, {3}".format( address1, address2, city, country ))
            else:
                print( "{0}, {1}, {2}".format( address1, city, country ))
            print( "{0}".format( phone ) )
            print( "{0}".format( email ) )
            print()
        
def printCoursesAsWordDoc( CourseParticipants ):
    '''Generate the docx document, one per course.   CourseParticipants
    is a dictionary, where the key is the string representing the course,
    and the value is a list of participants.  Each participants is a list
    of fields, as defined by the columns of the csv file.
    '''
    global courses

    # for each course, generate list of participants                   
    for j, course in enumerate( CourseParticipants ):
        
        # if course not valid, skip it
        if course not in courses: continue
        
        document = Document()
        
        logo = document.add_picture( LOGO, width=Inches(2.00) )
        last_paragraph = document.paragraphs[-1]
        last_paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
        document.add_paragraph()
        document.add_heading( course, level=1)    
        
        for i,fields in enumerate( CourseParticipants[ course ] ):
            lastName,firstName, institution, address1, address2, \
                city, zip, country, phone, email, course, _,_,_ = fields
       
            #print( "{0}, {1}".format( lastName, firstName ) )
            paragraph = document.add_paragraph()
            paragraph.add_run( 
                "{0}, {1}\n".format( lastName, firstName ) ).bold = True
            
            #print( "{0}".format( institution ) )
            paragraph.add_run( 
                "{0}\n".format( institution ) )
           
            if len( address2 ) > 0:
                #print( "{0}, {1}, {2}, {3}".format( address1, address2, city, country ))
                paragraph.add_run(
                    "{0}, {1}, {2}, {3}\n"
                    .format( address1, address2, city, country) )
            else:
                #print( "{0}, {1}, {2}".format( address1, city, country ))
                paragraph.add_run(
                    "{0}, {1}, {2}\n"
                    .format( address1, city, country) )

            #print( "{0}".format( phone ) )
            paragraph.add_run( "{0}\n".format( phone ) )
            
            #print( "{0}".format( email ) )
            paragraph.add_run( "{0}".format( email ) )

        # create name of file from short version of course name
        document.add_page_break()
        index = courses.index( course )
        try:
            courseShortName = coursesShort[ index ]
        except:
            print( "### ERROR ###\nindex in short courses (",
                   index, ") out of range!" )
            print( "\n\n" )
            continue

        document.save( courseShortName + ".docx" )

                       
# define course dictionary:
# key is course
# value is list of participants.  Each participant is
# a list of fields
CourseParticipants = { }

# read the csv file and parse it
with open( 'data.csv', newline='') as csvfile:

    # create a csv reader
    csvReader = csv.reader( csvfile, delimiter=',', quotechar='"')
    for i, fields in enumerate( csvReader ):

        print( fields )

        # skip first line that contains headers
        if i==0: 
           continue
        
        # skip ill formed lines
        if len( fields ) <= 5: 
           print( "Skipping line:", ", ".join( fields ) )
           continue

        # get the course from the correct field
        print( "INDEXOFCOURSE =", INDEXOFCOURSE )
        print( "fields[INDEXOFCOURSE] =", fields[INDEXOFCOURSE] )

        course = fields[INDEXOFCOURSE]
        print( "course = ", course )

        # add new participant to list associated with his/her course
        try:        
            CourseParticipants[ course ].append( fields )
        except:
            CourseParticipants[ course ] = [ fields ]


printCoursesAsText( CourseParticipants )

printCoursesAsWordDoc( CourseParticipants )

CSC file

Here's an example of the csv file:

Last name,First name,Institution,Address 1,Address 2,City,Zip,Country,Phone,Email,Course,ID,misc1,misc2
Smith,Joe,UMass,Dept. Computer Science,,Amherst,01002,"Massachusetts, USA",(413) 545 1212,joesmith@umass.edu,Machine Learning II,2,\
,
Jones,Alex,UMass,Dept. Computer Science,,Amherst,01002,"Massachusetts, USA",(413) 545 2121,alex@umass.edu,Machine Learning I,1,,

Output (Text)

Course #1: Machine Learning I
Jones, Alex
University of Massachusetts
Dept. Computer Science, Amherst, Massachusetts, USA
(413) 545 2121
alex@umass.edu

Travis, Martha
University of Massachusetts
Dept. Computer Science, Amherst, Massachusetts, USA
(413) 545 3232
mtravis@umass.edu



Course #2: Machine Learning II
Smith, Joe
University of Massachusetts
Dept. Computer Science, Amherst, Massachusetts, USA
(413) 545 1212
joesmith@umass.edu

DOCX Output

Here we show only one of the two files generated, the one with two students registered.

@@ Line 10: / Line 10: @@
 <br />
 ::<source lang="python">
-# parse_CSV_Generate_Docx.py
+# parseEcoModParticipants.py
 # D. Thiebaut
+from __future__ import print_function
 import csv
 from docx import Document
@@ Line 17: / Line 18: @@
 from docx.enum.text import WD_ALIGN_PARAGRAPH
-# The title of the courses, as they appear in the csv file
+INDEXOFCOURSE = 10   # column number where the course name is located
-courses=[ "Practical General Equilibrium",
+LOGO = "logo.png"    # the logo to include in each doc page
-           "Advanced Techniques in General Equilibrium",
+CSV  = "data.csv"    # the file containing the csv data
-          "Dynamic Stochastic General Equilibrium",
-          "Energy and Environmental Modeling",
+# the course titles
-          "Financial Social Accounting",
+courses=[ "Machine Learning I",
-          "Macroeconometric Modeling",
+           "Machine Learning II" ]
-          "Overlapping Generation General Equilibrium" ]
+# short version of the course titles.  This will be used as a file name
+# for the output docx document
+coursesShort = [ "ML1", "ML2" ]
-# the abbreviated versions of the courses. They will be used as the name
-# of the docx files
-coursesShort = [ "Practical_General",
-                 "Advanced_Techniques",
-                 "Dynamic_Stochastic",
-                 "Energy_and_Environmental",
-                 "Financial_Social",
-                 "Macroeconometric",
-                 "Overlapping_Generation" ]
 def printCoursesAsText( CourseParticipants ):
-     '''Takes a dictionary of courses and list of participants and
+     ''' Display the courses and participants as plain ASCII
-    outputs the courses and participants as ASCII text.'''
+    text.  Used mostly for debugging'''
      global courses
      # for each course, generate list of participants
@@ Line 65: / Line 61: @@
 def printCoursesAsWordDoc( CourseParticipants ):
-     '''Takes a dictionary of courses and list of participants and
+     '''Generate the docx document, one per course.   CourseParticipants
-     outputs a docx file for each course.'''
+    is a dictionary, where the key is the string representing the course,
+    and the value is a list of participants.  Each participants is a list
+     of fields, as defined by the columns of the csv file.
+    '''
      global courses
@@ Line 78: / Line 76: @@
          document = Document()
-         logo = document.add_picture('ecomod.png', width=Inches(2.00) )
+         logo = document.add_picture( LOGO, width=Inches(2.00) )
          last_paragraph = document.paragraphs[-1]
          last_paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
@@ Line 135: / Line 133: @@
 # read the csv file and parse it
-with open('data.csv', newline='') as csvfile:
+with open( 'data.csv', newline='') as csvfile:
      # create a csv reader
      csvReader = csv.reader( csvfile, delimiter=',', quotechar='"')
      for i, fields in enumerate( csvReader ):
-        # skip ill formed lines
-         if len( fields ) <= 5: continue
+         print( fields )
          # skip first line that contains headers
-         if i==0: continue
+         if i==0:
+           continue
-         # course is Field 10
+         # skip ill formed lines
-         course = fields[10]
+        if len( fields ) <= 5:
+           print( "Skipping line:", ", ".join( fields ) )
+           continue
+        # get the course from the correct field
+        print( "INDEXOFCOURSE =", INDEXOFCOURSE )
+        print( "fields[INDEXOFCOURSE] =", fields[INDEXOFCOURSE] )
+         course = fields[INDEXOFCOURSE]
+        print( "course = ", course )
          # add new participant to list associated with his/her course
@@ Line 156: / Line 164: @@
-# now that the csv is parsed, generate the ASCII version
-# on the screen (takes a long time)
 printCoursesAsText( CourseParticipants )
-# generate a list of docx files, one for each course, with the
-# list of participants for eac
 printCoursesAsWordDoc( CourseParticipants )
 </source>
 <br />
+==CSC file==
 <br />
+Here's an example of the csv file:
 <br />
+::<source lang="text">
+Last name,First name,Institution,Address 1,Address 2,City,Zip,Country,Phone,Email,Course,ID,misc1,misc2
+Smith,Joe,UMass,Dept. Computer Science,,Amherst,01002,"Massachusetts, USA",(413) 545 1212,joesmith@umass.edu,Machine Learning II,2,\
+,
+Jones,Alex,UMass,Dept. Computer Science,,Amherst,01002,"Massachusetts, USA",(413) 545 2121,alex@umass.edu,Machine Learning I,1,,
+</source>
 <br />
+==Output (Text)==
+<br />
+::<source lang="text">
+Course #1: Machine Learning I
+Jones, Alex
+University of Massachusetts
+Dept. Computer Science, Amherst, Massachusetts, USA
+(413) 545 2121
+alex@umass.edu
+Travis, Martha
+University of Massachusetts
+Dept. Computer Science, Amherst, Massachusetts, USA
+(413) 545 3232
+mtravis@umass.edu
+Course #2: Machine Learning II
+Smith, Joe
+University of Massachusetts
+Dept. Computer Science, Amherst, Massachusetts, USA
+(413) 545 1212
+joesmith@umass.edu
+</source>
+<br />
+==DOCX Output==
 <br />
+Here we show only one of the two files generated, the one with two students registered.
 <br />
+[[Image:PythonDocx.png|650px|center]]
 <br />
 <br />
@@ Line 178: / Line 228: @@
 <br />
 <br />
-[[Category:Python]][[Category:Word]]
+[[Category:Python]][[Category:Word]][[Category:docx]]

Difference between revisions of "Tutorial: Python & DOCX"

Latest revision as of 04:17, 28 June 2018

Contents

Requirements

Source Code

CSC file

Output (Text)

DOCX Output

Navigation menu

Personal tools

Namespaces

Variants

Views

More

Search

Navigation

Tools