Difference between revisions of "Tutorial: Python & DOCX"

From dftwiki3
Jump to: navigation, search
(Source Code)
Line 10: Line 10:
 
<br />
 
<br />
 
::<source lang="python">
 
::<source lang="python">
# parse_CSV_Generate_Docx.py
+
# parseEcoModParticipants.py
 
# D. Thiebaut
 
# D. Thiebaut
 +
from __future__ import print_function
 
import csv
 
import csv
 
from docx import Document
 
from docx import Document
Line 17: Line 18:
 
from docx.enum.text import WD_ALIGN_PARAGRAPH
 
from docx.enum.text import WD_ALIGN_PARAGRAPH
  
# The title of the courses, as they appear in the csv file
+
INDEXOFCOURSE = 10  # column number where the course name is located
courses=[ "Practical General Equilibrium",
+
LOGO = "logo.png"    # the logo to include in each doc page
           "Advanced Techniques in General Equilibrium",
+
CSV  = "data.csv"    # the file containing the csv data
          "Dynamic Stochastic General Equilibrium",
+
 
          "Energy and Environmental Modeling",
+
# the course titles
          "Financial Social Accounting",
+
courses=[ "Machine Learning I",
          "Macroeconometric Modeling",
+
           "Machine Learning II" ]
          "Overlapping Generation General Equilibrium" ]
+
 
 +
# short version of the course titles.  This will be used as a file name
 +
# for the output docx document
 +
coursesShort = [ "ML1", "ML2" ]
  
# the abbreviated versions of the courses. They will be used as the name
 
# of the docx files
 
coursesShort = [ "Practical_General",
 
                "Advanced_Techniques",
 
                "Dynamic_Stochastic",
 
                "Energy_and_Environmental",
 
                "Financial_Social",
 
                "Macroeconometric",
 
                "Overlapping_Generation" ]
 
  
 
def printCoursesAsText( CourseParticipants ):
 
def printCoursesAsText( CourseParticipants ):
     '''Takes a dictionary of courses and list of participants and
+
     ''' Display the courses and participants as plain ASCII
    outputs the courses and participants as ASCII text.'''
+
    ''' text.  Used mostly for debugging
 
     global courses
 
     global courses
 
     # for each course, generate list of participants                   
 
     # for each course, generate list of participants                   
Line 65: Line 60:
 
          
 
          
 
def printCoursesAsWordDoc( CourseParticipants ):
 
def printCoursesAsWordDoc( CourseParticipants ):
     '''Takes a dictionary of courses and list of participants and
+
     '''Generate the docx document, one per course.  CourseParticipants
     outputs a docx file for each course.'''
+
    is a dictionary, where the key is the string representing the course,
 
+
    and the value is a list of participants.  Each participants is a list
 +
     of fields, as defined by the columns of the csv file.
 +
    '''
 
     global courses
 
     global courses
  
Line 78: Line 75:
 
         document = Document()
 
         document = Document()
 
          
 
          
         logo = document.add_picture('ecomod.png', width=Inches(2.00) )
+
         logo = document.add_picture( LOGO, width=Inches(2.00) )
 
         last_paragraph = document.paragraphs[-1]
 
         last_paragraph = document.paragraphs[-1]
 
         last_paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
 
         last_paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
Line 135: Line 132:
  
 
# read the csv file and parse it
 
# read the csv file and parse it
with open('data.csv', newline='') as csvfile:
+
with open( 'data.csv', newline='') as csvfile:
  
 
     # create a csv reader
 
     # create a csv reader
 
     csvReader = csv.reader( csvfile, delimiter=',', quotechar='"')
 
     csvReader = csv.reader( csvfile, delimiter=',', quotechar='"')
 
     for i, fields in enumerate( csvReader ):
 
     for i, fields in enumerate( csvReader ):
        # skip ill formed lines
+
 
         if len( fields ) <= 5: continue
+
         print( fields )
  
 
         # skip first line that contains headers
 
         # skip first line that contains headers
         if i==0: continue
+
         if i==0:  
 +
          continue
 
          
 
          
         # course is Field 10
+
         # skip ill formed lines
         course = fields[10]
+
        if len( fields ) <= 5:
 +
          print( "Skipping line:", ", ".join( fields ) )
 +
          continue
 +
 
 +
        # get the course from the correct field
 +
        print( "INDEXOFCOURSE =", INDEXOFCOURSE )
 +
        print( "fields[INDEXOFCOURSE] =", fields[INDEXOFCOURSE] )
 +
 
 +
         course = fields[INDEXOFCOURSE]
 +
        print( "course = ", course )
  
 
         # add new participant to list associated with his/her course
 
         # add new participant to list associated with his/her course
Line 156: Line 163:
  
  
# now that the csv is parsed, generate the ASCII version
 
# on the screen (takes a long time)
 
 
printCoursesAsText( CourseParticipants )
 
printCoursesAsText( CourseParticipants )
  
# generate a list of docx files, one for each course, with the
 
# list of participants for eac
 
 
printCoursesAsWordDoc( CourseParticipants )
 
printCoursesAsWordDoc( CourseParticipants )
 +
   
 +
  
 
</source>
 
</source>
  
 
<br />
 
<br />
 +
==CSC file===
 +
<br />
 +
Here's an example of the csv file:
 
<br />
 
<br />
 +
::<source lang="text">
 +
Last name,First name,Institution,Address 1,Address 2,City,Zip,Country,Phone,Email,Course,ID,misc1,misc2
 +
Smith,Joe,UMass,Dept. Computer Science,,Amherst,01002,"Massachusetts, USA",(413) 545 1212,joesmith@umass.edu,Machine Learning II,2,\
 +
,
 +
Jones,Alex,UMass,Dept. Computer Science,,Amherst,01002,"Massachusetts, USA",(413) 545 2121,alex@umass.edu,Machine Learning I,1,,
 +
 +
</source>
 
<br />
 
<br />
 
<br />
 
<br />

Revision as of 11:32, 19 June 2018

D. Thiebaut (talk) 10:35, 19 June 2018 (EDT)


Requirements


  • Install the Python docx library using pip3
  • Uses the csv library that is already part of Python 3


Source Code


# parseEcoModParticipants.py
# D. Thiebaut
from __future__ import print_function
import csv
from docx import Document
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH

INDEXOFCOURSE = 10   # column number where the course name is located
LOGO = "logo.png"    # the logo to include in each doc page
CSV  = "data.csv"    # the file containing the csv data

# the course titles
courses=[ "Machine Learning I",
          "Machine Learning II" ]

# short version of the course titles.  This will be used as a file name
# for the output docx document
coursesShort = [ "ML1", "ML2" ]


def printCoursesAsText( CourseParticipants ):
    ''' Display the courses and participants as plain ASCII
    ''' text.  Used mostly for debugging
    global courses
    # for each course, generate list of participants                   
    for j, course in enumerate( CourseParticipants ):
        
        # if course not valid, skip it
        if course not in courses: continue

        print( "\n" )
        print( "Course #{0}: {1}".format( j+1, course ) )
        #continue

        for i,fields in enumerate( CourseParticipants[ course ] ):
            lastName,firstName, institution, address1, address2, \
                city, zip, country, phone, email, course, _,_,_ = fields
       
            print( "{0}, {1}".format( lastName, firstName ) )
            print( "{0}".format( institution ) )
            if len( address2 ) > 0:
                print( "{0}, {1}, {2}, {3}".format( address1, address2, city, country ))
            else:
                print( "{0}, {1}, {2}".format( address1, city, country ))
            print( "{0}".format( phone ) )
            print( "{0}".format( email ) )
            print()
        
def printCoursesAsWordDoc( CourseParticipants ):
    '''Generate the docx document, one per course.   CourseParticipants
    is a dictionary, where the key is the string representing the course,
    and the value is a list of participants.  Each participants is a list
    of fields, as defined by the columns of the csv file.
    '''
    global courses

    # for each course, generate list of participants                   
    for j, course in enumerate( CourseParticipants ):
        
        # if course not valid, skip it
        if course not in courses: continue
        
        document = Document()
        
        logo = document.add_picture( LOGO, width=Inches(2.00) )
        last_paragraph = document.paragraphs[-1]
        last_paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
        document.add_paragraph()
        document.add_heading( course, level=1)    
        
        for i,fields in enumerate( CourseParticipants[ course ] ):
            lastName,firstName, institution, address1, address2, \
                city, zip, country, phone, email, course, _,_,_ = fields
       
            #print( "{0}, {1}".format( lastName, firstName ) )
            paragraph = document.add_paragraph()
            paragraph.add_run( 
                "{0}, {1}\n".format( lastName, firstName ) ).bold = True
            
            #print( "{0}".format( institution ) )
            paragraph.add_run( 
                "{0}\n".format( institution ) )
           
            if len( address2 ) > 0:
                #print( "{0}, {1}, {2}, {3}".format( address1, address2, city, country ))
                paragraph.add_run(
                    "{0}, {1}, {2}, {3}\n"
                    .format( address1, address2, city, country) )
            else:
                #print( "{0}, {1}, {2}".format( address1, city, country ))
                paragraph.add_run(
                    "{0}, {1}, {2}\n"
                    .format( address1, city, country) )

            #print( "{0}".format( phone ) )
            paragraph.add_run( "{0}\n".format( phone ) )
            
            #print( "{0}".format( email ) )
            paragraph.add_run( "{0}".format( email ) )

        # create name of file from short version of course name
        document.add_page_break()
        index = courses.index( course )
        try:
            courseShortName = coursesShort[ index ]
        except:
            print( "### ERROR ###\nindex in short courses (",
                   index, ") out of range!" )
            print( "\n\n" )
            continue

        document.save( courseShortName + ".docx" )

                       
# define course dictionary:
# key is course
# value is list of participants.  Each participant is
# a list of fields
CourseParticipants = { }

# read the csv file and parse it
with open( 'data.csv', newline='') as csvfile:

    # create a csv reader
    csvReader = csv.reader( csvfile, delimiter=',', quotechar='"')
    for i, fields in enumerate( csvReader ):

        print( fields )

        # skip first line that contains headers
        if i==0: 
           continue
        
        # skip ill formed lines
        if len( fields ) <= 5: 
           print( "Skipping line:", ", ".join( fields ) )
           continue

        # get the course from the correct field
        print( "INDEXOFCOURSE =", INDEXOFCOURSE )
        print( "fields[INDEXOFCOURSE] =", fields[INDEXOFCOURSE] )

        course = fields[INDEXOFCOURSE]
        print( "course = ", course )

        # add new participant to list associated with his/her course
        try:        
            CourseParticipants[ course ].append( fields )
        except:
            CourseParticipants[ course ] = [ fields ]


printCoursesAsText( CourseParticipants )

printCoursesAsWordDoc( CourseParticipants )


CSC file=


Here's an example of the csv file:

Last name,First name,Institution,Address 1,Address 2,City,Zip,Country,Phone,Email,Course,ID,misc1,misc2
Smith,Joe,UMass,Dept. Computer Science,,Amherst,01002,"Massachusetts, USA",(413) 545 1212,joesmith@umass.edu,Machine Learning II,2,\
,
Jones,Alex,UMass,Dept. Computer Science,,Amherst,01002,"Massachusetts, USA",(413) 545 2121,alex@umass.edu,Machine Learning I,1,,