Tutorial: Python & DOCX

From dftwiki3
Revision as of 10:35, 19 June 2018 by Thiebaut (talk | contribs) (Source Code)
Jump to: navigation, search

D. Thiebaut (talk) 10:35, 19 June 2018 (EDT)


Requirements


  • Install the Python docx library using pip3
  • Uses the csv library that is already part of Python 3


Source Code


# parse_CSV_Generate_Docx.py
# D. Thiebaut
import csv
from docx import Document
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH

# The title of the courses, as they appear in the csv file
courses=[ "Practical General Equilibrium",
          "Advanced Techniques in General Equilibrium",
          "Dynamic Stochastic General Equilibrium",
          "Energy and Environmental Modeling",
          "Financial Social Accounting",
          "Macroeconometric Modeling",
          "Overlapping Generation General Equilibrium" ]

# the abbreviated versions of the courses. They will be used as the name
# of the docx files
coursesShort = [ "Practical_General",
                 "Advanced_Techniques",
                 "Dynamic_Stochastic",
                 "Energy_and_Environmental",
                 "Financial_Social",
                 "Macroeconometric",
                 "Overlapping_Generation" ]

def printCoursesAsText( CourseParticipants ):
    '''Takes a dictionary of courses and list of participants and 
    outputs the courses and participants as ASCII text.'''
    global courses
    # for each course, generate list of participants                   
    for j, course in enumerate( CourseParticipants ):
        
        # if course not valid, skip it
        if course not in courses: continue

        print( "\n" )
        print( "Course #{0}: {1}".format( j+1, course ) )
        #continue

        for i,fields in enumerate( CourseParticipants[ course ] ):
            lastName,firstName, institution, address1, address2, \
                city, zip, country, phone, email, course, _,_,_ = fields
       
            print( "{0}, {1}".format( lastName, firstName ) )
            print( "{0}".format( institution ) )
            if len( address2 ) > 0:
                print( "{0}, {1}, {2}, {3}".format( address1, address2, city, country ))
            else:
                print( "{0}, {1}, {2}".format( address1, city, country ))
            print( "{0}".format( phone ) )
            print( "{0}".format( email ) )
            print()
        
def printCoursesAsWordDoc( CourseParticipants ):
    '''Takes a dictionary of courses and list of participants and 
    outputs a docx file for each course.'''

    global courses

    # for each course, generate list of participants                   
    for j, course in enumerate( CourseParticipants ):
        
        # if course not valid, skip it
        if course not in courses: continue
        
        document = Document()
        
        logo = document.add_picture('ecomod.png', width=Inches(2.00) )
        last_paragraph = document.paragraphs[-1]
        last_paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
        document.add_paragraph()
        document.add_heading( course, level=1)    
        
        for i,fields in enumerate( CourseParticipants[ course ] ):
            lastName,firstName, institution, address1, address2, \
                city, zip, country, phone, email, course, _,_,_ = fields
       
            #print( "{0}, {1}".format( lastName, firstName ) )
            paragraph = document.add_paragraph()
            paragraph.add_run( 
                "{0}, {1}\n".format( lastName, firstName ) ).bold = True
            
            #print( "{0}".format( institution ) )
            paragraph.add_run( 
                "{0}\n".format( institution ) )
           
            if len( address2 ) > 0:
                #print( "{0}, {1}, {2}, {3}".format( address1, address2, city, country ))
                paragraph.add_run(
                    "{0}, {1}, {2}, {3}\n"
                    .format( address1, address2, city, country) )
            else:
                #print( "{0}, {1}, {2}".format( address1, city, country ))
                paragraph.add_run(
                    "{0}, {1}, {2}\n"
                    .format( address1, city, country) )

            #print( "{0}".format( phone ) )
            paragraph.add_run( "{0}\n".format( phone ) )
            
            #print( "{0}".format( email ) )
            paragraph.add_run( "{0}".format( email ) )

        # create name of file from short version of course name
        document.add_page_break()
        index = courses.index( course )
        try:
            courseShortName = coursesShort[ index ]
        except:
            print( "### ERROR ###\nindex in short courses (",
                   index, ") out of range!" )
            print( "\n\n" )
            continue

        document.save( courseShortName + ".docx" )

                       
# define course dictionary:
# key is course
# value is list of participants.  Each participant is
# a list of fields
CourseParticipants = { }

# read the csv file and parse it
with open('data.csv', newline='') as csvfile:

    # create a csv reader
    csvReader = csv.reader( csvfile, delimiter=',', quotechar='"')
    for i, fields in enumerate( csvReader ):
        # skip ill formed lines
        if len( fields ) <= 5: continue

        # skip first line that contains headers
        if i==0: continue
        
        # course is Field 10
        course = fields[10]

        # add new participant to list associated with his/her course
        try:        
            CourseParticipants[ course ].append( fields )
        except:
            CourseParticipants[ course ] = [ fields ]


# now that the csv is parsed, generate the ASCII version 
# on the screen (takes a long time)
printCoursesAsText( CourseParticipants )

# generate a list of docx files, one for each course, with the
# list of participants for eac
printCoursesAsWordDoc( CourseParticipants )