Difference between revisions of "Tutorial: Python & DOCX"

From dftwiki3
Jump to: navigation, search
(DOCX Output)
(Output (Text))
Line 188: Line 188:
::<source lang="text">
::<source lang="text">
Course #1: Machine Learning II
Smith, Joe
Course #1: Machine Learning I
Jones, Alex
University of Massachusetts
Dept. Computer Science, Amherst, Massachusetts, USA
(413) 545 2121
Travis, Martha
University of Massachusetts
University of Massachusetts
Dept. Computer Science, Amherst, Massachusetts, USA
Dept. Computer Science, Amherst, Massachusetts, USA
(413) 545 1212
(413) 545 3232
Course #2: Machine Learning I
Course #2: Machine Learning II
Jones, Alex
Smith, Joe
University of Massachusetts
University of Massachusetts
Dept. Computer Science, Amherst, Massachusetts, USA
Dept. Computer Science, Amherst, Massachusetts, USA
(413) 545 2121
(413) 545 1212
<br />
<br />
==DOCX Output==
==DOCX Output==
<br />
<br />

Revision as of 01:31, 20 June 2018

D. Thiebaut (talk) 10:35, 19 June 2018 (EDT)


  • Install the Python docx library using pip3
  • Uses the csv library that is already part of Python 3

Source Code

# parseEcoModParticipants.py
# D. Thiebaut
from __future__ import print_function
import csv
from docx import Document
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH

INDEXOFCOURSE = 10   # column number where the course name is located
LOGO = "logo.png"    # the logo to include in each doc page
CSV  = "data.csv"    # the file containing the csv data

# the course titles
courses=[ "Machine Learning I",
          "Machine Learning II" ]

# short version of the course titles.  This will be used as a file name
# for the output docx document
coursesShort = [ "ML1", "ML2" ]

def printCoursesAsText( CourseParticipants ):
    ''' Display the courses and participants as plain ASCII
    ''' text.  Used mostly for debugging
    global courses
    # for each course, generate list of participants                   
    for j, course in enumerate( CourseParticipants ):
        # if course not valid, skip it
        if course not in courses: continue

        print( "\n" )
        print( "Course #{0}: {1}".format( j+1, course ) )

        for i,fields in enumerate( CourseParticipants[ course ] ):
            lastName,firstName, institution, address1, address2, \
                city, zip, country, phone, email, course, _,_,_ = fields
            print( "{0}, {1}".format( lastName, firstName ) )
            print( "{0}".format( institution ) )
            if len( address2 ) > 0:
                print( "{0}, {1}, {2}, {3}".format( address1, address2, city, country ))
                print( "{0}, {1}, {2}".format( address1, city, country ))
            print( "{0}".format( phone ) )
            print( "{0}".format( email ) )
def printCoursesAsWordDoc( CourseParticipants ):
    '''Generate the docx document, one per course.   CourseParticipants
    is a dictionary, where the key is the string representing the course,
    and the value is a list of participants.  Each participants is a list
    of fields, as defined by the columns of the csv file.
    global courses

    # for each course, generate list of participants                   
    for j, course in enumerate( CourseParticipants ):
        # if course not valid, skip it
        if course not in courses: continue
        document = Document()
        logo = document.add_picture( LOGO, width=Inches(2.00) )
        last_paragraph = document.paragraphs[-1]
        last_paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
        document.add_heading( course, level=1)    
        for i,fields in enumerate( CourseParticipants[ course ] ):
            lastName,firstName, institution, address1, address2, \
                city, zip, country, phone, email, course, _,_,_ = fields
            #print( "{0}, {1}".format( lastName, firstName ) )
            paragraph = document.add_paragraph()
                "{0}, {1}\n".format( lastName, firstName ) ).bold = True
            #print( "{0}".format( institution ) )
                "{0}\n".format( institution ) )
            if len( address2 ) > 0:
                #print( "{0}, {1}, {2}, {3}".format( address1, address2, city, country ))
                    "{0}, {1}, {2}, {3}\n"
                    .format( address1, address2, city, country) )
                #print( "{0}, {1}, {2}".format( address1, city, country ))
                    "{0}, {1}, {2}\n"
                    .format( address1, city, country) )

            #print( "{0}".format( phone ) )
            paragraph.add_run( "{0}\n".format( phone ) )
            #print( "{0}".format( email ) )
            paragraph.add_run( "{0}".format( email ) )

        # create name of file from short version of course name
        index = courses.index( course )
            courseShortName = coursesShort[ index ]
            print( "### ERROR ###\nindex in short courses (",
                   index, ") out of range!" )
            print( "\n\n" )

        document.save( courseShortName + ".docx" )

# define course dictionary:
# key is course
# value is list of participants.  Each participant is
# a list of fields
CourseParticipants = { }

# read the csv file and parse it
with open( 'data.csv', newline='') as csvfile:

    # create a csv reader
    csvReader = csv.reader( csvfile, delimiter=',', quotechar='"')
    for i, fields in enumerate( csvReader ):

        print( fields )

        # skip first line that contains headers
        if i==0: 
        # skip ill formed lines
        if len( fields ) <= 5: 
           print( "Skipping line:", ", ".join( fields ) )

        # get the course from the correct field
        print( "fields[INDEXOFCOURSE] =", fields[INDEXOFCOURSE] )

        course = fields[INDEXOFCOURSE]
        print( "course = ", course )

        # add new participant to list associated with his/her course
            CourseParticipants[ course ].append( fields )
            CourseParticipants[ course ] = [ fields ]

printCoursesAsText( CourseParticipants )

printCoursesAsWordDoc( CourseParticipants )

CSC file

Here's an example of the csv file:

Last name,First name,Institution,Address 1,Address 2,City,Zip,Country,Phone,Email,Course,ID,misc1,misc2
Smith,Joe,UMass,Dept. Computer Science,,Amherst,01002,"Massachusetts, USA",(413) 545 1212,joesmith@umass.edu,Machine Learning II,2,\
Jones,Alex,UMass,Dept. Computer Science,,Amherst,01002,"Massachusetts, USA",(413) 545 2121,alex@umass.edu,Machine Learning I,1,,

Output (Text)

Course #1: Machine Learning I
Jones, Alex
University of Massachusetts
Dept. Computer Science, Amherst, Massachusetts, USA
(413) 545 2121

Travis, Martha
University of Massachusetts
Dept. Computer Science, Amherst, Massachusetts, USA
(413) 545 3232

Course #2: Machine Learning II
Smith, Joe
University of Massachusetts
Dept. Computer Science, Amherst, Massachusetts, USA
(413) 545 1212

DOCX Output

Here we show only one of the two files generated, the one with two students registered.
