Difference between revisions of "Tutorial: Python & DOCX"

From dftwiki3
Jump to: navigation, search
(DOCX Output)
(Output (Text))
Line 188: Line 188:
 
::<source lang="text">
 
::<source lang="text">
  
Course #1: Machine Learning II
+
 
Smith, Joe
+
Course #1: Machine Learning I
 +
Jones, Alex
 +
University of Massachusetts
 +
Dept. Computer Science, Amherst, Massachusetts, USA
 +
(413) 545 2121
 +
alex@umass.edu
 +
 
 +
Travis, Martha
 
University of Massachusetts
 
University of Massachusetts
 
Dept. Computer Science, Amherst, Massachusetts, USA
 
Dept. Computer Science, Amherst, Massachusetts, USA
(413) 545 1212
+
(413) 545 3232
joesmith@umass.edu
+
mtravis@umass.edu
  
  
  
Course #2: Machine Learning I
+
Course #2: Machine Learning II
Jones, Alex
+
Smith, Joe
 
University of Massachusetts
 
University of Massachusetts
 
Dept. Computer Science, Amherst, Massachusetts, USA
 
Dept. Computer Science, Amherst, Massachusetts, USA
(413) 545 2121
+
(413) 545 1212
alex@umass.edu
+
joesmith@umass.edu
 +
 
  
  
 
</source>
 
</source>
 
<br />
 
<br />
 +
 
==DOCX Output==
 
==DOCX Output==
 
<br />
 
<br />

Revision as of 01:31, 20 June 2018

D. Thiebaut (talk) 10:35, 19 June 2018 (EDT)


Requirements


  • Install the Python docx library using pip3
  • Uses the csv library that is already part of Python 3


Source Code


# parseEcoModParticipants.py
# D. Thiebaut
from __future__ import print_function
import csv
from docx import Document
from docx.shared import Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH

INDEXOFCOURSE = 10   # column number where the course name is located
LOGO = "logo.png"    # the logo to include in each doc page
CSV  = "data.csv"    # the file containing the csv data

# the course titles
courses=[ "Machine Learning I",
          "Machine Learning II" ]

# short version of the course titles.  This will be used as a file name
# for the output docx document
coursesShort = [ "ML1", "ML2" ]


def printCoursesAsText( CourseParticipants ):
    ''' Display the courses and participants as plain ASCII
    ''' text.  Used mostly for debugging
    global courses
    # for each course, generate list of participants                   
    for j, course in enumerate( CourseParticipants ):
        
        # if course not valid, skip it
        if course not in courses: continue

        print( "\n" )
        print( "Course #{0}: {1}".format( j+1, course ) )
        #continue

        for i,fields in enumerate( CourseParticipants[ course ] ):
            lastName,firstName, institution, address1, address2, \
                city, zip, country, phone, email, course, _,_,_ = fields
       
            print( "{0}, {1}".format( lastName, firstName ) )
            print( "{0}".format( institution ) )
            if len( address2 ) > 0:
                print( "{0}, {1}, {2}, {3}".format( address1, address2, city, country ))
            else:
                print( "{0}, {1}, {2}".format( address1, city, country ))
            print( "{0}".format( phone ) )
            print( "{0}".format( email ) )
            print()
        
def printCoursesAsWordDoc( CourseParticipants ):
    '''Generate the docx document, one per course.   CourseParticipants
    is a dictionary, where the key is the string representing the course,
    and the value is a list of participants.  Each participants is a list
    of fields, as defined by the columns of the csv file.
    '''
    global courses

    # for each course, generate list of participants                   
    for j, course in enumerate( CourseParticipants ):
        
        # if course not valid, skip it
        if course not in courses: continue
        
        document = Document()
        
        logo = document.add_picture( LOGO, width=Inches(2.00) )
        last_paragraph = document.paragraphs[-1]
        last_paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
        document.add_paragraph()
        document.add_heading( course, level=1)    
        
        for i,fields in enumerate( CourseParticipants[ course ] ):
            lastName,firstName, institution, address1, address2, \
                city, zip, country, phone, email, course, _,_,_ = fields
       
            #print( "{0}, {1}".format( lastName, firstName ) )
            paragraph = document.add_paragraph()
            paragraph.add_run( 
                "{0}, {1}\n".format( lastName, firstName ) ).bold = True
            
            #print( "{0}".format( institution ) )
            paragraph.add_run( 
                "{0}\n".format( institution ) )
           
            if len( address2 ) > 0:
                #print( "{0}, {1}, {2}, {3}".format( address1, address2, city, country ))
                paragraph.add_run(
                    "{0}, {1}, {2}, {3}\n"
                    .format( address1, address2, city, country) )
            else:
                #print( "{0}, {1}, {2}".format( address1, city, country ))
                paragraph.add_run(
                    "{0}, {1}, {2}\n"
                    .format( address1, city, country) )

            #print( "{0}".format( phone ) )
            paragraph.add_run( "{0}\n".format( phone ) )
            
            #print( "{0}".format( email ) )
            paragraph.add_run( "{0}".format( email ) )

        # create name of file from short version of course name
        document.add_page_break()
        index = courses.index( course )
        try:
            courseShortName = coursesShort[ index ]
        except:
            print( "### ERROR ###\nindex in short courses (",
                   index, ") out of range!" )
            print( "\n\n" )
            continue

        document.save( courseShortName + ".docx" )

                       
# define course dictionary:
# key is course
# value is list of participants.  Each participant is
# a list of fields
CourseParticipants = { }

# read the csv file and parse it
with open( 'data.csv', newline='') as csvfile:

    # create a csv reader
    csvReader = csv.reader( csvfile, delimiter=',', quotechar='"')
    for i, fields in enumerate( csvReader ):

        print( fields )

        # skip first line that contains headers
        if i==0: 
           continue
        
        # skip ill formed lines
        if len( fields ) <= 5: 
           print( "Skipping line:", ", ".join( fields ) )
           continue

        # get the course from the correct field
        print( "INDEXOFCOURSE =", INDEXOFCOURSE )
        print( "fields[INDEXOFCOURSE] =", fields[INDEXOFCOURSE] )

        course = fields[INDEXOFCOURSE]
        print( "course = ", course )

        # add new participant to list associated with his/her course
        try:        
            CourseParticipants[ course ].append( fields )
        except:
            CourseParticipants[ course ] = [ fields ]


printCoursesAsText( CourseParticipants )

printCoursesAsWordDoc( CourseParticipants )


CSC file


Here's an example of the csv file:

Last name,First name,Institution,Address 1,Address 2,City,Zip,Country,Phone,Email,Course,ID,misc1,misc2
Smith,Joe,UMass,Dept. Computer Science,,Amherst,01002,"Massachusetts, USA",(413) 545 1212,joesmith@umass.edu,Machine Learning II,2,\
,
Jones,Alex,UMass,Dept. Computer Science,,Amherst,01002,"Massachusetts, USA",(413) 545 2121,alex@umass.edu,Machine Learning I,1,,


Output (Text)


Course #1: Machine Learning I
Jones, Alex
University of Massachusetts
Dept. Computer Science, Amherst, Massachusetts, USA
(413) 545 2121
alex@umass.edu

Travis, Martha
University of Massachusetts
Dept. Computer Science, Amherst, Massachusetts, USA
(413) 545 3232
mtravis@umass.edu



Course #2: Machine Learning II
Smith, Joe
University of Massachusetts
Dept. Computer Science, Amherst, Massachusetts, USA
(413) 545 1212
joesmith@umass.edu


DOCX Output


Here we show only one of the two files generated, the one with two students registered.

PythonDocx.png