Difference between revisions of "Tutorial: Python & DOCX"
(→CSC file=) |
(→DOCX Output) |
||
Line 208: | Line 208: | ||
<br /> | <br /> | ||
==DOCX Output== | ==DOCX Output== | ||
+ | <br /> | ||
+ | Here we show only one of the two files generated, the one with two students registered. | ||
<br /> | <br /> | ||
[[Image:PythonDocx.png|650px|center]] | [[Image:PythonDocx.png|650px|center]] |
Revision as of 01:26, 20 June 2018
D. Thiebaut (talk) 10:35, 19 June 2018 (EDT)
Requirements
- Install the Python docx library using pip3
- Uses the csv library that is already part of Python 3
Source Code
# parseEcoModParticipants.py # D. Thiebaut from __future__ import print_function import csv from docx import Document from docx.shared import Inches from docx.enum.text import WD_ALIGN_PARAGRAPH INDEXOFCOURSE = 10 # column number where the course name is located LOGO = "logo.png" # the logo to include in each doc page CSV = "data.csv" # the file containing the csv data # the course titles courses=[ "Machine Learning I", "Machine Learning II" ] # short version of the course titles. This will be used as a file name # for the output docx document coursesShort = [ "ML1", "ML2" ] def printCoursesAsText( CourseParticipants ): ''' Display the courses and participants as plain ASCII ''' text. Used mostly for debugging global courses # for each course, generate list of participants for j, course in enumerate( CourseParticipants ): # if course not valid, skip it if course not in courses: continue print( "\n" ) print( "Course #{0}: {1}".format( j+1, course ) ) #continue for i,fields in enumerate( CourseParticipants[ course ] ): lastName,firstName, institution, address1, address2, \ city, zip, country, phone, email, course, _,_,_ = fields print( "{0}, {1}".format( lastName, firstName ) ) print( "{0}".format( institution ) ) if len( address2 ) > 0: print( "{0}, {1}, {2}, {3}".format( address1, address2, city, country )) else: print( "{0}, {1}, {2}".format( address1, city, country )) print( "{0}".format( phone ) ) print( "{0}".format( email ) ) print() def printCoursesAsWordDoc( CourseParticipants ): '''Generate the docx document, one per course. CourseParticipants is a dictionary, where the key is the string representing the course, and the value is a list of participants. Each participants is a list of fields, as defined by the columns of the csv file. ''' global courses # for each course, generate list of participants for j, course in enumerate( CourseParticipants ): # if course not valid, skip it if course not in courses: continue document = Document() logo = document.add_picture( LOGO, width=Inches(2.00) ) last_paragraph = document.paragraphs[-1] last_paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT document.add_paragraph() document.add_heading( course, level=1) for i,fields in enumerate( CourseParticipants[ course ] ): lastName,firstName, institution, address1, address2, \ city, zip, country, phone, email, course, _,_,_ = fields #print( "{0}, {1}".format( lastName, firstName ) ) paragraph = document.add_paragraph() paragraph.add_run( "{0}, {1}\n".format( lastName, firstName ) ).bold = True #print( "{0}".format( institution ) ) paragraph.add_run( "{0}\n".format( institution ) ) if len( address2 ) > 0: #print( "{0}, {1}, {2}, {3}".format( address1, address2, city, country )) paragraph.add_run( "{0}, {1}, {2}, {3}\n" .format( address1, address2, city, country) ) else: #print( "{0}, {1}, {2}".format( address1, city, country )) paragraph.add_run( "{0}, {1}, {2}\n" .format( address1, city, country) ) #print( "{0}".format( phone ) ) paragraph.add_run( "{0}\n".format( phone ) ) #print( "{0}".format( email ) ) paragraph.add_run( "{0}".format( email ) ) # create name of file from short version of course name document.add_page_break() index = courses.index( course ) try: courseShortName = coursesShort[ index ] except: print( "### ERROR ###\nindex in short courses (", index, ") out of range!" ) print( "\n\n" ) continue document.save( courseShortName + ".docx" ) # define course dictionary: # key is course # value is list of participants. Each participant is # a list of fields CourseParticipants = { } # read the csv file and parse it with open( 'data.csv', newline='') as csvfile: # create a csv reader csvReader = csv.reader( csvfile, delimiter=',', quotechar='"') for i, fields in enumerate( csvReader ): print( fields ) # skip first line that contains headers if i==0: continue # skip ill formed lines if len( fields ) <= 5: print( "Skipping line:", ", ".join( fields ) ) continue # get the course from the correct field print( "INDEXOFCOURSE =", INDEXOFCOURSE ) print( "fields[INDEXOFCOURSE] =", fields[INDEXOFCOURSE] ) course = fields[INDEXOFCOURSE] print( "course = ", course ) # add new participant to list associated with his/her course try: CourseParticipants[ course ].append( fields ) except: CourseParticipants[ course ] = [ fields ] printCoursesAsText( CourseParticipants ) printCoursesAsWordDoc( CourseParticipants )
CSC file
Here's an example of the csv file:
Last name,First name,Institution,Address 1,Address 2,City,Zip,Country,Phone,Email,Course,ID,misc1,misc2 Smith,Joe,UMass,Dept. Computer Science,,Amherst,01002,"Massachusetts, USA",(413) 545 1212,joesmith@umass.edu,Machine Learning II,2,\ , Jones,Alex,UMass,Dept. Computer Science,,Amherst,01002,"Massachusetts, USA",(413) 545 2121,alex@umass.edu,Machine Learning I,1,,
Output (Text)
Course #1: Machine Learning II Smith, Joe University of Massachusetts Dept. Computer Science, Amherst, Massachusetts, USA (413) 545 1212 joesmith@umass.edu Course #2: Machine Learning I Jones, Alex University of Massachusetts Dept. Computer Science, Amherst, Massachusetts, USA (413) 545 2121 alex@umass.edu
DOCX Output
Here we show only one of the two files generated, the one with two students registered.