Difference between revisions of "Tutorial: Python & DOCX"
(Created page with "~~~~ ---- =Requirements= <br /> * Install the Python docx library using pip3 * Uses the csv library that is already part of Python 3 <br /> =Source Code= <br /> ::<source lan...") |
(→DOCX Output) |
||
(6 intermediate revisions by the same user not shown) | |||
Line 12: | Line 12: | ||
# parseEcoModParticipants.py | # parseEcoModParticipants.py | ||
# D. Thiebaut | # D. Thiebaut | ||
+ | from __future__ import print_function | ||
import csv | import csv | ||
from docx import Document | from docx import Document | ||
Line 17: | Line 18: | ||
from docx.enum.text import WD_ALIGN_PARAGRAPH | from docx.enum.text import WD_ALIGN_PARAGRAPH | ||
− | # | + | INDEXOFCOURSE = 10 # column number where the course name is located |
− | courses=[ " | + | LOGO = "logo.png" # the logo to include in each doc page |
− | " | + | CSV = "data.csv" # the file containing the csv data |
− | + | ||
− | + | # the course titles | |
− | + | courses=[ "Machine Learning I", | |
− | + | "Machine Learning II" ] | |
− | + | ||
+ | # short version of the course titles. This will be used as a file name | ||
+ | # for the output docx document | ||
+ | coursesShort = [ "ML1", "ML2" ] | ||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
− | |||
def printCoursesAsText( CourseParticipants ): | def printCoursesAsText( CourseParticipants ): | ||
− | ''' | + | ''' Display the courses and participants as plain ASCII |
− | + | text. Used mostly for debugging''' | |
+ | |||
global courses | global courses | ||
# for each course, generate list of participants | # for each course, generate list of participants | ||
Line 65: | Line 61: | ||
def printCoursesAsWordDoc( CourseParticipants ): | def printCoursesAsWordDoc( CourseParticipants ): | ||
− | ''' | + | '''Generate the docx document, one per course. CourseParticipants |
− | + | is a dictionary, where the key is the string representing the course, | |
− | + | and the value is a list of participants. Each participants is a list | |
+ | of fields, as defined by the columns of the csv file. | ||
+ | ''' | ||
global courses | global courses | ||
Line 78: | Line 76: | ||
document = Document() | document = Document() | ||
− | logo = document.add_picture( | + | logo = document.add_picture( LOGO, width=Inches(2.00) ) |
last_paragraph = document.paragraphs[-1] | last_paragraph = document.paragraphs[-1] | ||
last_paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT | last_paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT | ||
Line 135: | Line 133: | ||
# read the csv file and parse it | # read the csv file and parse it | ||
− | with open('data.csv', newline='') as csvfile: | + | with open( 'data.csv', newline='') as csvfile: |
# create a csv reader | # create a csv reader | ||
csvReader = csv.reader( csvfile, delimiter=',', quotechar='"') | csvReader = csv.reader( csvfile, delimiter=',', quotechar='"') | ||
for i, fields in enumerate( csvReader ): | for i, fields in enumerate( csvReader ): | ||
− | + | ||
− | + | print( fields ) | |
# skip first line that contains headers | # skip first line that contains headers | ||
− | if i==0: continue | + | if i==0: |
+ | continue | ||
− | # course | + | # skip ill formed lines |
− | course = fields[ | + | if len( fields ) <= 5: |
+ | print( "Skipping line:", ", ".join( fields ) ) | ||
+ | continue | ||
+ | |||
+ | # get the course from the correct field | ||
+ | print( "INDEXOFCOURSE =", INDEXOFCOURSE ) | ||
+ | print( "fields[INDEXOFCOURSE] =", fields[INDEXOFCOURSE] ) | ||
+ | |||
+ | course = fields[INDEXOFCOURSE] | ||
+ | print( "course = ", course ) | ||
# add new participant to list associated with his/her course | # add new participant to list associated with his/her course | ||
Line 156: | Line 164: | ||
− | |||
− | |||
printCoursesAsText( CourseParticipants ) | printCoursesAsText( CourseParticipants ) | ||
− | |||
− | |||
printCoursesAsWordDoc( CourseParticipants ) | printCoursesAsWordDoc( CourseParticipants ) | ||
+ | |||
+ | |||
</source> | </source> | ||
<br /> | <br /> | ||
+ | ==CSC file== | ||
<br /> | <br /> | ||
+ | Here's an example of the csv file: | ||
<br /> | <br /> | ||
+ | ::<source lang="text"> | ||
+ | Last name,First name,Institution,Address 1,Address 2,City,Zip,Country,Phone,Email,Course,ID,misc1,misc2 | ||
+ | Smith,Joe,UMass,Dept. Computer Science,,Amherst,01002,"Massachusetts, USA",(413) 545 1212,joesmith@umass.edu,Machine Learning II,2,\ | ||
+ | , | ||
+ | Jones,Alex,UMass,Dept. Computer Science,,Amherst,01002,"Massachusetts, USA",(413) 545 2121,alex@umass.edu,Machine Learning I,1,, | ||
+ | |||
+ | </source> | ||
<br /> | <br /> | ||
+ | ==Output (Text)== | ||
+ | <br /> | ||
+ | ::<source lang="text"> | ||
+ | |||
+ | |||
+ | Course #1: Machine Learning I | ||
+ | Jones, Alex | ||
+ | University of Massachusetts | ||
+ | Dept. Computer Science, Amherst, Massachusetts, USA | ||
+ | (413) 545 2121 | ||
+ | alex@umass.edu | ||
+ | |||
+ | Travis, Martha | ||
+ | University of Massachusetts | ||
+ | Dept. Computer Science, Amherst, Massachusetts, USA | ||
+ | (413) 545 3232 | ||
+ | mtravis@umass.edu | ||
+ | |||
+ | |||
+ | |||
+ | Course #2: Machine Learning II | ||
+ | Smith, Joe | ||
+ | University of Massachusetts | ||
+ | Dept. Computer Science, Amherst, Massachusetts, USA | ||
+ | (413) 545 1212 | ||
+ | joesmith@umass.edu | ||
+ | |||
+ | |||
+ | |||
+ | </source> | ||
+ | <br /> | ||
+ | |||
+ | ==DOCX Output== | ||
<br /> | <br /> | ||
+ | Here we show only one of the two files generated, the one with two students registered. | ||
<br /> | <br /> | ||
+ | [[Image:PythonDocx.png|650px|center]] | ||
<br /> | <br /> | ||
<br /> | <br /> | ||
Line 178: | Line 228: | ||
<br /> | <br /> | ||
<br /> | <br /> | ||
− | [[Category:Python]][[Category:Word]] | + | [[Category:Python]][[Category:Word]][[Category:docx]] |
Latest revision as of 04:17, 28 June 2018
D. Thiebaut (talk) 10:35, 19 June 2018 (EDT)
Requirements
- Install the Python docx library using pip3
- Uses the csv library that is already part of Python 3
Source Code
# parseEcoModParticipants.py # D. Thiebaut from __future__ import print_function import csv from docx import Document from docx.shared import Inches from docx.enum.text import WD_ALIGN_PARAGRAPH INDEXOFCOURSE = 10 # column number where the course name is located LOGO = "logo.png" # the logo to include in each doc page CSV = "data.csv" # the file containing the csv data # the course titles courses=[ "Machine Learning I", "Machine Learning II" ] # short version of the course titles. This will be used as a file name # for the output docx document coursesShort = [ "ML1", "ML2" ] def printCoursesAsText( CourseParticipants ): ''' Display the courses and participants as plain ASCII text. Used mostly for debugging''' global courses # for each course, generate list of participants for j, course in enumerate( CourseParticipants ): # if course not valid, skip it if course not in courses: continue print( "\n" ) print( "Course #{0}: {1}".format( j+1, course ) ) #continue for i,fields in enumerate( CourseParticipants[ course ] ): lastName,firstName, institution, address1, address2, \ city, zip, country, phone, email, course, _,_,_ = fields print( "{0}, {1}".format( lastName, firstName ) ) print( "{0}".format( institution ) ) if len( address2 ) > 0: print( "{0}, {1}, {2}, {3}".format( address1, address2, city, country )) else: print( "{0}, {1}, {2}".format( address1, city, country )) print( "{0}".format( phone ) ) print( "{0}".format( email ) ) print() def printCoursesAsWordDoc( CourseParticipants ): '''Generate the docx document, one per course. CourseParticipants is a dictionary, where the key is the string representing the course, and the value is a list of participants. Each participants is a list of fields, as defined by the columns of the csv file. ''' global courses # for each course, generate list of participants for j, course in enumerate( CourseParticipants ): # if course not valid, skip it if course not in courses: continue document = Document() logo = document.add_picture( LOGO, width=Inches(2.00) ) last_paragraph = document.paragraphs[-1] last_paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT document.add_paragraph() document.add_heading( course, level=1) for i,fields in enumerate( CourseParticipants[ course ] ): lastName,firstName, institution, address1, address2, \ city, zip, country, phone, email, course, _,_,_ = fields #print( "{0}, {1}".format( lastName, firstName ) ) paragraph = document.add_paragraph() paragraph.add_run( "{0}, {1}\n".format( lastName, firstName ) ).bold = True #print( "{0}".format( institution ) ) paragraph.add_run( "{0}\n".format( institution ) ) if len( address2 ) > 0: #print( "{0}, {1}, {2}, {3}".format( address1, address2, city, country )) paragraph.add_run( "{0}, {1}, {2}, {3}\n" .format( address1, address2, city, country) ) else: #print( "{0}, {1}, {2}".format( address1, city, country )) paragraph.add_run( "{0}, {1}, {2}\n" .format( address1, city, country) ) #print( "{0}".format( phone ) ) paragraph.add_run( "{0}\n".format( phone ) ) #print( "{0}".format( email ) ) paragraph.add_run( "{0}".format( email ) ) # create name of file from short version of course name document.add_page_break() index = courses.index( course ) try: courseShortName = coursesShort[ index ] except: print( "### ERROR ###\nindex in short courses (", index, ") out of range!" ) print( "\n\n" ) continue document.save( courseShortName + ".docx" ) # define course dictionary: # key is course # value is list of participants. Each participant is # a list of fields CourseParticipants = { } # read the csv file and parse it with open( 'data.csv', newline='') as csvfile: # create a csv reader csvReader = csv.reader( csvfile, delimiter=',', quotechar='"') for i, fields in enumerate( csvReader ): print( fields ) # skip first line that contains headers if i==0: continue # skip ill formed lines if len( fields ) <= 5: print( "Skipping line:", ", ".join( fields ) ) continue # get the course from the correct field print( "INDEXOFCOURSE =", INDEXOFCOURSE ) print( "fields[INDEXOFCOURSE] =", fields[INDEXOFCOURSE] ) course = fields[INDEXOFCOURSE] print( "course = ", course ) # add new participant to list associated with his/her course try: CourseParticipants[ course ].append( fields ) except: CourseParticipants[ course ] = [ fields ] printCoursesAsText( CourseParticipants ) printCoursesAsWordDoc( CourseParticipants )
CSC file
Here's an example of the csv file:
Last name,First name,Institution,Address 1,Address 2,City,Zip,Country,Phone,Email,Course,ID,misc1,misc2 Smith,Joe,UMass,Dept. Computer Science,,Amherst,01002,"Massachusetts, USA",(413) 545 1212,joesmith@umass.edu,Machine Learning II,2,\ , Jones,Alex,UMass,Dept. Computer Science,,Amherst,01002,"Massachusetts, USA",(413) 545 2121,alex@umass.edu,Machine Learning I,1,,
Output (Text)
Course #1: Machine Learning I Jones, Alex University of Massachusetts Dept. Computer Science, Amherst, Massachusetts, USA (413) 545 2121 alex@umass.edu Travis, Martha University of Massachusetts Dept. Computer Science, Amherst, Massachusetts, USA (413) 545 3232 mtravis@umass.edu Course #2: Machine Learning II Smith, Joe University of Massachusetts Dept. Computer Science, Amherst, Massachusetts, USA (413) 545 1212 joesmith@umass.edu
DOCX Output
Here we show only one of the two files generated, the one with two students registered.