Tutorial: Running Multithreaded Programs on AWS

From dftwiki3
Revision as of 14:43, 14 June 2012 by Thiebaut (talk | contribs) (Main Python Program)
Jump to: navigation, search

--D. Thiebaut 15:39, 14 June 2012 (EDT)


This tutorial is a quick overview of how to run a compiled C++ program on a multi-core machine. In this case the multicore is on AWS (Amazon), but it could also be any multicore desktop machine.

Overall Block Diagram

FilterWiki10.png


Main Python Program




#! /usr/bin/env python2.6
# D. Thiebaut

import sys
import time
import multiprocessing 
import subprocess

def syntax():
    print "runMultipleFilterWiki10.py -start nn -end nn"
    
def runFilterWiki( id ):
    # runFilterwiki10.sh url inFile outFile
    # url = http://hadoop0.dyndns.org/wikipedia/1line.split.nnn.gz
    url = "http://hadoop0.dyndns.org/wikipediagz/1line.split." + id + ".gz"
    print "runFilterwiki10.sh", url, "infile."+id, "outfile."+id
    output = subprocess.Popen( ["runFilterwiki10.sh", url, "infile."+id , "outfile."+id],
             stdout=subprocess.PIPE  ).communicate()[0]
    print output


def main():

    start = None
    end   = None
    for i, arg in enumerate( sys.argv ):
        #print "2"
        if arg=="-start" and i+1 < len( sys.argv ):
           start = sys.argv[i+1]
        if arg=="-end" and i+1 < len( sys.argv ):
           end = sys.argv[i+1]

    #print "3"
    if start==None or end==None:
        syntax()
        return

    start = int( start )
    end   = int( end )
    print "start = ", start
    print "end   = ", end

    list = []   
    for i in range( start, end ):        
        p = multiprocessing.Process( target=runFilterWiki, args=( i, ) )
        p.start()
        list.append( p )           

    for p in list:
        p.join()
main()