Tutorial: Running Multithreaded Programs on AWS
--D. Thiebaut 15:39, 14 June 2012 (EDT)
This tutorial is a quick overview of how to run a compiled C++ program on a multi-core machine. In this case the multicore is on AWS (Amazon), but it could also be any multicore desktop machine.
Overall Block Diagram
Main Python Program: runMultipleFilterWiki10.py
#! /usr/bin/env python2.6
# D. Thiebaut
import sys
import time
import multiprocessing
import subprocess
def syntax():
print "runMultipleFilterWiki10.py -start nn -end nn"
def runFilterWiki( id ):
# runFilterwiki10.sh url inFile outFile
# url = http://hadoop0.dyndns.org/wikipedia/1line.split.nnn.gz
url = "http://hadoop0.dyndns.org/wikipediagz/1line.split." + id + ".gz"
print "runFilterwiki10.sh", url, "infile."+id, "outfile."+id
output = subprocess.Popen( ["runFilterwiki10.sh", url, "infile."+id , "outfile."+id],
stdout=subprocess.PIPE ).communicate()[0]
print output
def main():
start = None
end = None
for i, arg in enumerate( sys.argv ):
#print "2"
if arg=="-start" and i+1 < len( sys.argv ):
start = sys.argv[i+1]
if arg=="-end" and i+1 < len( sys.argv ):
end = sys.argv[i+1]
#print "3"
if start==None or end==None:
syntax()
return
start = int( start )
end = int( end )
print "start = ", start
print "end = ", end
list = []
for i in range( start, end ):
p = multiprocessing.Process( target=runFilterWiki, args=( i, ) )
p.start()
list.append( p )
for p in list:
p.join()
main()
Shell File: runFilterwiki10.sh
#! /bin/bash
# runFilterwiki10.sh
# D. Thiebaut
# runs filterwiki10 and fetches URL files first
#
USAGE="syntax: runFilterwiki10.sh urlOfInputFile LocalInputFileName localOutFileName"
#echo $#
if [ $# != 3 ]; then
echo "$USAGE"
exit 1
fi
url=$1
inFile=$2
outFile=$3
echo /usr/bin/env curl -s -o ${inFile}.gz -G $url
/usr/bin/env curl -s -o ${inFile}.gz -G $url
echo gunzip ${inFile}.gz
gunzip ${inFile}.gz
echo ./filterwiki10 -in $inFile -out $outFile
./filterwiki10 -in $inFile -out $outFile
rm $inFile
echo gzip $outFile
gzip $outFile
/usr/bin/env curl -s -F "uploadedfile=@${outFile}.gz" http://hadoop0.dyndns.org/uploader.php
rm ${outFile}.gz