Difference between revisions of "CSC352 Game of Life in Map-Reduce"
(→Reducer.py) |
|||
(3 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
--[[User:Thiebaut|D. Thiebaut]] ([[User talk:Thiebaut|talk]]) 16:47, 2 April 2017 (EDT) | --[[User:Thiebaut|D. Thiebaut]] ([[User talk:Thiebaut|talk]]) 16:47, 2 April 2017 (EDT) | ||
---- | ---- | ||
− | < | + | |
+ | =Reference= | ||
+ | <br /> | ||
+ | * [https://mitpress.mit.edu/sites/default/files/titles/content/ecal13/ch043.html Using MapReduce Streaming for Distributed Life Simulation on the Cloud], by Atanas Radenski. | ||
+ | <br /> | ||
=Introduction= | =Introduction= | ||
<br /> | <br /> | ||
Line 45: | Line 49: | ||
::<source lang="python"> | ::<source lang="python"> | ||
#!/usr/bin/env python | #!/usr/bin/env python | ||
+ | # mapper.py | ||
+ | # mapper for Conway's 2D Game of Life. | ||
+ | # The input data has two possible formats: | ||
+ | # 1) The first first generation defined by | ||
+ | # a python-type collection of quoted strings. | ||
+ | # We refer to it as "raw-life" | ||
+ | # | ||
+ | #0 " " | ||
+ | #1 " * " | ||
+ | #2 " * " | ||
+ | #3 " * " | ||
+ | #4 " " | ||
+ | # | ||
+ | # 2) the output comes from a previous map-reduce stage, | ||
+ | # in which case the reducer outputs the coordinates of | ||
+ | # live cells only: | ||
+ | # | ||
+ | # 2 4 | ||
+ | # 2 5 | ||
+ | # 2 6 | ||
+ | # | ||
+ | # Above, 3 cells are live, at coordinates (2,4), (2,5), and (2,6) | ||
+ | # | ||
+ | # For every live cell that it finds, the mapper sends 9 tupples: | ||
+ | # 001005 alive | ||
+ | # 000004 OneMoreNeighbor | ||
+ | # 000005 OneMoreNeighbor | ||
+ | # 000006 OneMoreNeighbor | ||
+ | # 001004 OneMoreNeighbor | ||
+ | # 001006 OneMoreNeighbor | ||
+ | # 002004 OneMoreNeighbor | ||
+ | # 002005 OneMoreNeighbor | ||
+ | # 002006 OneMoreNeighbor | ||
+ | # | ||
+ | # The first string contains the 0-padded coordinates of live cells and | ||
+ | # neighbor cells that have this live cell as a neighbor. | ||
+ | # | ||
+ | |||
from __future__ import print_function | from __future__ import print_function | ||
import sys | import sys | ||
Line 156: | Line 198: | ||
<br /> | <br /> | ||
+ | |||
=ShufflerSort.py= | =ShufflerSort.py= | ||
<br /> | <br /> | ||
Line 224: | Line 267: | ||
::<source lang="python"> | ::<source lang="python"> | ||
#!/usr/bin/env python | #!/usr/bin/env python | ||
+ | # reducer.py | ||
+ | # D. Thiebaut | ||
+ | # Reducer stage for the game of life. | ||
+ | # The input typically looks like this: | ||
+ | # | ||
+ | # 001005 alive | ||
+ | # 000004 OneMoreNeighbor | ||
+ | # 000005 OneMoreNeighbor | ||
+ | # 000006 OneMoreNeighbor | ||
+ | # 001004 OneMoreNeighbor | ||
+ | # 001006 OneMoreNeighbor | ||
+ | # 002004 OneMoreNeighbor | ||
+ | # 002005 OneMoreNeighbor | ||
+ | # 002006 OneMoreNeighbor | ||
+ | # | ||
+ | # where the first word is a string of 6 0-padded digits, the first 3 | ||
+ | # representing the row of a cell, and the last 3 its column. The | ||
+ | # second word indicates whether the cell is actually a live cell in | ||
+ | # the current generation, or whether its status is unknown, but there | ||
+ | # is a cell in the previous generation that is alive and is its | ||
+ | # neighbor. | ||
+ | # | ||
+ | # Since all the lines fed to the reducer are sorted, then lines | ||
+ | # with the same keys, representing the same cell, are together. | ||
+ | # | ||
+ | # 000004 OneMoreNeighbor | ||
+ | # 000005 OneMoreNeighbor | ||
+ | # 000006 OneMoreNeighbor | ||
+ | # 001004 OneMoreNeighbor | ||
+ | # 001004 OneMoreNeighbor | ||
+ | # 001005 alive | ||
+ | # 001005 OneMoreNeighbor | ||
+ | # 001006 OneMoreNeighbor | ||
+ | # 001006 OneMoreNeighbor | ||
+ | # 002004 OneMoreNeighbor | ||
+ | # 002004 OneMoreNeighbor | ||
+ | # 002004 OneMoreNeighbor | ||
+ | # 002005 alive | ||
+ | # 002005 OneMoreNeighbor | ||
+ | # 002005 OneMoreNeighbor | ||
+ | # 002006 OneMoreNeighbor | ||
+ | # 002006 OneMoreNeighbor | ||
+ | # 002006 OneMoreNeighbor | ||
+ | # 003004 OneMoreNeighbor | ||
+ | # 003004 OneMoreNeighbor | ||
+ | # 003005 alive | ||
+ | # 003005 OneMoreNeighbor | ||
+ | # 003006 OneMoreNeighbor | ||
+ | # 003006 OneMoreNeighbor | ||
+ | # 004004 OneMoreNeighbor | ||
+ | # 004005 OneMoreNeighbor | ||
+ | # 004006 OneMoreNeighbor | ||
+ | # | ||
+ | # It is then easy to divide the input lines into blocks with the same key, | ||
+ | # and count the number of neighbors, and know if the cell is alive or dead. | ||
+ | # If the cell should be alive in the next generation, then the reducer | ||
+ | # outputs its coordinates (row, column) as two numbers separated by a tab. | ||
+ | # For example: | ||
+ | # | ||
+ | # 2 4 | ||
+ | # 2 5 | ||
+ | # 2 6 | ||
+ | # | ||
+ | # | ||
+ | |||
from __future__ import print_function | from __future__ import print_function | ||
from operator import itemgetter | from operator import itemgetter | ||
Line 282: | Line 390: | ||
− | |||
<br /> | <br /> | ||
<br /> | <br /> |
Latest revision as of 10:00, 21 April 2017
--D. Thiebaut (talk) 16:47, 2 April 2017 (EDT)
Contents
Reference
- Using MapReduce Streaming for Distributed Life Simulation on the Cloud, by Atanas Radenski.
Introduction
- We map-reduce once to go from one generation to the next.
- The original array (see Data File section below) is transformed into a file where each line is numbered, and each row of the game of life is sandwiched between double quotes.
- To go from the data file to the first generation:
cat life0.txt | ./mapper.py | ./shuffleSort.py | ./reducer.py 2 4 2 5 2 6
- the output is a horizontal line, on Row 2, spanning Columns 4, 5, and 6.
- To generate the second generation:
cat life0.txt | ./mapper.py | ./shuffleSort.py | ./reducer.py | ./mapper.py | ./shuffleSort.py | ./reducer.py 1 5 2 5 3 5
- To generate the third generation:
cat life0.txt | ./mapper.py | ./shuffleSort.py | ./reducer.py | ./mapper.py | ./shuffleSort.py | ./reducer.py \ | ./mapper.py | ./shuffleSort.py | ./reducer.py 2 4 2 5 2 6
Data File
The file is called life0.txt.
0 " " 1 " * " 2 " * " 3 " * " 4 " "
Mapper.py
#!/usr/bin/env python # mapper.py # mapper for Conway's 2D Game of Life. # The input data has two possible formats: # 1) The first first generation defined by # a python-type collection of quoted strings. # We refer to it as "raw-life" # #0 " " #1 " * " #2 " * " #3 " * " #4 " " # # 2) the output comes from a previous map-reduce stage, # in which case the reducer outputs the coordinates of # live cells only: # # 2 4 # 2 5 # 2 6 # # Above, 3 cells are live, at coordinates (2,4), (2,5), and (2,6) # # For every live cell that it finds, the mapper sends 9 tupples: # 001005 alive # 000004 OneMoreNeighbor # 000005 OneMoreNeighbor # 000006 OneMoreNeighbor # 001004 OneMoreNeighbor # 001006 OneMoreNeighbor # 002004 OneMoreNeighbor # 002005 OneMoreNeighbor # 002006 OneMoreNeighbor # # The first string contains the 0-padded coordinates of live cells and # neighbor cells that have this live cell as a neighbor. # from __future__ import print_function import sys # input comes from STDIN (standard input) for line in sys.stdin: # remove leading and trailing whitespace line = line.strip() #print( "line =", line ) # split the line into words words = line.split( None, 1 ) #print( "words = ", words ) isRawLife = line.find( "\"" ) != -1 # are we reading the original array of cells from a file? if isRawLife: lineNo = int( words[0] ) lineWidth = len( words[1] )-2 # for the double quotes lifeLine = words[1].replace( "\"", "" ) #print( "(%d, >%s<)" % ( lineNo, lifeLine ) ) for j in range( len( lifeLine ) ): if lifeLine[j] != ' ': print( "%03d%03d\t%s" % ( lineNo, j, "alive" ) ) for row in range( lineNo-1, lineNo+1+1 ): for col in range( j-1, j+1+1 ): if row < 0: continue if col < 0: continue if col > lineWidth: col = 0 if row == lineNo and col == j: continue print( "%03d%03d\t%s" % ( row, col, "OneMoreNeighbor" ) ) # or are we reading tuples output by the reducer? else: lineNo = int( words[0] ) j = int( words[1] ) print( "%03d%03d\t%s" % ( lineNo, j, "alive" ) ) for row in range( lineNo-1, lineNo+1+1 ): for col in range( j-1, j+1+1 ): if row < 0: continue if col < 0: continue if row == lineNo and col == j: continue print( "%03d%03d\t%s" % ( row, col, "OneMoreNeighbor" ) )
Mapper on Original Life File
cat life0.txt | ./mapper.py 001005 alive 000004 OneMoreNeighbor 000005 OneMoreNeighbor 000006 OneMoreNeighbor 001004 OneMoreNeighbor 001006 OneMoreNeighbor 002004 OneMoreNeighbor 002005 OneMoreNeighbor 002006 OneMoreNeighbor 002005 alive 001004 OneMoreNeighbor 001005 OneMoreNeighbor 001006 OneMoreNeighbor 002004 OneMoreNeighbor 002006 OneMoreNeighbor 003004 OneMoreNeighbor 003005 OneMoreNeighbor 003006 OneMoreNeighbor 003005 alive 002004 OneMoreNeighbor 002005 OneMoreNeighbor 002006 OneMoreNeighbor 003004 OneMoreNeighbor 003006 OneMoreNeighbor 004004 OneMoreNeighbor 004005 OneMoreNeighbor 004006 OneMoreNeighbor
Mapper on Output of Reduce
cat life0.txt | ./mapper.py | ./shuffleSort.py | ./reducer.py | ./mapper.py 002004 alive 001003 OneMoreNeighbor 001004 OneMoreNeighbor 001005 OneMoreNeighbor 002003 OneMoreNeighbor 002005 OneMoreNeighbor 003003 OneMoreNeighbor 003004 OneMoreNeighbor 003005 OneMoreNeighbor 002005 alive 001004 OneMoreNeighbor 001005 OneMoreNeighbor 001006 OneMoreNeighbor 002004 OneMoreNeighbor 002006 OneMoreNeighbor 003004 OneMoreNeighbor 003005 OneMoreNeighbor 003006 OneMoreNeighbor 002006 alive 001005 OneMoreNeighbor 001006 OneMoreNeighbor 001007 OneMoreNeighbor 002005 OneMoreNeighbor 002007 OneMoreNeighbor 003005 OneMoreNeighbor 003006 OneMoreNeighbor 003007 OneMoreNeighbor
ShufflerSort.py
#!/usr/bin/env python from __future__ import print_function import sys sortNumeric = False if len( sys.argv ) > 1 and sys.argv[1] == "-n": sortNumeric = True L = [] # input comes from STDIN for line in sys.stdin: # remove leading and trailing whitespace line = line.strip() word, count = line.split('\t', 1) if sortNumeric: word = int( word ) L.append( (word, count) ) L.sort() for word, count in L: print( '%s\t%s' % (word, count) )
Output of ShuffleSort
cat life0.txt | ./mapper.py | ./shuffleSort.py 000004 OneMoreNeighbor 000005 OneMoreNeighbor 000006 OneMoreNeighbor 001004 OneMoreNeighbor 001004 OneMoreNeighbor 001005 OneMoreNeighbor 001005 alive 001006 OneMoreNeighbor 001006 OneMoreNeighbor 002004 OneMoreNeighbor 002004 OneMoreNeighbor 002004 OneMoreNeighbor 002005 OneMoreNeighbor 002005 OneMoreNeighbor 002005 alive 002006 OneMoreNeighbor 002006 OneMoreNeighbor 002006 OneMoreNeighbor 003004 OneMoreNeighbor 003004 OneMoreNeighbor 003005 OneMoreNeighbor 003005 alive 003006 OneMoreNeighbor 003006 OneMoreNeighbor 004004 OneMoreNeighbor 004005 OneMoreNeighbor 004006 OneMoreNeighbor
Reducer.py
#!/usr/bin/env python # reducer.py # D. Thiebaut # Reducer stage for the game of life. # The input typically looks like this: # # 001005 alive # 000004 OneMoreNeighbor # 000005 OneMoreNeighbor # 000006 OneMoreNeighbor # 001004 OneMoreNeighbor # 001006 OneMoreNeighbor # 002004 OneMoreNeighbor # 002005 OneMoreNeighbor # 002006 OneMoreNeighbor # # where the first word is a string of 6 0-padded digits, the first 3 # representing the row of a cell, and the last 3 its column. The # second word indicates whether the cell is actually a live cell in # the current generation, or whether its status is unknown, but there # is a cell in the previous generation that is alive and is its # neighbor. # # Since all the lines fed to the reducer are sorted, then lines # with the same keys, representing the same cell, are together. # # 000004 OneMoreNeighbor # 000005 OneMoreNeighbor # 000006 OneMoreNeighbor # 001004 OneMoreNeighbor # 001004 OneMoreNeighbor # 001005 alive # 001005 OneMoreNeighbor # 001006 OneMoreNeighbor # 001006 OneMoreNeighbor # 002004 OneMoreNeighbor # 002004 OneMoreNeighbor # 002004 OneMoreNeighbor # 002005 alive # 002005 OneMoreNeighbor # 002005 OneMoreNeighbor # 002006 OneMoreNeighbor # 002006 OneMoreNeighbor # 002006 OneMoreNeighbor # 003004 OneMoreNeighbor # 003004 OneMoreNeighbor # 003005 alive # 003005 OneMoreNeighbor # 003006 OneMoreNeighbor # 003006 OneMoreNeighbor # 004004 OneMoreNeighbor # 004005 OneMoreNeighbor # 004006 OneMoreNeighbor # # It is then easy to divide the input lines into blocks with the same key, # and count the number of neighbors, and know if the cell is alive or dead. # If the cell should be alive in the next generation, then the reducer # outputs its coordinates (row, column) as two numbers separated by a tab. # For example: # # 2 4 # 2 5 # 2 6 # # from __future__ import print_function from operator import itemgetter import sys lastCell = None isAlive = False cell = None noNeighbors = 0 # input comes from STDIN for line in sys.stdin: # remove leading and trailing whitespace line = line.strip() # parse the input we got from mapper.py cell, status = line.split('\t', 1) if cell != lastCell: # decide of fate of last cell if isAlive and 2 <= noNeighbors <= 3: # continue living print( "%d\t%d" % ( int( lastCell )/1000, int( lastCell )%1000 ) ) if not isAlive and noNeighbors == 3: # be born print( "%d\t%d" % ( int( lastCell )/1000, int( lastCell )%1000 ) ) noNeighbors = 0 isAlive = False # parse status of new cell if status.find( "alive" ) != -1: isAlive = True else: noNeighbors += 1 lastCell = cell # do not forget to output the last cell if needed! if isAlive and 2 <= noNeighbors <= 3: # continue living print( "%d\t%d" % ( int( lastCell )/1000, int( lastCell )%1000 ) ) if not isAlive and noNeighbors == 3: # be born print( "%d\t%d" % ( int( lastCell )/1000, int( lastCell )%1000 ) )
Output of Reducer
cat life0.txt | ./mapper.py | ./shuffleSort.py | ./reducer.py 2 4 2 5 2 6