Difference between revisions of "CSC352 Game of Life in Map-Reduce"

From dftwiki3
Jump to: navigation, search
(Reducer.py)
 
(8 intermediate revisions by the same user not shown)
Line 1: Line 1:
 
--[[User:Thiebaut|D. Thiebaut]] ([[User talk:Thiebaut|talk]]) 16:47, 2 April 2017 (EDT)
 
--[[User:Thiebaut|D. Thiebaut]] ([[User talk:Thiebaut|talk]]) 16:47, 2 April 2017 (EDT)
 
----
 
----
<onlydft>
+
 +
=Reference=
 +
<br />
 +
* [https://mitpress.mit.edu/sites/default/files/titles/content/ecal13/ch043.html Using MapReduce Streaming for Distributed Life Simulation on the Cloud], by Atanas Radenski.
 +
<br />
 
=Introduction=
 
=Introduction=
 
<br />
 
<br />
Line 21: Line 25:
 
  3 5
 
  3 5
 
   
 
   
 +
* To generate the third generation:
 +
 +
'''cat life0.txt | ./mapper.py | ./shuffleSort.py | ./reducer.py | ./mapper.py | ./shuffleSort.py | ./reducer.py \'''
 +
    '''| ./mapper.py | ./shuffleSort.py | ./reducer.py'''
 +
2 4
 +
2 5
 +
2 6
 +
 
  
 
=Data File=
 
=Data File=
Line 37: Line 49:
 
::<source lang="python">
 
::<source lang="python">
 
#!/usr/bin/env python
 
#!/usr/bin/env python
 +
# mapper.py
 +
# mapper for Conway's 2D Game of Life.
 +
# The input data has two possible formats:
 +
# 1) The first first generation defined by
 +
# a python-type collection of quoted strings.
 +
# We refer to it  as "raw-life"
 +
#
 +
#0      "          "
 +
#1      "    *    "
 +
#2      "    *    "
 +
#3      "    *    "
 +
#4      "          "
 +
#
 +
# 2) the output comes from a previous map-reduce stage,
 +
# in which case the reducer outputs the coordinates of
 +
# live cells only:
 +
#
 +
# 2  4
 +
# 2  5
 +
# 2  6
 +
#
 +
# Above, 3 cells are live, at coordinates (2,4), (2,5), and (2,6)
 +
#
 +
# For every live cell that it finds, the mapper sends 9 tupples:
 +
# 001005 alive
 +
# 000004 OneMoreNeighbor
 +
# 000005 OneMoreNeighbor
 +
# 000006 OneMoreNeighbor
 +
# 001004 OneMoreNeighbor
 +
# 001006 OneMoreNeighbor
 +
# 002004 OneMoreNeighbor
 +
# 002005 OneMoreNeighbor
 +
# 002006 OneMoreNeighbor
 +
#
 +
# The first string contains the 0-padded coordinates of live cells and
 +
# neighbor cells that have this live cell as a neighbor.
 +
#
 +
 
from __future__ import print_function
 
from __future__ import print_function
 
import sys
 
import sys
Line 53: Line 103:
 
     isRawLife = line.find( "\"" ) != -1
 
     isRawLife = line.find( "\"" ) != -1
  
 +
    # are we reading the original array of cells from a file?
 
     if isRawLife:
 
     if isRawLife:
 
       lineNo = int( words[0] )
 
       lineNo = int( words[0] )
Line 68: Line 119:
 
                       if row == lineNo and col == j: continue
 
                       if row == lineNo and col == j: continue
 
                       print( "%03d%03d\t%s" % ( row, col, "OneMoreNeighbor" ) )
 
                       print( "%03d%03d\t%s" % ( row, col, "OneMoreNeighbor" ) )
                       
+
   
 +
    # or are we reading tuples output by the reducer?                   
 
     else:
 
     else:
 
       lineNo = int( words[0] )
 
       lineNo = int( words[0] )
Line 82: Line 134:
 
</source>
 
</source>
 
<br />
 
<br />
 +
==Mapper on Original Life File==
 +
<br />
 +
'''cat life0.txt | ./mapper.py '''
 +
001005 alive
 +
000004 OneMoreNeighbor
 +
000005 OneMoreNeighbor
 +
000006 OneMoreNeighbor
 +
001004 OneMoreNeighbor
 +
001006 OneMoreNeighbor
 +
002004 OneMoreNeighbor
 +
002005 OneMoreNeighbor
 +
002006 OneMoreNeighbor
 +
002005 alive
 +
001004 OneMoreNeighbor
 +
001005 OneMoreNeighbor
 +
001006 OneMoreNeighbor
 +
002004 OneMoreNeighbor
 +
002006 OneMoreNeighbor
 +
003004 OneMoreNeighbor
 +
003005 OneMoreNeighbor
 +
003006 OneMoreNeighbor
 +
003005 alive
 +
002004 OneMoreNeighbor
 +
002005 OneMoreNeighbor
 +
002006 OneMoreNeighbor
 +
003004 OneMoreNeighbor
 +
003006 OneMoreNeighbor
 +
004004 OneMoreNeighbor
 +
004005 OneMoreNeighbor
 +
004006 OneMoreNeighbor
 +
 +
<br />
 +
==Mapper on Output of Reduce==
 +
<br />
 +
'''cat life0.txt | ./mapper.py | ./shuffleSort.py | ./reducer.py | ./mapper.py'''
 +
002004 alive
 +
001003 OneMoreNeighbor
 +
001004 OneMoreNeighbor
 +
001005 OneMoreNeighbor
 +
002003 OneMoreNeighbor
 +
002005 OneMoreNeighbor
 +
003003 OneMoreNeighbor
 +
003004 OneMoreNeighbor
 +
003005 OneMoreNeighbor
 +
002005 alive
 +
001004 OneMoreNeighbor
 +
001005 OneMoreNeighbor
 +
001006 OneMoreNeighbor
 +
002004 OneMoreNeighbor
 +
002006 OneMoreNeighbor
 +
003004 OneMoreNeighbor
 +
003005 OneMoreNeighbor
 +
003006 OneMoreNeighbor
 +
002006 alive
 +
001005 OneMoreNeighbor
 +
001006 OneMoreNeighbor
 +
001007 OneMoreNeighbor
 +
002005 OneMoreNeighbor
 +
002007 OneMoreNeighbor
 +
003005 OneMoreNeighbor
 +
003006 OneMoreNeighbor
 +
003007 OneMoreNeighbor
 +
 
 +
<br />
 +
 
=ShufflerSort.py=
 
=ShufflerSort.py=
 
<br />
 
<br />
Line 113: Line 230:
  
 
</source>
 
</source>
 +
<br />
 +
==Output of ShuffleSort==
 +
<br />
 +
'''cat life0.txt | ./mapper.py | ./shuffleSort.py '''
 +
000004 OneMoreNeighbor
 +
000005 OneMoreNeighbor
 +
000006 OneMoreNeighbor
 +
001004 OneMoreNeighbor
 +
001004 OneMoreNeighbor
 +
001005 OneMoreNeighbor
 +
001005 alive
 +
001006 OneMoreNeighbor
 +
001006 OneMoreNeighbor
 +
002004 OneMoreNeighbor
 +
002004 OneMoreNeighbor
 +
002004 OneMoreNeighbor
 +
002005 OneMoreNeighbor
 +
002005 OneMoreNeighbor
 +
002005 alive
 +
002006 OneMoreNeighbor
 +
002006 OneMoreNeighbor
 +
002006 OneMoreNeighbor
 +
003004 OneMoreNeighbor
 +
003004 OneMoreNeighbor
 +
003005 OneMoreNeighbor
 +
003005 alive
 +
003006 OneMoreNeighbor
 +
003006 OneMoreNeighbor
 +
004004 OneMoreNeighbor
 +
004005 OneMoreNeighbor
 +
004006 OneMoreNeighbor
 +
 
 
<br />
 
<br />
 
=Reducer.py=
 
=Reducer.py=
Line 118: Line 267:
 
::<source lang="python">
 
::<source lang="python">
 
#!/usr/bin/env python
 
#!/usr/bin/env python
 +
# reducer.py
 +
# D. Thiebaut
 +
# Reducer stage for the game of life.
 +
# The input typically looks like this:
 +
#
 +
# 001005 alive
 +
# 000004 OneMoreNeighbor
 +
# 000005 OneMoreNeighbor
 +
# 000006 OneMoreNeighbor
 +
# 001004 OneMoreNeighbor
 +
# 001006 OneMoreNeighbor
 +
# 002004 OneMoreNeighbor
 +
# 002005 OneMoreNeighbor
 +
# 002006 OneMoreNeighbor
 +
#
 +
# where the first word is a string of 6 0-padded digits, the first 3
 +
# representing the row of a cell, and the last 3 its column.  The
 +
# second word indicates whether the cell is actually a live cell in
 +
# the current generation, or whether its status is unknown, but there
 +
# is a cell in the previous generation that is alive and is its
 +
# neighbor.
 +
#
 +
# Since all the lines fed to the reducer are sorted, then lines
 +
# with the same keys, representing the same cell, are together.
 +
#
 +
# 000004  OneMoreNeighbor
 +
# 000005  OneMoreNeighbor
 +
# 000006  OneMoreNeighbor
 +
# 001004  OneMoreNeighbor
 +
# 001004  OneMoreNeighbor
 +
# 001005  alive
 +
# 001005  OneMoreNeighbor
 +
# 001006  OneMoreNeighbor
 +
# 001006  OneMoreNeighbor
 +
# 002004  OneMoreNeighbor
 +
# 002004  OneMoreNeighbor
 +
# 002004  OneMoreNeighbor
 +
# 002005  alive
 +
# 002005  OneMoreNeighbor
 +
# 002005  OneMoreNeighbor
 +
# 002006  OneMoreNeighbor
 +
# 002006  OneMoreNeighbor
 +
# 002006  OneMoreNeighbor
 +
# 003004  OneMoreNeighbor
 +
# 003004  OneMoreNeighbor
 +
# 003005  alive
 +
# 003005  OneMoreNeighbor
 +
# 003006  OneMoreNeighbor
 +
# 003006  OneMoreNeighbor
 +
# 004004  OneMoreNeighbor
 +
# 004005  OneMoreNeighbor
 +
# 004006  OneMoreNeighbor
 +
#
 +
# It is then easy to divide the input lines into blocks with the same key,
 +
# and count the number of neighbors, and know if the cell is alive or dead.
 +
# If the cell should be alive in the next generation, then the reducer
 +
# outputs its coordinates (row, column) as two numbers separated by a tab.
 +
# For example:
 +
#
 +
# 2  4
 +
# 2  5
 +
# 2  6
 +
#
 +
#
 +
 
from __future__ import print_function
 
from __future__ import print_function
 
from operator import itemgetter
 
from operator import itemgetter
Line 165: Line 379:
 
   print( "%d\t%d" % ( int( lastCell )/1000, int( lastCell )%1000 ) )
 
   print( "%d\t%d" % ( int( lastCell )/1000, int( lastCell )%1000 ) )
 
</source>
 
</source>
 +
<br />
 +
==Output of Reducer==
 +
<br />
 +
 +
'''cat life0.txt | ./mapper.py | ./shuffleSort.py | ./reducer.py  '''
 +
2 4
 +
2 5
 +
2 6
 +
 
 +
  
</onlydft>
+
<br />
 +
<br />
 +
<br />
 +
<br />
 +
<br />
 +
<br />
 +
<br />
 +
<br />
 +
<br />
 +
<br />
 +
<br />
 +
<br />
 +
<br />
 +
<br />
 +
<br />
 +
<br />
 +
<br />
 +
<br />
 +
<br />
 +
[[Category:CSC352]][[Category:MapReduce]][[Category:Hadoop]][[Category:Python]]

Latest revision as of 09:00, 21 April 2017

--D. Thiebaut (talk) 16:47, 2 April 2017 (EDT)


Reference



Introduction


  • We map-reduce once to go from one generation to the next.
  • The original array (see Data File section below) is transformed into a file where each line is numbered, and each row of the game of life is sandwiched between double quotes.
  • To go from the data file to the first generation:
cat life0.txt | ./mapper.py | ./shuffleSort.py | ./reducer.py  
2	4
2	5
2	6

the output is a horizontal line, on Row 2, spanning Columns 4, 5, and 6.
  • To generate the second generation:
cat life0.txt | ./mapper.py | ./shuffleSort.py | ./reducer.py | ./mapper.py | ./shuffleSort.py | ./reducer.py
1	5
2	5
3	5

  • To generate the third generation:
cat life0.txt | ./mapper.py | ./shuffleSort.py | ./reducer.py | ./mapper.py | ./shuffleSort.py | ./reducer.py \
   | ./mapper.py | ./shuffleSort.py | ./reducer.py
2	4
2	5
2	6
 

Data File


The file is called life0.txt.

0       "           "
1       "     *     "
2       "     *     "
3       "     *     "
4       "           "


Mapper.py


#!/usr/bin/env python
# mapper.py 
# mapper for Conway's 2D Game of Life.
# The input data has two possible formats:
# 1) The first first generation defined by 
# a python-type collection of quoted strings.
# We refer to it  as "raw-life"
#
#0       "           "
#1       "     *     "
#2       "     *     "
#3       "     *     "
#4       "           "
#
# 2) the output comes from a previous map-reduce stage,
# in which case the reducer outputs the coordinates of 
# live cells only:
#
# 2  4
# 2  5
# 2  6
# 
# Above, 3 cells are live, at coordinates (2,4), (2,5), and (2,6)
#
# For every live cell that it finds, the mapper sends 9 tupples:
# 001005 alive
# 000004 OneMoreNeighbor
# 000005 OneMoreNeighbor
# 000006 OneMoreNeighbor
# 001004 OneMoreNeighbor
# 001006 OneMoreNeighbor
# 002004 OneMoreNeighbor
# 002005 OneMoreNeighbor
# 002006 OneMoreNeighbor 
#
# The first string contains the 0-padded coordinates of live cells and
# neighbor cells that have this live cell as a neighbor.
# 

from __future__ import print_function
import sys

# input comes from STDIN (standard input)
for line in sys.stdin:

    # remove leading and trailing whitespace
    line = line.strip()
    #print( "line =", line )

    # split the line into words
    words = line.split( None, 1 )

    #print( "words = ", words )
    isRawLife = line.find( "\"" ) != -1

    # are we reading the original array of cells from a file?
    if isRawLife:
       lineNo = int( words[0] )
       lineWidth = len( words[1] )-2 # for the double quotes
       lifeLine = words[1].replace( "\"", "" )
       #print( "(%d, >%s<)" % ( lineNo, lifeLine ) )
       for j in range( len( lifeLine ) ):
           if lifeLine[j] != ' ':
              print( "%03d%03d\t%s" % ( lineNo, j, "alive" ) )
              for row in range( lineNo-1, lineNo+1+1 ):
                  for col in range( j-1, j+1+1 ):
                      if row < 0: continue
                      if col < 0: continue
                      if col > lineWidth: col = 0
                      if row == lineNo and col == j: continue
                      print( "%03d%03d\t%s" % ( row, col, "OneMoreNeighbor" ) )
     
    # or are we reading tuples output by the reducer?                    
    else:
       lineNo = int( words[0] )
       j      = int( words[1] )
       print( "%03d%03d\t%s" % ( lineNo, j, "alive" ) )
       for row in range( lineNo-1, lineNo+1+1 ):
           for col in range( j-1, j+1+1 ):
               if row < 0: continue
               if col < 0: continue
               if row == lineNo and col == j: continue
               print( "%03d%03d\t%s" % ( row, col, "OneMoreNeighbor" ) )


Mapper on Original Life File


cat life0.txt | ./mapper.py 
001005	alive
000004	OneMoreNeighbor
000005	OneMoreNeighbor
000006	OneMoreNeighbor
001004	OneMoreNeighbor
001006	OneMoreNeighbor
002004	OneMoreNeighbor
002005	OneMoreNeighbor
002006	OneMoreNeighbor
002005	alive
001004	OneMoreNeighbor
001005	OneMoreNeighbor
001006	OneMoreNeighbor
002004	OneMoreNeighbor
002006	OneMoreNeighbor
003004	OneMoreNeighbor
003005	OneMoreNeighbor
003006	OneMoreNeighbor
003005	alive
002004	OneMoreNeighbor
002005	OneMoreNeighbor
002006	OneMoreNeighbor
003004	OneMoreNeighbor
003006	OneMoreNeighbor
004004	OneMoreNeighbor
004005	OneMoreNeighbor
004006	OneMoreNeighbor 


Mapper on Output of Reduce


cat life0.txt | ./mapper.py | ./shuffleSort.py | ./reducer.py | ./mapper.py
002004	alive
001003	OneMoreNeighbor
001004	OneMoreNeighbor
001005	OneMoreNeighbor
002003	OneMoreNeighbor
002005	OneMoreNeighbor
003003	OneMoreNeighbor
003004	OneMoreNeighbor
003005	OneMoreNeighbor
002005	alive
001004	OneMoreNeighbor
001005	OneMoreNeighbor
001006	OneMoreNeighbor
002004	OneMoreNeighbor
002006	OneMoreNeighbor
003004	OneMoreNeighbor
003005	OneMoreNeighbor
003006	OneMoreNeighbor
002006	alive
001005	OneMoreNeighbor
001006	OneMoreNeighbor
001007	OneMoreNeighbor
002005	OneMoreNeighbor
002007	OneMoreNeighbor
003005	OneMoreNeighbor
003006	OneMoreNeighbor
003007	OneMoreNeighbor
 


ShufflerSort.py


#!/usr/bin/env python
from __future__ import print_function
import sys

sortNumeric = False
if len( sys.argv ) > 1 and sys.argv[1] == "-n":
   sortNumeric = True

L = []

# input comes from STDIN
for line in sys.stdin:
      # remove leading and trailing whitespace
      line = line.strip()

      word, count = line.split('\t', 1)
      if sortNumeric:
          word = int( word )

      L.append( (word, count) )


L.sort()

for word, count in L:
    print( '%s\t%s' % (word, count) )


Output of ShuffleSort


cat life0.txt | ./mapper.py | ./shuffleSort.py 
000004	OneMoreNeighbor
000005	OneMoreNeighbor
000006	OneMoreNeighbor
001004	OneMoreNeighbor
001004	OneMoreNeighbor
001005	OneMoreNeighbor
001005	alive
001006	OneMoreNeighbor
001006	OneMoreNeighbor
002004	OneMoreNeighbor
002004	OneMoreNeighbor
002004	OneMoreNeighbor
002005	OneMoreNeighbor
002005	OneMoreNeighbor
002005	alive
002006	OneMoreNeighbor
002006	OneMoreNeighbor
002006	OneMoreNeighbor
003004	OneMoreNeighbor
003004	OneMoreNeighbor
003005	OneMoreNeighbor
003005	alive
003006	OneMoreNeighbor
003006	OneMoreNeighbor
004004	OneMoreNeighbor
004005	OneMoreNeighbor
004006	OneMoreNeighbor
 


Reducer.py


#!/usr/bin/env python
# reducer.py
# D. Thiebaut
# Reducer stage for the game of life.
# The input typically looks like this:
# 
# 001005 alive
# 000004 OneMoreNeighbor
# 000005 OneMoreNeighbor
# 000006 OneMoreNeighbor
# 001004 OneMoreNeighbor
# 001006 OneMoreNeighbor
# 002004 OneMoreNeighbor
# 002005 OneMoreNeighbor
# 002006 OneMoreNeighbor 
#
# where the first word is a string of 6 0-padded digits, the first 3 
# representing the row of a cell, and the last 3 its column.  The
# second word indicates whether the cell is actually a live cell in 
# the current generation, or whether its status is unknown, but there
# is a cell in the previous generation that is alive and is its 
# neighbor.
#
# Since all the lines fed to the reducer are sorted, then lines 
# with the same keys, representing the same cell, are together.
# 
# 000004   OneMoreNeighbor
# 000005   OneMoreNeighbor
# 000006   OneMoreNeighbor
# 001004   OneMoreNeighbor
# 001004   OneMoreNeighbor
# 001005   alive
# 001005   OneMoreNeighbor
# 001006   OneMoreNeighbor
# 001006   OneMoreNeighbor
# 002004   OneMoreNeighbor
# 002004   OneMoreNeighbor
# 002004   OneMoreNeighbor
# 002005   alive
# 002005   OneMoreNeighbor
# 002005   OneMoreNeighbor
# 002006   OneMoreNeighbor
# 002006   OneMoreNeighbor
# 002006   OneMoreNeighbor
# 003004   OneMoreNeighbor
# 003004   OneMoreNeighbor
# 003005   alive
# 003005   OneMoreNeighbor
# 003006   OneMoreNeighbor
# 003006   OneMoreNeighbor
# 004004   OneMoreNeighbor
# 004005   OneMoreNeighbor
# 004006   OneMoreNeighbor
#
# It is then easy to divide the input lines into blocks with the same key,
# and count the number of neighbors, and know if the cell is alive or dead.
# If the cell should be alive in the next generation, then the reducer
# outputs its coordinates (row, column) as two numbers separated by a tab.
# For example:
#
# 2  4
# 2  5
# 2  6
# 
#

from __future__ import print_function
from operator import itemgetter

import sys

lastCell = None
isAlive = False
cell = None
noNeighbors = 0

# input comes from STDIN
for line in sys.stdin:
      # remove leading and trailing whitespace
      line = line.strip()
      
      # parse the input we got from mapper.py
      cell, status = line.split('\t', 1)
      
      if cell != lastCell:
         # decide of fate of last cell
         if isAlive and 2 <= noNeighbors <= 3:
            # continue living
            print( "%d\t%d" % ( int( lastCell )/1000, int( lastCell )%1000 ) )
         if not isAlive and noNeighbors == 3:
            # be born 
            print( "%d\t%d" % ( int( lastCell )/1000, int( lastCell )%1000 ) )

         noNeighbors = 0
         isAlive = False
 
      # parse status of new cell
      if status.find( "alive" ) != -1:
          isAlive = True
      else:
          noNeighbors += 1

      lastCell = cell


# do not forget to output the last cell if needed!
if isAlive and 2 <= noNeighbors <= 3:
   # continue living
   print( "%d\t%d" % ( int( lastCell )/1000, int( lastCell )%1000 ) )
if not isAlive and noNeighbors == 3:
   # be born
   print( "%d\t%d" % ( int( lastCell )/1000, int( lastCell )%1000 ) )


Output of Reducer


cat life0.txt | ./mapper.py | ./shuffleSort.py | ./reducer.py  
2	4
2	5
2	6