Difference between revisions of "CSC352 Bash Script to Run Hadoop WordCount"
(Created page with "--~~~~ ---- <source lang="bash"> #! /bin/bash # D. Thiebaut # A script to run the Hadoop wordcount example program on two # books stored in the /data/hadoop directory: 4300-8....") |
|||
Line 1: | Line 1: | ||
--[[User:Thiebaut|D. Thiebaut]] ([[User talk:Thiebaut|talk]]) 10:08, 14 November 2013 (EST) | --[[User:Thiebaut|D. Thiebaut]] ([[User talk:Thiebaut|talk]]) 10:08, 14 November 2013 (EST) | ||
---- | ---- | ||
+ | |||
+ | More information about this script and how to run the WordCount program can be found in [[Tutorial:_Creating_a_Hadoop_Cluster_on_Amazon_AWS| this tutorial]]. | ||
+ | <br /> | ||
<source lang="bash"> | <source lang="bash"> | ||
#! /bin/bash | #! /bin/bash | ||
Line 6: | Line 9: | ||
# A script to run the Hadoop wordcount example program on two | # A script to run the Hadoop wordcount example program on two | ||
# books stored in the /data/hadoop directory: 4300-8.txt and 12241.txt | # books stored in the /data/hadoop directory: 4300-8.txt and 12241.txt | ||
− | # | + | # This script assumes that a Hadoop cluster has been setup on AWS using |
+ | # Starcluster, and that an EBS volume is already attached on /data. | ||
+ | # the /data/hadoop directory is already loaded with a couple books. | ||
+ | |||
echo "cd /data/hadoop" | echo "cd /data/hadoop" | ||
cd /data/hadoop | cd /data/hadoop |
Latest revision as of 11:10, 14 November 2013
--D. Thiebaut (talk) 10:08, 14 November 2013 (EST)
More information about this script and how to run the WordCount program can be found in this tutorial.
#! /bin/bash
# D. Thiebaut
# A script to run the Hadoop wordcount example program on two
# books stored in the /data/hadoop directory: 4300-8.txt and 12241.txt
# This script assumes that a Hadoop cluster has been setup on AWS using
# Starcluster, and that an EBS volume is already attached on /data.
# the /data/hadoop directory is already loaded with a couple books.
echo "cd /data/hadoop"
cd /data/hadoop
echo "hadoop dfs -rmr books"
echo "hadoop dfs -rmr output1"
hadoop dfs -rmr books
hadoop dfs -rmr output1
echo
echo "About to run: hadoop dfs -lsr"
read -p "Press Enter to continue " -n 1 -r
echo
hadoop dfs -lsr
echo
echo "About to run: hadoop dfs -mkdir books"
read -p "Press Enter to continue " -n 1 -r
echo
hadoop dfs -mkdir books
echo
echo "About to run: hadoop dfs -copyFromLocal 4300-8.txt books"
read -p "Press Enter to continue " -n 1 -r
echo
hadoop dfs -copyFromLocal 4300-8.txt books
hadoop dfs -copyFromLocal 12241.txt books
hadoop dfs -lsr
echo
echo "About to run: hadoop jar /usr/lib/hadoop-0.20/hadoop-examples.jar wordcount books output1"
read -p "Press Enter to continue " -n 1 -r
echo
hadoop jar /usr/lib/hadoop-0.20/hadoop-examples.jar wordcount books output1
echo
echo "About to run: hadoop dfs -lsr"
read -p "Press Enter to continue " -n 1 -r
echo
hadoop dfs -lsr
echo
echo "About to run: hadoop dfs -cat output1/part-r-00000"
read -p "Press Enter to continue " -n 1 -r
echo
hadoop dfs -cat output1/part-r-00000 | less