Difference between revisions of "CSC352 MapReduce/Hadoop Class Notes"

From dftwiki3
Jump to: navigation, search
Line 190: Line 190:
 
[[Hadoop WordCount.java | WordCount.java]]
 
[[Hadoop WordCount.java | WordCount.java]]
  
===The Map and Reduce Java Blocks===
 
 
<source lang="java">
 
  public static class MapClass extends MapReduceBase
 
    implements Mapper<LongWritable, Text, Text, IntWritable> {
 
 
    private final static IntWritable one = new IntWritable(1);
 
    private Text word = new Text();
 
 
    public void map(LongWritable key, Text value,
 
                    OutputCollector<Text, IntWritable> output,
 
                    Reporter reporter) throws IOException {
 
      String line = value.toString();
 
      StringTokenizer itr = new StringTokenizer(line);
 
      while (itr.hasMoreTokens()) {
 
        word.set(itr.nextToken());
 
        output.collect(word, one);
 
      }
 
    }
 
  }
 
 
  /**
 
  * A reducer class that just emits the sum of the input values.
 
  */
 
  public static class Reduce extends MapReduceBase
 
    implements Reducer<Text, IntWritable, Text, IntWritable> {
 
 
    public void reduce(Text key, Iterator<IntWritable> values,
 
                      OutputCollector<Text, IntWritable> output,
 
                      Reporter reporter) throws IOException {
 
      int sum = 0;
 
      while (values.hasNext()) {
 
        sum += values.next().get();
 
      }
 
      output.collect(key, new IntWritable(sum));
 
    }
 
  }
 
</source>
 
  
 
<br />
 
<br />

Revision as of 19:41, 5 April 2010


This section is only visible to computers located at Smith College