MRBench.java
上传用户:quxuerui
上传日期:2018-01-08
资源大小:41811k
文件大小:11k
- /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.hadoop.mapred;
- import java.io.IOException;
- import java.io.PrintStream;
- import java.util.ArrayList;
- import java.util.Iterator;
- import java.util.Random;
- import org.apache.commons.logging.Log;
- import org.apache.commons.logging.LogFactory;
- import org.apache.hadoop.fs.FileSystem;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.UTF8;
- import org.apache.hadoop.io.WritableComparable;
- import org.apache.hadoop.io.Text;
- /**
- * Runs a job multiple times and takes average of all runs.
- */
- public class MRBench {
-
- private static final Log LOG = LogFactory.getLog(MRBench.class);
- private static Path BASE_DIR =
- new Path(System.getProperty("test.build.data","/benchmarks/MRBench"));
- private static Path INPUT_DIR = new Path(BASE_DIR, "mr_input");
- private static Path OUTPUT_DIR = new Path(BASE_DIR, "mr_output");
-
- public static enum Order {RANDOM, ASCENDING, DESCENDING};
-
- /**
- * Takes input format as text lines, runs some processing on it and
- * writes out data as text again.
- */
- public static class Map extends MapReduceBase
- implements Mapper<WritableComparable, Text, UTF8, UTF8> {
-
- public void map(WritableComparable key, Text value,
- OutputCollector<UTF8, UTF8> output,
- Reporter reporter) throws IOException
- {
- String line = value.toString();
- output.collect(new UTF8(process(line)), new UTF8(""));
- }
- public String process(String line) {
- return line;
- }
- }
- /**
- * Ignores the key and writes values to the output.
- */
- public static class Reduce extends MapReduceBase
- implements Reducer<UTF8, UTF8, UTF8, UTF8> {
-
- public void reduce(UTF8 key, Iterator<UTF8> values,
- OutputCollector<UTF8, UTF8> output, Reporter reporter) throws IOException
- {
- while(values.hasNext()) {
- output.collect(key, new UTF8(values.next().toString()));
- }
- }
- }
- /**
- * Generate a text file on the given filesystem with the given path name.
- * The text file will contain the given number of lines of generated data.
- * The generated data are string representations of numbers. Each line
- * is the same length, which is achieved by padding each number with
- * an appropriate number of leading '0' (zero) characters. The order of
- * generated data is one of ascending, descending, or random.
- */
- public static void generateTextFile(FileSystem fs, Path inputFile,
- long numLines, Order sortOrder) throws IOException
- {
- LOG.info("creating control file: "+numLines+" numLines, "+sortOrder+" sortOrder");
- PrintStream output = null;
- try {
- output = new PrintStream(fs.create(inputFile));
- int padding = String.valueOf(numLines).length();
- switch(sortOrder) {
- case RANDOM:
- for (long l = 0; l < numLines; l++) {
- output.println(pad((new Random()).nextLong(), padding));
- }
- break;
- case ASCENDING:
- for (long l = 0; l < numLines; l++) {
- output.println(pad(l, padding));
- }
- break;
- case DESCENDING:
- for (long l = numLines; l > 0; l--) {
- output.println(pad(l, padding));
- }
- break;
- }
- } finally {
- if (output != null)
- output.close();
- }
- LOG.info("created control file: " + inputFile);
- }
-
- /**
- * Convert the given number to a string and pad the number with
- * leading '0' (zero) characters so that the string is exactly
- * the given length.
- */
- private static String pad(long number, int length) {
- String str = String.valueOf(number);
- StringBuffer value = new StringBuffer();
- for (int i = str.length(); i < length; i++) {
- value.append("0");
- }
- value.append(str);
- return value.toString();
- }
-
- /**
- * Create the job configuration.
- */
- private static JobConf setupJob(int numMaps, int numReduces, String jarFile) {
- JobConf jobConf = new JobConf(MRBench.class);
- FileInputFormat.addInputPath(jobConf, INPUT_DIR);
-
- jobConf.setInputFormat(TextInputFormat.class);
- jobConf.setOutputFormat(TextOutputFormat.class);
-
- jobConf.setOutputValueClass(UTF8.class);
-
- jobConf.setMapOutputKeyClass(UTF8.class);
- jobConf.setMapOutputValueClass(UTF8.class);
-
- if (null != jarFile) {
- jobConf.setJar(jarFile);
- }
- jobConf.setMapperClass(Map.class);
- jobConf.setReducerClass(Reduce.class);
-
- jobConf.setNumMapTasks(numMaps);
- jobConf.setNumReduceTasks(numReduces);
-
- return jobConf;
- }
-
- /**
- * Runs a MapReduce task, given number of times. The input to each run
- * is the same file.
- */
- private static ArrayList<Long> runJobInSequence(JobConf masterJobConf, int numRuns) throws IOException {
- Path intrimData = null;
- Random rand = new Random();
- ArrayList<Long> execTimes = new ArrayList<Long>();
-
- for (int i = 0; i < numRuns; i++) {
- // create a new job conf every time, reusing same object does not work
- JobConf jobConf = new JobConf(masterJobConf);
- // reset the job jar because the copy constructor doesn't
- jobConf.setJar(masterJobConf.getJar());
- // give a new random name to output of the mapred tasks
- FileOutputFormat.setOutputPath(jobConf,
- new Path(OUTPUT_DIR, "output_" + rand.nextInt()));
- LOG.info("Running job " + i + ":" +
- " input=" + FileInputFormat.getInputPaths(jobConf)[0] +
- " output=" + FileOutputFormat.getOutputPath(jobConf));
-
- // run the mapred task now
- long curTime = System.currentTimeMillis();
- JobClient.runJob(jobConf);
- execTimes.add(new Long(System.currentTimeMillis() - curTime));
- }
- return execTimes;
- }
-
- /**
- * <pre>
- * Usage: mrbench
- * [-baseDir <base DFS path for output/input, default is /benchmarks/MRBench>]
- * [-jar <local path to job jar file containing Mapper and Reducer implementations, default is current jar file>]
- * [-numRuns <number of times to run the job, default is 1>]
- * [-maps <number of maps for each run, default is 2>]
- * [-reduces <number of reduces for each run, default is 1>]
- * [-inputLines <number of input lines to generate, default is 1>]
- * [-inputType <type of input to generate, one of ascending (default), descending, random>]
- * [-verbose]
- * </pre>
- */
- public static void main (String[] args) throws IOException {
- String version = "MRBenchmark.0.0.2";
- System.out.println(version);
- String usage =
- "Usage: mrbench " +
- "[-baseDir <base DFS path for output/input, default is /benchmarks/MRBench>] " +
- "[-jar <local path to job jar file containing Mapper and Reducer implementations, default is current jar file>] " +
- "[-numRuns <number of times to run the job, default is 1>] " +
- "[-maps <number of maps for each run, default is 2>] " +
- "[-reduces <number of reduces for each run, default is 1>] " +
- "[-inputLines <number of input lines to generate, default is 1>] " +
- "[-inputType <type of input to generate, one of ascending (default), descending, random>] " +
- "[-verbose]";
-
- String jarFile = null;
- int inputLines = 1;
- int numRuns = 1;
- int numMaps = 2;
- int numReduces = 1;
- boolean verbose = false;
- Order inputSortOrder = Order.ASCENDING;
- for (int i = 0; i < args.length; i++) { // parse command line
- if (args[i].equals("-jar")) {
- jarFile = args[++i];
- } else if (args[i].equals("-numRuns")) {
- numRuns = Integer.parseInt(args[++i]);
- } else if (args[i].equals("-baseDir")) {
- BASE_DIR = new Path(args[++i]);
- } else if (args[i].equals("-maps")) {
- numMaps = Integer.parseInt(args[++i]);
- } else if (args[i].equals("-reduces")) {
- numReduces = Integer.parseInt(args[++i]);
- } else if (args[i].equals("-inputLines")) {
- inputLines = Integer.parseInt(args[++i]);
- } else if (args[i].equals("-inputType")) {
- String s = args[++i];
- if (s.equalsIgnoreCase("ascending")) {
- inputSortOrder = Order.ASCENDING;
- } else if (s.equalsIgnoreCase("descending")) {
- inputSortOrder = Order.DESCENDING;
- } else if (s.equalsIgnoreCase("random")) {
- inputSortOrder = Order.RANDOM;
- } else {
- inputSortOrder = null;
- }
- } else if (args[i].equals("-verbose")) {
- verbose = true;
- } else {
- System.err.println(usage);
- System.exit(-1);
- }
- }
-
- if (numRuns < 1 || // verify args
- numMaps < 1 ||
- numReduces < 1 ||
- inputLines < 0 ||
- inputSortOrder == null)
- {
- System.err.println(usage);
- System.exit(-1);
- }
- JobConf jobConf = setupJob(numMaps, numReduces, jarFile);
- FileSystem fs = FileSystem.get(jobConf);
- Path inputFile = new Path(INPUT_DIR, "input_" + (new Random()).nextInt() + ".txt");
- generateTextFile(fs, inputFile, inputLines, inputSortOrder);
- // setup test output directory
- fs.mkdirs(BASE_DIR);
- ArrayList<Long> execTimes = new ArrayList<Long>();
- try {
- execTimes = runJobInSequence(jobConf, numRuns);
- } finally {
- // delete output -- should we really do this?
- fs.delete(BASE_DIR, true);
- }
-
- if (verbose) {
- // Print out a report
- System.out.println("Total MapReduce jobs executed: " + numRuns);
- System.out.println("Total lines of data per job: " + inputLines);
- System.out.println("Maps per job: " + numMaps);
- System.out.println("Reduces per job: " + numReduces);
- }
- int i = 0;
- long totalTime = 0;
- for (Long time : execTimes) {
- totalTime += time.longValue();
- if (verbose) {
- System.out.println("Total milliseconds for task: " + (++i) +
- " = " + time);
- }
- }
- long avgTime = totalTime / numRuns;
- System.out.println("DataLinestMapstReducestAvgTime (milliseconds)");
- System.out.println(inputLines + "tt" + numMaps + "t" +
- numReduces + "t" + avgTime);
- }
-
- }