TestMultipleOutputs.java
上传用户:quxuerui
上传日期:2018-01-08
资源大小:41811k
文件大小:8k
- /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.hadoop.mapred.lib;
- import org.apache.hadoop.fs.FileStatus;
- import org.apache.hadoop.fs.FileSystem;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.LongWritable;
- import org.apache.hadoop.io.SequenceFile;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapred.*;
- import java.io.BufferedReader;
- import java.io.DataOutputStream;
- import java.io.IOException;
- import java.io.InputStreamReader;
- import java.util.Iterator;
- public class TestMultipleOutputs extends HadoopTestCase {
- public TestMultipleOutputs() throws IOException {
- super(HadoopTestCase.LOCAL_MR, HadoopTestCase.LOCAL_FS, 1, 1);
- }
- public void testWithoutCounters() throws Exception {
- _testMultipleOutputs(false);
- }
- public void testWithCounters() throws Exception {
- _testMultipleOutputs(true);
- }
- private static final Path ROOT_DIR = new Path("testing/mo");
- private static final Path IN_DIR = new Path(ROOT_DIR, "input");
- private static final Path OUT_DIR = new Path(ROOT_DIR, "output");
- private Path getDir(Path dir) {
- // Hack for local FS that does not have the concept of a 'mounting point'
- if (isLocalFS()) {
- String localPathRoot = System.getProperty("test.build.data", "/tmp")
- .replace(' ', '+');
- dir = new Path(localPathRoot, dir);
- }
- return dir;
- }
- public void setUp() throws Exception {
- super.setUp();
- Path rootDir = getDir(ROOT_DIR);
- Path inDir = getDir(IN_DIR);
- JobConf conf = createJobConf();
- FileSystem fs = FileSystem.get(conf);
- fs.delete(rootDir, true);
- if (!fs.mkdirs(inDir)) {
- throw new IOException("Mkdirs failed to create " + inDir.toString());
- }
- }
- public void tearDown() throws Exception {
- Path rootDir = getDir(ROOT_DIR);
- JobConf conf = createJobConf();
- FileSystem fs = FileSystem.get(conf);
- fs.delete(rootDir, true);
- super.tearDown();
- }
- protected void _testMultipleOutputs(boolean withCounters) throws Exception {
- Path inDir = getDir(IN_DIR);
- Path outDir = getDir(OUT_DIR);
- JobConf conf = createJobConf();
- FileSystem fs = FileSystem.get(conf);
- DataOutputStream file = fs.create(new Path(inDir, "part-0"));
- file.writeBytes("anbnncndne");
- file.close();
- file = fs.create(new Path(inDir, "part-1"));
- file.writeBytes("anbnncndne");
- file.close();
- conf.setJobName("mo");
- conf.setInputFormat(TextInputFormat.class);
- conf.setOutputKeyClass(LongWritable.class);
- conf.setOutputValueClass(Text.class);
- conf.setMapOutputKeyClass(LongWritable.class);
- conf.setMapOutputValueClass(Text.class);
- conf.setOutputFormat(TextOutputFormat.class);
- conf.setOutputKeyClass(LongWritable.class);
- conf.setOutputValueClass(Text.class);
- MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class,
- LongWritable.class, Text.class);
- MultipleOutputs.addMultiNamedOutput(conf, "sequence",
- SequenceFileOutputFormat.class, LongWritable.class, Text.class);
- MultipleOutputs.setCountersEnabled(conf, withCounters);
- conf.setMapperClass(MOMap.class);
- conf.setReducerClass(MOReduce.class);
- FileInputFormat.setInputPaths(conf, inDir);
- FileOutputFormat.setOutputPath(conf, outDir);
- JobClient jc = new JobClient(conf);
- RunningJob job = jc.submitJob(conf);
- while (!job.isComplete()) {
- Thread.sleep(100);
- }
- // assert number of named output part files
- int namedOutputCount = 0;
- FileStatus[] statuses = fs.listStatus(outDir);
- for (FileStatus status : statuses) {
- if (status.getPath().getName().equals("text-m-00000") ||
- status.getPath().getName().equals("text-m-00001") ||
- status.getPath().getName().equals("text-r-00000") ||
- status.getPath().getName().equals("sequence_A-m-00000") ||
- status.getPath().getName().equals("sequence_A-m-00001") ||
- status.getPath().getName().equals("sequence_B-m-00000") ||
- status.getPath().getName().equals("sequence_B-m-00001") ||
- status.getPath().getName().equals("sequence_B-r-00000") ||
- status.getPath().getName().equals("sequence_C-r-00000")) {
- namedOutputCount++;
- }
- }
- assertEquals(9, namedOutputCount);
- // assert TextOutputFormat files correctness
- BufferedReader reader = new BufferedReader(
- new InputStreamReader(fs.open(
- new Path(FileOutputFormat.getOutputPath(conf), "text-r-00000"))));
- int count = 0;
- String line = reader.readLine();
- while (line != null) {
- assertTrue(line.endsWith("text"));
- line = reader.readLine();
- count++;
- }
- reader.close();
- assertFalse(count == 0);
- // assert SequenceOutputFormat files correctness
- SequenceFile.Reader seqReader =
- new SequenceFile.Reader(fs, new Path(FileOutputFormat.getOutputPath(conf),
- "sequence_B-r-00000"), conf);
- assertEquals(LongWritable.class, seqReader.getKeyClass());
- assertEquals(Text.class, seqReader.getValueClass());
- count = 0;
- LongWritable key = new LongWritable();
- Text value = new Text();
- while (seqReader.next(key, value)) {
- assertEquals("sequence", value.toString());
- count++;
- }
- seqReader.close();
- assertFalse(count == 0);
- Counters.Group counters =
- job.getCounters().getGroup(MultipleOutputs.class.getName());
- if (!withCounters) {
- assertEquals(0, counters.size());
- }
- else {
- assertEquals(4, counters.size());
- assertEquals(4, counters.getCounter("text"));
- assertEquals(2, counters.getCounter("sequence_A"));
- assertEquals(4, counters.getCounter("sequence_B"));
- assertEquals(2, counters.getCounter("sequence_C"));
- }
- }
- @SuppressWarnings({"unchecked"})
- public static class MOMap implements Mapper<LongWritable, Text, LongWritable,
- Text> {
- private MultipleOutputs mos;
- public void configure(JobConf conf) {
- mos = new MultipleOutputs(conf);
- }
- public void map(LongWritable key, Text value,
- OutputCollector<LongWritable, Text> output,
- Reporter reporter)
- throws IOException {
- if (!value.toString().equals("a")) {
- output.collect(key, value);
- } else {
- mos.getCollector("text", reporter).collect(key, new Text("text"));
- mos.getCollector("sequence", "A", reporter).collect(key,
- new Text("sequence"));
- mos.getCollector("sequence", "B", reporter).collect(key,
- new Text("sequence"));
- }
- }
- public void close() throws IOException {
- mos.close();
- }
- }
- @SuppressWarnings({"unchecked"})
- public static class MOReduce implements Reducer<LongWritable, Text,
- LongWritable, Text> {
- private MultipleOutputs mos;
- public void configure(JobConf conf) {
- mos = new MultipleOutputs(conf);
- }
- public void reduce(LongWritable key, Iterator<Text> values,
- OutputCollector<LongWritable, Text> output,
- Reporter reporter)
- throws IOException {
- while (values.hasNext()) {
- Text value = values.next();
- if (!value.toString().equals("b")) {
- output.collect(key, value);
- } else {
- mos.getCollector("text", reporter).collect(key, new Text("text"));
- mos.getCollector("sequence", "B", reporter).collect(key,
- new Text("sequence"));
- mos.getCollector("sequence", "C", reporter).collect(key,
- new Text("sequence"));
- }
- }
- }
- public void close() throws IOException {
- mos.close();
- }
- }
- }