BZip2Codec.java
上传用户:quxuerui
上传日期:2018-01-08
资源大小:41811k
文件大小:8k
- /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.hadoop.io.compress;
- import java.io.BufferedInputStream;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.OutputStream;
- import org.apache.hadoop.io.compress.bzip2.BZip2DummyCompressor;
- import org.apache.hadoop.io.compress.bzip2.BZip2DummyDecompressor;
- import org.apache.hadoop.io.compress.bzip2.CBZip2InputStream;
- import org.apache.hadoop.io.compress.bzip2.CBZip2OutputStream;
- /**
- * This class provides CompressionOutputStream and CompressionInputStream for
- * compression and decompression. Currently we dont have an implementation of
- * the Compressor and Decompressor interfaces, so those methods of
- * CompressionCodec which have a Compressor or Decompressor type argument, throw
- * UnsupportedOperationException.
- */
- public class BZip2Codec implements
- org.apache.hadoop.io.compress.CompressionCodec {
- private static final String HEADER = "BZ";
- private static final int HEADER_LEN = HEADER.length();
- /**
- * Creates a new instance of BZip2Codec
- */
- public BZip2Codec() {
- }
- /**
- * Creates CompressionOutputStream for BZip2
- *
- * @param out
- * The output Stream
- * @return The BZip2 CompressionOutputStream
- * @throws java.io.IOException
- * Throws IO exception
- */
- public CompressionOutputStream createOutputStream(OutputStream out)
- throws IOException {
- return new BZip2CompressionOutputStream(out);
- }
- /**
- * This functionality is currently not supported.
- *
- * @throws java.lang.UnsupportedOperationException
- * Throws UnsupportedOperationException
- */
- public CompressionOutputStream createOutputStream(OutputStream out,
- Compressor compressor) throws IOException {
- return createOutputStream(out);
- }
- /**
- * This functionality is currently not supported.
- *
- * @throws java.lang.UnsupportedOperationException
- * Throws UnsupportedOperationException
- */
- public Class<? extends org.apache.hadoop.io.compress.Compressor> getCompressorType() {
- return BZip2DummyCompressor.class;
- }
- /**
- * This functionality is currently not supported.
- *
- * @throws java.lang.UnsupportedOperationException
- * Throws UnsupportedOperationException
- */
- public Compressor createCompressor() {
- return new BZip2DummyCompressor();
- }
- /**
- * Creates CompressionInputStream to be used to read off uncompressed data.
- *
- * @param in
- * The InputStream
- * @return Returns CompressionInputStream for BZip2
- * @throws java.io.IOException
- * Throws IOException
- */
- public CompressionInputStream createInputStream(InputStream in)
- throws IOException {
- return new BZip2CompressionInputStream(in);
- }
- /**
- * This functionality is currently not supported.
- *
- * @throws java.lang.UnsupportedOperationException
- * Throws UnsupportedOperationException
- */
- public CompressionInputStream createInputStream(InputStream in,
- Decompressor decompressor) throws IOException {
- return createInputStream(in);
- }
- /**
- * This functionality is currently not supported.
- *
- * @throws java.lang.UnsupportedOperationException
- * Throws UnsupportedOperationException
- */
- public Class<? extends org.apache.hadoop.io.compress.Decompressor> getDecompressorType() {
- return BZip2DummyDecompressor.class;
- }
- /**
- * This functionality is currently not supported.
- *
- * @throws java.lang.UnsupportedOperationException
- * Throws UnsupportedOperationException
- */
- public Decompressor createDecompressor() {
- return new BZip2DummyDecompressor();
- }
- /**
- * .bz2 is recognized as the default extension for compressed BZip2 files
- *
- * @return A String telling the default bzip2 file extension
- */
- public String getDefaultExtension() {
- return ".bz2";
- }
- private static class BZip2CompressionOutputStream extends CompressionOutputStream {
- // class data starts here//
- private CBZip2OutputStream output;
- private boolean needsReset;
- // class data ends here//
- public BZip2CompressionOutputStream(OutputStream out)
- throws IOException {
- super(out);
- needsReset = true;
- }
- private void writeStreamHeader() throws IOException {
- if (super.out != null) {
- // The compressed bzip2 stream should start with the
- // identifying characters BZ. Caller of CBZip2OutputStream
- // i.e. this class must write these characters.
- out.write(HEADER.getBytes());
- }
- }
- public void finish() throws IOException {
- this.output.finish();
- needsReset = true;
- }
- private void internalReset() throws IOException {
- if (needsReset) {
- needsReset = false;
- writeStreamHeader();
- this.output = new CBZip2OutputStream(out);
- }
- }
-
- public void resetState() throws IOException {
- // Cannot write to out at this point because out might not be ready
- // yet, as in SequenceFile.Writer implementation.
- needsReset = true;
- }
- public void write(int b) throws IOException {
- if (needsReset) {
- internalReset();
- }
- this.output.write(b);
- }
- public void write(byte[] b, int off, int len) throws IOException {
- if (needsReset) {
- internalReset();
- }
- this.output.write(b, off, len);
- }
- public void close() throws IOException {
- this.output.flush();
- this.output.close();
- needsReset = true;
- }
- }// end of class BZip2CompressionOutputStream
- private static class BZip2CompressionInputStream extends CompressionInputStream {
- // class data starts here//
- private CBZip2InputStream input;
- boolean needsReset;
- // class data ends here//
- public BZip2CompressionInputStream(InputStream in) throws IOException {
- super(in);
- needsReset = true;
- }
- private BufferedInputStream readStreamHeader() throws IOException {
- // We are flexible enough to allow the compressed stream not to
- // start with the header of BZ. So it works fine either we have
- // the header or not.
- BufferedInputStream bufferedIn = null;
- if (super.in != null) {
- bufferedIn = new BufferedInputStream(super.in);
- bufferedIn.mark(HEADER_LEN);
- byte[] headerBytes = new byte[HEADER_LEN];
- int actualRead = bufferedIn.read(headerBytes, 0, HEADER_LEN);
- if (actualRead != -1) {
- String header = new String(headerBytes);
- if (header.compareTo(HEADER) != 0) {
- bufferedIn.reset();
- }
- }
- }
- if (bufferedIn == null) {
- throw new IOException("Failed to read bzip2 stream.");
- }
- return bufferedIn;
- }// end of method
- public void close() throws IOException {
- if (!needsReset) {
- input.close();
- needsReset = true;
- }
- }
- public int read(byte[] b, int off, int len) throws IOException {
- if (needsReset) {
- internalReset();
- }
- return this.input.read(b, off, len);
- }
- private void internalReset() throws IOException {
- if (needsReset) {
- needsReset = false;
- BufferedInputStream bufferedIn = readStreamHeader();
- input = new CBZip2InputStream(bufferedIn);
- }
- }
-
- public void resetState() throws IOException {
- // Cannot read from bufferedIn at this point because bufferedIn might not be ready
- // yet, as in SequenceFile.Reader implementation.
- needsReset = true;
- }
- public int read() throws IOException {
- if (needsReset) {
- internalReset();
- }
- return this.input.read();
- }
- }// end of BZip2CompressionInputStream
- }