StructureGenerator.java
上传用户:quxuerui
上传日期:2018-01-08
资源大小:41811k
文件大小:10k
- /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.hadoop.fs.loadGenerator;
- import java.io.File;
- import java.io.FileNotFoundException;
- import java.io.PrintStream;
- import java.util.ArrayList;
- import java.util.List;
- import java.util.Random;
- import org.apache.hadoop.util.ToolRunner;
- /**
- * This program generates a random namespace structure with the following
- * constraints:
- * 1. The number of subdirectories is a random number in [minWidth, maxWidth].
- * 2. The maximum depth of each subdirectory is a random number
- * [2*maxDepth/3, maxDepth].
- * 3. Files are randomly placed in the empty directories. The size of each
- * file follows Gaussian distribution.
- * The generated namespace structure is described by two files in the output
- * directory. Each line of the first file
- * contains the full name of a leaf directory.
- * Each line of the second file contains
- * the full name of a file and its size, separated by a blank.
- *
- * The synopsis of the command is
- * java StructureGenerator
- -maxDepth <maxDepth> : maximum depth of the directory tree; default is 5.
- -minWidth <minWidth> : minimum number of subdirectories per directories; default is 1
- -maxWidth <maxWidth> : maximum number of subdirectories per directories; default is 5
- -numOfFiles <#OfFiles> : the total number of files; default is 10.
- -avgFileSize <avgFileSizeInBlocks>: average size of blocks; default is 1.
- -outDir <outDir>: output directory; default is the current directory.
- -seed <seed>: random number generator seed; default is the current time.
- */
- public class StructureGenerator {
- private int maxDepth = 5;
- private int minWidth = 1;
- private int maxWidth = 5;
- private int numOfFiles = 10;
- private double avgFileSize = 1;
- private File outDir = DEFAULT_STRUCTURE_DIRECTORY;
- final static private String USAGE = "java StructureGeneratorn" +
- "-maxDepth <maxDepth>n" +
- "-minWidth <minWidth>n" +
- "-maxWidth <maxWidth>n" +
- "-numOfFiles <#OfFiles>n" +
- "-avgFileSize <avgFileSizeInBlocks>n" +
- "-outDir <outDir>n" +
- "-seed <seed>";
-
- private Random r = null;
-
- /** Default directory for storing file/directory structure */
- final static File DEFAULT_STRUCTURE_DIRECTORY = new File(".");
- /** The name of the file for storing directory structure */
- final static String DIR_STRUCTURE_FILE_NAME = "dirStructure";
- /** The name of the file for storing file structure */
- final static String FILE_STRUCTURE_FILE_NAME = "fileStructure";
- /** The name prefix for the files created by this program */
- final static String FILE_NAME_PREFIX = "_file_";
-
- /**
- * The main function first parses the command line arguments,
- * then generates in-memory directory structure and outputs to a file,
- * last generates in-memory files and outputs them to a file.
- */
- public int run(String[] args) throws Exception {
- int exitCode = 0;
- exitCode = init(args);
- if (exitCode != 0) {
- return exitCode;
- }
- genDirStructure();
- output(new File(outDir, DIR_STRUCTURE_FILE_NAME));
- genFileStructure();
- outputFiles(new File(outDir, FILE_STRUCTURE_FILE_NAME));
- return exitCode;
- }
- /** Parse the command line arguments and initialize the data */
- private int init(String[] args) {
- try {
- for (int i = 0; i < args.length; i++) { // parse command line
- if (args[i].equals("-maxDepth")) {
- maxDepth = Integer.parseInt(args[++i]);
- if (maxDepth<1) {
- System.err.println("maxDepth must be positive: " + maxDepth);
- return -1;
- }
- } else if (args[i].equals("-minWidth")) {
- minWidth = Integer.parseInt(args[++i]);
- if (minWidth<0) {
- System.err.println("minWidth must be positive: " + minWidth);
- return -1;
- }
- } else if (args[i].equals("-maxWidth")) {
- maxWidth = Integer.parseInt(args[++i]);
- } else if (args[i].equals("-numOfFiles")) {
- numOfFiles = Integer.parseInt(args[++i]);
- if (numOfFiles<1) {
- System.err.println("NumOfFiles must be positive: " + numOfFiles);
- return -1;
- }
- } else if (args[i].equals("-avgFileSize")) {
- avgFileSize = Double.parseDouble(args[++i]);
- if (avgFileSize<=0) {
- System.err.println("AvgFileSize must be positive: " + avgFileSize);
- return -1;
- }
- } else if (args[i].equals("-outDir")) {
- outDir = new File(args[++i]);
- } else if (args[i].equals("-seed")) {
- r = new Random(Long.parseLong(args[++i]));
- } else {
- System.err.println(USAGE);
- ToolRunner.printGenericCommandUsage(System.err);
- return -1;
- }
- }
- } catch (NumberFormatException e) {
- System.err.println("Illegal parameter: " + e.getLocalizedMessage());
- System.err.println(USAGE);
- return -1;
- }
-
- if (maxWidth < minWidth) {
- System.err.println(
- "maxWidth must be bigger than minWidth: " + maxWidth);
- return -1;
- }
-
- if (r==null) {
- r = new Random();
- }
- return 0;
- }
-
- /** In memory representation of a directory */
- private static class INode {
- private String name;
- private List<INode> children = new ArrayList<INode>();
-
- /** Constructor */
- private INode(String name) {
- this.name = name;
- }
-
- /** Add a child (subdir/file) */
- private void addChild(INode child) {
- children.add(child);
- }
-
- /** Output the subtree rooted at the current node.
- * Only the leaves are printed.
- */
- private void output(PrintStream out, String prefix) {
- prefix = prefix==null?name:prefix+"/"+name;
- if (children.isEmpty()) {
- out.println(prefix);
- } else {
- for (INode child : children) {
- child.output(out, prefix);
- }
- }
- }
-
- /** Output the files in the subtree rooted at this node */
- protected void outputFiles(PrintStream out, String prefix) {
- prefix = prefix==null?name:prefix+"/"+name;
- for (INode child : children) {
- child.outputFiles(out, prefix);
- }
- }
-
- /** Add all the leaves in the subtree to the input list */
- private void getLeaves(List<INode> leaves) {
- if (children.isEmpty()) {
- leaves.add(this);
- } else {
- for (INode child : children) {
- child.getLeaves(leaves);
- }
- }
- }
- }
-
- /** In memory representation of a file */
- private static class FileINode extends INode {
- private double numOfBlocks;
- /** constructor */
- private FileINode(String name, double numOfBlocks) {
- super(name);
- this.numOfBlocks = numOfBlocks;
- }
-
- /** Output a file attribute */
- protected void outputFiles(PrintStream out, String prefix) {
- prefix = (prefix == null)?super.name: prefix + "/"+super.name;
- out.println(prefix + " " + numOfBlocks);
- }
- }
- private INode root;
-
- /** Generates a directory tree with a max depth of <code>maxDepth</code> */
- private void genDirStructure() {
- root = genDirStructure("", maxDepth);
- }
-
- /** Generate a directory tree rooted at <code>rootName</code>
- * The number of subtree is in the range of [minWidth, maxWidth].
- * The maximum depth of each subtree is in the range of
- * [2*maxDepth/3, maxDepth].
- */
- private INode genDirStructure(String rootName, int maxDepth) {
- INode root = new INode(rootName);
-
- if (maxDepth>0) {
- maxDepth--;
- int minDepth = maxDepth*2/3;
- // Figure out the number of subdirectories to generate
- int numOfSubDirs = minWidth + r.nextInt(maxWidth-minWidth+1);
- // Expand the tree
- for (int i=0; i<numOfSubDirs; i++) {
- int childDepth = (maxDepth == 0)?0:
- (r.nextInt(maxDepth-minDepth+1)+minDepth);
- INode child = genDirStructure("dir"+i, childDepth);
- root.addChild(child);
- }
- }
- return root;
- }
-
- /** Collects leaf nodes in the tree */
- private List<INode> getLeaves() {
- List<INode> leaveDirs = new ArrayList<INode>();
- root.getLeaves(leaveDirs);
- return leaveDirs;
- }
-
- /** Decides where to place all the files and its length.
- * It first collects all empty directories in the tree.
- * For each file, it randomly chooses an empty directory to place the file.
- * The file's length is generated using Gaussian distribution.
- */
- private void genFileStructure() {
- List<INode> leaves = getLeaves();
- int totalLeaves = leaves.size();
- for (int i=0; i<numOfFiles; i++) {
- int leaveNum = r.nextInt(totalLeaves);
- double fileSize;
- do {
- fileSize = r.nextGaussian()+avgFileSize;
- } while (fileSize<0);
- leaves.get(leaveNum).addChild(
- new FileINode(FILE_NAME_PREFIX+i, fileSize));
- }
- }
-
- /** Output directory structure to a file, each line of the file
- * contains the directory name. Only empty directory names are printed. */
- private void output(File outFile) throws FileNotFoundException {
- System.out.println("Printing to " + outFile.toString());
- PrintStream out = new PrintStream(outFile);
- root.output(out, null);
- out.close();
- }
-
- /** Output all files' attributes to a file, each line of the output file
- * contains a file name and its length. */
- private void outputFiles(File outFile) throws FileNotFoundException {
- System.out.println("Printing to " + outFile.toString());
- PrintStream out = new PrintStream(outFile);
- root.outputFiles(out, null);
- out.close();
- }
-
- /**
- * Main program
- * @param args Command line arguments
- * @throws Exception
- */
- public static void main(String[] args) throws Exception {
- StructureGenerator sg = new StructureGenerator();
- System.exit(sg.run(args));
- }
- }