com.datasalt.pangool.tuplemr
Class MapOnlyJobBuilder

java.lang.Object
  extended by com.datasalt.pangool.tuplemr.MapOnlyJobBuilder

public class MapOnlyJobBuilder
extends Object

The MapOnlyJobBuilder is a simple Pangool primitive that executes map-only Jobs. You must implement MapOnlyMapper for using it.


Constructor Summary
MapOnlyJobBuilder(org.apache.hadoop.conf.Configuration conf)
           
MapOnlyJobBuilder(org.apache.hadoop.conf.Configuration conf, String jobName)
           
 
Method Summary
 MapOnlyJobBuilder addInput(org.apache.hadoop.fs.Path path, org.apache.hadoop.mapreduce.InputFormat inputFormat, MapOnlyMapper processor)
           
 MapOnlyJobBuilder addInput(org.apache.hadoop.fs.Path path, org.apache.hadoop.mapreduce.InputFormat inputFormat, MapOnlyMapper processor, Map<String,String> specificContext)
           
 void addNamedOutput(String namedOutput, org.apache.hadoop.mapreduce.OutputFormat outputFormat, Class keyClass, Class valueClass)
           
 void addNamedOutput(String namedOutput, org.apache.hadoop.mapreduce.OutputFormat outputFormat, Class keyClass, Class valueClass, Map<String,String> specificContext)
           
 void addNamedTupleOutput(String namedOutput, Schema outputSchema)
           
 void addTupleInput(org.apache.hadoop.fs.Path path, MapOnlyMapper tupleMapper)
          Adds an input file associated with a TupleFile.
 void addTupleInput(org.apache.hadoop.fs.Path path, Schema targetSchema, MapOnlyMapper tupleMapper)
          Adds an input file associated with a TupleFile.
 void cleanUpInstanceFiles()
          Run this method after running your Job for instance files to be properly cleaned.
 org.apache.hadoop.mapreduce.Job createJob()
           
 void setDefaultNamedOutput(org.apache.hadoop.mapreduce.OutputFormat outputFormat, Class keyClass, Class valueClass)
          Sets the default named output specs.
 void setDefaultNamedOutput(org.apache.hadoop.mapreduce.OutputFormat outputFormat, Class keyClass, Class valueClass, Map<String,String> specificContext)
          Sets the default named output specs.
 void setDefaultNamedOutput(Schema outputSchema)
          Sets the default named output (Tuple format) specs.
 MapOnlyJobBuilder setJarByClass(Class<?> jarByClass)
           
 MapOnlyJobBuilder setMapper(MapOnlyMapper mapOnlyMapper)
          Deprecated. 
 MapOnlyJobBuilder setOutput(org.apache.hadoop.fs.Path outputPath, org.apache.hadoop.mapreduce.OutputFormat outputFormat, Class<?> outputKeyClass, Class<?> outputValueClass)
           
 MapOnlyJobBuilder setTupleOutput(org.apache.hadoop.fs.Path outputPath, Schema schema)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

MapOnlyJobBuilder

public MapOnlyJobBuilder(org.apache.hadoop.conf.Configuration conf)

MapOnlyJobBuilder

public MapOnlyJobBuilder(org.apache.hadoop.conf.Configuration conf,
                         String jobName)
Method Detail

setJarByClass

public MapOnlyJobBuilder setJarByClass(Class<?> jarByClass)

addTupleInput

public void addTupleInput(org.apache.hadoop.fs.Path path,
                          MapOnlyMapper tupleMapper)
Adds an input file associated with a TupleFile.


addTupleInput

public void addTupleInput(org.apache.hadoop.fs.Path path,
                          Schema targetSchema,
                          MapOnlyMapper tupleMapper)
Adds an input file associated with a TupleFile.

A specific "Target Schema" is specified, which should be backwards-compatible with the Schema in the Tuple File (new nullable fields are allowed, not used old fields too).


addInput

public MapOnlyJobBuilder addInput(org.apache.hadoop.fs.Path path,
                                  org.apache.hadoop.mapreduce.InputFormat inputFormat,
                                  MapOnlyMapper processor)

addInput

public MapOnlyJobBuilder addInput(org.apache.hadoop.fs.Path path,
                                  org.apache.hadoop.mapreduce.InputFormat inputFormat,
                                  MapOnlyMapper processor,
                                  Map<String,String> specificContext)

setDefaultNamedOutput

public void setDefaultNamedOutput(org.apache.hadoop.mapreduce.OutputFormat outputFormat,
                                  Class keyClass,
                                  Class valueClass)
                           throws TupleMRException
Sets the default named output specs. By using this method one can use an arbitrary number of named outputs without pre-defining them beforehand.

Throws:
TupleMRException

setDefaultNamedOutput

public void setDefaultNamedOutput(org.apache.hadoop.mapreduce.OutputFormat outputFormat,
                                  Class keyClass,
                                  Class valueClass,
                                  Map<String,String> specificContext)
                           throws TupleMRException
Sets the default named output specs. By using this method one can use an arbitrary number of named outputs without pre-defining them beforehand.

The specific (key, value) default context defined here will be applied to ALL named outputs.

Throws:
TupleMRException

setDefaultNamedOutput

public void setDefaultNamedOutput(Schema outputSchema)
                           throws TupleMRException
Sets the default named output (Tuple format) specs. By using this method one can use an arbitrary number of named outputs without pre-defining them beforehand.

Throws:
TupleMRException

addNamedOutput

public void addNamedOutput(String namedOutput,
                           org.apache.hadoop.mapreduce.OutputFormat outputFormat,
                           Class keyClass,
                           Class valueClass)
                    throws TupleMRException
Throws:
TupleMRException

addNamedOutput

public void addNamedOutput(String namedOutput,
                           org.apache.hadoop.mapreduce.OutputFormat outputFormat,
                           Class keyClass,
                           Class valueClass,
                           Map<String,String> specificContext)
                    throws TupleMRException
Throws:
TupleMRException

addNamedTupleOutput

public void addNamedTupleOutput(String namedOutput,
                                Schema outputSchema)
                         throws TupleMRException
Throws:
TupleMRException

setTupleOutput

public MapOnlyJobBuilder setTupleOutput(org.apache.hadoop.fs.Path outputPath,
                                        Schema schema)

setOutput

public MapOnlyJobBuilder setOutput(org.apache.hadoop.fs.Path outputPath,
                                   org.apache.hadoop.mapreduce.OutputFormat outputFormat,
                                   Class<?> outputKeyClass,
                                   Class<?> outputValueClass)

setMapper

@Deprecated
public MapOnlyJobBuilder setMapper(MapOnlyMapper mapOnlyMapper)
Deprecated. 

Deprecated. Use addInput(org.apache.hadoop.fs.Path, org.apache.hadoop.mapreduce.InputFormat, com.datasalt.pangool.tuplemr.mapred.MapOnlyMapper) instead.


cleanUpInstanceFiles

public void cleanUpInstanceFiles()
                          throws IOException
Run this method after running your Job for instance files to be properly cleaned.

Throws:
IOException

createJob

public org.apache.hadoop.mapreduce.Job createJob()
                                          throws IOException,
                                                 TupleMRException,
                                                 URISyntaxException
Throws:
IOException
TupleMRException
URISyntaxException


Copyright © –2014 Datasalt Systems S.L.. All rights reserved.