MapreduceRCFile如何写入和读取API
这篇文章主要介绍Mapreduce RCFile如何写入和读取API,文中介绍的非常详细,具有一定的参考价值,感兴趣的小伙伴们一定要看完!
成都创新互联公司专注于企业营销型网站建设、网站重做改版、酒泉网站定制设计、自适应品牌网站建设、H5技术、成都做商城网站、集团公司官网建设、外贸网站制作、高端网站制作、响应式网页设计等建站业务,价格优惠性价比高,为酒泉等各大城市提供网站开发制作服务。
RCFile是FaceBook开发的高压缩比、高效读的行列存储结构。通常在Hive中可以直接对一张Text表使用insert-select转换,但有时希望使用Mapreduce进行RCFile的读写。
读取文本文件,使用mapreduce生成RCFile格式文件
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hive.hcatalog.rcfile.RCFileMapReduceInputFormat;
import java.io.IOException;
public class RcFileReaderJob {
static class RcFileMapper extends Mapper
@Override
protected void cleanup(Context context) throws IOException,
InterruptedException {
super.cleanup(context);
}
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
super.setup(context);
}
}
static class RcFileReduce extends Reducer
@Override
protected void reduce(Text key, Iterable
Context context) throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
public static boolean runLoadMapReducue(Configuration conf, Path input, Path output) throws IOException,
ClassNotFoundException, InterruptedException {
Job job = Job.getInstance(conf);
job.setJarByClass(RcFileReaderJob.class);
job.setJobName("RcFileReaderJob");
job.setNumReduceTasks(1);
job.setMapperClass(RcFileMapper.class);
job.setReducerClass(RcFileReduce.class);
job.setInputFormatClass(RCFileMapReduceInputFormat.class);
// MultipleInputs.addInputPath(job, input, RCFileInputFormat.class);
RCFileMapReduceInputFormat.addInputPath(job, input);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileOutputFormat.setOutputPath(job, output);
return job.waitForCompletion(true);
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
if (args.length != 2) {
System.err.println("Usage: rcfile
System.exit(2);
}
RcFileReaderJob.runLoadMapReducue(conf, new Path(args[0]), new Path(args[1]));
}
}
读取RCFile格式文件,使用mapreduce生成Text格式文件
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hive.hcatalog.rcfile.RCFileMapReduceOutputFormat;
import java.io.IOException;
public class RcFileWriterJob extends Configured implements Tool{
public static class Map extends Mapper
"-tableName
"-output