HBase应用(3.2)HBase与MapReduce

假设Users表有100万条数据,使用MapReduce统计各性别的用户数量,并将结果写入users_stats表中

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
package info.aviraer.demo.bigdata.core.hbase.chapter3;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import info.aviraer.demo.bigdata.base.PropsKey;
import info.aviraer.demo.bigdata.util.PropertiesUtils;

/**
* 使用HBase作为数据源,并将MR结果写入HBase
* @author
*
*/
public class HBaseDataSource extends Configured implements Tool{

public static class Map extends TableMapper<Text, IntWritable> {

private static Text outKey = new Text();
private static IntWritable outVal = new IntWritable(1);

@Override
protected void map(ImmutableBytesWritable key, Result value,
Mapper<ImmutableBytesWritable, Result, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
String gender = new String(value.getValue("info".getBytes(), "gender".getBytes()));
outKey.set(gender);
context.write(outKey, outVal);
}
}

public static class Reduce extends TableReducer<Text, IntWritable, Text> {

private static IntWritable outVal = new IntWritable();

@Override
protected void reduce(Text arg0, Iterable<IntWritable> arg1,
Reducer<Text, IntWritable, Text, Mutation>.Context arg2) throws IOException, InterruptedException {
int count = 0;
outVal.set(count);
while (arg1.iterator().hasNext()) {
count += arg1.iterator().next().get();
}
Put put = new Put(arg0.copyBytes());
put.addColumn("stats".getBytes(), "cout".getBytes(), Bytes.toBytes(count + ""));
arg2.write(arg0, put);
}
}

public int run(String[] args) throws Exception {
Configuration config = HBaseConfiguration.create();
config.set(PropsKey.HB_ZK_QUORUM_KEY, PropertiesUtils.get(PropsKey.HB_ZK_QUORUM_KEY));
Job job = Job.getInstance(config);
job.setJarByClass(HBaseDataSource.class);
job.setJobName("HBaseUserCount");

Scan scan = new Scan();
TableMapReduceUtil.initTableMapperJob("users", scan, Map.class, Text.class, IntWritable.class, job);
TableMapReduceUtil.initTableReducerJob("users_stats", Reduce.class, job);
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}

public static void main(String[] args) throws Exception {
ToolRunner.run(new HBaseDataSource(), args);
}

}