-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhbase_rdd.py
27 lines (22 loc) · 1.15 KB
/
hbase_rdd.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from pyspark import SparkContext
from pyspark.sql.types import StructType, StructField, StringType,StructField, ArrayType
from pyspark.sql import SQLContext
"""該方法效率低落,不推薦使用該模式"""
sc = SparkContext()
conf = {"hbase.zookeeper.property.clientPort":"2181",
"zookeeper.znode.parent":"/hbase-unsecure",
"hbase.zookeeper.quorum": "localhost",
"hbase.mapreduce.scan.columns": "booking:url(decode) booking:session_id",
"hbase.mapreduce.scan.row.start":"e_2020-04-19",
"hbase.mapreduce.scan.row.stop": "e_2020-04-20",
"hbase.mapreduce.inputtable": "booking",
}
keyConv = "org.apache.spark.examples.pythonconverters.ImmutableBytesWritableToStringConverter"
valueConv = "org.apache.spark.examples.pythonconverters.HBaseResultToStringConverter"
hbase_rdd = sc.newAPIHadoopRDD("org.apache.hadoop.hbase.mapreduce.TableInputFormat","org.apache.hadoop.hbase.io.ImmutableBytesWritable","org.apache.hadoop.hbase.client.Result",keyConverter=keyConv,valueConverter=valueConv,conf=conf)
count = hbase_rdd.count()
hbase_rdd.cache()
output = hbase_rdd.collect()
for (k, v) in output:
print(k, v)
break