配置单元中的JSON解析问题 [英] JSON parsing issue in hive
问题描述
我的样本数据看起来像...
{Rtype:{ver:1,os:ms,type:ns,vehicle:Mh-3412,MOD :{ 版本 :[{ ABC :{ XYZ : 123.dfer 方正: 3.0, GHT: 佛罗里达, 时尚: fg45, 疾病预防控制中心:new,dof:yes,ts:2000-04-01T00:00:00.171Z}}]}}}
{ 舍入类型:{ VER: 1, OS: MS, 类型: NS, 车辆: MH-3412, MOD:{ 版本: [{ GAP:{ GGG: 123.dfer, FFF: 3.0, DDD: 佛罗里达, GOP: fg45, CDC: QQQ, ZZZ:是,ts:2000-04-01T00:00:00.171Z}}]}}}
{Rtype:{ 版本: 1, OS: MS, 类型: NS, 车辆: MH-3412, MOD:{ 版本:[{ BOX:{ FRG: 123.dfer, CXD: 3.0, 传真: 佛罗里达, 陕柴重工: fg45, CDC: 新, 自由度: 是的, ts:2000-04-01T00:00:00.171Z}}]}}}
我已经完成了
create table src(myjson string);
插入src值
({ 舍入类型:{ VER: 1, OS: MS, 类型: NS, 车辆: MH-3412, MOD:{ 版本 :[{ ABC:{ XYZ: 123.dfer, 方正: 3.0, GHT: 佛罗里达, 时尚: fg45, CDC: 新, dof:yes,ts:2000-04-01T00:00:00.171Z}}]}}}')
,('{Rtype:{ver: 1\" , OS: MS, 类型: NS, 车辆: MH-3412, MOD:{ 版本:[{ GAP:{ XVY: 123.dfer FAH: 3.0, GHT: 佛罗里达, 时尚: fg45, CDC: 新, 自由度: 是的, TS: ('{Rtype:{ver:1,os:ms,type)}}]}}}'')
, : NS, 车辆: MH-3412, MOD:{ 版本:[{ BOX:{ VOG: 123.dfer, FAH: 3.0, 传真: 佛罗里达, 时尚: fg45, CDC: 新, 自由度: 是的, TS: 2000-04-01T00:00:00.171Z}} ]}}}')
;
问题是当我开始执行 select get_json_object( myjson,'$。Rtype.MOD.Version [0] .ABC.fashion')其中get_json_object(myjson,'$。Rtype.MOD.Version [0] .ABC')不为src
正在为某些字段获取NULLS
计数值这个说2345
没有条件也countvalue 2345.这是问题
我看到的观察是这是由于它正试图获取 $的数据。Rtype.MOD.Version [0] .GAP
蜂房>将数据本地inpath'/home/satish/s.json'加载到表sjson中;
将数据加载到表hivelearning.sjson
表hivelearning.sjson统计数据:[numFiles = 1,totalSize = 216]
hive> select * from sjson;
{rtype:{ver:1,os:ms,type:ns,vehicle:Mh-3412,MOD :{ 版本 :[{ ABC :{ XYZ : 123.dfer 方正: 3.0, GHT: 佛罗里达, 时尚: fg45, 疾病预防控制中心:新,自由度:是,ts:2000-04-01T00:00:00.171Z}}]}}}
时间:1.297秒,提取:1行
hive>从sjson中选择get_json_object(data,'$。Rtype.MOD.Version [0] .ABC.fashion');
确定
fg45
所需时间:0.084秒,提取:1行
i am getting some issues while querying json data.
my sample data look like ...
{"Rtype":{"ver":"1","os":"ms","type":"ns","vehicle":"Mh-3412","MOD":{"Version":[{"ABC":{"XYZ":"123.dfer","founder":"3.0","GHT":"Florida","fashion":"fg45","cdc":"new","dof":"yes","ts":"2000-04-01T00:00:00.171Z"}}]}}}
{"Rtype":{"ver":"1","os":"ms","type":"ns","vehicle":"Mh-3412","MOD":{"Version":[{"GAP":{"GGG":"123.dfer","FFF":"3.0","DDD":"Florida","GOP":"fg45","cdc":"QQQ","ZZZ":"yes","ts":"2000-04-01T00:00:00.171Z"}}]}}}
{"Rtype":{"ver":"1","os":"ms","type":"ns","vehicle":"Mh-3412","MOD":{"Version":[{"BOX":{"FRG":"123.dfer","CXD":"3.0","FAX":"Florida","SXD":"fg45","cdc":"new","dof":"yes","ts":"2000-04-01T00:00:00.171Z"}}]}}}
i have done follwing
create table src (myjson string);
insert into src values ('{"Rtype":{"ver":"1","os":"ms","type":"ns","vehicle":"Mh-3412","MOD":{"Version":[{"ABC":{"XYZ":"123.dfer","founder":"3.0","GHT":"Florida","fashion":"fg45","cdc":"new","dof":"yes","ts":"2000-04-01T00:00:00.171Z"}}]}}}') ,('{"Rtype":{"ver":"1","os":"ms","type":"ns","vehicle":"Mh-3412","MOD":{"Version":[{"GAP":{"XVY":"123.dfer","FAH":"3.0","GHT":"Florida","fashion":"fg45","cdc":"new","dof":"yes","ts":"2000-04-01T00:00:00.171Z"}}]}}}') ,('{"Rtype":{"ver":"1","os":"ms","type":"ns","vehicle":"Mh-3412","MOD":{"Version":[{"BOX":{"VOG":"123.dfer","FAH":"3.0","FAX":"Florida","fashion":"fg45","cdc":"new","dof":"yes","ts":"2000-04-01T00:00:00.171Z"}}]}}}') ;
The issue is when i start do select get_json_object(myjson,'$.Rtype.MOD.Version[0].ABC.fashion') where get_json_object(myjson,'$.Rtype.MOD.Version[0].ABC') is not null from src
am getting NULLS for the some fields
count value for this say 2345
without where condition also countvalue 2345. this is the issue
the observasion i have seen is this is due to it is trying to fetch data that is $.Rtype.MOD.Version[0].GAP
hive> load data local inpath '/home/satish/s.json' into table sjson;
Loading data to table hivelearning.sjson
Table hivelearning.sjson stats: [numFiles=1, totalSize=216]
hive> select * from sjson;
{"Rtype":{"ver":"1","os":"ms","type":"ns","vehicle":"Mh-3412","MOD":{"Version":[{"ABC":{"XYZ":"123.dfer","founder":"3.0","GHT":"Florida","fashion":"fg45","cdc":"new","dof":"yes","ts":"2000-04-01T00:00:00.171Z"}}]}}}
Time taken: 1.297 seconds, Fetched: 1 row(s)
hive> select get_json_object(data,'$.Rtype.MOD.Version[0].ABC.fashion') from sjson;
OK
fg45
Time taken: 0.084 seconds, Fetched: 1 row(s)
这篇关于配置单元中的JSON解析问题的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!