clickhouse 1 node out of 3 not syncing - clickhouse

I have a 3-node clickhouse cluster, almost identical to the setup in https://github.com/tetafro/clickhouse-cluster.
I ran the CREATE TABLE statement on the 3 nodes, as follows:
CREATE TABLE replica_test.hits_shard
(
`WatchID` UInt64,
`JavaEnable` UInt8,
`Title` String,
`GoodEvent` Int16,
`EventTime` DateTime,
`EventDate` Date,
`CounterID` UInt32,
`ClientIP` UInt32,
`ClientIP6` FixedString(16),
`RegionID` UInt32,
`UserID` UInt64,
`CounterClass` Int8,
`OS` UInt8,
`UserAgent` UInt8,
`URL` String,
`Referer` String,
`URLDomain` String,
`RefererDomain` String,
`Refresh` UInt8,
`IsRobot` UInt8,
`RefererCategories` Array(UInt16),
`URLCategories` Array(UInt16),
`URLRegions` Array(UInt32),
`RefererRegions` Array(UInt32),
`ResolutionWidth` UInt16,
`ResolutionHeight` UInt16,
`ResolutionDepth` UInt8,
`FlashMajor` UInt8,
`FlashMinor` UInt8,
`FlashMinor2` String,
`NetMajor` UInt8,
`NetMinor` UInt8,
`UserAgentMajor` UInt16,
`UserAgentMinor` FixedString(2),
`CookieEnable` UInt8,
`JavascriptEnable` UInt8,
`IsMobile` UInt8,
`MobilePhone` UInt8,
`MobilePhoneModel` String,
`Params` String,
`IPNetworkID` UInt32,
`TraficSourceID` Int8,
`SearchEngineID` UInt16,
`SearchPhrase` String,
`AdvEngineID` UInt8,
`IsArtifical` UInt8,
`WindowClientWidth` UInt16,
`WindowClientHeight` UInt16,
`ClientTimeZone` Int16,
`ClientEventTime` DateTime,
`SilverlightVersion1` UInt8,
`SilverlightVersion2` UInt8,
`SilverlightVersion3` UInt32,
`SilverlightVersion4` UInt16,
`PageCharset` String,
`CodeVersion` UInt32,
`IsLink` UInt8,
`IsDownload` UInt8,
`IsNotBounce` UInt8,
`FUniqID` UInt64,
`HID` UInt32,
`IsOldCounter` UInt8,
`IsEvent` UInt8,
`IsParameter` UInt8,
`DontCountHits` UInt8,
`WithHash` UInt8,
`HitColor` FixedString(1),
`UTCEventTime` DateTime,
`Age` UInt8,
`Sex` UInt8,
`Income` UInt8,
`Interests` UInt16,
`Robotness` UInt8,
`GeneralInterests` Array(UInt16),
`RemoteIP` UInt32,
`RemoteIP6` FixedString(16),
`WindowName` Int32,
`OpenerName` Int32,
`HistoryLength` Int16,
`BrowserLanguage` FixedString(2),
`BrowserCountry` FixedString(2),
`SocialNetwork` String,
`SocialAction` String,
`HTTPError` UInt16,
`SendTiming` Int32,
`DNSTiming` Int32,
`ConnectTiming` Int32,
`ResponseStartTiming` Int32,
`ResponseEndTiming` Int32,
`FetchTiming` Int32,
`RedirectTiming` Int32,
`DOMInteractiveTiming` Int32,
`DOMContentLoadedTiming` Int32,
`DOMCompleteTiming` Int32,
`LoadEventStartTiming` Int32,
`LoadEventEndTiming` Int32,
`NSToDOMContentLoadedTiming` Int32,
`FirstPaintTiming` Int32,
`RedirectCount` Int8,
`SocialSourceNetworkID` UInt8,
`SocialSourcePage` String,
`ParamPrice` Int64,
`ParamOrderID` String,
`ParamCurrency` FixedString(3),
`ParamCurrencyID` UInt16,
`GoalsReached` Array(UInt32),
`OpenstatServiceName` String,
`OpenstatCampaignID` String,
`OpenstatAdID` String,
`OpenstatSourceID` String,
`UTMSource` String,
`UTMMedium` String,
`UTMCampaign` String,
`UTMContent` String,
`UTMTerm` String,
`FromTag` String,
`HasGCLID` UInt8,
`RefererHash` UInt64,
`URLHash` UInt64,
`CLID` UInt32,
`YCLID` UInt64,
`ShareService` String,
`ShareURL` String,
`ShareTitle` String,
`ParsedParams` Nested(
Key1 String,
Key2 String,
Key3 String,
Key4 String,
Key5 String,
ValueDouble Float64),
`IslandID` FixedString(16),
`RequestNum` UInt32,
`RequestTry` UInt8
)
ENGINE=ReplicatedMergeTree('/clickhouse/tables/{shard}/tables/hits', '{replica}')
PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID))
SAMPLE BY intHash32(UserID);
CREATE TABLE replica_test.hits_distributed
(
`WatchID` UInt64,
`JavaEnable` UInt8,
`Title` String,
`GoodEvent` Int16,
`EventTime` DateTime,
`EventDate` Date,
`CounterID` UInt32,
`ClientIP` UInt32,
`ClientIP6` FixedString(16),
`RegionID` UInt32,
`UserID` UInt64,
`CounterClass` Int8,
`OS` UInt8,
`UserAgent` UInt8,
`URL` String,
`Referer` String,
`URLDomain` String,
`RefererDomain` String,
`Refresh` UInt8,
`IsRobot` UInt8,
`RefererCategories` Array(UInt16),
`URLCategories` Array(UInt16),
`URLRegions` Array(UInt32),
`RefererRegions` Array(UInt32),
`ResolutionWidth` UInt16,
`ResolutionHeight` UInt16,
`ResolutionDepth` UInt8,
`FlashMajor` UInt8,
`FlashMinor` UInt8,
`FlashMinor2` String,
`NetMajor` UInt8,
`NetMinor` UInt8,
`UserAgentMajor` UInt16,
`UserAgentMinor` FixedString(2),
`CookieEnable` UInt8,
`JavascriptEnable` UInt8,
`IsMobile` UInt8,
`MobilePhone` UInt8,
`MobilePhoneModel` String,
`Params` String,
`IPNetworkID` UInt32,
`TraficSourceID` Int8,
`SearchEngineID` UInt16,
`SearchPhrase` String,
`AdvEngineID` UInt8,
`IsArtifical` UInt8,
`WindowClientWidth` UInt16,
`WindowClientHeight` UInt16,
`ClientTimeZone` Int16,
`ClientEventTime` DateTime,
`SilverlightVersion1` UInt8,
`SilverlightVersion2` UInt8,
`SilverlightVersion3` UInt32,
`SilverlightVersion4` UInt16,
`PageCharset` String,
`CodeVersion` UInt32,
`IsLink` UInt8,
`IsDownload` UInt8,
`IsNotBounce` UInt8,
`FUniqID` UInt64,
`HID` UInt32,
`IsOldCounter` UInt8,
`IsEvent` UInt8,
`IsParameter` UInt8,
`DontCountHits` UInt8,
`WithHash` UInt8,
`HitColor` FixedString(1),
`UTCEventTime` DateTime,
`Age` UInt8,
`Sex` UInt8,
`Income` UInt8,
`Interests` UInt16,
`Robotness` UInt8,
`GeneralInterests` Array(UInt16),
`RemoteIP` UInt32,
`RemoteIP6` FixedString(16),
`WindowName` Int32,
`OpenerName` Int32,
`HistoryLength` Int16,
`BrowserLanguage` FixedString(2),
`BrowserCountry` FixedString(2),
`SocialNetwork` String,
`SocialAction` String,
`HTTPError` UInt16,
`SendTiming` Int32,
`DNSTiming` Int32,
`ConnectTiming` Int32,
`ResponseStartTiming` Int32,
`ResponseEndTiming` Int32,
`FetchTiming` Int32,
`RedirectTiming` Int32,
`DOMInteractiveTiming` Int32,
`DOMContentLoadedTiming` Int32,
`DOMCompleteTiming` Int32,
`LoadEventStartTiming` Int32,
`LoadEventEndTiming` Int32,
`NSToDOMContentLoadedTiming` Int32,
`FirstPaintTiming` Int32,
`RedirectCount` Int8,
`SocialSourceNetworkID` UInt8,
`SocialSourcePage` String,
`ParamPrice` Int64,
`ParamOrderID` String,
`ParamCurrency` FixedString(3),
`ParamCurrencyID` UInt16,
`GoalsReached` Array(UInt32),
`OpenstatServiceName` String,
`OpenstatCampaignID` String,
`OpenstatAdID` String,
`OpenstatSourceID` String,
`UTMSource` String,
`UTMMedium` String,
`UTMCampaign` String,
`UTMContent` String,
`UTMTerm` String,
`FromTag` String,
`HasGCLID` UInt8,
`RefererHash` UInt64,
`URLHash` UInt64,
`CLID` UInt32,
`YCLID` UInt64,
`ShareService` String,
`ShareURL` String,
`ShareTitle` String,
`ParsedParams.Key1` Array(String),
`ParsedParams.Key2` Array(String),
`ParsedParams.Key3` Array(String),
`ParsedParams.Key4` Array(String),
`ParsedParams.Key5` Array(String),
`ParsedParams.ValueDouble` Array(Float64),
`IslandID` FixedString(16),
`RequestNum` UInt32,
`RequestTry` UInt8
)
ENGINE = Distributed('hivestack', replica_test, 'hits_shard', rand());
I have an out-of-sync issue with node-2 as follows:
if i insert data to node-2, the data is only seen by node-2
insert ... into node2 => hits_shard then select count(*) fom node1 => hits_shard the result is 0
if i insert data into node-1 or node-3, it's seen by all 3 nodes.
SELECT * from system.replicas:
replica_test,hits_shard,ReplicatedMergeTree,1,1,0,0,0,0,/clickhouse/tables/02/tables/hits,clickhouse02,/clickhouse/tables/02/tables/hits/replicas/clickhouse02,-1,0,0,0,0,1969-12-31 19:00:00,1969-12-31 19:00:00,1969-12-31 19:00:00,1969-12-31 19:00:00,"","","",0,0,1969-12-31 19:00:00,0,1,1,"","",{'clickhouse02':1}
select * from system.replication_queue is empty
clickhouse version: clickhouse/clickhouse-server:22.4.5.9
zookeeper version: bitnami/zookeeper:3.8.0
I verified that all 3 nodes can talk to each others via clickhouse-client and nc 9000
Would appreciate any tips on how to debug/fix this

Are you sure you have the same zookeeper clickhouse settings on all 3 nodes?
Do you insert into Distributed table or insert into ReplicatedMergeTree?
For first case, check on node-2
SELECT * FROM system.distribution_queue FORMAT Vertical
SELECT * FROM system.clusters FORMAT Vertical
and compare <remote_servers> section in your config for all 3 nodes

Related

Non-string values showing as NULL in Hive

Im new to HIVE and creating my first table!
for some reason all non-string values are showing as NULL (including int, BOOLEAN, etc.)
my data looks like this sample row:
58;"management";"married";"tertiary";"no";2143;"yes";"no";"unknown";5;"may";261;1;-1;0;"unknown";"no"
i used this to create the table:
create external table bank_dataset(
age TINYINT,
job string,
education string,
default BOOLEAN,
balance INT,
housing BOOLEAN,
loan BOOLEAN,
contact STRING,
day STRING,
month STRING,
duration INT,
campaign INT,
pdays INT,
previous INT,
poutcome STRING,
y BOOLEAN)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\u003B'
STORED AS TEXTFILE
location '/user/marchenrisaad_gmail/Bank_Project'
tblproperties("skip.header.line.count"="1");
Thanks for the comments it worked! but i have 1 issue. For every row i get all the data correctly then I get extra columns of null values. Find below my code:
create external table bank_dataset(age TINYINT, job string, education string, default BOOLEAN, balance INT, housing BOOLEAN, loan BOOLEAN, contact STRING,day INT, month STRING, duration INT,campaign INT, pdays INT, previous INT, poutcome STRING,y BOOLEAN)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
"separatorChar" = "\u003B",
"quoteChar" = '"'
)
STORED AS TEXTFILE
location '/user/marchenrisaad_gmail/Bank_Project'
tblproperties("skip.header.line.count"="1");
Any suggestions?

DB::Exception: Received from localhost:8000. DB::Exception: Number of columns doesn't match

create table:-
CREATE TABLE default.bankIfsc (
`event_date` Date DEFAULT toDate(now()),
`id` Int32,
`uid` Int32,
`nid` Int32,
`bank` String,
`ifsc_code` String,
`micr_code` String,
`branch` String,
`address` String,
`contact` String,
`city` String,
`district` String,
`state` String,
`content` String,
`feature_image` String,
`var1` String,
`var2` String,
`var3` String,
`var4` String,
`var5` String,
`createdtime` Int32,
`createdtimestr` DateTime DEFAULT toDateTime(createdtime),
`updatedtime` Int32,
`updatedtimestr` DateTime DEFAULT toDateTime(updatedtime),
`status` Int32) ENGINE = ReplacingMergeTree(event_date, id, 8192)
datastore in this table and table view:-
CREATE TABLE default.bankIfsc_bck (
`id` Int32,
`uid` Int32,
`nid` Int32,
`bank` String,
`ifsc_code` String,
`micr_code` String,
`branch` String,
`address` String,
`contact` String,
`city` String,
`district` String,
`state` String,
`content` String,
`feature_image` String,
`var1` String,
`var2` String,
`var3` String,
`var4` String,
`var5` String,
`createdtime` Int32,
`createdtimestr` DateTime,
`updatedtime` Int32,
`updatedtimestr` DateTime,
`status` Int32) ENGINE = Log
insert data:-
INSERT INTO bankIfsc ( id, uid, nid, bank, ifsc_code, micr_code, branch,
address, contact, city, district, state, content, feature_image, var1,
var2, var3, var4, var5, createdtime, createdtimestr, updatedtime,
updatedtimestr, status )
SELECT (id, uid, nid, bank, ifsc_code, micr_code, branch, address,
contact, city, district, state, content, feature_image, var1, var2, var3,
var4, var5, createdtime, createdtimestr, updatedtime, updatedtimestr,
status)
FROM bankIfsc_bck;
The number of columns doesn't match.
I got this error anyone help, please
( ) -- makes tuple datatype.
2 columns -- select 1,2
desc (select 1,2)
┌─name─┬─type──┬
│ 1 │ UInt8 │
│ 2 │ UInt8 │
└──────┴───────┴
1 column -- select (1,2)
desc (select (1,2))
─name────────┬─type────────────────┬
tuple(1, 2) │ Tuple(UInt8, UInt8) │
─────────────┴─────────────────────┴
remove ( ) from your select
SELECT id, uid, nid, bank, ifsc_code, micr_code, branch, address,
contact, city, district, state, content, feature_image, var1, var2, var3,
var4, var5, createdtime, createdtimestr, updatedtime, updatedtimestr,
status
FROM bankIfsc_bck;

Hive insert query failing with error return code -101

I am trying to run a simple insert statement as below:
insert into table `bwc_test` partition(call_date)
select * from
`bwc_master`;
Then it fails with the below error:
INFO : Loading data to table dtc.bwc_test partition (call_date=null) from /apps/hive/warehouse/dtc.db/bwc_test/.hive-staging_hive_2018-11-13_19-10-37_084_8697431764330812894-1/-ext-10000
Error: Error while processing statement: FAILED: Execution Error, return code -101 from org.apache.hadoop.hive.ql.exec.MoveTask. HIVE_LOAD_DYNAMIC_PARTITIONS_THREAD_COUNT (state=08S01,code=-101)
Table definition for bwc_master:
CREATE TABLE `bwc_master`(
unique_id bigint,
customer_id string,
direction string,
call_date_time timestamp,
duration int,
billed_duration int,
retail_rate decimal(9,7),
retail_cost decimal(19,7),
billed_tier smallint,
call_type tinyint,
record_status tinyint,
aggregate_id bigint,
originating_ipaddress string,
originating_number string,
destination_number string,
lrn string,
ocn string,
destination_rate_center string,
destination_lata int,
billed_prefix string,
rate_id string,
wholesale_rate decimal(9,7),
wholesale_cost decimal(19,7),
cnam_dipped boolean,
billed_number_type tinyint,
source_lata int,
source_ocn string,
location_id string,
sippeer_id int,
rate_attempts tinyint,
source_state string,
source_rc string,
destination_country string,
destination_state string,
destination_ip string,
carrier_id string,
rated_date_time timestamp,
partition_id smallint,
encryption_rate decimal(9,7),
encryption_cost decimal(19,7),
trans_coding_rate decimal(9,7),
trans_coding_cost decimal(19,7),
file_name string,
call_id string,
from_tag string,
to_tag string,
unique_record_id string)
PARTITIONED BY (
`call_date` date)
CLUSTERED BY (
customer_id)
INTO 10 BUCKETS
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'hdfs://*****/apps/hive/warehouse/dtc.db/bwc_master'
Can someone help me debug this? I didn't find anything in the logs.
You missing the "table" before bwc_test
insert into table `bwc_test` partition(call_date)
select * from
`bwc_master`;

Query on Bucketized Table

I created a bucketized table as the following
drop table if exists bi_st.st_usr_member_active_day_test;
CREATE TABLE `bi_st.st_usr_member_active_day_test`(
`cal_dt_from` string,
`cal_dt_to` string,
`memberid` string,
`vipcode` string,
`vipleavel` string,
`cityid` string,
`cityname` string,
`groupid` int,
`groupname` string,
`storeid` int,
`storename` string,
`sectionid` int,
`sectionname` string,
`promotionid` string,
`promotionname` string,
`moduleid` string,
`modulename` string,
`activeness_today` string,
`new_vip_class` string
)
clustered by (storeid) into 2 buckets
row format delimited fields terminated by '\t'
stored as orc TBLPROPERTIES('transactional'='true');
And then inserted some data into it, and then I did
select * from bi_st.st_usr_member_active_day_test where storeid = 193;, it failed and gave an array index out of bound error. Can anybody explain about this? Thanks

How do I INSERT OVERWRITE with a struct in HIVE?

I have a Hive table tweets stored as text that I am trying to write to another table tweetsORC that is ORC. Both have the same structure:
col_name data_type comment
racist boolean from deserializer
contributors string from deserializer
coordinates string from deserializer
created_at string from deserializer
entities struct<hashtags:array<string>,symbols:array<string>,urls:array<struct<display_url:string,expanded_url:string,indices:array<tinyint>,url:string>>,user_mentions:array<string>> from deserializer
favorite_count tinyint from deserializer
favorited boolean from deserializer
filter_level string from deserializer
geo string from deserializer
id bigint from deserializer
id_str string from deserializer
in_reply_to_screen_name string from deserializer
in_reply_to_status_id string from deserializer
in_reply_to_status_id_str string from deserializer
in_reply_to_user_id string from deserializer
in_reply_to_user_id_str string from deserializer
is_quote_status boolean from deserializer
lang string from deserializer
place string from deserializer
possibly_sensitive boolean from deserializer
retweet_count tinyint from deserializer
retweeted boolean from deserializer
source string from deserializer
text string from deserializer
timestamp_ms string from deserializer
truncated boolean from deserializer
user struct<contributors_enabled:boolean,created_at:string,default_profile:boolean,default_profile_image:boolean,description:string,favourites_count:tinyint,follow_request_sent:string,followers_count:tinyint,following:string,friends_count:tinyint,geo_enabled:boolean,id:bigint,id_str:string,is_translator:boolean,lang:string,listed_count:tinyint,location:string,name:string,notifications:string,profile_background_color:string,profile_background_image_url:string,profile_background_image_url_https:string,profile_background_tile:boolean,profile_image_url:string,profile_image_url_https:string,profile_link_color:string,profile_sidebar_border_color:string,profile_sidebar_fill_color:string,profile_text_color:string,profile_use_background_image:boolean,protected:boolean,screen_name:string,statuses_count:smallint,time_zone:string,url:string,utc_offset:string,verified:boolean> from deserializer
When I try to insert from tweets to tweetsORC I get:
INSERT OVERWRITE TABLE tweetsORC SELECT * FROM tweets;
FAILED: NoMatchingMethodException No matching method for class org.apache.hadoop.hive.ql.udf.UDFToString with (struct<hashtags:array<string>,symbols:array<string>,urls:array<struct<display_url:string,expanded_url:string,indices:array<tinyint>,url:string>>,user_mentions:array<string>>). Possible choices: _FUNC_(bigint) _FUNC_(binary) _FUNC_(boolean) _FUNC_(date) _FUNC_(decimal(38,18)) _FUNC_(double) _FUNC_(float) _FUNC_(int) _FUNC_(smallint) _FUNC_(string) _FUNC_(timestamp) _FUNC_(tinyint) _FUNC_(void)
The only help I have found on this kind of problem says to make a UDF use primitive types, but I am not using a UDF! Any help is much appreciated!
FYI: Hive version:
Hive 1.2.1000.2.4.2.0-258
Subversion git://u12-slave-5708dfcd-10/grid/0/jenkins/workspace/HDP-build-ubuntu12/bigtop/output/hive/hive-1.2.1000.2.4.2.0 -r 240760457150036e13035cbb82bcda0c65362f3a
EDIT: Create tables and sample data:
create table tweets (
contributors string,
coordinates string,
created_at string,
entities struct <
hashtags: array <string>,
symbols: array <string>,
urls: array <struct <
display_url: string,
expanded_url: string,
indices: array <tinyint>,
url: string>>,
user_mentions: array <string>>,
favorite_count tinyint,
favorited boolean,
filter_level string,
geo string,
id bigint,
id_str string,
in_reply_to_screen_name string,
in_reply_to_status_id string,
in_reply_to_status_id_str string,
in_reply_to_user_id string,
in_reply_to_user_id_str string,
is_quote_status boolean,
lang string,
place string,
possibly_sensitive boolean,
retweet_count tinyint,
retweeted boolean,
source string,
text string,
timestamp_ms string,
truncated boolean,
`user` struct <
contributors_enabled: boolean,
created_at: string,
default_profile: boolean,
default_profile_image: boolean,
description: string,
favourites_count: tinyint,
follow_request_sent: string,
followers_count: tinyint,
`following`: string,
friends_count: tinyint,
geo_enabled: boolean,
id: bigint,
id_str: string,
is_translator: boolean,
lang: string,
listed_count: tinyint,
location: string,
name: string,
notifications: string,
profile_background_color: string,
profile_background_image_url: string,
profile_background_image_url_https: string,
profile_background_tile: boolean,
profile_image_url: string,
profile_image_url_https: string,
profile_link_color: string,
profile_sidebar_border_color: string,
profile_sidebar_fill_color: string,
profile_text_color: string,
profile_use_background_image: boolean,
protected: boolean,
screen_name: string,
statuses_count: smallint,
time_zone: string,
url: string,
utc_offset: string,
verified: boolean>
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH '/home/ed/Downloads/hive-json-master/1abbo.txt' OVERWRITE INTO TABLE tweets;
create table tweetsORC (
racist boolean,
contributors string,
coordinates string,
created_at string,
entities struct <
hashtags: array <string>,
symbols: array <string>,
urls: array <struct <
display_url: string,
expanded_url: string,
indices: array <tinyint>,
url: string>>,
user_mentions: array <string>>,
favorite_count tinyint,
favorited boolean,
filter_level string,
geo string,
id bigint,
id_str string,
in_reply_to_screen_name string,
in_reply_to_status_id string,
in_reply_to_status_id_str string,
in_reply_to_user_id string,
in_reply_to_user_id_str string,
is_quote_status boolean,
lang string,
place string,
possibly_sensitive boolean,
retweet_count tinyint,
retweeted boolean,
source string,
text string,
timestamp_ms string,
truncated boolean,
`user` struct <
contributors_enabled: boolean,
created_at: string,
default_profile: boolean,
default_profile_image: boolean,
description: string,
favourites_count: tinyint,
follow_request_sent: string,
followers_count: tinyint,
`following`: string,
friends_count: tinyint,
geo_enabled: boolean,
id: bigint,
id_str: string,
is_translator: boolean,
lang: string,
listed_count: tinyint,
location: string,
name: string,
notifications: string,
profile_background_color: string,
profile_background_image_url: string,
profile_background_image_url_https: string,
profile_background_tile: boolean,
profile_image_url: string,
profile_image_url_https: string,
profile_link_color: string,
profile_sidebar_border_color: string,
profile_sidebar_fill_color: string,
profile_text_color: string,
profile_use_background_image: boolean,
protected: boolean,
screen_name: string,
statuses_count: smallint,
time_zone: string,
url: string,
utc_offset: string,
verified: boolean>
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
STORED AS ORC tblproperties ("orc.compress"="ZLIB");
data here.
Instead of using Select * I list the fields by name and the error goes.
Data type mismatch: The data type you want to insert is inconsistent with the field type in the corresponding data table. For example, if the field type declared when you create the table is string, but the field type you inserted is indeed the list type, this error will be thrown.

Resources