clickhouse : information_schema.KEY_COLUMN_USAGE - clickhouse

In MySQL we have information_schema.KEY_COLUMN_USAGE. Where we can find same information in click house ?
select * from information_schema.KEY_COLUMN_USAGE in MySQL, by executing this we get the result. I want to know what is the query in clickhouse which gives same result.

CH provides several ways to get metadata about a table.
Let's create test-table:
CREATE TABLE test_001
(
`id` Int32 CODEC(Delta, LZ4),
CONSTRAINT id_should_be_positive CHECK id > 0
)
ENGINE = MergeTree()
PARTITION BY tuple()
ORDER BY id
and look at these ways:
SELECT *
FROM system.tables
WHERE name = 'test_001'
FORMAT Vertical
/*
Row 1:
──────
database: default
name: test_001
uuid: 00000000-0000-0000-0000-000000000000
engine: MergeTree
is_temporary: 0
data_paths: ['/var/lib/clickhouse/data/default/test_001/']
metadata_path: /var/lib/clickhouse/metadata/default/test_001.sql
metadata_modification_time: 2020-07-21 12:42:07
dependencies_database: []
dependencies_table: []
create_table_query: CREATE TABLE default.test_001 (`id` Int32 CODEC(Delta(4), LZ4), CONSTRAINT id_should_be_positive CHECK id > 0) ENGINE = MergeTree() PARTITION BY tuple() ORDER BY id SETTINGS index_granularity = 8192
engine_full: MergeTree() PARTITION BY tuple() ORDER BY id SETTINGS index_granularity = 8192
partition_key: tuple()
sorting_key: id
primary_key: id
sampling_key:
storage_policy: default
total_rows: 0
total_bytes: 0
*/
SELECT *
FROM system.columns
WHERE table = 'test_001'
FORMAT Vertical
/*
Row 1:
──────
database: default
table: test_001
name: id
type: Int32
default_kind:
default_expression:
data_compressed_bytes: 0
data_uncompressed_bytes: 0
marks_bytes: 0
comment:
is_in_partition_key: 0
is_in_sorting_key: 1
is_in_primary_key: 1
is_in_sampling_key: 0
compression_codec: CODEC(Delta(4), LZ4)
*/
DESCRIBE TABLE test_001
/*
┌─name─┬─type──┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ id │ Int32 │ │ │ │ Delta(4), LZ4 │ │
└──────┴───────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
*/
SHOW CREATE TABLE test_001
/*
┌─statement──────────────────────────────────────────┐
│ CREATE TABLE default.test_001
(
`id` Int32 CODEC(Delta(4), LZ4),
CONSTRAINT id_should_be_positive CHECK id > 0
)
ENGINE = MergeTree()
PARTITION BY tuple()
ORDER BY id
SETTINGS index_granularity = 8192 │
└─────────────────────────────────────────────────────┘
*/

Related

Is there a way to left join on column which is an Array

I'm wondering if it's possible to make a left join on Array column, I know there is arrayJoin but I'm not sure if it suits my case.
I did try a query like this:
SELECT
foo.name AS name, -- String
foo.policies AS policies -- Array(String),
groupArray(profits7d.profits) AS profits7D
FROM (
...
) AS foo
any left join (
SELECT
groupArray((x, y)) as profits,
policy -- String
FROM (
SELECT
day as x,
SUM(profit) as y,
policy
FROM profits
WHERE x >= toDateTime(NOW() - INTERVAL '1 week')
GROUP BY x, policy
)
GROUP BY policy
) AS profits7d ON has(foo.policies, profits7d.policy)
I expect this:
┌─name─┬─policies──────┬─profits7D────────────┐
│ test │ ['one','two'] │ [(200,'2023-01-01')] │
└──────┴───────────────┴──────────────────────┘
I have a INVALID_JOIN_ON_EXPRESSION error, so I'm asking what I need to use to have this query to work, I can't find what expression works for ON
Thanks in advance
Cheers
Without arrayJoin
create table L (polices Array(String), f String) Engine=Memory as select ['a', 'b'], 'x';
create table R (policy String, f String) Engine=Memory as select * from values (('a', 'y'), ('b', 'y1'));
SELECT *
FROM L
ANY LEFT JOIN R ON
((L.polices[1]) = R.policy)
OR ((L.polices[2]) = R.policy)
OR ((L.polices[3]) = R.policy)
OR ((L.polices[4]) = R.policy)
┌─polices───┬─f─┬─policy─┬─R.f─┐
│ ['a','b'] │ x │ a │ y │
└───────────┴───┴────────┴─────┘
With arrayJoin
SELECT
L.polices,
(any((L.f, R.*)) AS t).1 AS f,
t.2 AS policy,
t.3 AS f1
FROM
(
SELECT
arrayJoin(L.polices) AS _policy,
polices,
f
FROM L
) AS L
ANY LEFT JOIN R ON _policy = R.policy
GROUP BY L.polices
┌─polices───┬─f─┬─policy─┬─f1─┐
│ ['a','b'] │ x │ a │ y │
└───────────┴───┴────────┴────┘

Query materialized view returned more data than local table

When I query one materialized view, I got more data then the local table it based on.
What's wrong with it?
Query mv search_uv_minute_level:
toYYYYMMDD(stime) AS dt,
uniqExactMerge(imp_uv) AS imp_uv,
uniqExactMerge(clk_uv) AS clk_uv,
uniqExactMerge(imp_count) AS imp_count,
sumMerge(clk_count) AS clk_count
FROM search_uv_minute_level
WHERE (toYYYYMMDD(stime) = 20210623) AND (toYYYYMMDDhhmmss(stime) >= 20210623150500) AND (toYYYYMMDDhhmmss(stime) <= 20210623150800)
GROUP BY toYYYYMMDD(stime)
┌───────dt─┬─imp_uv─┬─clk_uv─┬─imp_count─┬─clk_count─┐
│ 20210623 │ 114108 │ 66320 │ 179590 │ 110870 │
└──────────┴────────┴────────┴───────────┴───────────┘
Query local table search_action_dwd:
SELECT
toYYYYMMDD(dt) AS t,
uniqExact(IF(type = 'IMP', uid, NULL)) AS uv,
uniqExact(IF(type = 'CLK', uid, NULL)) AS clk_uv,
uniqExact(IF(type = 'IMP', e, NULL)) AS imp,
SUM(IF(type = 'CLK', 1, 0)) AS clk
FROM search_action_dwd
WHERE (toYYYYMMDD(dt) = 20210623) AND (toYYYYMMDDhhmmss(dt) >= 20210623150500) AND (toYYYYMMDDhhmmss(dt) <= 20210623150800)
GROUP BY t
┌────────t─┬────uv─┬─clk_uv─┬────imp─┬───clk─┐
│ 20210623 │ 92755 │ 54265 │ 141838 │ 87337 │
└──────────┴───────┴────────┴────────┴───────┘
mv definition:
CREATE MATERIALIZED VIEW qiso_bi.search_uv_minute_level (`stime` DateTime('Asia/Shanghai'), `imp_uv` AggregateFunction(uniqExact, String), `clk_uv` AggregateFunction(uniqExact, String), `imp_count` AggregateFunction(uniqExact, String), `clk_count` AggregateFunction(sum, UInt8)) ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/tables/{cluster}/{shard}/search_uv_minute_level', '{replica}') PARTITION BY toYYYYMMDD(stime) ORDER BY toYYYYMMDDhhmmss(stime) SETTINGS index_granularity = 8192 AS SELECT toStartOfMinute(dt) AS stime, uniqExactState(IF(type = 'IMP', uid, NULL)) AS imp_uv, uniqExactState(IF(type = 'CLK', uid, NULL)) AS clk_uv, uniqExactState(IF(type = 'IMP', e, NULL)) AS imp_count, sumState(IF(type = 'CLK', 1, 0)) AS clk_count FROM search_action_dwd WHERE toYYYYMMDD(dt) >= 20210623 GROUP BY toStartOfMinute(dt)

Why I can't apply clickhouse json functions for "FORMAT JSON" query results?

I made a simple query to test JSONExtractRaw function, but with no luck:
select JSONExtractRaw(j, 'data') from (select * from devices
format JSON) as j;
But it doesn't work because of "Syntax error". Is that a bug or this is expected?
In v20.7.2.30 were added functions that format SQL outputs - formatRow and formatRowNoNewline:
SELECT JSONExtractRaw(j, 'data') AS json
FROM
(
SELECT formatRow('JSONEachRow', *) AS j
FROM
(
SELECT *
FROM
(
/* emulate the test dataset */
SELECT
1 AS id,
'[{"a": 8}]' AS data
UNION ALL
SELECT
2 AS id,
'[{"a": 2}]' AS data
)
)
)
┌─json───────────┐
│ "[{\"a\": 8}]" │
│ "[{\"a\": 2}]" │
└────────────────┘

Set array value at specific index in ClickHouse

Is it possible to update an array at specific index in existing row in ClickHouse db? Something like alter table mytable set arr[3]=8
create table xxx(A Int64, Person Nested (Name String, value String))
Engine=MergeTree order by A;
insert into xxx values (1, ['a','b','c'], ['aaa','bbb','ccc'])
if array index = 3 then name = '1'
alter table xxx update "Person.Name" =
arrayMap( i-> if(i=3,'1',"Person.Name"[i]), arrayEnumerate("Person.Name")) where 1;
select * from xxx;
┌─A─┬─Person.Name───┬─Person.value────────┐
│ 1 │ ['a','b','1'] │ ['aaa','bbb','ccc'] │
└───┴───────────────┴─────────────────────┘
if name = a then name = 1
alter table xxx update "Person.Name" =
arrayMap( i-> if(i='a','1',i), "Person.Name") where 1;
select * from xxx;
┌─A─┬─Person.Name───┬─Person.value────────┐
│ 1 │ ['1','b','c'] │ ['aaa','bbb','ccc'] │
└───┴───────────────┴─────────────────────┘
if name = c then value = 333
alter table xxx update "Person.value" =
arrayMap( (i,j) -> if(j='c','333', i), "Person.value", "Person.Name") where 1;
select * from xxx
┌─A─┬─Person.Name───┬─Person.value────────┐
│ 1 │ ['1','b','c'] │ ['aaa','bbb','333'] │
└───┴───────────────┴─────────────────────┘

T-SQL Like query performance + doing cross join

WITH tst
AS (
SELECT nz.sPostcode op
,zp.nZoneCountryID
,zp.sPostcode
,zp.nZoneID
,zp.nInjectionID
FROM dbo.tblZonePostcode zp
INNER JOIN tblNewZone nz ON nz.nPostcodeID = zp.NewZoneId
WHERE CHARINDEX('%', zp.sPostcode) = 1
AND zp.sPostcode IS NOT NULL
AND isnumeric(zp.sPostcodeRange) = 0
)
SELECT *
INTO #tmptst
FROM tst;
CREATE INDEX idx_sPostcode12 ON #tmptst (sPostcode);
CREATE INDEX idx_x1 ON #tmptst (nZoneCountryID);
CREATE INDEX idx_x2 ON #tmptst (nInjectionID);
CREATE INDEX idx_x3 ON #tmptst (nZoneID);
UPDATE #tmprec
SET calculatedPostCodeZone = (
SELECT TOP 1 zp.op
FROM #tmptst zp
WHERE zp.nZoneCountryID = CalculatedCountryId
AND zp.nInjectionID = CalculatedinjectionPointId
AND zp.nZoneID = CalculatedZoneId
AND CHARINDEX(zp.sPostcode, sConsigneePostcodeFirst) = 1
)
WHERE CalculatedRateChartGroupID > 0
AND calculatedRatechartValues IS NULL
AND calculatedPostCodeZone IS NULL
AND CalculationStatus IS NULL
AND 0 < (
SELECT count(1)
FROM #tmptst
);
In total with tst, in total contains 85k records and #tmprec contains 255k records. Objective of above query is to get matchig postcode from tblNewZone for records postcode starting with matching sConsigneePostcodeFirst.
Issue is above query is taking 7 minutes to execute. I have added Index to the query , tried like query, tried Text search Index.
Please let me know if any thing I can do to improve the query.

Resources