In MySQL we have information_schema.KEY_COLUMN_USAGE. Where we can find same information in click house ?
select * from information_schema.KEY_COLUMN_USAGE in MySQL, by executing this we get the result. I want to know what is the query in clickhouse which gives same result.
CH provides several ways to get metadata about a table.
Let's create test-table:
CREATE TABLE test_001
(
`id` Int32 CODEC(Delta, LZ4),
CONSTRAINT id_should_be_positive CHECK id > 0
)
ENGINE = MergeTree()
PARTITION BY tuple()
ORDER BY id
and look at these ways:
SELECT *
FROM system.tables
WHERE name = 'test_001'
FORMAT Vertical
/*
Row 1:
──────
database: default
name: test_001
uuid: 00000000-0000-0000-0000-000000000000
engine: MergeTree
is_temporary: 0
data_paths: ['/var/lib/clickhouse/data/default/test_001/']
metadata_path: /var/lib/clickhouse/metadata/default/test_001.sql
metadata_modification_time: 2020-07-21 12:42:07
dependencies_database: []
dependencies_table: []
create_table_query: CREATE TABLE default.test_001 (`id` Int32 CODEC(Delta(4), LZ4), CONSTRAINT id_should_be_positive CHECK id > 0) ENGINE = MergeTree() PARTITION BY tuple() ORDER BY id SETTINGS index_granularity = 8192
engine_full: MergeTree() PARTITION BY tuple() ORDER BY id SETTINGS index_granularity = 8192
partition_key: tuple()
sorting_key: id
primary_key: id
sampling_key:
storage_policy: default
total_rows: 0
total_bytes: 0
*/
SELECT *
FROM system.columns
WHERE table = 'test_001'
FORMAT Vertical
/*
Row 1:
──────
database: default
table: test_001
name: id
type: Int32
default_kind:
default_expression:
data_compressed_bytes: 0
data_uncompressed_bytes: 0
marks_bytes: 0
comment:
is_in_partition_key: 0
is_in_sorting_key: 1
is_in_primary_key: 1
is_in_sampling_key: 0
compression_codec: CODEC(Delta(4), LZ4)
*/
DESCRIBE TABLE test_001
/*
┌─name─┬─type──┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ id │ Int32 │ │ │ │ Delta(4), LZ4 │ │
└──────┴───────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
*/
SHOW CREATE TABLE test_001
/*
┌─statement──────────────────────────────────────────┐
│ CREATE TABLE default.test_001
(
`id` Int32 CODEC(Delta(4), LZ4),
CONSTRAINT id_should_be_positive CHECK id > 0
)
ENGINE = MergeTree()
PARTITION BY tuple()
ORDER BY id
SETTINGS index_granularity = 8192 │
└─────────────────────────────────────────────────────┘
*/
Related
I'm wondering if it's possible to make a left join on Array column, I know there is arrayJoin but I'm not sure if it suits my case.
I did try a query like this:
SELECT
foo.name AS name, -- String
foo.policies AS policies -- Array(String),
groupArray(profits7d.profits) AS profits7D
FROM (
...
) AS foo
any left join (
SELECT
groupArray((x, y)) as profits,
policy -- String
FROM (
SELECT
day as x,
SUM(profit) as y,
policy
FROM profits
WHERE x >= toDateTime(NOW() - INTERVAL '1 week')
GROUP BY x, policy
)
GROUP BY policy
) AS profits7d ON has(foo.policies, profits7d.policy)
I expect this:
┌─name─┬─policies──────┬─profits7D────────────┐
│ test │ ['one','two'] │ [(200,'2023-01-01')] │
└──────┴───────────────┴──────────────────────┘
I have a INVALID_JOIN_ON_EXPRESSION error, so I'm asking what I need to use to have this query to work, I can't find what expression works for ON
Thanks in advance
Cheers
Without arrayJoin
create table L (polices Array(String), f String) Engine=Memory as select ['a', 'b'], 'x';
create table R (policy String, f String) Engine=Memory as select * from values (('a', 'y'), ('b', 'y1'));
SELECT *
FROM L
ANY LEFT JOIN R ON
((L.polices[1]) = R.policy)
OR ((L.polices[2]) = R.policy)
OR ((L.polices[3]) = R.policy)
OR ((L.polices[4]) = R.policy)
┌─polices───┬─f─┬─policy─┬─R.f─┐
│ ['a','b'] │ x │ a │ y │
└───────────┴───┴────────┴─────┘
With arrayJoin
SELECT
L.polices,
(any((L.f, R.*)) AS t).1 AS f,
t.2 AS policy,
t.3 AS f1
FROM
(
SELECT
arrayJoin(L.polices) AS _policy,
polices,
f
FROM L
) AS L
ANY LEFT JOIN R ON _policy = R.policy
GROUP BY L.polices
┌─polices───┬─f─┬─policy─┬─f1─┐
│ ['a','b'] │ x │ a │ y │
└───────────┴───┴────────┴────┘
When I query one materialized view, I got more data then the local table it based on.
What's wrong with it?
Query mv search_uv_minute_level:
toYYYYMMDD(stime) AS dt,
uniqExactMerge(imp_uv) AS imp_uv,
uniqExactMerge(clk_uv) AS clk_uv,
uniqExactMerge(imp_count) AS imp_count,
sumMerge(clk_count) AS clk_count
FROM search_uv_minute_level
WHERE (toYYYYMMDD(stime) = 20210623) AND (toYYYYMMDDhhmmss(stime) >= 20210623150500) AND (toYYYYMMDDhhmmss(stime) <= 20210623150800)
GROUP BY toYYYYMMDD(stime)
┌───────dt─┬─imp_uv─┬─clk_uv─┬─imp_count─┬─clk_count─┐
│ 20210623 │ 114108 │ 66320 │ 179590 │ 110870 │
└──────────┴────────┴────────┴───────────┴───────────┘
Query local table search_action_dwd:
SELECT
toYYYYMMDD(dt) AS t,
uniqExact(IF(type = 'IMP', uid, NULL)) AS uv,
uniqExact(IF(type = 'CLK', uid, NULL)) AS clk_uv,
uniqExact(IF(type = 'IMP', e, NULL)) AS imp,
SUM(IF(type = 'CLK', 1, 0)) AS clk
FROM search_action_dwd
WHERE (toYYYYMMDD(dt) = 20210623) AND (toYYYYMMDDhhmmss(dt) >= 20210623150500) AND (toYYYYMMDDhhmmss(dt) <= 20210623150800)
GROUP BY t
┌────────t─┬────uv─┬─clk_uv─┬────imp─┬───clk─┐
│ 20210623 │ 92755 │ 54265 │ 141838 │ 87337 │
└──────────┴───────┴────────┴────────┴───────┘
mv definition:
CREATE MATERIALIZED VIEW qiso_bi.search_uv_minute_level (`stime` DateTime('Asia/Shanghai'), `imp_uv` AggregateFunction(uniqExact, String), `clk_uv` AggregateFunction(uniqExact, String), `imp_count` AggregateFunction(uniqExact, String), `clk_count` AggregateFunction(sum, UInt8)) ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/tables/{cluster}/{shard}/search_uv_minute_level', '{replica}') PARTITION BY toYYYYMMDD(stime) ORDER BY toYYYYMMDDhhmmss(stime) SETTINGS index_granularity = 8192 AS SELECT toStartOfMinute(dt) AS stime, uniqExactState(IF(type = 'IMP', uid, NULL)) AS imp_uv, uniqExactState(IF(type = 'CLK', uid, NULL)) AS clk_uv, uniqExactState(IF(type = 'IMP', e, NULL)) AS imp_count, sumState(IF(type = 'CLK', 1, 0)) AS clk_count FROM search_action_dwd WHERE toYYYYMMDD(dt) >= 20210623 GROUP BY toStartOfMinute(dt)
I made a simple query to test JSONExtractRaw function, but with no luck:
select JSONExtractRaw(j, 'data') from (select * from devices
format JSON) as j;
But it doesn't work because of "Syntax error". Is that a bug or this is expected?
In v20.7.2.30 were added functions that format SQL outputs - formatRow and formatRowNoNewline:
SELECT JSONExtractRaw(j, 'data') AS json
FROM
(
SELECT formatRow('JSONEachRow', *) AS j
FROM
(
SELECT *
FROM
(
/* emulate the test dataset */
SELECT
1 AS id,
'[{"a": 8}]' AS data
UNION ALL
SELECT
2 AS id,
'[{"a": 2}]' AS data
)
)
)
┌─json───────────┐
│ "[{\"a\": 8}]" │
│ "[{\"a\": 2}]" │
└────────────────┘
Is it possible to update an array at specific index in existing row in ClickHouse db? Something like alter table mytable set arr[3]=8
create table xxx(A Int64, Person Nested (Name String, value String))
Engine=MergeTree order by A;
insert into xxx values (1, ['a','b','c'], ['aaa','bbb','ccc'])
if array index = 3 then name = '1'
alter table xxx update "Person.Name" =
arrayMap( i-> if(i=3,'1',"Person.Name"[i]), arrayEnumerate("Person.Name")) where 1;
select * from xxx;
┌─A─┬─Person.Name───┬─Person.value────────┐
│ 1 │ ['a','b','1'] │ ['aaa','bbb','ccc'] │
└───┴───────────────┴─────────────────────┘
if name = a then name = 1
alter table xxx update "Person.Name" =
arrayMap( i-> if(i='a','1',i), "Person.Name") where 1;
select * from xxx;
┌─A─┬─Person.Name───┬─Person.value────────┐
│ 1 │ ['1','b','c'] │ ['aaa','bbb','ccc'] │
└───┴───────────────┴─────────────────────┘
if name = c then value = 333
alter table xxx update "Person.value" =
arrayMap( (i,j) -> if(j='c','333', i), "Person.value", "Person.Name") where 1;
select * from xxx
┌─A─┬─Person.Name───┬─Person.value────────┐
│ 1 │ ['1','b','c'] │ ['aaa','bbb','333'] │
└───┴───────────────┴─────────────────────┘
WITH tst
AS (
SELECT nz.sPostcode op
,zp.nZoneCountryID
,zp.sPostcode
,zp.nZoneID
,zp.nInjectionID
FROM dbo.tblZonePostcode zp
INNER JOIN tblNewZone nz ON nz.nPostcodeID = zp.NewZoneId
WHERE CHARINDEX('%', zp.sPostcode) = 1
AND zp.sPostcode IS NOT NULL
AND isnumeric(zp.sPostcodeRange) = 0
)
SELECT *
INTO #tmptst
FROM tst;
CREATE INDEX idx_sPostcode12 ON #tmptst (sPostcode);
CREATE INDEX idx_x1 ON #tmptst (nZoneCountryID);
CREATE INDEX idx_x2 ON #tmptst (nInjectionID);
CREATE INDEX idx_x3 ON #tmptst (nZoneID);
UPDATE #tmprec
SET calculatedPostCodeZone = (
SELECT TOP 1 zp.op
FROM #tmptst zp
WHERE zp.nZoneCountryID = CalculatedCountryId
AND zp.nInjectionID = CalculatedinjectionPointId
AND zp.nZoneID = CalculatedZoneId
AND CHARINDEX(zp.sPostcode, sConsigneePostcodeFirst) = 1
)
WHERE CalculatedRateChartGroupID > 0
AND calculatedRatechartValues IS NULL
AND calculatedPostCodeZone IS NULL
AND CalculationStatus IS NULL
AND 0 < (
SELECT count(1)
FROM #tmptst
);
In total with tst, in total contains 85k records and #tmprec contains 255k records. Objective of above query is to get matchig postcode from tblNewZone for records postcode starting with matching sConsigneePostcodeFirst.
Issue is above query is taking 7 minutes to execute. I have added Index to the query , tried like query, tried Text search Index.
Please let me know if any thing I can do to improve the query.