How to sum arrays element by element after group by in clickhouse - clickhouse

I am trying to add an array column element by element after a group by another column.
Having the table A below:
id units
1 [1,1,1]
2 [3,0,0]
1 [5,3,7]
3 [2,5,2]
2 [3,2,6]
I would like to query something like:
select id, sum(units) from A group by id
And get the following result:
id units
1 [6,4,8]
2 [6,2,6]
3 [2,5,2]
Where the units arrays in rows with the same id get added element by element.

Try this query:
SELECT id, sumForEach(units) units
FROM (
/* emulate dataset */
SELECT data.1 id, data.2 units
FROM (
SELECT arrayJoin([(1, [1,1,1]), (2, [3,0,0]), (1, [5,3,7]), (3, [2,5,2]), (2, [3,2,6])]) data))
GROUP BY id
/* Result
┌─id─┬─units───┐
│ 1 │ [6,4,8] │
│ 2 │ [6,2,6] │
│ 3 │ [2,5,2] │
└────┴─────────┘
*/

Related

clickhouse sum arrays at same index [duplicate]

I am trying to add an array column element by element after a group by another column.
Having the table A below:
id units
1 [1,1,1]
2 [3,0,0]
1 [5,3,7]
3 [2,5,2]
2 [3,2,6]
I would like to query something like:
select id, sum(units) from A group by id
And get the following result:
id units
1 [6,4,8]
2 [6,2,6]
3 [2,5,2]
Where the units arrays in rows with the same id get added element by element.
Try this query:
SELECT id, sumForEach(units) units
FROM (
/* emulate dataset */
SELECT data.1 id, data.2 units
FROM (
SELECT arrayJoin([(1, [1,1,1]), (2, [3,0,0]), (1, [5,3,7]), (3, [2,5,2]), (2, [3,2,6])]) data))
GROUP BY id
/* Result
┌─id─┬─units───┐
│ 1 │ [6,4,8] │
│ 2 │ [6,2,6] │
│ 3 │ [2,5,2] │
└────┴─────────┘
*/

how to use array join in Clickhouse

I'm trying to split 2 arrays using arrayJoin()
my table:
create table test_array(
col1 Array(INT),
col2 Array(INT),
col3 String
)
engine = TinyLog;
then i insert these values:
insert into test_array values ([1,2],[11,22],'Text');
insert into test_array values ([5,6],[55,66],'Text');
when i split the first array in col1 the result will be like this :
but what i need is to split col1 and col2 and add them in the select.
I tried this query but it didn't work.
select arrayJoin(col1) ,arrayJoin(col2) ,col1 , col2 ,col3 from test_array;
how can i edit the query to remove the highlighted rows in the picture?
Thanks.
The serial calls of arrayJoin produces the cartesian product, to avoid it use ARRAY JOIN:
SELECT
c1,
c2,
col1,
col2,
col3
FROM test_array
ARRAY JOIN
col1 AS c1,
col2 AS c2
/*
┌─c1─┬─c2─┬─col1──┬─col2────┬─col3─┐
│ 1 │ 11 │ [1,2] │ [11,22] │ Text │
│ 2 │ 22 │ [1,2] │ [11,22] │ Text │
│ 5 │ 55 │ [5,6] │ [55,66] │ Text │
│ 6 │ 66 │ [5,6] │ [55,66] │ Text │
└────┴────┴───────┴─────────┴──────┘
*/
one more way -- tuple()
SELECT
untuple(arrayJoin(arrayZip(col1, col2))),
col3
FROM test_array
┌─_ut_1─┬─_ut_2─┬─col3─┐
│ 1 │ 11 │ Text │
│ 2 │ 22 │ Text │
│ 5 │ 55 │ Text │
│ 6 │ 66 │ Text │
└───────┴───────┴──────┘

Clickhouse SQL Query: Average in intervals

I have a table:
deviceId, valueDateTime, value, valueType
Where the valueType - temperature, pressure, etc.
I have several parameters for query: begin, end (period), and time interval (for example 20 minutes)
I want to get charts for the period for each deviceId and valueType with series of average values for each interval in the period.
EDIT:
Above is the final task, at this moment I just experimenting with this task and I use https://play.clickhouse.tech/?file=playground where I trying to solve a similar task. I want to calculate the average Age in the time interval grouped by Title field. And I have a problem, how to add grouping by Title?
-- 2013-07-15 00:00:00 - begin
-- 2013-07-16 00:00:00 - end
-- 1200 - average in interval 20m
SELECT t, avg(Age) as Age FROM (
SELECT
arrayJoin(
arrayMap(x -> addSeconds(toDateTime('2013-07-15 00:00:00'), x * 1200),
range(toUInt64(dateDiff('second', toDateTime('2013-07-15 00:00:00'), toDateTime('2013-07-16 00:00:00'))/1200)))
) as t,
null as Age
UNION ALL
SELECT
(addSeconds(
toDateTime('2013-07-15 00:00:00'),
1200 * intDivOrZero(dateDiff('second', toDateTime('2013-07-15 00:00:00'), EventTime), 1200))
) as t,
avg(Age) as Age
FROM `hits_100m_obfuscated`
WHERE EventTime BETWEEN toDateTime('2013-07-15 00:00:00') AND toDateTime('2013-07-16 00:00:00')
GROUP BY t
)
GROUP BY t ORDER BY t;
EDITED 2
Correct answer from vladimir adapted to be used and tested on https://play.clickhouse.tech/?file=playground
SELECT
Title, -- as deviceId
JavaEnable, -- as valueType
groupArray((rounded_time, avg_value)) values
FROM (
WITH 60 * 20 AS interval
SELECT
Title,
JavaEnable,
toDateTime(intDiv(toUInt32(EventTime), interval) * interval)
AS rounded_time, -- EventTime as valueDateTime
avg(Age) avg_value -- Age as value
FROM `hits_100m_obfuscated`
WHERE
EventTime BETWEEN toDateTime('2013-07-15 00:00:00')
AND toDateTime('2013-07-16 00:00:00')
GROUP BY
Title,
JavaEnable,
rounded_time
ORDER BY rounded_time
)
GROUP BY
Title,
JavaEnable
ORDER BY
Title,
JavaEnable
Try this query:
SELECT
deviceId,
valueType,
groupArray((rounded_time, avg_value)) values
FROM (
WITH 60 * 20 AS interval
SELECT
deviceId,
valueType,
toDateTime(intDiv(toUInt32(valueDateTime), interval) * interval) AS rounded_time,
avg(value) avg_value
FROM
(
/* emulate the test dataset */
SELECT
number % 4 AS deviceId,
now() - (number * 60) AS valueDateTime,
number % 10 AS value,
if((number % 2) = 1, 'temp', 'pres') AS valueType
FROM numbers(48)
)
/*WHERE valueDateTime >= begin AND valueDateTime < end */
GROUP BY
deviceId,
valueType,
rounded_time
ORDER BY rounded_time
)
GROUP BY
deviceId,
valueType
ORDER BY
deviceId,
valueType
/*
┌─deviceId─┬─valueType─┬─values────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ 0 │ pres │ [('2021-02-12 06:00:00',4),('2021-02-12 06:20:00',4),('2021-02-12 06:40:00',4),('2021-02-12 07:00:00',0)] │
│ 1 │ temp │ [('2021-02-12 06:00:00',5),('2021-02-12 06:20:00',5),('2021-02-12 06:40:00',5),('2021-02-12 07:00:00',1)] │
│ 2 │ pres │ [('2021-02-12 06:00:00',4),('2021-02-12 06:20:00',4),('2021-02-12 06:40:00',4)] │
│ 3 │ temp │ [('2021-02-12 06:00:00',5),('2021-02-12 06:20:00',5),('2021-02-12 06:40:00',5)] │
└──────────┴───────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────┘
*/
I would recommend using Grafana to visualize CH report (see Grafana ClickHouse datasource).

how can I calculated point of each user per day with sum all the points from beginning to that day in clickhouse

I have this data in clickhouse:
final point of each user in day is sum(point) from the beginning to that day.
e.g: point of user 1 in 2020-07-02 is 800 and in 2020-07-03 is 200.
I need this result: Point of each user per day:
select uid, d, t from (
select uid, groupArray(date) dg, arrayCumSum(groupArray(spt)) gt from
(select uid, date, sum(pt) spt from
(select 1 tid, '2020-07-01' date, 1 uid, 500 pt
union all
select 1 tid, '2020-07-02' date, 1 uid, 300 pt
union all
select 1 tid, '2020-07-03' date, 1 uid, -600 pt)
group by uid, date
order by uid, date)
group by uid) array join dg as d, gt as t
┌─uid─┬─d──────────┬───t─┐
│ 1 │ 2020-07-01 │ 500 │
│ 1 │ 2020-07-02 │ 800 │
│ 1 │ 2020-07-03 │ 200 │
└─────┴────────────┴─────┘

How to pivot subgroups?

We have created a flat table for Clickhouse and are trying to get records from this table to create a Materialized view. The logic is if e_id is null the record is 'TypeB', if e_id is not null then record is 'TypeA'. Both TypeA and TypeB records will have the same p_id and s_id. We want to create one record per p_id+s_id combination.
The query given below works well with filter (p_id =1 and s_id = 1) but without filters - the exception is "DB::Exception: Scalar subquery returned more than one row"
Is it possible to do this in ClickHouse?
Would it be possible to create Materialized View with such a query?
select p_id,s_id,
groupArray(e_id),
groupArray(name),
(select groupArray(name)
from flat_table
where e_id is null and p_id =1 and s_id = 1
group by p_id,s_id) as typeB
from flat_table
where e_id is not null and p_id =1 and s_id = 1
group by p_id,s_id;
/*
This what the table looks like:
Flat_table
p_id s_id e_id name
1 1 1 Jake
1 1 2 Bob
1 1 null Barby
1 1 null Ella
This is expected result:
p_id s_id e_id typeA typeB
1 1 [1,2] [Jake,Bob] [Barby,Ella]
*/
Let's try this query:
SELECT p_id, s_id, e_ids, typeA, typeB
FROM (
SELECT
p_id,
s_id,
groupArray((e_id, name)) eid_names,
arrayMap(x -> x.1, arrayFilter(x -> not isNull(x.1), eid_names)) e_ids,
arrayMap(x -> x.2, arrayFilter(x -> not isNull(x.1), eid_names)) typeA,
arrayMap(x -> x.2, arrayFilter(x -> isNull(x.1), eid_names)) typeB
FROM test.test_006
GROUP BY p_id, s_id)
/* Result
┌─p_id─┬─s_id─┬─e_ids─┬─typeA────────────┬─typeB──────────────┐
│ 2 │ 2 │ [1,2] │ ['Jake2','Bob2'] │ ['Barby2','Ella2'] │
│ 1 │ 1 │ [1,2] │ ['Jake','Bob'] │ ['Barby','Ella'] │
└──────┴──────┴───────┴──────────────────┴────────────────────┘
*/
/* Data preparation queries */
CREATE TABLE test.test_006
(
`p_id` Int32,
`s_id` Int32,
`e_id` Nullable(Int32),
`name` String
)
ENGINE = Memory
INSERT INTO test.test_006
VALUES (1, 1, 1, 'Jake'), (1, 1, 2, 'Bob'), (1, 1, null, 'Barby'), (1, 1, null, 'Ella'),
(2, 2, 1, 'Jake2'), (2, 2, 2, 'Bob2'), (2, 2, null, 'Barby2'), (2, 2, null, 'Ella2')

Resources