why not array join execute on mergetree node

why not array join execute on mergetree node - clickhouse

select uniq(uid,sid) as value,l.1 as from ,l.2 as to
from (
select uid,sid,s_t
from (
select
distinct_id as uid,
arraySort((x)->x.1,groupArray(tuple(toUnixTimestamp(ums_ts_),toString(event_id)))) as cur,
arrayDifference((x)->x.1,cur) as cur_diff,
arrayPushBack(
arrayFilter(
(x,y)->y>1800,
arrayEnumerate(cur_diff),
cur_diff
),
length(cur)+1
) as cur_split,
arrayFilter((x)->length(x)>0,
arrayMap((x)->arrayMap((x)->x.2,arraySlice(x,arrayFirstIndex((y)->y.2='1301',x))),
arrayMap((x,y)->arraySlice(cur,
multiIf(y==1,1,cur_split[y-1]),multiIf(y==1,cur_split[y]-1,cur_split[y]-cur_split[y-1])),cur_split,arrayEnumerate(cur_split)))) as t,
arrayMap((x)->arrayMap((y,z,q)->tuple(concat(toString(y),'_$$_',z),concat(multiIf(y==length(arrayEnumerate(x)),'',toString(y+1)),'_$$_',q)),arrayEnumerate(x),x,arrayPushBack(arrayPopFront(x),'_waste')),t) as tx
from event_data.event_wos_p15 where event_id in (1301,1310,1303,1305,1429) and event_date>='2020-03-01' and event_date <='2020-03-31' group by distinct_id
) array join tx as s_t,arrayEnumerate(tx) as sid
) array join s_t as l group by from ,to
check system.query_log table found that array join executed on distributed node.why not array join execute on mergetree node?
mergetree node query_log
type: QueryFinish
event_date: 2020-04-27
event_time: 2020-04-27 15:34:54
query_start_time: 2020-04-27 15:34:53
query_duration_ms: 628
read_rows: 4955184
read_bytes: 355066855
written_rows: 0
written_bytes: 0
result_rows: 76798
result_bytes: 4636864
memory_usage: 660752320
query: SELECT distinct_id AS uid, arrayMap(x -> arrayMap((y, z, q) -> (concat(toString(y), '_$$_', z), concat(multiIf(y = length(arrayEnumerate(x)), '', toString(y + 1)), '_$$_', q)), arrayEnumerate(x), x, arrayPushBack(arrayPopFront(x), '_waste')), arrayFilter(x -> (length(x) > 0), arrayMap(x -> arrayMap(x -> (x.2), arraySlice(x, arrayFirstIndex(y -> ((y.2) = '1301'), x))), arrayMap((x, y) -> arraySlice(arraySort(x -> (x.1), groupArray((toUnixTimestamp(ums_ts_), toString(event_id)))) AS cur, multiIf(y = 1, 1, (arrayPushBack(arrayFilter((x, y) -> (y > 1800), arrayEnumerate(arrayDifference(x -> (x.1), cur) AS cur_diff), cur_diff), length(cur) + 1) AS cur_split)[y - 1]), multiIf(y = 1, (cur_split[y]) - 1, (cur_split[y]) - (cur_split[y - 1]))), cur_split, arrayEnumerate(cur_split)))) AS t) AS tx
FROM event_data.event_wos_p15 WHERE (event_id IN (1301, 1310, 1303, 1305, 1429)) AND (event_date >= '2020-03-01') AND (event_date <= '2020-03-31') GROUP BY distinct_id

select
from (
select xxx,
from distributed_table
group by
)
Only internal part of a query from distributed_table will be executed on shads (on MergeTree table), all other parts outside ( ) will be executed at an initiator node.

Related

How to use ClickHouse partition value in SQL query?

I have a table with tuple partitions: (0, 0), (0, 1), (1, 0), (1, 1), (2, 0), (2, 1), (3, 0), ...
CREATE TABLE my_table
(
id Int32,
a Int32,
b Float32,
c Int32
)
ENGINE = MergeTree
PARTITION BY
(
intDiv(id, 1000000),
a < 20000 AND b > 0.6 AND c >= 100
)
ORDER BY id;
I need only rows with partition (<any number>, 1) and I'm looking for a way to use partition value in a query like
SELECT *
FROM my_table
WHERE my_table.partition[2] == 1;
Does ClickHouse have such a feature?

In version 21.6 was added virtual columns _partition_id and _partition_value that can help you:
SELECT
*,
_partition_id,
_partition_value
FROM my_table
WHERE (_partition_value.2) = 1

And what is the problem with
where (a < 20000 AND b > 0.6 AND c >= 100) = 1
???
insert into my_table select 1, 3000000, 0, 0 from numbers(100000000);
insert into my_table select 1, 0, 10, 200 from numbers(100);
SET send_logs_level = 'debug';
set force_index_by_date=1;
select sum(id) from my_table where (a < 20000 AND b > 0.6 AND c >= 100) = 1;
...Selected 1/7 parts by partition key...
┌─sum(id)─┐
│ 100 │
└─────────┘
1 rows in set. Elapsed: 0.002 sec.
Though (_partition_value.2) = 1 will be faster because it does not require to read columns a,b,c for filtering.

Oracle comma seperated to rows with grouping

I have the following requirement.
Do we have direct functions available in oracle 12c to accomplish this.
create table t1(input_name varchar2(500),input_values varchar2(500));
insert into t1 values('a,b,c,d,','1,2,3,4');
insert into t1 values('e,f,g,','5,6,7');
insert into t1 values('a1,b1,c1,d1,','11,12,13,14');
insert into t1 values('d,c,b,a,','100,200,300,400');
commit;
select * from t1;
INPUT_NAME INPUT_VALUES
------------------------------ ----------------
a,b,c,d, 1,2,3,4
e,f,g, 5,6,7
a1,b1,c1,d1, 11,12,13,14
d,c,b,a, 100,200,300,400
output:
a b c d e f g a1 b1 c1 d1
1 2 3 4 5 6 7 11 12 13 14
400 300 200 100
Thanks,
Rahmat Ali

Yes... if you have a known set of input names. But you would be better reorganising your data so that you are not storing correlated pairs of comma-separated lists.
SQL Fiddle
Oracle 11g R2 Schema Setup:
create table t1(input_name,input_values) AS
SELECT 'a,b,c,d,','1,2,3,4' FROM DUAL UNION ALL
SELECT 'e,f,g,','5,6,7' FROM DUAL UNION ALL
SELECT 'a1,b1,c1,d1,','11,12,13,14' FROM DUAL UNION ALL
SELECT 'd,c,b,a,','100,200,300,400' FROM DUAL
/
CREATE TYPE pair IS OBJECT(
name VARCHAR2(20),
value VARCHAR2(20)
)
/
CREATE TYPE pair_table IS TABLE OF PAIR
/
Query 1:
SELECT MAX( CASE name WHEN 'a' THEN value END ) AS a,
MAX( CASE name WHEN 'b' THEN value END ) AS b,
MAX( CASE name WHEN 'c' THEN value END ) AS c,
MAX( CASE name WHEN 'd' THEN value END ) AS d,
MAX( CASE name WHEN 'e' THEN value END ) AS e,
MAX( CASE name WHEN 'f' THEN value END ) AS f,
MAX( CASE name WHEN 'g' THEN value END ) AS g,
MAX( CASE name WHEN 'a1' THEN value END ) AS a1,
MAX( CASE name WHEN 'b1' THEN value END ) AS b1,
MAX( CASE name WHEN 'c1' THEN value END ) AS c1,
MAX( CASE name WHEN 'd1' THEN value END ) AS d1
FROM (
SELECT v.name,
v.value,
ROW_NUMBER() OVER ( PARTITION BY v.name ORDER BY ROWNUM ) AS rn
FROM t1 t
CROSS JOIN
TABLE(
CAST(
MULTISET(
SELECT pair(
REGEXP_SUBSTR( t.input_name, '([^,]+)(,|$)', 1, LEVEL, NULL, 1 ),
REGEXP_SUBSTR( t.input_values, '([^,]+)(,|$)', 1, LEVEL, NULL, 1 )
)
FROM DUAL
CONNECT BY level <= REGEXP_COUNT( t.input_name, '([^,]+)(,|$)' )
) AS pair_table
)
) v
)
GROUP BY rn
Results:
| A | B | C | D | E | F | G | A1 | B1 | C1 | D1 |
|-----|-----|-----|-----|--------|--------|--------|--------|--------|--------|--------|
| 1 | 2 | 3 | 4 | 5 | 6 | 7 | 11 | 12 | 13 | 14 |
| 400 | 300 | 200 | 100 | (null) | (null) | (null) | (null) | (null) | (null) | (null) |
You can also use a PIVOT statement rather than multiple MAX( CASE ... END ) statements.
If you do not have a static set of input names then you will need to search for dynamic pivots.
Update:
Is there a way where I can avoid using types?
You can avoid creating types and just use a built-in VARRAY or collection like SYS.ODCIVARCHAR2LIST but then you will need two lists and it becomes complicated to correlate between the two.
WITH input_names ( rid, idx, name ) AS (
SELECT t.ROWID,
ROW_NUMBER() OVER ( PARTITION BY t.ROWID ORDER BY ROWNUM ) AS rn,
v.COLUMN_VALUE
FROM t1 t
CROSS JOIN
TABLE(
CAST(
MULTISET(
SELECT REGEXP_SUBSTR( t.input_name, '([^,]+)(,|$)', 1, LEVEL, NULL, 1 )
FROM DUAL
CONNECT BY level <= REGEXP_COUNT( t.input_name, '([^,]+)(,|$)' )
) AS SYS.ODCIVARCHAR2LIST
)
) v
),
input_values ( rid, idx, value ) AS (
SELECT t.ROWID,
ROW_NUMBER() OVER ( PARTITION BY t.ROWID ORDER BY ROWNUM ) AS rn,
v.COLUMN_VALUE
FROM t1 t
CROSS JOIN
TABLE(
CAST(
MULTISET(
SELECT REGEXP_SUBSTR( t.input_values, '([^,]+)(,|$)', 1, LEVEL, NULL, 1 )
FROM DUAL
CONNECT BY level <= REGEXP_COUNT( t.input_values, '([^,]+)(,|$)' )
) AS SYS.ODCIVARCHAR2LIST
)
) v
),
correlated ( name, value, rn ) AS (
SELECT n.name,
v.value,
ROW_NUMBER() OVER ( PARTITION BY n.name
ORDER BY ROWNUM )
FROM input_names n
INNER JOIN
input_values v
ON ( n.rid = v.rid AND n.idx = v.idx )
)
SELECT MAX( CASE name WHEN 'a' THEN value END ) AS a,
MAX( CASE name WHEN 'b' THEN value END ) AS b,
MAX( CASE name WHEN 'c' THEN value END ) AS c,
MAX( CASE name WHEN 'd' THEN value END ) AS d,
MAX( CASE name WHEN 'e' THEN value END ) AS e,
MAX( CASE name WHEN 'f' THEN value END ) AS f,
MAX( CASE name WHEN 'g' THEN value END ) AS g,
MAX( CASE name WHEN 'a1' THEN value END ) AS a1,
MAX( CASE name WHEN 'b1' THEN value END ) AS b1,
MAX( CASE name WHEN 'c1' THEN value END ) AS c1,
MAX( CASE name WHEN 'd1' THEN value END ) AS d1
FROM correlated
GROUP BY rn;

BUG: ORA_00913 Oracle Error - Too many values

The below code is in stored procedure, and they told me to convert it into nested loops and try running it.
insert into PRICEVIEW_RATE_PLAN_PROC (
SSR_CODE
,CORRIDOR_PLAN_ID
,CORRIDOR_PLAN_DESCRIPTION
,USAGE_TYPE
,PRODUCT
,JURISDICTION
,PROVIDER
,RATE_PERIOD
,FLAGFALL
,RATE
,RATEBAND
,NUMSECS
,BAND_RATE
,ACTIVE_DT
,INACTIVE_DT
)
select /*+ use_hash(rate_usage_overrides,corridor_plan_id_values,product_elements,descriptions,jurisdictions,rate_usage_bands_overrides) */
distinct decode(a.corridor_plan_id, 0, '''', (
select c.short_display
from corridor_plan_id_values c
where a.corridor_plan_id = c.corridor_plan_id
)) as SSR_CODE
,a.corridor_plan_id as CORRIDOR_PLAN_ID
,decode(a.corridor_plan_id, 0, '''', (
select d.display_value
from corridor_plan_id_values d
where a.corridor_plan_id = d.corridor_plan_id
)) as CORRIDOR_PLAN_DESCRIPTION
,decode(a.type_id_usg, 0, '''', (
select f.description_text
from usage_types e
,descriptions f
where a.type_id_usg = e.type_id_usg
and e.description_code = f.description_code
)) as USAGE_TYPE
,decode(a.element_id, 0, '''', (
select h.description_text
from product_elements g
,descriptions h
where a.element_id = g.element_id
and g.description_code = h.description_code
)) as PRODUCT
,decode(a.jurisdiction, 0, '''', (
select j.description_text
from jurisdictions i
,descriptions j
where a.jurisdiction = i.jurisdiction
and j.description_code = i.description_code
)) as JURISDICTION
,decode(a.provider_class, 0, '''', (
select k.display_value
from provider_class_values k
where a.provider_class = k.provider_class
)) as PROVIDER
,decode(a.rate_period, '' 0 '', '''', (
select l.display_value
from rate_period_values l
where a.rate_period = l.rate_period
)) as RATE_PERIOD
,(a.FIXED_CHARGE_AMT / 100) + (a.ADD_FIXED_AMT / 10000000) as FLAGFALL
,(a.ADD_UNIT_RATE / 10000000) * 60 as RATE
,b.RATEBAND as RATEBAND
,b.NUM_UNITS as NUMSECS
,(b.UNIT_RATE / 10000000) * 60 as BAND_RATE
,a.ACTIVE_DT as ACTIVE_DT
,a.INACTIVE_DT as INACTIVE_DT
from rate_usage_overrides a
,rate_usage_bands_overrides b
where a.seqnum = b.seqnum(+);
I converted above code to nested loop and please find below converted nested loop and When I try to run this script below, it is prompting me an error: too many values. Can you tell me what exactly problem is
insert into PRICEVIEW_RATE_PLAN_PROC(
SSR_CODE,
CORRIDOR_PLAN_DESCRIPTION,
USAGE_TYPE,
PRODUCT,
JURISDICTION,
PROVIDER,
RATE_PERIOD,
FLAGFALL,
RATE,
RATEBAND,
NUMSECS,
BAND_RATE,
ACTIVE_DT,
INACTIVE_DT
) VALUES (
(select c.short_display AS SSR_CODE from rate_usage_overrides a,corridor_plan_id_values c where a.corridor_plan_id = c.corridor_plan_id),
(select d.display_value AS CORRIDOR_PLAN_DESCRIPTION from rate_usage_overrides a ,corridor_plan_id_values d where a.corridor_plan_id = d.corridor_plan_id),
(select f.description_text AS USAGE_TYPE from rate_usage_overrides a ,usage_types e, descriptions f where a.type_id_usg = e.type_id_usg and e.description_code = f.description_code ),
(select h.description_text AS PRODUCT from rate_usage_overrides a, product_elements g,descriptions h where a.element_id = g.element_id and g.description_code = h.description_code ),
(select j.description_text AS JURISDICTION from rate_usage_overrides a, jurisdictions i,descriptions j where a.jurisdiction = i.jurisdiction and j.description_code = i.description_code),
(select k.display_value AS PROVIDER from rate_usage_overrides a ,provider_class_values k where a.provider_class = k.provider_class),
(select l.display_value AS RATE_PERIOD from rate_usage_overrides a ,rate_period_values l where a.rate_period = l.rate_period),
(select (a.FIXED_CHARGE_AMT/100) + (a.ADD_FIXED_AMT/10000000) AS FLAGFALL from rate_usage_overrides a AS ACTIVE_DT),
(select (a.ADD_UNIT_RATE/10000000) * 60 AS RATE from rate_usage_overrides a),
(select b.RATEBAND AS RATEBAND from rate_usage_bands_overrides b),
(select b.NUM_UNITS AS NUMSECS from rate_usage_bands_overrides b),
(select (b.UNIT_RATE/10000000) * 60 AS BAND_RATE from rate_usage_bands_overrides b),
(select a.ACTIVE_DT,a.seqnum,b.seqnum AS ACTIVE_DT from rate_usage_overrides a, rate_usage_bands_overrides b where a.seqnum = b.seqnum(+)),
(select a.INACTIVE_DT,a.seqnum,b.seqnum AS INACTIVE_DT from rate_usage_overrides a, rate_usage_bands_overrides b where a.seqnum = b.seqnum(+))

Here is your mistake
(select a.ACTIVE_DT,a.seqnum,b.seqnum AS ACTIVE_DT from rate_usage_overrides a, rate_usage_bands_overrides b where a.seqnum = b.seqnum(+)),
(select a.INACTIVE_DT,a.seqnum,b.seqnum AS INACTIVE_DT from rate_usage_overrides a, rate_usage_bands_overrides b where a.seqnum = b.seqnum(+))
both the query will return 3 field but insert specify only one column that's why u are getting this error. and by the way this is not a bug

Run Individual queries with c.corridor_plan_id from 1st query on wards and check at least one query returns more than one value

Convert SQL Server CTE into Oracle CTE

Here is the SQL Server CTE, trying to convert to Oracle CTE or regular oracle query..
;with cte as
(Select AC, M, Y, D, E, F, CD
from tblA
WHere
(Y = YEAR(GETDATE()) and M = Month(dateadd(month, -1, GETDATE())))
),
cte2 as
(Select A.AC,Max(A.Y)as Y, Max(A.M) as M, Max(A.CD) as CD
from tbl A
Inner join cte B on B.AC = A.AC
WHere A.CD is Not Null and B.CD is Null
Group by A.AC)
, cte3 as
(Select C.AC, C.Y, C.M, C.D, C.E, C.F, C.CD
from tblA C
Inner join cte2 D on C.AC = D.AC and C.Y= D.Y and C.M = D.M and
D.CD = C.CD
)
select * from cte
union
select * from cte3;

Assuming you didn't have the m and y columns reversed on purpose in your cte/cte3 select lists, I think you could rewrite your query as:
with cte1 as (select a.ac,
a.m,
a.y,
a.d,
a.e,
a.f,
a.cd,
max(case when a.cd is not null and b.cd is not null then a.y end) over (partition by a.ac) max_y,
max(case when a.cd is not null and b.cd is not null then a.m end) over (partition by a.ac) max_m,
max(case when a.cd is not null and b.cd is not null then a.cd end) over (partition by a.ac) max_cd
from tbla a
left outer join tblb b on (a.ac = b.ac))
select ac,
m,
y,
d,
e,
f,
cd
from cte1
where (y = to_char(sysdate, 'yyyy')
and m = to_char(add_months(sysdate, -1), 'mm'))
or (y = max_y
and m = max_m
and cd = max_cd);
You haven't provided any sample data, so I can't test, but it would be worth converting the date functions to their SQL Server equivalents and testing to make sure the data returned is the same.
This way, you're not querying the same table 3 times, which should improve the performance some.

Duplicating rows in an oracle query

I am to create a query which will be used for printing labels in our project and find it difficult since the count of the number of labels is based on a string. I have made a query that looks like this:
SELECT
wipdatavalue
, containername
, l
, q as qtybox
, d
, qtyperbox AS q
, productname
, dt
, dsn
, CASE
WHEN instr(wipdatavalue, '-') = 0
THEN
to_number(wipdatavalue)
ELSE
to_number(substr(wipdatavalue, 1, instr(wipdatavalue, '-') - 1))
END AS una
, CASE
WHEN instr(wipdatavalue, '-') = 0
THEN
to_number(wipdatavalue)
ELSE
to_number(substr(wipdatavalue, instr(wipdatavalue, '-') + 1))
END AS dulo
, ROW_NUMBER() OVER (ORDER BY containername) AS n
, count(*) over() m
FROM trprinting_ls
WHERE containername = 'TRALTESTU0A'
GROUP BY wipdatavalue, containername, l, q, d, qtyperbox, productname, dt, dsn
ORDER BY wipdatavalue
The query above will result to below:
But actually, I have to display the first Item (Wipdatavalue 1-4) not only once but four times to look something like this:
I have tried another query that runs fine but when I try to load it in our project, it does not print the label. We found out that it is because of the WITH statement and we don't know why. The query is:
WITH DATA (WIPDATAVALUE, CONTAINERNAME, L, Q, D, QTYBOX, PRODUCTNAME, DT, una, dulo, m1, n)
AS (SELECT WIPDATAVALUE, CONTAINERNAME, L, Q, D, QTYBOX, PRODUCTNAME, DT, una, dulo,(dulo - una) + 1 AS m1,una n
FROM (SELECT WIPDATAVALUE, CONTAINERNAME, L, Q, D, QTYPERBOX AS QTYBOX, PRODUCTNAME, DT,
CASE
WHEN instr(wipdatavalue, '-') = 0
THEN
to_number(wipdatavalue)
ELSE
to_number(substr(wipdatavalue, 1, instr(wipdatavalue, '-') - 1))
END AS una,
CASE
WHEN instr(wipdatavalue, '-') = 0
THEN
to_number(wipdatavalue)
ELSE
to_number(substr(wipdatavalue, instr(wipdatavalue, '-') + 1))
END AS dulo
FROM trprinting_ls
WHERE containername = 'TRALTESTU0A'
)
UNION ALL
SELECT WIPDATAVALUE, CONTAINERNAME, L, Q, D, QTYBOX, PRODUCTNAME, DT, una, dulo, m1, n + 1
FROM DATA
WHERE n + 1 <= dulo)
SELECT WIPDATAVALUE, CONTAINERNAME, L, Q, D, QTYBOX, PRODUCTNAME, DT, una, dulo, n,
count(*) OVER () m
FROM DATA
ORDER BY n, wipdatavalue
Thanks guys for helping out.

Try this
select *
from your_data
start with instr(Wipdatavalue, '1') > 0
connect by level between regexp_substr(Wipdatavalue, '^\d+')
and regexp_substr(Wipdatavalue, '\d+$')
It's a simplified example
The regexp_substr can be replaced with substr and instr if you like (may also be faster)
Here is a sqlfiddle demo

Try this Query
select column_name, count(column_name)
from table
group by column_name
having count (column_name) > 1;

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

why not array join execute on mergetree node - clickhouse

select from ( select xxx, from distributed_table group by ) Only internal part of a query from distributed_table will be executed on shads (on MergeTree table), all other parts outside ( ) will be executed at an initiator node.

Related

How to use ClickHouse partition value in SQL query?

Oracle comma seperated to rows with grouping

BUG: ORA_00913 Oracle Error - Too many values

Convert SQL Server CTE into Oracle CTE

Duplicating rows in an oracle query

Categories

Resources