Hive - Select unique rows based on some columns - hadoop

I am trying to group rows that have the save value across two columns and have the result ranked/sorted based on a third column.
The result should contain all the other columns.
For the table:
with sample as (
select 'A' as c1, 'B' as c2, '22:00' as c3, 'Da' as c4
union all
select 'A' as c1, 'B' as c2, '23:00' as c3, 'Db' as c4
union all
select 'A' as c1, 'B' as c2, '09:00' as c3, 'Dc' as c4
union all
select 'A' as c1, 'C' as c2, '22:00' as c3, 'Dd' as c4
union all
select 'B' as c1, 'C' as c2, '09:00' as c3, 'De' as c4
)
Grouping or filtering by column c1 and c2 ranked by time on c3, the output would be:
row_number() over (partition by c1, c2 order by c3) as rnk
| c1, c2, c3, c4, rnk|
-----------------------
| A | B |09:00| Dc| 1 |
| A | B |22:00| Da| 2 |
| A | B |23:00| Db| 3 |
| A | C |22:00| Dd| 1 |
| B | C |09:00| De| 1 |
All the other columns like c4, c5.. should be kept but don't have any effect on the group criteria or rank.
A believe a window function with partition on c1 and c2 and order by c3 could work, but not sure if it's the best way in case of very large tables and the need to group by more columns.
The final output would be a UNIQUE row where rank is 1 (top). The columns should be exactly the same as the sample table (no rank).
Select * from tableX where rnk = 1 would do the work but keep colum 'rnk'.
I want to avoid writing all the columns in the select, excluding the rnk.
| c1, c2, c3, c4 |
-------------------
| A | B |09:00| Dc|
| A | C |22:00| Dd|
| B | C |09:00| De|
*Edited, add final table

select inline(array(rec))
from (select struct(*) as rec
,row_number() over
(
partition by c1,c2
order by c3
) as rn
from sample t
) t
where rn = 1
;
+------+------+-------+------+
| col1 | col2 | col3 | col4 |
+------+------+-------+------+
| A | B | 09:00 | Dc |
| A | C | 22:00 | Dd |
| B | C | 09:00 | De |
+------+------+-------+------+
P.s.
Please note that the columns names were aliased, due to the use of struct

I think you just want row_number():
select t.*,
row_number() over (partition by c1, c2 order by c3) as rnk
from sample t;
The question seems to have changed since I answered it -- a rather rude thing to happen. If you want the top ranked column, then use a subquery:
select t.*
from (select t.*,
row_number() over (partition by c1, c2 order by c3) as rnk
from sample t
) t
where rnk = 1;
This returns one row for each combination of c1/c2 in the data. If you want all rows in the event of ties, then use rank() instead of row_number().

Related

Is there any to add one loop row in connect by oracle with nocycle?

Just like Oracle continues to follow a path beyond a cyclical loop when the cycle occurs at the top node (root node connected right back to root node), is there any way to do the same with in between cycle.
Like if i have some data like below
create table t1 ( c1 varchar2(2), c2 varchar2(2));
insert into t1 values ('A', 'B');
insert into t1 values ('B', 'C');
insert into t1 values ('C', '**A**');
and execute below query:
select * from (
select distinct
connect_by_root c1 as c3,
c1,
c2
from t1
connect by nocycle c1 = prior c2
) where c3='A';
It will give me this results
c3 c1 c2
A A B
A B C
**A** **C** **A**
It gives me the root looped valued. But if i have data like below.
create table t2 ( c1 varchar2(2), c2 varchar2(2));
insert into t2 values ('A', 'B');
insert into t2 values ('B', 'C');
insert into t2 values ('C', '**B**');
select * from (
select distinct
connect_by_root c1 as c3,
c1,
c2
from t2
connect by nocycle c1 = prior c2
) where c3='A';
this gives me
c3 c1 c2
A A B
A B C
But i need third row also that is A C B.
So wondering if this could be done?
You can use a recursive sub-query factoring clause:
WITH rsqfc ( c3, c1, c2 ) AS (
SELECT c1, c1, c2
FROM t2
WHERE c1 = 'A'
UNION ALL
SELECT r.c3, t.c1, t.c2
FROM t2 t
INNER JOIN rsqfc r
ON ( t.c1 = r.c2 )
)
CYCLE c1, c2 SET is_cycle TO 1 DEFAULT 0
SELECT c3, c1, c2
FROM rsqfc
WHERE is_cycle = 0;
Which, for your sample data:
create table t2 ( c1, c2 ) AS
SELECT 'A', 'B' FROM DUAL UNION ALL
SELECT 'B', 'C' FROM DUAL UNION ALL
SELECT 'C', 'B' FROM DUAL;
Outputs:
C3 | C1 | C2
:- | :- | :-
A | A | B
A | B | C
A | C | B
db<>fiddle here

How to get average of the row with minimun and maximun excluded in Oracle?

There are five colums.
How to get average of the row (not column) and
the average should be made with minimum and maximum excluded.
If there are duplicate maximum or(and) minimum, how to exlude them all?
The result of my data should be like this.
Average_MIN_MAX_excluded
-------------------------
3.33333333
5.33333333
My data set is as below;
WITH DATAA AS
(SELECT 3 c1,5 c2,4 c3,3 c4 ,1 c5 FROM DUAL
UNION
SELECT 1 c1,3 c2,6 c3,9 c4 ,7 c5 FROM DUAL)
SELECT c1, c2, c3, c4, c5 FROM DATAA;
select ((c1 + c2 + c3 + c4 + c5) -
greatest( c1, c2, c3, c4, c5 ) -
least( c1, c2, c3, c4, c5 ))/ 3
from DATAA
would be one way. Here's a liveSQL link
It is a good place to use LATERAL JOIN aka CROSS APPLY:
SELECT *
FROM t
CROSS APPLY (
SELECT AVG(c) AS Average_MIN_MAX_excluded
FROM (
SELECT c, ROW_NUMBER() OVER(ORDER BY c) rn
FROM (
SELECT c1 c FROM dual UNION ALL
SELECT c2 FROM dual UNION ALL
SELECT c3 FROM dual UNION ALL
SELECT c4 FROM dual UNION ALL
SELECT c5 FROM dual)
)
WHERE rn NOT IN (1,5)
) s;
This method allows to easily exclude 1,2,3 highest/lowest values if necessary.
db<>fiddle demo
I think you can use unpivot and analytical function with group by and average aggregate function as following:
WITH DATAA AS
(
SELECT 3 c1,5 c2,4 c3,3 c4 ,1 c5 FROM DUAL
UNION
-- case with same value at min for two columns
SELECT 1 c1,5 c2,4 c3,3 c4 ,1 c5 FROM DUAL
UNION
SELECT 1 c1,3 c2,6 c3,9 c4 ,7 c5 FROM DUAL)
-- your query starts from here
select rn, avg(val) from
(select rn, val,
max(val) over (partition by rn) maxval,
min(val) over (partition by rn) minval
from
(SELECT rownum rn, c1, c2, c3, c4, c5
FROM DATAA)
unpivot
(val for vals in (c1,c2,c3,c4,c5)))
where val not in (maxval, minval)
group by rn
See db<>fiddle demo.
Cheers!!

Oracle 8g - How can i create a dynamic table without pivot?

i need to make a dynamic table in Oracle 8g, but this version doesn't have the PIVOT property. I want to create a table like this.
Date | Code | count
12/04/2016 | a1 | 8
12/05/2016 | a2 | 10
10/06/2016 | a3 | 4
24/10/2016 | a2 | 6
Date | a1 | a2 | a3
12/04/2016 | 8 | |
12/05/2016 | | 10 |
10/06/2016 | | | 4
24/10/2016 | | 6 |
The numbers of codes is undefined. That would be the reason why i cant create a static table.
Use a "plain" pivot query:
SELECT Date,
max( CASE code WHEN 'a1' THEN count END ) As a1,
max( CASE code WHEN 'a2' THEN count END ) As a2,
max( CASE code WHEN 'a3' THEN count END ) As a3
FROM table
GROUP BY Date
PIVOT clause is only a syntactic sugar to make it easier to express the above query
The below query using PIVOT clause is the same as the above one.
SELECT *
FROM (SELECT date, code, count FROM table )
PIVOT (
max( count ) FOR code IN ( 'a1' as a1, 'a2' as a2, 'a3' as a3 )
)

How to compute differences of values in one column

C1 |C2 |C3
a |b1 |1
a |b2 |2
a |b3 |3
a |b4 |4
b |b1 |5
b |b2 |6
b |b3 |10
b |b4 |11
Given the above table data, I want output like below:
C1 |DIFF
a |3
b |6
Here, DIFF is the difference of the C3 column value, where C2='b4' and C2='b1'.
What would be the logic to do it in Oracle?
Perhaps this... my_table is your input table.
select t1.c1 as c1, t1.c3 - t2.c3 as diff
from (select c1, c3 from my_table where c2 = 'b4') t1
inner join
(select c1, c3 from my_table where c2 = 'b1') t2
on t1.c1 = t2.c1
;

Oracle 10: Incomprehensible behaviour on INSERT into a view?

we have a strange problem here, we can't explain to ourselves.
We have a view in an Oracle DB Version 10.2.0.5.8. The view uses an INSTEAD OF trigger.
This is the code for the trigger:
CREATE OR REPLACE TRIGGER V1_T1_BIUD
INSTEAD OF INSERT OR UPDATE OR DELETE
ON V1_T1
FOR EACH ROW
DECLARE
AnyId NUMBER;
BEGIN
IF INSERTING THEN
INSERT INTO Table T1 (
F1, F2, F3, F4, F5
) VALUES (
:new.F1, :new.F2, :new.F3, :new.F4, :new.F5
);
ELSIF UPDATING THEN
UPDATE T1 SET F1 = :new.F1,
F2 = :new.F2,
F3 = :new.F3,
F4 = :new.F4,
F5 = :new.F5
WHERE F1 = :old.F1;
ELSIF DELETING THEN
DELETE FROM T1
WHERE F1 = :old.F1;
END IF;
END;
/
This is an example INSERT statement:
INSERT INTO V_T1 (
F1, F2, F3, F4, F5
)
SELECT A.V, A.S, A.F, A.T, A.Z
FROM (
SELECT 'E' V, 'N' S, 'ABC' F, 'E' T, 'E' Z FROM DUAL UNION ALL
SELECT 'E', 'Y', 'QWE', 'O', 'E' FROM DUAL UNION ALL
SELECT 'I', 'Y', 'GHJ', 'I', 'I' FROM DUAL
) A
ORDER BY 1, 2, 3;
COMMIT;
Pay attention to the ORDER BY clause at the end of the select. The result of this INSERT statement is something like this:
F1 F2 F3 F4 F5
---------------
E N ABC I I
E Y QWE I I
I Y GHJ I I
As you can see, the 4th and 5th column are incorrectly filled with the values of the last datarow in all other datarows.
If we change the INSERT statement like this:
INSERT INTO V_T1 (
F1, F2, F3, F4, F5
)
SELECT A.V, A.S, A.F, A.T, A.Z
FROM (
SELECT 'E' V, 'N' S, 'ABC' F, 'E' T, 'E' Z FROM DUAL UNION ALL
SELECT 'E', 'Y', 'QWE', 'O', 'E' FROM DUAL UNION ALL
SELECT 'I', 'Y', 'GHJ', 'I', 'I' FROM DUAL
) A
ORDER BY 1, 2, 3, 4, 5;
COMMIT;
the result is this:
F1 F2 F3 F4 F5
---------------
E N ABC E E
E Y QWE O E
I Y GHJ I I
Again, pay attention to the ORDER BY clause, which now orders all rows instead of the first three in the first insert statement.
edit: If you omit the ORDER BY clause the result is also as expected (e. g. like in example 2).
Can someone explain this behaviour to me?
P. S. Concerning the comments:
I have not time to investigate or deliver any more infos on this topic today. I will create a complete example on our database and publish it here in the next few days. Thank you for your patience!
This does look like a bug, but I can't find an obvious match in the bug database (a few look possible, like 5842445, but are vague or don't quite line up). I can only make it happen with the trigger (so I assume your inserts being against T1 rather than V1_T1 are a transcription error); and only if F4 and F5 are CHAR not VARCHAR2:
create table t1 (f1 varchar2(2), f2 varchar2(2), f3 varchar2(3),
f4 char(2), f5 char(2));
create view v1_t1 as select * from t1;
... and the instead of trigger exactly as shown in the question.
The :NEW values inside the trigger are wrong, according to DBMS_OUTPUT, but how that's affected by the column data type is something only Oracle would be able to figure out I think.
It also still happens in 11.2.0.3 (Linux). Interestingly if I change the UNION ALL to just UNION I get slightly different results; in 10g the two columns end up null, in 11g they have x:
insert into v1_t1 (
F1, F2, F3, F4, F5
)
SELECT A.V, A.S, A.F, A.T, A.Z
FROM (
SELECT 'E' V, 'N' S, 'ABC' F, 'E' T, 'E' Z FROM DUAL UNION
SELECT 'E', 'Y', 'QWE', 'O', 'E' FROM DUAL UNION
SELECT 'I', 'Y', 'GHJ', 'I', 'I' FROM DUAL
) A
ORDER BY 1, 2, 3;
3 rows created.
select * from v1_t1;
F1 F2 F3 F4 F5
-- -- --- -- --
E N ABC x x
E Y QWE x x
I Y GHJ x x
... which is even stranger - looks like maybe a fix to some other bug has slightly affected this one.
So not really an answer; you'd need to rase a service request with Oracle, and I'm fairly sure they'd just tell you to remove the order by since it doesn't have any value, as you already know.
For Thilo; plan without any order by (11g):
----------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
----------------------------------------------------------------------------------
| 0 | INSERT STATEMENT | | 3 | 51 | 9 (34)| 00:00:01 |
| 1 | LOAD TABLE CONVENTIONAL | V1_T1 | | | | |
| 2 | VIEW | | 3 | 51 | 9 (34)| 00:00:01 |
| 3 | SORT UNIQUE | | 3 | | 9 (78)| 00:00:01 |
| 4 | UNION-ALL | | | | | |
| 5 | FAST DUAL | | 1 | | 2 (0)| 00:00:01 |
| 6 | FAST DUAL | | 1 | | 2 (0)| 00:00:01 |
| 7 | FAST DUAL | | 1 | | 2 (0)| 00:00:01 |
----------------------------------------------------------------------------------
And plan with order by 1,2,3 or 1,2,3,4,5 - same plan hash value (11g):
----------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
----------------------------------------------------------------------------------
| 0 | INSERT STATEMENT | | 3 | 51 | 10 (40)| 00:00:01 |
| 1 | LOAD TABLE CONVENTIONAL | V1_T1 | | | | |
| 2 | SORT ORDER BY | | 3 | 51 | 10 (40)| 00:00:01 |
| 3 | VIEW | | 3 | 51 | 9 (34)| 00:00:01 |
| 4 | SORT UNIQUE | | 3 | | 9 (78)| 00:00:01 |
| 5 | UNION-ALL | | | | | |
| 6 | FAST DUAL | | 1 | | 2 (0)| 00:00:01 |
| 7 | FAST DUAL | | 1 | | 2 (0)| 00:00:01 |
| 8 | FAST DUAL | | 1 | | 2 (0)| 00:00:01 |
----------------------------------------------------------------------------------
And I see the same sort of corruption selecting from other tables, but only if the results in the subquery are unioned before being ordered; though then I get nulls rather than x. (I briefly wondered if the x was coming from dual itself, but dummy is upper-case X, and this shows lower-case x).
Following #Annjawn's comment, changing the insert from V1_T1 to a direct insert in T1 works fine (i.e. correct values inserted), and curiously has the same plan hash even though it shows the table name instead of the view in the Name column. Work with either UNION or UNION ALL, too, and in both 10gR2 and 11gR2. Seems to be the trigger that's confused by the union, I guess.
Further to the datatype point... the view has to have char columns, the table does not necessarily, which isn't really a surprise since the trigger on the view seems to be the problem. If I set the table up with char columns but cast them to varchar2 in the view then I don't see the problem:
create table t1 (f1 varchar2(2), f2 varchar2(2), f3 varchar2(3),
f4 char(2), f5 char(2));
create view v1_t1 as select f1, f2, f3, cast(f4 as varchar(2)) f4,
cast(f5 as varchar(2)) f5
from t1;
But If I do it the other way around it does exhibit the problem:
create table t1 (f1 varchar2(2), f2 varchar2(2), f3 varchar2(3),
f4 varchar(2), f5 varchar(2));
create view v1_t1 as select f1, f2, f3, cast(f4 as char(2)) f4,
cast(f5 as char(2)) f5
from t1;
Unless I am missing something, this entire ordeal works fine and inserts the rows into table T1 as expected in both Oracle 10g and 11g.
create table t1 (f1 varchar2(10),
f2 varchar2(10),
f3 varchar2(10),
f4 varchar2(10),
f5 varchar2(10));
create or replace view v_t1 as select * from t1;
CREATE OR REPLACE TRIGGER V1_T1_BIUD
INSTEAD OF INSERT OR UPDATE OR DELETE
ON v_t1
FOR EACH ROW
DECLARE
AnyId NUMBER;
BEGIN
IF INSERTING THEN
INSERT INTO t1 (
F1, F2, F3, F4, F5
) VALUES (
:new.F1, :new.F2, :new.F3, :new.F4, :new.F5
);
ELSIF UPDATING THEN
UPDATE t1 SET F1 = :new.F1,
F2 = :new.F2,
F3 = :new.F3,
F4 = :new.F4,
F5 = :new.F5
WHERE F1 = :old.F1;
ELSIF DELETING THEN
DELETE FROM t1
WHERE F1 = :old.F1;
END IF;
END;
--With UNION ALL
INSERT INTO V_T1 (
F1, F2, F3, F4, F5
)
SELECT A.V, A.S, A.F, A.T, A.Z
FROM (
SELECT 'E' V, 'N' S, 'ABC' F, 'E' T, 'E' Z FROM DUAL UNION ALL
SELECT 'E', 'Y', 'QWE', 'O', 'E' FROM DUAL UNION ALL
SELECT 'I', 'Y', 'GHJ', 'I', 'I' FROM DUAL
) A
ORDER BY 1, 2, 3;
commit;
select * from t1;
F1 F2 F3 F4 F5
---------- ---------- ---------- ---------- ----------
E N ABC E E
E Y QWE O E
I Y GHJ I I
delete from t1;
commit;
--With UNION
INSERT INTO V_T1 (
F1, F2, F3, F4, F5
)
SELECT A.V, A.S, A.F, A.T, A.Z
FROM (
SELECT 'E' V, 'N' S, 'ABC' F, 'E' T, 'E' Z FROM DUAL UNION
SELECT 'E', 'Y', 'QWE', 'O', 'E' FROM DUAL UNION
SELECT 'I', 'Y', 'GHJ', 'I', 'I' FROM DUAL
) A
ORDER BY 1, 2, 3;
commit;
select * from t1;
F1 F2 F3 F4 F5
---------- ---------- ---------- ---------- ----------
E N ABC E E
E Y QWE O E
I Y GHJ I I
Sure enough, when I change F4 and F5 to char(10) from varchar2(10) (as noted by Alex Poole) I can re-create your issue exactly in both 10g and 11g.

Resources