ORACLE Recursive query - oracle

I'm trying to build a recursive query and I'm facing a problem.
please find below my dataset
WITH table1 ( ID, Code, Label ) as(
SELECT 123, 'C1', 'LABEL_1' from dual UNION ALL
SELECT 1, 'C2', 'LABEL_2' from dual UNION ALL
SELECT 30, 'C3', 'LABEL_3' from dual UNION ALL
SELECT 44, 'C4', 'LABEL_4' from dual UNION ALL
SELECT 5, 'C5', 'LABEL_5' from dual
),
table2 ( ID, id_table1, code_child, label_child ) as (
SELECT 1, 123, 'C1_1','LABEL_1_1' from dual UNION ALL
SELECT 2, 123, 'C1_2','LABEL_1_2' from dual UNION ALL
SELECT 3, 123, 'C1_3','LABEL_1_3' from dual UNION ALL
SELECT 4, 123, 'C1_4','LABEL_1_4' from dual UNION ALL
SELECT 6, 30, 'C3_1','LABEL_3_1' from dual UNION ALL
SELECT 7, 30, 'C3_2','LABEL_3_2' from dual UNION ALL
SELECT 8, 30, 'C3_3','LABEL_3_3' from dual UNION ALL
SELECT 9, 30, 'C3_4','LABEL_3_4' from dual UNION ALL
SELECT 10, 5, 'C5_1','LABEL_5_1' from dual
),
hierarchy as (
Select
a.id, code, label, CODE_CHILD,id_table1
from table1 a
left join table2 b on b.id_table1 = a.ID
)
,recursive (base, id, code, label, CODE_CHILD,id_table1) as (
SELECT
id as base,
id,
code,
label,
CODE_CHILD,
id_table1
FROM hierarchy
UNION ALL
SELECT
previous_level.base,
current_level.id,
current_level.code,
current_level.label,
current_level.CODE_CHILD,
current_level.id_table1
FROM recursive previous_level,
hierarchy current_level
WHERE 1=1
and current_level.id = previous_level.id_table1
)
SELECT * FROM recursive order by base;
And i'm getting this error :
32044. 00000 - "cycle detected while executing recursive WITH query"
*Cause: A recursive WITH clause query produced a cycle and was stopped
in order to avoid an infinite loop.
*Action: Rewrite the recursive WITH query to stop the recursion or use
the CYCLE clause.
Where i'm wrong ?
I need to merge these two tables into one.
here's what I'd like to get as a result.
id code label id_parent
1 C1 LABEL_1
2 C2 LABEL_2
3 C3 LABEL_3
4 C4 LABEL_4
5 C5 LABEL_5
6 C1_1 LABEL_1_1 1
7 C1_2 LABEL_1_2 1
8 C1_3 LABEL_1_3 1
9 C1_4 LABEL_1_4 1
10 C3_1 LABEL_3_1 3
11 C3_2 LABEL_3_2 3
12 C3_3 LABEL_3_3 3
13 C3_4 LABEL_3_4 3
14 C5_1 LABEL_5_1 5
Thank you

Not sure why you want a recursive query? It appears that you could just use UNION ALL and join the two tables:
WITH table1 ( ID, Code, Label ) as(
SELECT 1, 'C1', 'LABEL_1' from dual UNION ALL
SELECT 2, 'C2', 'LABEL_2' from dual UNION ALL
SELECT 3, 'C3', 'LABEL_3' from dual UNION ALL
SELECT 4, 'C4', 'LABEL_4' from dual UNION ALL
SELECT 5, 'C5', 'LABEL_5' from dual
),
table2 ( ID, id_table1, code_child, label_child ) as (
SELECT 1, 1, 'C1_1','LABEL_1_1' from dual UNION ALL
SELECT 2, 1, 'C1_2','LABEL_1_2' from dual UNION ALL
SELECT 3, 1, 'C1_3','LABEL_1_3' from dual UNION ALL
SELECT 4, 1, 'C1_4','LABEL_1_4' from dual UNION ALL
SELECT 6, 3, 'C3_1','LABEL_3_1' from dual UNION ALL
SELECT 7, 3, 'C3_2','LABEL_3_2' from dual UNION ALL
SELECT 8, 3, 'C3_3','LABEL_3_3' from dual UNION ALL
SELECT 9, 3, 'C3_4','LABEL_3_4' from dual UNION ALL
SELECT 10, 5, 'C5_1','LABEL_5_1' from dual
)
SELECT ROW_NUMBER() OVER ( ORDER BY table_no, code ) AS id,
code,
label,
id_parent
FROM (
SELECT code,
label,
1 AS table_no,
NULL AS id_parent
FROM table1
UNION ALL
SELECT code_child,
label_child,
2 AS table_no,
id_table1
FROM table2
)
order by table_no, code;
Which outputs:
ID | CODE | LABEL | ID_PARENT
-: | :--- | :-------- | --------:
1 | C1 | LABEL_1 | null
2 | C2 | LABEL_2 | null
3 | C3 | LABEL_3 | null
4 | C4 | LABEL_4 | null
5 | C5 | LABEL_5 | null
6 | C1_1 | LABEL_1_1 | 1
7 | C1_2 | LABEL_1_2 | 1
8 | C1_3 | LABEL_1_3 | 1
9 | C1_4 | LABEL_1_4 | 1
10 | C3_1 | LABEL_3_1 | 3
11 | C3_2 | LABEL_3_2 | 3
12 | C3_3 | LABEL_3_3 | 3
13 | C3_4 | LABEL_3_4 | 3
14 | C5_1 | LABEL_5_1 | 5
db<>fiddle here

A recursive WITH clause query produced a cycle and was stopped in order to avoid an infinite loop.
This issue is coming due to bad data in the DB. There are some records which are causing circular relationship among them which is causing infinite loops.
For example: P is parent of C and C is again parent of P.
You can fetch the above output simple using UNION ALL and join of the tables.

Related

How to Choose a specific value from a table and to avoid duplicates?

I have two tables:
MainTable
id AccountNum status
1 11001 active
2 11002 active
3 11003 active
4 11004 active
AddTable
id date description
1 01.2020 ACCOUNT.SET
1 02.2020 ACCOUNT.CHANGE
1 03.2020 ACCOUNT.REMOVE
2 04.2020 ACCOUNT.SET
2 05.2020 ACCOUNT.CHANGE
3 08.2020 ACCOUNT.SET
4 05.2020 ACCOUNT.SET
4 09.2020 ACCOUNT.REMOVE
I need to get a such result:
EffectiveFrom is date when Account was set,
EffectiveTo is date when Account was removed
id AccountNum EffectiveFrom EffectiveTo
1 11001 01.2020 03.2020
2 11002 04.2020 null
3 11003 08.2020 null
4 11004 05.2020 09.2020
The problem is that after joining on AddTable I get the duplicates, but I need just one row on every Id and only dates where the description in ACCOUNT.SET,ACCOUNT.REMOVE.
Are you looking for left join?
select m.id as id,
m.AccountNum as AccountNum,
a.date as EffectiveFrom,
b.date as EffectiveTo
from MainTable m left join
AddTable a on (a.id = m.id and a.description = 'ACCOUNT.SET') left join
AddTable b on (b.id = m.id and b.description = 'ACCOUNT.REMOVE')
order by m.AccountNum
Use a PIVOT and a LEFT OUTER JOIN:
SELECT m.id,
a.EffectiveFrom,
a.EffectiveTo
FROM MainTable m
LEFT OUTER JOIN
(
SELECT *
FROM AddTable
PIVOT( MAX( dt ) FOR description IN (
'ACCOUNT.SET' AS EffectiveFrom,
'ACCOUNT.REMOVE' AS EffectiveTo
) )
) a
ON ( a.id = m.id )
ORDER BY m.id
So for your test data:
CREATE TABLE MainTable ( id, AccountNum, status ) AS
SELECT 1, 11001, 'active' FROM DUAL UNION ALL
SELECT 2, 11002, 'active' FROM DUAL UNION ALL
SELECT 3, 11003, 'active' FROM DUAL UNION ALL
SELECT 4, 11004, 'active' FROM DUAL;
CREATE TABLE AddTable ( id, dt, description ) AS
SELECT 1, DATE '2020-01-01', 'ACCOUNT.SET' FROM DUAL UNION ALL
SELECT 1, DATE '2020-01-02', 'ACCOUNT.CHANGE' FROM DUAL UNION ALL
SELECT 1, DATE '2020-01-03', 'ACCOUNT.REMOVE' FROM DUAL UNION ALL
SELECT 2, DATE '2020-01-04', 'ACCOUNT.SET' FROM DUAL UNION ALL
SELECT 2, DATE '2020-01-05', 'ACCOUNT.CHANGE' FROM DUAL UNION ALL
SELECT 3, DATE '2020-01-08', 'ACCOUNT.SET' FROM DUAL UNION ALL
SELECT 4, DATE '2020-01-05', 'ACCOUNT.SET' FROM DUAL UNION ALL
SELECT 4, DATE '2020-01-09', 'ACCOUNT.REMOVE' FROM DUAL;
This outputs:
ID | EFFECTIVEFROM | EFFECTIVETO
-: | :------------ | :----------
1 | 01-JAN-20 | 03-JAN-20
2 | 04-JAN-20 | null
3 | 08-JAN-20 | null
4 | 05-JAN-20 | 09-JAN-20
db<>fiddle here

Count column comma delimited values oracle

Is it possible to count and also group by comma delimited values in the oracle database table? This is a table data example:
id | user | title |
1 | foo | a,b,c |
2 | bar | a,d |
3 | tee | b |
The expected result would be:
title | count
a | 2
b | 2
c | 1
d | 1
I wanted to use concat like this:
SELECT a.title FROM Account a WHERE concat(',', a.title, ',') LIKE 'a' OR concat(',', a.title, ',') LIKE 'b' ... GROUP BY a.title?
But I'm getting invalid number of arguments on concat. The title values are predefined, therefore I don't mind if I have to list all of them in the query. Any help is greatly appreciated.
This uses simple string functions and a recursive sub-query factoring and may be faster than using regular expressions and correlated joins:
Oracle Setup:
CREATE TABLE account ( id, "user", title ) AS
SELECT 1, 'foo', 'a,b,c' FROM DUAL UNION ALL
SELECT 2, 'bar', 'a,d' FROM DUAL UNION ALL
SELECT 3, 'tee', 'b' FROM DUAL;
Query:
WITH positions ( title, start_pos, end_pos ) AS (
SELECT title,
1,
INSTR( title, ',', 1 )
FROM account
UNION ALL
SELECT title,
end_pos + 1,
INSTR( title, ',', end_pos + 1 )
FROM positions
WHERE end_pos > 0
),
items ( item ) AS (
SELECT CASE end_pos
WHEN 0
THEN SUBSTR( title, start_pos )
ELSE SUBSTR( title, start_pos, end_pos - start_pos )
END
FROM positions
)
SELECT item,
COUNT(*)
FROM items
GROUP BY item
ORDER BY item;
Output:
ITEM | COUNT(*)
:--- | -------:
a | 2
b | 2
c | 1
d | 1
db<>fiddle here
Split titles to rows and count them.
SQL> with test (id, title) as
2 (select 1, 'a,b,c' from dual union all
3 select 2, 'a,d' from dual union all
4 select 3, 'b' from dual
5 ),
6 temp as
7 (select regexp_substr(title, '[^,]', 1, column_value) val
8 from test cross join table(cast(multiset(select level from dual
9 connect by level <= regexp_count(title, ',') + 1
10 ) as sys.odcinumberlist))
11 )
12 select val as title,
13 count(*)
14 From temp
15 group by val
16 order by val;
TITLE COUNT(*)
-------------------- ----------
a 2
b 2
c 1
d 1
SQL>
If titles aren't that simple, then modify REGEXP_SUBSTR (add + sign) in line #7, e.g.
SQL> with test (id, title) as
2 (select 1, 'Robin Hood,Avatar,Star Wars Episode III' from dual union all
3 select 2, 'Mickey Mouse,Avatar' from dual union all
4 select 3, 'The Godfather' from dual
5 ),
6 temp as
7 (select regexp_substr(title, '[^,]+', 1, column_value) val
8 from test cross join table(cast(multiset(select level from dual
9 connect by level <= regexp_count(title, ',') + 1
10 ) as sys.odcinumberlist))
11 )
12 select val as title,
13 count(*)
14 From temp
15 group by val
16 order by val;
TITLE COUNT(*)
------------------------------ ----------
Avatar 2
Mickey Mouse 1
Robin Hood 1
Star Wars Episode III 1
The Godfather 1
SQL>

calculate the average time difference between each stage

How to calculate the average time difference between each stage.
The challenge with the actual data set is not every id will go through all stages.. some will skip stages and the date is not continuous for all Id's like below.
id date status
1 1/1/18 requirement
1 1/8/18 analysis
1 ? design
1 1/30/18 closed
2 2/1/18 requirement
2 2/18/18 closed
3 1/2/18 requirement
3 1/29/18 analysis
3 ? accepted
3 2/5/18 closed
?--we have missing dates as well
Expected output
id date status time_spent
1 1/1/18 requirement 0
1 1/8/18 analysis 7
1 ? design
1 1/30/18 closed 22
2 2/1/18 requirement 0
2 2/18/18 closed 17
3 1/2/18 requirement 0
3 1/29/18 analysis 27
3 ? accepted
3 2/5/18 closed 24
status avg(timespent)
requirement 0
analysis 17
design
closed 21
You can use windowing functions LAG (or LEAD) to get the data of the previous (or next) status for each id. That will let you compute the time elapsed in each stage. Then, compute the average time elapsed for each stage.
Here is an example of how to do that:
with input_data (id, dte, status) as (
SELECT 1, TO_DATE('1/1/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 1, TO_DATE('1/8/18','MM/DD/YY'), 'analysis' FROM DUAL UNION ALL
SELECT 1, NULL, 'design' FROM DUAL UNION ALL
SELECT 1, TO_DATE('1/30/18','MM/DD/YY'), 'closed' FROM DUAL UNION ALL
SELECT 2, TO_DATE('2/1/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 2, TO_DATE('2/18/18','MM/DD/YY'), 'closed' FROM DUAL UNION ALL
SELECT 3, TO_DATE('1/2/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 3, TO_DATE('1/29/18','MM/DD/YY'), 'analysis' FROM DUAL UNION ALL
SELECT 3, NULL, 'accepted' FROM DUAL UNION ALL
SELECT 3, TO_DATE('2/5/18','MM/DD/YY'), 'closed' FROM DUAL ),
----- Solution begins here
data_with_elapsed_days as (
SELECT id.*, dte-nvl(lag(dte ignore nulls) over ( partition by id order by dte ), dte) elapsed
from input_data id)
SELECT status, avg(elapsed)
FROM data_with_elapsed_days d
group by status
order by decode(status,'requirement',1,'analysis',2,'design',3,'accepted',4,'closed',5,99);
+-------------+-------------------------------------------+
| STATUS | AVG(ELAPSED) |
+-------------+-------------------------------------------+
| requirement | 0 |
| analysis | 17 |
| design | |
| accepted | |
| closed | 15.33333333333333333333333333333333333333 |
+-------------+-------------------------------------------+
As I said in my comment, that logic computes the elapsed days as the time to the given status from the prior status. Since, "requirement" has no prior status, this logic will always show zero days spent in requirements. It would probably be better to compute the time from the given status to the next status. For "closed", there would be no next status. You could just leave that blank or use SYSDATE as the data of the next status. Here is an example of that:
with input_data (id, dte, status) as (
SELECT 1, TO_DATE('1/1/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 1, TO_DATE('1/8/18','MM/DD/YY'), 'analysis' FROM DUAL UNION ALL
SELECT 1, NULL, 'design' FROM DUAL UNION ALL
SELECT 1, TO_DATE('1/30/18','MM/DD/YY'), 'closed' FROM DUAL UNION ALL
SELECT 2, TO_DATE('2/1/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 2, TO_DATE('2/18/18','MM/DD/YY'), 'closed' FROM DUAL UNION ALL
SELECT 3, TO_DATE('1/2/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 3, TO_DATE('1/29/18','MM/DD/YY'), 'analysis' FROM DUAL UNION ALL
SELECT 3, NULL, 'accepted' FROM DUAL UNION ALL
SELECT 3, TO_DATE('2/5/18','MM/DD/YY'), 'closed' FROM DUAL ),
----- Solution begins here
data_with_elapsed_days as (
SELECT id.*, nvl(lead(dte ignore nulls) over ( partition by id order by dte ), trunc(sysdate))-dte elapsed
from input_data id)
SELECT status, avg(elapsed)
FROM data_with_elapsed_days d
group by status
order by decode(status,'requirement',1,'analysis',2,'design',3,'accepted',4,'closed',5,99);
+-------------+------------------------------------------+
| STATUS | AVG(ELAPSED) |
+-------------+------------------------------------------+
| requirement | 17 |
| analysis | 14.5 |
| design | |
| accepted | |
| closed | 361.666666666666666666666666666666666667 |
+-------------+------------------------------------------+
I agree with #MatthewMcPeak. Your requirements seem a bit odd: you spend zero days of requirement stage but spend an average of 21 days on closed? Fnord.
This solution treats the presented date as the start date of the stage and calculates the difference between it and the start_date of the next phase.
with cte as (
select status
, lead(dd ignore nulls) over (partition by id order by dd) - dd as dt_diff
from your_table)
select status, avg(dt_diff) as avg_ela
from cte
group by status
/
If you wish to include all stages for each d and estimate the time spent in each (using linear interpolation) then you can create a sub-query with all the statuses and use a PARTITION OUTER JOIN to join them and then use LAG and LEAD to find the date range the status is in and interpolate between:
Oracle Setup:
CREATE TABLE data ( d, dt, status ) AS
SELECT 1, TO_DATE( '1/1/18', 'MM/DD/YY' ), 'requirement' FROM DUAL UNION ALL
SELECT 1, TO_DATE( '1/8/18', 'MM/DD/YY' ), 'analysis' FROM DUAL UNION ALL
SELECT 1, NULL, 'design' FROM DUAL UNION ALL
SELECT 1, TO_DATE( '1/30/18', 'MM/DD/YY' ), 'closed' FROM DUAL UNION ALL
SELECT 2, TO_DATE( '2/1/18', 'MM/DD/YY' ), 'requirement' FROM DUAL UNION ALL
SELECT 2, TO_DATE( '2/18/18', 'MM/DD/YY' ), 'closed' FROM DUAL UNION ALL
SELECT 3, TO_DATE( '1/2/18', 'MM/DD/YY' ), 'requirement' FROM DUAL UNION ALL
SELECT 3, TO_DATE( '1/29/18', 'MM/DD/YY' ), 'analysis' FROM DUAL UNION ALL
SELECT 3, NULL, 'accepted' FROM DUAL UNION ALL
SELECT 3, TO_DATE( '2/5/18', 'MM/DD/YY' ), 'closed' FROM DUAL;
Query:
WITH statuses ( status, id ) AS (
SELECT 'requirement', 1 FROM DUAL UNION ALL
SELECT 'analysis', 2 FROM DUAL UNION ALL
SELECT 'design', 3 FROM DUAL UNION ALL
SELECT 'accepted', 4 FROM DUAL UNION ALL
SELECT 'closed', 5 FROM DUAL
),
ranges ( d, dt, status, id, recent_dt, recent_id, next_dt, next_id ) AS (
SELECT d.d,
d.dt,
s.status,
s.id,
NVL(
d.dt,
LAG( d.dt, 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
),
NVL2(
d.dt,
s.id,
LAG( CASE WHEN d.dt IS NOT NULL THEN s.id END, 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
),
LEAD( d.dt, 1, d.dt )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id ),
LEAD( CASE WHEN d.dt IS NOT NULL THEN s.id END, 1, s.id + 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
FROM data d
PARTITION BY ( d )
RIGHT OUTER JOIN statuses s
ON ( d.status = s.status )
)
SELECT d,
dt,
status,
( next_dt - recent_dt ) / (next_id - recent_id ) AS estimated_duration
FROM ranges;
Output:
D | DT | STATUS | ESTIMATED_DURATION
-: | :-------- | :---------- | ---------------------------------------:
1 | 01-JAN-18 | requirement | 7
1 | 08-JAN-18 | analysis | 7.33333333333333333333333333333333333333
1 | null | design | 7.33333333333333333333333333333333333333
1 | null | accepted | 7.33333333333333333333333333333333333333
1 | 30-JAN-18 | closed | 0
2 | 01-FEB-18 | requirement | 4.25
2 | null | analysis | 4.25
2 | null | design | 4.25
2 | null | accepted | 4.25
2 | 18-FEB-18 | closed | 0
3 | 02-JAN-18 | requirement | 27
3 | 29-JAN-18 | analysis | 2.33333333333333333333333333333333333333
3 | null | design | 2.33333333333333333333333333333333333333
3 | null | accepted | 2.33333333333333333333333333333333333333
3 | 05-FEB-18 | closed | 0
Query 2:
Then of you can easily change that to take the average for each status:
WITH statuses ( status, id ) AS (
SELECT 'requirement', 1 FROM DUAL UNION ALL
SELECT 'analysis', 2 FROM DUAL UNION ALL
SELECT 'design', 3 FROM DUAL UNION ALL
SELECT 'accepted', 4 FROM DUAL UNION ALL
SELECT 'closed', 5 FROM DUAL
),
ranges ( d, dt, status, id, recent_dt, recent_id, next_dt, next_id ) AS (
SELECT d.d,
d.dt,
s.status,
s.id,
NVL(
d.dt,
LAG( d.dt, 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
),
NVL2(
d.dt,
s.id,
LAG( CASE WHEN d.dt IS NOT NULL THEN s.id END, 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
),
LEAD( d.dt, 1, d.dt )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id ),
LEAD( CASE WHEN d.dt IS NOT NULL THEN s.id END, 1, s.id + 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
FROM data d
PARTITION BY ( d )
RIGHT OUTER JOIN statuses s
ON ( d.status = s.status )
)
SELECT status,
AVG( ( next_dt - recent_dt ) / (next_id - recent_id ) ) AS estimated_duration
FROM ranges
GROUP BY status, id
ORDER BY id;
Results:
STATUS | ESTIMATED_DURATION
:---------- | ---------------------------------------:
requirement | 12.75
analysis | 4.63888888888888888888888888888888888889
design | 4.63888888888888888888888888888888888889
accepted | 4.63888888888888888888888888888888888889
closed | 0
db<>fiddle here

Subtracting a column based on another column

This table consists of transaction data, where each row is one transaction exchange. Main data to collect: average amount of years it takes users to switch from checks to credit cards when paying.
I'm trying to subtract the year a person first used a check with the year he first used a credit card to the same receiving bank account. Example data is attached below. This query is freaking complicated and I'm wondering if I should/can even do this, but this is what I got so far
SELECT
ID,
BankAcc#,
FROM table
GROUP BY
ID,
BankAcc#,
TransYear,
Method
ORDER BY
ID,BackAcc#,TransYear ASC
Example table (sorry couldn't embed the photo since I'm new)
My idea was to group ID-Bank#-TransYear-Method into a single row, with the TransYear being the earliest year of that Method, by using ORDER BY ASC LIMIT 1. Problems I'm running into:
Oracle doesn't support LIMIT after the ORDER BY clause. I tried using OFFSET or FETCH but it didn't work for some reason after the ORDER BY clause. I have Oracle 12.9.0.71 so I theoretically should have that function, but nope. I also tried WHERE rownum=1 but it limits all my results to 1 instead of limiting the ID-Bank-Year-Method group to 1.
Even if I do manage to get rows of the correct data, I still don't know how I can subtract the year values of check against card. Issue is that the data is in the same column and I need to distinguish it with another column before subtracting the year values.
Any thoughts? Appreciate any help, especially since this is really complicated.
Two methods - one using MIN and CASE and the other using MIN and PIVOT
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE table_name ( PersonId, BankAcc, TransactionNo, TransYear, method) as
select 1, 10, 1, 2011, 'check' from dual union
select 1, 10, 5, 2012, 'card' from dual union
select 2, 11, 7, 2012, 'check' from dual union
select 2, 15, 10, 2012, 'check' from dual union
select 2, 15, 11, 2014, 'card' from dual union
select 2, 15, 14, 2016, 'card' from dual union
select 2, 19, 15, 2009, 'check' from dual union
select 2, 19, 16, 2015, 'card' from dual union
select 3, 20, 25, 2017, 'check' from dual union
select 3, 21, 34, 2015, 'check' from dual union
select 3, 21, 51, 2017, 'card' from dual;
Query 1:
SELECT PersonID,
BankAcc,
MIN( CASE method WHEN 'card' THEN TransYear END )
- MIN( CASE method WHEN 'check' THEN TransYear END ) AS diff
FROM table_name
GROUP BY PersonID,
BankAcc
ORDER BY PersonID,
BankAcc
Results:
| PERSONID | BANKACC | DIFF |
|----------|---------|--------|
| 1 | 10 | 1 |
| 2 | 11 | (null) |
| 2 | 15 | 2 |
| 2 | 19 | 6 |
| 3 | 20 | (null) |
| 3 | 21 | 2 |
Query 2:
SELECT PersonId,
BankAcc,
crd - chk AS diff
FROM ( SELECT PersonId, BankAcc, TransYear, method FROM table_name )
PIVOT( MIN( transyear ) FOR method IN ( 'check' AS chk, 'card' AS crd ) )
ORDER BY PersonID, BankAcc
Results:
| PERSONID | BANKACC | DIFF |
|----------|---------|--------|
| 1 | 10 | 1 |
| 2 | 11 | (null) |
| 2 | 15 | 2 |
| 2 | 19 | 6 |
| 3 | 20 | (null) |
| 3 | 21 | 2 |
This query returns the result you posted; see if it is really OK.
SQL> with test (person_id, acc, trans, tyear, method) as
2 (select 1, 10, 1, 2011, 'check' from dual union
3 select 1, 10, 5, 2012, 'card' from dual union
4 select 2, 11, 7, 2012, 'check' from dual union
5 select 2, 15, 10, 2012, 'check' from dual union
6 select 2, 15, 11, 2014, 'card' from dual union
7 select 2, 15, 14, 2016, 'card' from dual union
8 select 2, 19, 15, 2009, 'check' from dual union
9 select 2, 19, 16, 2015, 'card' from dual union
10 select 3, 20, 25, 2017, 'check' from dual union
11 select 3, 21, 34, 2015, 'check' from dual union
12 select 3, 21, 51, 2017, 'card' from dual
13 ),
14 inter as
15 (select person_id, acc, trans, tyear, method,
16 first_value(tyear) over (partition by person_id, acc, method order by trans) fv
17 from test
18 )
19 select person_id, acc, max(fv) - min(fv) diff
20 from inter
21 group by person_id, acc
22 having count(distinct method) > 1
23 order by person_id, acc;
PERSON_ID ACC DIFF
---------- ---------- ----------
1 10 1
2 15 2
2 19 6
3 21 2
SQL>

Oracle: Combine two group by queries (which use aggregate function count()) by union or so to get a consolidated result

I have two tables. TABLE_A and TABLE_B.
Both tables maintain columns to save CREATION_USER. But this column has different name in respective tables.
My motive is to get a count of records each user has created in both tables.
That is, combining result of these two queries with few conditions. The user name should not get repeated and for user names who have created records in both tables, the count should be their sum.
SELECT A.CREATION_USER_A AS "USER",
COUNT(*)
FROM TABLE_A A
GROUP BY A.CREATION_USER_A;
SELECT B.CREATION_USER_B AS "USER",
COUNT(*)
FROM TABLE_B B
GROUP BY B.CREATION_USER_B;
For e.g.,
USER_A has created 2 records in TABLE_A,
USER_B has created 3 records in TABLE_B and
USER_C has created 4 records in TABLE_A and 3 records in TABLE_B.
So the output should look like this:
| USER | COUNT |
| USER_A | 2 |
| USER_B | 3 |
| USER_C | 7 |
I have written a query which does this but it performs really bad.
SELECT A.CREATION_USER_A AS "USER",
(COUNT(A.CREATION_USER_A)+(SELECT COUNT(CREATION_USER_B) FROM TABLE_B WHERE CREATION_USER_B = A.CREATION_USER_A)) AS "COUNT"
FROM TABLE_A A
GROUP BY A.CREATION_USER_A
UNION
SELECT B.CREATION_USER_B,
COUNT(B.CREATION_USER_B)
FROM TABLE_B B
WHERE B.CREATION_USER_B NOT IN (SELECT CREATION_USER_A FROM TABLE_A)
GROUP BY B.CREATION_USER_B;
Please suggest a way to get this done.
You can simply build a set given by the union (keeping duplicates) of all the records in your tables, and then count the records grouping by creation user:
Bulding some sample data:
create table table_a(id, creation_user_a) as (
select 1, 'USER_A' from dual union all
select 1, 'USER_A' from dual union all
select 1, 'USER_C' from dual union all
select 1, 'USER_C' from dual union all
select 1, 'USER_C' from dual union all
select 1, 'USER_C' from dual
);
create table table_b(id, creation_user_b) as (
select 1, 'USER_B' from dual union all
select 1, 'USER_B' from dual union all
select 1, 'USER_B' from dual union all
select 1, 'USER_C' from dual union all
select 1, 'USER_C' from dual union all
select 1, 'USER_C' from dual
)
The query:
select count(1), creation_user
from ( /* the union of all the records from table_a and table_b */
select creation_user_a as creation_user from table_a
union all /* UNION ALL keeps duplicates */
select creation_user_B from table_b
)
group by creation_user
order by creation_user
The result:
2 USER_A
3 USER_B
7 USER_C
The explain plan:
---------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
---------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 12 | 96 | 8 (25)| 00:00:01 |
| 1 | SORT ORDER BY | | 12 | 96 | 8 (25)| 00:00:01 |
| 2 | HASH GROUP BY | | 12 | 96 | 8 (25)| 00:00:01 |
| 3 | VIEW | | 12 | 96 | 6 (0)| 00:00:01 |
| 4 | UNION-ALL | | | | | |
| 5 | TABLE ACCESS FULL| TABLE_A | 6 | 48 | 3 (0)| 00:00:01 |
| 6 | TABLE ACCESS FULL| TABLE_B | 6 | 48 | 3 (0)| 00:00:01 |
---------------------------------------------------------------------------------
An alternative (but more complicated, and possibly slower - you'd need to test both to check) solution to Aleksej's answer is to use a full outer join to join both grouped by queries, like so:
WITH table_a AS (SELECT 'USER_A' creation_user_a, 10 val FROM dual UNION ALL
SELECT 'USER_A' creation_user_a, 20 val FROM dual UNION ALL
SELECT 'USER_C' creation_user_a, 30 val FROM dual UNION ALL
SELECT 'USER_C' creation_user_a, 40 val FROM dual UNION ALL
SELECT 'USER_C' creation_user_a, 50 val FROM dual UNION ALL
SELECT 'USER_C' creation_user_a, 60 val FROM dual),
table_b AS (SELECT 'USER_B' creation_user_b, 10 val FROM dual UNION ALL
SELECT 'USER_B' creation_user_b, 20 val FROM dual UNION ALL
SELECT 'USER_B' creation_user_b, 30 val FROM dual UNION ALL
SELECT 'USER_C' creation_user_b, 40 val FROM dual UNION ALL
SELECT 'USER_C' creation_user_b, 50 val FROM dual UNION ALL
SELECT 'USER_C' creation_user_b, 60 val FROM dual)
-- end of mimicking your tables with data in them. See the SQL below:
SELECT COALESCE(a.creation_user_a, b.creation_user_b) "USER",
nvl(a.cnt_a, 0) + nvl(b.cnt_b, 0) total_records
FROM (SELECT creation_user_a,
COUNT(*) cnt_a
FROM table_a
GROUP BY creation_user_a) a
FULL OUTER JOIN (SELECT creation_user_b,
COUNT(*) cnt_b
FROM table_b
GROUP BY creation_user_b) b ON a.creation_user_a = b.creation_user_b
ORDER BY "USER";
USER TOTAL_RECORDS
------ -------------
USER_A 2
USER_B 3
USER_C 7
Thank you for helping me guys. I have found a simpler and more efficient solution. It works.
SELECT CREATION_USER, SUM(TOTAL_COUNT) TOTAL_COUNT FROM
(SELECT /*+ PARALLEL */ A.CREATION_USER_A CREATION_USER,
COUNT(A.CREATION_USER_A) TOTAL_COUNT
FROM TABLE_A A
GROUP BY A.CREATION_USER_A
UNION
SELECT /*+ PARALLEL */ B.CREATION_USER_B CREATION_USER,
COUNT(B.CREATION_USER_B) TOTAL_COUNT
FROM TABLE_B B
GROUP BY B.CREATION_USER_B)
GROUP BY CREATION_USER;

Resources