How to compare 2 columns and return the difference in oracle SQL - oracle

We have 2 columns in one table in oracle SQL as
Col1= "there is book on the table"
Col2= "there are flowers on the chair"
Now I need the result as differed data in the column3 as new column col3.
The col3 result should be
"are flowers chair".
How to achieve this in oracle SQL??

You can use:
WITH words ( rid, col, name, id, word ) AS (
SELECT rid,
CASE INSTR(col, ' ')
WHEN 0
THEN NULL
ELSE SUBSTR(col, INSTR(col, ' ') + 1)
END,
name,
1,
CASE INSTR(col, ' ')
WHEN 0
THEN col
ELSE SUBSTR(col, 1, INSTR(col, ' ') - 1)
END
FROM ( SELECT ROWID AS rid, col1, col2 FROM table_name )
UNPIVOT ( col FOR name IN (col1, col2) )
UNION ALL
SELECT rid,
CASE INSTR(col, ' ')
WHEN 0
THEN NULL
ELSE SUBSTR(col, INSTR(col, ' ') + 1)
END,
name,
id + 1,
CASE INSTR(col, ' ')
WHEN 0
THEN col
ELSE SUBSTR(col, 1, INSTR(col, ' ') - 1)
END
FROM words
WHERE col IS NOT NULL
),
paired_words ( rid, id1, id2 ) AS (
SELECT c1.rid,
c1.id AS id1,
c2.id AS id2
FROM ( SELECT rid, id, word FROM words WHERE name = 'COL1' ) c1
INNER JOIN
( SELECT rid, id, word FROM words WHERE name = 'COL2' ) c2
ON (c1.rid = c2.rid AND c1.word = c2.word)
),
max_path ( rid, path ) AS (
SELECT rid,
path
FROM (
SELECT rid,
SYS_CONNECT_BY_PATH(id2, ',') || ',' AS path,
ROW_NUMBER() OVER (PARTITION BY rid ORDER BY LEVEL DESC) AS rn
FROM paired_words
CONNECT BY PRIOR rid = rid
AND PRIOR id1 < id1
AND PRIOR id2 < id2
)
WHERE rn = 1
)
SELECT LISTAGG(word, ' ') WITHIN GROUP (ORDER BY id) AS missing
FROM words w
WHERE NOT EXISTS (
SELECT 1
FROM max_path mp
WHERE w.rid = mp.rid
AND mp.path LIKE '%,' || w.id || ',%'
)
AND w.name = 'COL2'
GROUP BY rid;
Which, for the sample data:
CREATE TABLE table_name ( col1, col2 ) AS
SELECT 'there is book on the table', 'there are flowers on the chair' FROM DUAL UNION ALL
SELECT 'there is book on the table', 'there is a book on the table' FROM DUAL UNION ALL
SELECT 'there is book on the table', 'there is book there is book on the table on the table' FROM DUAL
Outputs:
MISSING
are flowers chair
a
there is book on the table
db<>fiddle here

Here's one option (which follows what you asked). Read comments within code.
SQL> with test (id, col1, col2) as
2 (select 1, 'there is book on the table',
3 'there are flowers on the chair'
4 from dual
5 ),
6 -- split sentences into words (each in its own line)
7 sent1 as
8 (select id,
9 column_value cv,
10 regexp_substr(col1, '[^ ]+', 1, column_value) word
11 from test cross join
12 table(cast(multiset(select level from dual
13 connect by level <= regexp_count(col1, ' ') + 1
14 ) as sys.odcinumberlist))
15 ),
16 sent2 as
17 (select id,
18 column_value cv,
19 regexp_substr(col2, '[^ ]+', 1, column_value) word
20 from test cross join
21 table(cast(multiset(select level from dual
22 connect by level <= regexp_count(col2, ' ') + 1
23 ) as sys.odcinumberlist))
24 )
25 -- final result
26 select a.id,
27 listagg(b.word, ' ') within group (order by a.cv) result
28 from sent2 b join sent1 a on a.id = b.id and a.cv = b.cv and a.word <> b.word
29 group by a.id;
ID RESULT
---------- ------------------------------
1 are flowers chair
SQL>

Related

ORACLE - How to use LAG to display strings from all previous rows into current row

I have data like below:
group
seq
activity
A
1
scan
A
2
visit
A
3
pay
B
1
drink
B
2
rest
I expect to have 1 new column "hist" like below:
group
seq
activity
hist
A
1
scan
NULL
A
2
visit
scan
A
3
pay
scan, visit
B
1
drink
NULL
B
2
rest
drink
I was trying to solve with LAG function, but LAG only returns one row from previous instead of multiple.
Truly appreciate any help!
Use a correlated sub-query:
SELECT t.*,
(SELECT LISTAGG(activity, ',') WITHIN GROUP (ORDER BY seq)
FROM table_name l
WHERE t."GROUP" = l."GROUP"
AND l.seq < t.seq
) AS hist
FROM table_name t
Or a hierarchical query:
SELECT t.*,
SUBSTR(SYS_CONNECT_BY_PATH(PRIOR activity, ','), 3) AS hist
FROM table_name t
START WITH seq = 1
CONNECT BY
PRIOR seq + 1 = seq
AND PRIOR "GROUP" = "GROUP"
Or a recursive sub-query factoring clause:
WITH rsqfc ("GROUP", seq, activity, hist) AS (
SELECT "GROUP", seq, activity, NULL
FROM table_name
WHERE seq = 1
UNION ALL
SELECT t."GROUP", t.seq, t.activity, r.hist || ',' || r.activity
FROM rsqfc r
INNER JOIN table_name t
ON (r."GROUP" = t."GROUP" AND r.seq + 1 = t.seq)
)
SEARCH DEPTH FIRST BY "GROUP" SET order_rn
SELECT "GROUP", seq, activity, SUBSTR(hist, 2) AS hist
FROM rsqfc
Which, for the sample data:
CREATE TABLE table_name ("GROUP", seq, activity) AS
SELECT 'A', 1, 'scan' FROM DUAL UNION ALL
SELECT 'A', 2, 'visit' FROM DUAL UNION ALL
SELECT 'A', 3, 'pay' FROM DUAL UNION ALL
SELECT 'B', 1, 'drink' FROM DUAL UNION ALL
SELECT 'B', 2, 'rest' FROM DUAL;
All output:
GROUP
SEQ
ACTIVITY
HIST
A
1
scan
null
A
2
visit
scan
A
3
pay
scan,visit
B
1
drink
null
B
2
rest
drink
db<>fiddle here
To aggregate strings in Oracle we use LISAGG function.
In general, you need a windowing_clause to specify a sliding window for analytic function to calculate running total.
But unfortunately LISTAGG doesn't support it.
To simulate this behaviour you may use model_clause of the select statement. Below is an example with explanation.
select
group_
, activity
, seq
, hist
from t
model
/*Where to restart calculation*/
partition by (group_)
/*Add consecutive numbers to reference "previous" row per group.
May use "seq" column if its values are consecutive*/
dimension by (
row_number() over(
partition by group_
order by seq asc
) as rn
)
measures (
/*Other columnns to return*/
activity
, cast(null as varchar2(1000)) as hist
, seq
)
rules update (
/*Apply this rule sequentially*/
hist[any] order by rn asc =
/*Previous concatenated result*/
hist[cv()-1]
/*Plus comma for the third row and tne next rows*/
|| presentv(activity[cv()-2], ',', '') /**/
/*lus previous row's value*/
|| activity[cv()-1]
)
GROUP_ | ACTIVITY | SEQ | HIST
:----- | :------- | --: | :---------
A | scan | 1 | null
A | visit | 2 | scan
A | pay | 3 | scan,visit
B | drink | 1 | null
B | rest | 2 | drink
db<>fiddle here
Few more variants (without subqueries):
SELECT--+ NO_XML_QUERY_REWRITE
t.*,
regexp_substr(
listagg(activity, ',')
within group(order by SEQ)
over(partition by "GROUP")
,'^([^,]+,){'||(row_number()over(partition by "GROUP" order by seq)-1)||'}'
)
AS hist1
,xmlcast(
xmlquery(
'string-join($X/A/B[position()<$Y]/text(),",")'
passing
xmlelement("A", xmlagg(xmlelement("B", activity)) over(partition by "GROUP")) as x
,row_number()over(partition by "GROUP" order by seq) as y
returning content
)
as varchar2(1000)
) hist2
FROM table_name t;
DBFIddle: https://dbfiddle.uk/?rdbms=oracle_21&fiddle=9b477a2089d3beac62579d2b7103377a
Full test case with output:
with table_name ("GROUP", seq, activity) AS (
SELECT 'A', 1, 'scan' FROM DUAL UNION ALL
SELECT 'A', 2, 'visit' FROM DUAL UNION ALL
SELECT 'A', 3, 'pay' FROM DUAL UNION ALL
SELECT 'B', 1, 'drink' FROM DUAL UNION ALL
SELECT 'B', 2, 'rest' FROM DUAL
)
SELECT--+ NO_XML_QUERY_REWRITE
t.*,
regexp_substr(
listagg(activity, ',')
within group(order by SEQ)
over(partition by "GROUP")
,'^([^,]+,){'||(row_number()over(partition by "GROUP" order by seq)-1)||'}'
)
AS hist1
,xmlcast(
xmlquery(
'string-join($X/A/B[position()<$Y]/text(),",")'
passing
xmlelement("A", xmlagg(xmlelement("B", activity)) over(partition by "GROUP")) as x
,row_number()over(partition by "GROUP" order by seq) as y
returning content
)
as varchar2(1000)
) hist2
FROM table_name t;
GROUP SEQ ACTIV HIST1 HIST2
------ ---------- ----- ------------------------------ ------------------------------
A 1 scan
A 2 visit scan, scan
A 3 pay scan,visit, scan,visit
B 1 drink
B 2 rest drink, drink

Remove coma separated string from another coma separated string in oracle

Column1 =A,B,C,D,E,F
Column2 =C,D,A,F,C,B (It can have duplicates)
I need to remove column2 values from column1 and get the missing value.
Desired output
(Column1)-(Column2) = E
Split columns' contents into rows, use MINUS set operator. Sample data in lines #1 - 3; query begins at line #4.
SQL> with test (col1, col2) as
2 (select 'A,B,C,D,E,F', 'C,D,A,F,C,B' from dual
3 )
4 select regexp_substr(col1, '[^,]+', 1, level) val
5 from test
6 connect by level <= regexp_count(col1, ',') + 1
7 minus
8 select regexp_substr(col2, '[^,]+', 1, level) val
9 from test
10 connect by level <= regexp_count(col2, ',') + 1
11 /
VAL
--------------------------------------------
E
SQL>
If you're comparing columns in a multi-row table, the above approach won't work OK as it'll retrieve duplicates and will be slow. In that case, rewrite it to
SQL> with test (id, col1, col2) as
2 (select 1, 'A,B,C,D,E,F', 'C,D,A,F,C,B' from dual union all
3 select 2, 'A,B,C,D,E,F', 'A,B,B,B' from dual
4 )
5 select id, listagg(val, ',') within group (order by val) missing_letters
6 from
7 (
8 select id,
9 regexp_substr(col1, '[^,]+', 1, column_value) val
10 from test cross join
11 table(cast(multiset(select level from dual
12 connect by level <= regexp_count(col1, ',') + 1
13 ) as sys.odcinumberlist))
14 minus
15 select id,
16 regexp_substr(col2, '[^,]+', 1, column_value) val
17 from test cross join
18 table(cast(multiset(select level from dual
19 connect by level <= regexp_count(col2, ',') + 1
20 ) as sys.odcinumberlist))
21 )
22 group by id;
ID MISSING_LETTERS
---------- --------------------
1 E
2 C,D,E,F
SQL>
You may use translate function with additional cleanup logic to remove all remaining commas. This will work only for single character replacement (one character between commas), but doesn't require to split string into tokens and uses simple string functions.
with a(col1, col2) as (
select 'A,B,C,D,E,F', 'C,D,A,F,C,B' from dual
)
select
/*Then remove leading and trailing commas*/
trim(',' from
/*Then condense all intermediate commas and spaces*/
regexp_replace(
/*Do actual replacement*/
translate(col1, replace(col2, ','), ' '),
'[, ]+', ','
)
) as res
from a
| RES |
| :-- |
| E |
db<>fiddle here
You do not need to split the string.
If your delimited values do not have any characters with special meaning in regular expressions then you can double-up the delimiters in col1 and then convert col2 to a regular expression and replace matches with an empty string and then remove the excess delimiters:
SELECT col1,
col2,
TRIM(
BOTH ',' FROM
REPLACE(
REGEXP_REPLACE(
',' || REPLACE(col1, ',', ',,') || ',',
',(' || REPLACE(col2, ',', '|') || '),'
),
',,',
','
)
) AS missing
FROM table_name;
Which, for the sample data:
CREATE TABLE table_name ( col1, col2 ) AS
SELECT 'A,B,C,D,E,F', 'C,D,A,F,C,B' FROM DUAL UNION ALL
SELECT 'A,AB,BA,B,', 'A,B' FROM DUAL;
Outputs:
COL1
COL2
MISSING
A,B,C,D,E,F
C,D,A,F,C,B
E
A,AB,BA,B,
A,B
AB,BA
If you do have characters with special meaning then you can do a similar replacement using a recursive sub-query:
WITH replacements ( col1, col2 ) AS (
SELECT ',' || REPLACE( col1, ',', ',,') || ',',
col2 || ','
FROM table_name
UNION ALL
SELECT REPLACE(col1, ',' || SUBSTR(col2, 1, INSTR(col2, ','))),
SUBSTR(col2, INSTR(col2, ',') + 1)
FROM replacements
WHERE col2 IS NOT NULL
)
SELECT TRIM(BOTH ',' FROM REPLACE(col1, ',,', ',')) AS missing
FROM replacements
WHERE col2 IS NULL
Which outputs:
MISSING
AB,BA
E
Note: both of these queries only require a single table scan.
db<>fiddle here
Using ora:tokenize you could do something like this (including a few test cases in the with clause; you should remove it, and use your actual table and column names in the main query):
with
inputs (col1, col2) as (
select 'A,B,C,D,E,F', 'C,D,A,F,C,B' from dual union all
select 'D,,F' , 'F,A' from dual union all
select 'A,B,E,F' , 'E' from dual union all
select 'ABC' , 'A,B,ABC' from dual
)
-- END OF TEST DATA; QUERY BEGINS **BELOW THIS LINE**
select i.col1, i.col2, l.diff
from inputs i cross join lateral
( select listagg(token, ',') within group (order by null) as diff
from xmltable('ora:tokenize(.,",")' passing i.col1 || ','
columns token varchar2(10) path '.')
where not ',' || col2 || ',' like '%,' || token || ',%' ) l
;
COL1 COL2 DIFF
----------- ----------- --------------------
A,B,C,D,E,F C,D,A,F,C,B E
D,,F F,A D
A,B,E,F E A,B,F
ABC A,B,ABC

Count column comma delimited values oracle

Is it possible to count and also group by comma delimited values in the oracle database table? This is a table data example:
id | user | title |
1 | foo | a,b,c |
2 | bar | a,d |
3 | tee | b |
The expected result would be:
title | count
a | 2
b | 2
c | 1
d | 1
I wanted to use concat like this:
SELECT a.title FROM Account a WHERE concat(',', a.title, ',') LIKE 'a' OR concat(',', a.title, ',') LIKE 'b' ... GROUP BY a.title?
But I'm getting invalid number of arguments on concat. The title values are predefined, therefore I don't mind if I have to list all of them in the query. Any help is greatly appreciated.
This uses simple string functions and a recursive sub-query factoring and may be faster than using regular expressions and correlated joins:
Oracle Setup:
CREATE TABLE account ( id, "user", title ) AS
SELECT 1, 'foo', 'a,b,c' FROM DUAL UNION ALL
SELECT 2, 'bar', 'a,d' FROM DUAL UNION ALL
SELECT 3, 'tee', 'b' FROM DUAL;
Query:
WITH positions ( title, start_pos, end_pos ) AS (
SELECT title,
1,
INSTR( title, ',', 1 )
FROM account
UNION ALL
SELECT title,
end_pos + 1,
INSTR( title, ',', end_pos + 1 )
FROM positions
WHERE end_pos > 0
),
items ( item ) AS (
SELECT CASE end_pos
WHEN 0
THEN SUBSTR( title, start_pos )
ELSE SUBSTR( title, start_pos, end_pos - start_pos )
END
FROM positions
)
SELECT item,
COUNT(*)
FROM items
GROUP BY item
ORDER BY item;
Output:
ITEM | COUNT(*)
:--- | -------:
a | 2
b | 2
c | 1
d | 1
db<>fiddle here
Split titles to rows and count them.
SQL> with test (id, title) as
2 (select 1, 'a,b,c' from dual union all
3 select 2, 'a,d' from dual union all
4 select 3, 'b' from dual
5 ),
6 temp as
7 (select regexp_substr(title, '[^,]', 1, column_value) val
8 from test cross join table(cast(multiset(select level from dual
9 connect by level <= regexp_count(title, ',') + 1
10 ) as sys.odcinumberlist))
11 )
12 select val as title,
13 count(*)
14 From temp
15 group by val
16 order by val;
TITLE COUNT(*)
-------------------- ----------
a 2
b 2
c 1
d 1
SQL>
If titles aren't that simple, then modify REGEXP_SUBSTR (add + sign) in line #7, e.g.
SQL> with test (id, title) as
2 (select 1, 'Robin Hood,Avatar,Star Wars Episode III' from dual union all
3 select 2, 'Mickey Mouse,Avatar' from dual union all
4 select 3, 'The Godfather' from dual
5 ),
6 temp as
7 (select regexp_substr(title, '[^,]+', 1, column_value) val
8 from test cross join table(cast(multiset(select level from dual
9 connect by level <= regexp_count(title, ',') + 1
10 ) as sys.odcinumberlist))
11 )
12 select val as title,
13 count(*)
14 From temp
15 group by val
16 order by val;
TITLE COUNT(*)
------------------------------ ----------
Avatar 2
Mickey Mouse 1
Robin Hood 1
Star Wars Episode III 1
The Godfather 1
SQL>

How to get count by using UNION operator

i'm trying to get total count by using UNION operator but it gives wrong count.
select count(*) as companyRatings from (
select count(*) hrs from (
select distinct hrs from companyA
)
union
select count(*) financehrs from (
select distinct finance_hrs from companyB
)
union
select count(*) hrids from (
select regexp_substr(hr_id,'[^/]+',1,3) hrid from companyZ
)
union
select count(*) cities from (
select regexp_substr(city,'[^/]+',1,3) city from companyY
)
);
individual query's working fine but total count not matching.
individual results here: 12 19 3 6
present total count: 31
Actual total count:40.
so there is any alternate solution without UNION operator?
To add values you'd use +. UNION is to add data sets.
select
(select count(distinct hrs) from companyA)
+
(select count(distinct finance_hrs) from companyB)
+
(select count(regexp_substr(hr_id,'[^/]+',1,3)) from companyZ)
+
(select count(regexp_substr(city,'[^/]+',1,3)) from companyY)
as total
from dual;
But I agree with juergen d; you should not have separate tables per company in the first place.
Edit. Updated query using Sum
select sum(cnt) as companyRatings from
(
select count(*) as cnt from (select distinct hrs from companyA)
union all
select count(*) as cnt from (select distinct finance_hrs from companyB)
union all
select count(*) as cnt from (select regexp_substr(hr_id,'[^/]+',1,3) hrid from companyZ)
union all
select count(*) as cnt from (select regexp_substr(city,'[^/]+',1,3) city from companyY)
)
Previous answer:
Try this
SELECT (
SELECT count(*) hrs
FROM (
SELECT DISTINCT hrs
FROM companyA
)
)
+
(
SELECT count(*) financehrs
FROM (
SELECT DISTINCT finance_hrs
FROM companyB
)
)
+
(
SELECT count(*) hrids
FROM (
SELECT regexp_substr(hr_id, '[^/]+', 1, 3) hrid
FROM companyZ
)
)
+
(
SELECT count(*) cities
FROM (
SELECT regexp_substr(city, '[^/]+', 1, 3) city
FROM companyY
)
)
AS total_count
FROM dual

Concatenate columns with distinct values in oracle

Gurus,
My table structre look like follwoing:
TankListUS TankListCanada TankListIndia
---------- -------------- -------------
T111||T222 T444||T222 T555
Now when I run the query:
select (TL.TankListUS || '||' || TL.TankListCanada || '||' || TL.TankListIndia)as "OverallSummary" from TankListTL
I get output as
T111||T222||T444||T222||T555
But I don't need duplicate of tanks. I need my output as:
T111||T222||T444||T555
Is this possible?
I have not tested this but try the following :-
with test1 as
(select (TL.TankListUS || '||' || TL.TankListCanada || '||' || TL.TankListIndia) as str from TankListTL),
test2 as
(select regexp_substr(str,'[^|]+',1,rownum) split
from test1
connect by level <= length (regexp_replace (str, '[^|]+')) + 1)
select listagg(split,'||') within group(order by split)
from test2
Do note that listagg was introduced in Oracle 11gR2. Following are some of the string aggregation techniques :-
http://www.oracle-base.com/articles/misc/string-aggregation-techniques.php#listagg
Source:
TANKLISTUS TANKLISTCANADA TANKLISTINDIA
T111||T222 T444||T222 T555
T111||T222 T444||T111 T555
T111||T666 T444||T222 T555
Code:
WITH src AS
(
SELECT 'T111||T222' TankListUS, 'T444||T222' TankListCanada, 'T555' TankListIndia FROM DUAL UNION ALL
SELECT 'T111||T222' TankListUS, 'T444||T111' TankListCanada, 'T555' TankListIndia FROM DUAL UNION ALL
SELECT 'T111||T666' TankListUS, 'T444||T222' TankListCanada, 'T555' TankListIndia FROM DUAL
)
, step1 AS
(
SELECT SUBSTR(TankListUS, 1, 4) AS us1
, SUBSTR(TankListUS, 7) AS us2
, SUBSTR(TankListCanada, 1, 4) AS ca1
, SUBSTR(TankListCanada, 7) AS ca2
, TankListIndia AS in1
, ROWNUM AS r_id
FROM src
)
, step2 AS
(
SELECT us1 AS r_value, r_id FROM step1 UNION
SELECT us2, r_id FROM step1 UNION
SELECT ca1, r_id FROM step1 UNION
SELECT ca2, r_id FROM step1 UNION
SELECT in1, r_id FROM step1
)
,step3 AS
(
SELECT r_value
, LEAD(r_value, 1) OVER (PARTITION BY r_id ORDER BY r_value) AS lead1
, LEAD(r_value, 2) OVER (PARTITION BY r_id ORDER BY r_value) AS lead2
, LEAD(r_value, 3) OVER (PARTITION BY r_id ORDER BY r_value) AS lead3
, LEAD(r_value, 4) OVER (PARTITION BY r_id ORDER BY r_value) AS lead4
, ROW_NUMBER() OVER (PARTITION BY r_id ORDER BY r_value) AS r_num
FROM step2
)
,step4 AS
(
SELECT r_value
|| NVL2(lead1, '||' || lead1, lead1)
|| NVL2(lead2, '||' || lead2, lead2)
|| NVL2(lead3, '||' || lead3, lead3)
|| NVL2(lead4, '||' || lead4, lead4) AS the_result
FROM step3
WHERE r_num = 1
)
-- OR:
SELECT DISTINCT the_result AS the_result_with_DISTINCT
FROM step4
Result:
T111||T222||T444||T555||T666
T111||T222||T444||T555
with t1 as (select 'T111||T222||T444||T222||T555' col from dual),
t2 as (SELECT rownum,REGEXP_SUBSTR (col, '[^||]+', 1, RN) SPLIT
from t1
cross join
(SELECT ROWNUM RN
FROM (SELECT (length(REGEXP_REPLACE (col, '[^||]+'))/2)+1 MAX_VALUE
from t1)
connect by level <= max_value)),
t3 as (select distinct split as dis_col from t2)
select
rtrim (xmlagg (xmlelement (e, dis_col || '||')).extract ('//text()'), '||') d_col
from
t3;

Resources