How can I split by a character without ignoring nulls? - oracle

I'm trying to use regexp_subst to split a delimited string. I'm running into an issue when delimited fields are null. The regexp_substr ignores the nulls and moves to the next occurrence of the delimiter. Is there a way to do this with regexp_substr? If not, what alternative do you use?
--Expecting hello, gets hello
select regexp_substr('hello##world', '[^#]+', 1, 1)
from dual;
--Expecting null, gets world
select regexp_substr('hello##world', '[^#]+', 1, 2)
from dual;
--Expecting world, gets null
select regexp_substr('hello##world', '[^#]+', 1, 3)
from dual;
EDIT: tried this, but it works only with | which isn't an option

Answering based on Matbailie's input in above comment
select regexp_substr('hello##world', '(.*?)(#|$)', 1, 1,NULL,1)
from dual
union all
--Expecting null, gets null
select regexp_substr('hello##world', '(.*?)(#|$)', 1, 2,NULL,1)
from dual
union all
--Expecting world, gets world
select regexp_substr('hello##world', '(.*?)(#|$)', 1, 3,NULL,1)
from dual;

You do not need regular expressions. It can be done with simple (and faster) string functions in a recursive sub-query:
WITH data (value) AS (
SELECT 'hello##world' FROM DUAL
),
bounds (value, start_pos, end_pos) AS (
SELECT value,
1,
INSTR(value, '#', 1)
FROM data
UNION ALL
SELECT value,
end_pos + 1,
INSTR(value, '#', end_pos + 1)
FROM bounds
WHERE end_pos > 0
)
SEARCH DEPTH FIRST BY value SET order_id
SELECT CASE end_pos
WHEN 0
THEN SUBSTR(value, start_pos)
ELSE SUBSTR(value, start_pos, end_pos - start_pos)
END AS item
FROM bounds;
Which outputs:
ITEM
hello
null
world
Or, if you want the data in columns (rather than rows):
WITH data (value) AS (
SELECT 'hello##world' FROM DUAL
),
bounds (value, pos1, pos2) AS (
SELECT value,
INSTR(value, '#', 1, 1),
INSTR(value, '#', 1, 2)
FROM data
)
SELECT SUBSTR(value, 1, pos1 - 1) AS item1,
SUBSTR(value, pos1 + 1, pos2 - pos1 - 1) AS item2,
SUBSTR(value, pos2 + 1) AS item3
FROM bounds
Which outputs:
ITEM1
ITEM2
ITEM3
hello
null
world
If you did want to use (slower) regular expressions then:
WITH data (value) AS (
SELECT 'hello##world' FROM DUAL
)
SELECT item
FROM data d
CROSS JOIN LATERAL(
SELECT REGEXP_SUBSTR( d.value, '(.*?)(#|$)', 1, LEVEL, NULL, 1) AS item
FROM DUAL
CONNECT BY LEVEL < REGEXP_COUNT( d.value, '(.*?)(#|$)')
)
or, for columns:
WITH data (value) AS (
SELECT 'hello##world' FROM DUAL
)
SELECT REGEXP_SUBSTR(value, '(.*?)(#|$)', 1, 1, NULL, 1) AS item1,
REGEXP_SUBSTR(value, '(.*?)(#|$)', 1, 2, NULL, 1) AS item2,
REGEXP_SUBSTR(value, '(.*?)(#|$)', 1, 3, NULL, 1) AS item3
FROM data
(Which both have the same output as above)
db<>fiddle here

Related

Custom aggregate function to collapse vertices to SDO_GEOMETRY

I have multi-part polyline vertices stored as individual rows in an Oracle 18c table.
ASSET_ID PART_NUM VERTEX_NUM X Y M
---------- ---------- ---------- ---------- ---------- ----------
001 1 1 0 5 0
001 1 2 10 10 11.18
001 1 3 30 0 33.54
001 2 1 50 10 33.54
001 2 2 60 10 43.54
DDL db<>fiddle
CTE db<>fiddle
I want to convert the vertices to a multi-part SDO_GEOMETRY polyline (collapsed into a single row).
I've tried a few different ways of doing that (i.e. listagg and PL/SQL block). Additionally, as a learning exercise, I would also like to explore creating a custom aggregate function as a solution.
It might look like this:
select
asset_id,
sdo_geometry(partition by id, part num, vertex order, x, y, m, gtype, srid) as sdo_geom
from
vertices
group by
asset_id
Output:
ASSET_ID: 001
SDO_GEOM: SDO_GEOMETRY(3306, 26917, NULL, MDSYS.SDO_ELEM_INFO_ARRAY(1, 2, 1, 10, 2, 1), MDSYS.SDO_ORDINATE_ARRAY(0, 5, 0, 10, 10, 11.18, 30, 0, 33.54, 50, 10, 33.54, 60, 10, 43.54))
--SDO_GEOMETRY docs: https://docs.oracle.com/en/database/oracle/oracle-database/19/spatl/spatial-datatypes-metadata.html
--Info about multi-part lines: https://community.oracle.com/tech/apps-infra/discussion/4497547/sdo-geometry-output-how-to-know-if-geometry-is-multi-part
Is there a way to create a custom aggregate function to do that?
Create a type to store the point:
CREATE TYPE PointLRS AS OBJECT(
X NUMBER,
Y NUMBER,
M NUMBER
);
Then create a user-defined aggregation type:
CREATE TYPE Line3DAggType AS OBJECT(
ordinates SDO_ORDINATE_ARRAY,
STATIC FUNCTION ODCIAggregateInitialize(
ctx IN OUT Line3DAggType
) RETURN NUMBER,
MEMBER FUNCTION ODCIAggregateIterate(
self IN OUT Line3DAggType,
point IN PointLRS
) RETURN NUMBER,
MEMBER FUNCTION ODCIAggregateTerminate(
self IN OUT Line3DAggType,
returnValue OUT SDO_GEOMETRY,
flags IN NUMBER
) RETURN NUMBER,
MEMBER FUNCTION ODCIAggregateMerge(
self IN OUT Line3DAggType,
ctx IN OUT Line3DAggType
) RETURN NUMBER
);
/
CREATE OR REPLACE TYPE BODY Line3DAggType
IS
STATIC FUNCTION ODCIAggregateInitialize(
ctx IN OUT Line3DAggType
) RETURN NUMBER
IS
BEGIN
ctx := Line3DAggType( SDO_ORDINATE_ARRAY() );
RETURN ODCIConst.SUCCESS;
END;
MEMBER FUNCTION ODCIAggregateIterate(
self IN OUT Line3DAggType,
point IN PointLRS
) RETURN NUMBER
IS
BEGIN
IF point IS NOT NULL
AND point.X IS NOT NULL
AND point.Y IS NOT NULL
AND point.M IS NOT NULL
THEN
self.ordinates.EXTEND(3);
self.ordinates(self.ordinates.COUNT - 2) := point.X;
self.ordinates(self.ordinates.COUNT - 1) := point.Y;
self.ordinates(self.ordinates.COUNT - 0) := point.M;
END IF;
RETURN ODCIConst.SUCCESS;
END;
MEMBER FUNCTION ODCIAggregateTerminate(
self IN OUT Line3DAggType,
returnValue OUT SDO_GEOMETRY,
flags IN NUMBER
) RETURN NUMBER
IS
BEGIN
IF self.ordinates.COUNT > 0 THEN
returnValue := SDO_GEOMETRY(
3302,
NULL,
NULL,
SDO_ELEM_INFO_ARRAY(1,2,1),
self.ordinates
);
ELSE
returnValue := NULL;
END IF;
RETURN ODCIConst.SUCCESS;
END;
MEMBER FUNCTION ODCIAggregateMerge(
self IN OUT Line3DAggType,
ctx IN OUT Line3DAggType
) RETURN NUMBER
IS
BEGIN
FOR i IN 1 .. ctx.ordinates.COUNT LOOP
self.ordinates.EXTEND;
self.ordinates(self.ordinates.COUNT) := ctx.ordinates(i);
END LOOP;
RETURN ODCIConst.SUCCESS;
END;
END;
/
Then define a custom aggregation function:
CREATE FUNCTION Line3DAgg( point PointLRS )
RETURN SDO_GEOMETRY
PARALLEL_ENABLE AGGREGATE USING Line3DAggType;
/
Then you can aggregate the points for each part into a line and then concatenate the lines:
SELECT asset_id,
SDO_AGGR_LRS_CONCAT(SDOAGGRTYPE(part, 0.005)) AS geom
FROM (
SELECT asset_id,
part_num,
Line3DAgg(PointLRS(x, y, m)) AS part
FROM vertices
GROUP BY asset_id, part_num
)
GROUP BY asset_id
db<>fiddle here
This builds the individual linestrings.
with cte as (
select 001 as asset_id, 1 as part_num,1 as vertex_num,0 as x,5 as y, 0 as m from dual union all
select 001 as asset_id, 1 as part_num,2 as vertex_num,10 as x,10 as y,11.18 as m from dual union all
select 001 as asset_id, 1 as part_num,3 as vertex_num,30 as x,0 as y, 33.54 as m from dual union all
select 001 as asset_id, 2 as part_num,1 as vertex_num,50 as x,10 as y,33.54 as m from dual union all
select 001 as asset_id, 2 as part_num,2 as vertex_num,60 as x,10 as y,43.54 as m from dual
)
SELECT asset_id,
part_num,
mdsys.sdo_geometry(
3302,
null,
null,
mdsys.sdo_elem_info_array(1,2,1),
CAST(MULTISET( select case when r.rin = 1 then x
when r.rin = 2 then y
when r.rin = 3 then m
end
from cte b,
(select level rin from dual connect by level < 4) r
where b.asset_id = a.asset_id
and b.part_num = a.part_num
order by b.vertex_num, r.rin
) as mdsys.sdo_ordinate_array
)
) as geom
from cte a
group by asset_id, part_num
order by part_num;
Note how the X, Y and M ordinates are "serialised" into an array (of type mdsys.sdo_ordinate_array) using the MULTISET operator.
Result is:
ASSET_ID PART_NUM GEOM
---------- ---------- ----
1 1 SDO_GEOMETRY(3002, NULL, NULL, SDO_ELEM_INFO_ARRAY(1, 2, 1), SDO_ORDINATE_ARRAY(0, 5, 0, 10, 10, 11.18, 30, 0, 33.54))
1 2 SDO_GEOMETRY(3002, NULL, NULL, SDO_ELEM_INFO_ARRAY(1, 2, 1), SDO_ORDINATE_ARRAY(50, 10, 33.54, 60, 10, 43.54))
Creating a multilinestring involves aggregating the linestrings using the asset_id attribute.
with cte as (
select 001 as asset_id, 1 as part_num,1 as vertex_num,0 as x,5 as y, 0 as m from dual union all
select 001 as asset_id, 1 as part_num,2 as vertex_num,10 as x,10 as y,11.18 as m from dual union all
select 001 as asset_id, 1 as part_num,3 as vertex_num,30 as x,0 as y, 33.54 as m from dual union all
select 001 as asset_id, 2 as part_num,1 as vertex_num,50 as x,10 as y,33.54 as m from dual union all
select 001 as asset_id, 2 as part_num,2 as vertex_num,60 as x,10 as y,43.54 as m from dual
)
SELECT asset_id,
SDO_AGGR_UNION(SDOAGGRTYPE(geom,0.005)) as mGeom
FROM (SELECT asset_id,
part_num,
mdsys.sdo_geometry(
3302,
null,
null,
mdsys.sdo_elem_info_array(1,2,1),
CAST(MULTISET( select case when r.rin = 1 then x
when r.rin = 2 then y
when r.rin = 3 then m
end
from cte b,
(select level rin from dual connect by level < 4) r
where b.asset_id = a.asset_id
and b.part_num = a.part_num
order by b.vertex_num, r.rin
) as mdsys.sdo_ordinate_array
)
) as geom
from cte a
group by asset_id, part_num
order by part_num
) f
GROUP BY asset_id;
Result:
ASSET_ID MGEOM
---------- -----
1 SDO_GEOMETRY(3006, NULL, NULL, SDO_ELEM_INFO_ARRAY(1, 2, 1, 10, 2, 1), SDO_ORDINATE_ARRAY(0, 5, 0, 10, 10, 11.18, 30, 0, 33.54, 50, 10, 33.54, 60, 10, 43.54))
See also my article [Building linestrings from GPX GPS data]: https://www.spdba.com.au/loading-and-processing-gpx-1-1-files-using-oracle-xmldb-2/
You can concatenate the it into a multi-line string of parts and then generate the SDO_GEOMETRY from that string:
SELECT asset_id,
SDO_GEOMETRY(
'MULTILINESTRING (' || LISTAGG(part, ',') WITHIN GROUP (ORDER BY part_num) || ')'
) AS geom
FROM (
SELECT asset_id,
part_num,
'(' || LISTAGG(x || ' ' || y || ' ' || m, ',') WITHIN GROUP (ORDER BY vertex_num) || ')'
AS part
FROM vertices
GROUP BY asset_id, part_num
)
GROUP BY asset_id
db<>fiddle here

split into rows and columns Oracle

I am creating a function that returns a table type object based on the split of the chain, the query is the following:
WITH COLUMNA AS (
SELECT ROWNUM COL_ID, REGEXP_SUBSTR ('A,B,C:D,E,F:','[^:]+',1,LEVEL) COL FROM DUAL
CONNECT BY REGEXP_SUBSTR ('A,B,C:D,E,F:','[^:]+',1,LEVEL) IS NOT NULL
ORDER BY COL_ID
)
SELECT * FROM (SELECT COL_ID, ROWNUM FIL_ID, SUBSTR(COL, INSTR(COL, ',', 1, LVL) + 1, INSTR(COL, ',', 1, LVL + 1) - INSTR(COL, ',', 1, LVL) - 1) NAME
FROM
( SELECT ',' || COL || ',' AS COL, COL_ID FROM COLUMNA ),
( SELECT LEVEL AS LVL FROM DUAL CONNECT BY LEVEL <= 100 )
WHERE LVL <= LENGTH(COL) - LENGTH(REPLACE(COL, ',')) - 1
ORDER BY COL_ID, NAME
) FILA
The result is as follows:
COL_ID FIL_ID NAME
1 1 A
1 2 B
1 3 C
2 4 D
2 5 E
2 6 F
And I Need To Get The Following Result
COL_ID VAL1 VAL2 VAL3 VALN
1 A B C X
2 D E F Y
I hope your valuable help!!!
You need to have a fixed number of columns in your object:
CREATE TYPE values_obj AS OBJECT(
COL_id INTEGER,
VAL1 VARCHAR2(10),
VAL2 VARCHAR2(10),
VAL3 VARCHAR2(10),
VAL4 VARCHAR2(10),
VAL5 VARCHAR2(10)
)
/
CREATE TYPE values_tab AS TABLE OF values_obj
/
CREATE OR REPLACE FUNCTION split_values(
in_list VARCHAR2
) RETURN values_tab
IS
vals VALUES_TAB;
BEGIN
SELECT values_obj(
LEVEL,
REGEXP_SUBSTR( in_list, '([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*).*?(:|$)', 1, LEVEL, NULL, 1 ),
REGEXP_SUBSTR( in_list, '([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*).*?(:|$)', 1, LEVEL, NULL, 2 ),
REGEXP_SUBSTR( in_list, '([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*).*?(:|$)', 1, LEVEL, NULL, 3 ),
REGEXP_SUBSTR( in_list, '([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*).*?(:|$)', 1, LEVEL, NULL, 4 ),
REGEXP_SUBSTR( in_list, '([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*).*?(:|$)', 1, LEVEL, NULL, 5 )
)
BULK COLLECT INTO vals
FROM DUAL
CONNECT BY LEVEL < REGEXP_COUNT( in_list, '([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*).*?(:|$)' );
RETURN vals;
END;
/
Then you can do:
SELECT *
FROM TABLE( split_values( 'A,B,C:D,E,F,G:H,I,J,K,L,M::N' ) );
Which outputs:
COL_ID VAL1 VAL2 VAL3 VAL4 VAL5
------ ---- ---- ---- ---- ----
1 A B C - -
2 D E F G -
3 H I J K L
4 - - - - -
5 N - - - -

Split column in Oracle

I have a column in an Oracle Database which has something like this data
column1
/opt/log/data/abcd.efghi.jklmn.aaa.txt
/opt/log/data/abbbcd.efccghi.jkdsdflmn.abab.txt
/opt/log/data/nmvcnmcd.efjhjghi.jkvslmn.abcbc.txt
/opt/log/data/hjsdhj.hjfdhdf.hdfhjd.aghag.txt
/opt/log/data/dfhjfdhj.yureyer.qwtyq.hjahjh.txt
I want to split the data in such a way that
**firstdot seconddot thirdnfourthdot**
abcd efghi jklmn.aaa
abbbcd efccghi jkdsdflmn.abab
nmvcnmcd efjhjghi jkvslmn.abcbc
hjsdhj hjfdhdf hdfhjd.aghag
dfhjfdhj yureyer qwtyq.hjahjh
I can get the seconddot value by
select substr(column1,instr(column1,'.',1+1,instr(column1,'.',1,2)-instr(column1,'.',1,1)-1) as secondot
but I could not get the rest. Can you guys help.
Thanks a lot
Without regexp, you need to reply the same logic for every substring you need, every timi picking the initial position and the leght, based on the position of the "terminator" of that substring.
/* input data */
with yourTable(column1) as (
select '/opt/log/data/abcd.efghi.jklmn.aaa.txt' from dual union all
select '/opt/log/data/abbbcd.efccghi.jkdsdflmn.abab.txt' from dual union all
select '/opt/log/data/nmvcnmcd.efjhjghi.jkvslmn.abcbc.txt' from dual union all
select '/opt/log/data/hjsdhj.hjfdhdf.hdfhjd.aghag.txt' from dual union all
select '/opt/log/data/dfhjfdhj.yureyer.qwtyq.hjahjh.txt' from dual
)
/* query */
select substr(column1, instr(column1, '/', -1) +1, instr(column1, '.') - instr(column1, '/', -1)-1) firstDot,
substr(column1, instr(column1, '.') +1, instr(column1, '.', 1, 2) - instr(column1, '.') -1) secondDot,
substr(column1, instr(column1, '.', 1, 2) +1, instr(column1, '.', 1, 4) - instr(column1, '.', 1, 2) -1) thirdAndFourthDot
from yourTable
gives:
FIRSTDOT SECONDDOT THIRDANDFOURTHD
--------------- --------------- ---------------
abcd efghi jklmn.aaa
abbbcd efccghi jkdsdflmn.abab
nmvcnmcd efjhjghi jkvslmn.abcbc
hjsdhj hjfdhdf hdfhjd.aghag
dfhjfdhj yureyer qwtyq.hjahjh
In a more readable way:
select substr(column1, lastSlashPos +1, firstDotPos - lastSlashPos -1) as firstDot,
substr(column1, firstDotPos +1, secondDotPos - firstDotPos -1) as secondDot,
substr(column1, secondDotPos +1, fourthDotPos - secondDotPos -1) as thirdAndFourthDot
from (
select instr(column1, '/', -1) as lastSlashPos,
instr(column1, '.') as firstDotPos,
instr(column1, '.', 1, 2) as secondDotPos,
instr(column1, '.', 1, 3) as thirdDotPos,
instr(column1, '.', 1, 4) as fourthDotPos,
column1
from yourTable
)
select substr('/opt/log/data/abcd.efghi.jklmn.aaa.txt',instr('/opt/log/data/abcd.efghi.jklmn.aaa.txt','/',-1) + 1) from dual;
This will give you text after last /
Then you need to apply instr for .:
select
substr(text, 1, instr(text,'.', 1) - 1),
substr(text, instr(text,'.', 1) + 1, instr(text,'.', 2) - 1),
substr(text, instr(text,'.', 2) + 1)
from (
select substr('/opt/log/data/abcd.efghi.jklmn.aaa.txt',instr('/opt/log/data/abcd.efghi.jklmn.aaa.txt','/',-1) + 1) text from dual
);

Parsing strings regular expression for Oracle

select rtrim(regexp_substr (str, '[^|]*(.|$)', 1, level), '|') ASPLIT
from
(select 'str 1|str 2|str 3' as str from dual)
connect by level <= length (regexp_replace (str, '[^|]+')) + 1
str 1 str 2 str 3
How to alter the parser separator ', ' ?
'str 1, str 2, str 3'
You can just change the delimiter in the pattern:
select rtrim(regexp_substr (str, '[^,]*(.|$)', 1, level), ',') ASPLIT
from
(select 'str 1, str 2, str 3' as str from dual)
connect by level <= length (regexp_replace (str, '[^,]+')) + 1;
Note that you do not want to change the one in the grouping, (.|$); in that context it's an OR operator not a literal character.
It's simpler to use the same pattern in the substring as you do in the replace (but note Gary_W's warning about this losing empty values with this pattern):
select trim(regexp_substr (str, '[^,]+', 1, level)) ASPLIT
from (select 'str 1, str 2, str 3' as str from dual)
connect by level <= length (regexp_replace (str, '[^,]+')) + 1;
ASPLIT
-------------------
str 1
str 2
str 3
But since you have spaces after the commas, you need to eliminate those; the simplest way is to get rid of leading and trailing spaces with trim. This also shows a variation on the connect by limit but either works (again, note the warning about this pattern):
select trim(regexp_substr (str, '[^,]+', 1, level)) ASPLIT
from (select 'str 1, str 2, str 3' as str from dual)
connect by regexp_substr (str, '[^,]+', 1, level) is not null;
ASPLIT
-------------------
str 1
str 2
str 3
I must point out that using the regex of the format '[^,]+' to parse a string will give invalid results if there is a NULL element in the list and the position of the element in the list is important. Consider this where the 2nd element is NULL. The results make it seem the 2nd element is 'str 3' where really the 2nd element is NULL.
SQL> select trim(regexp_substr (str, '[^,]+', 1, level)) ASPLIT
from (select 'str 1,, str 3' as str from dual)
connect by level <= length (regexp_replace (str, '[^,]+')) + 1;
ASPLIT
-------------
str 1
str 3
Here's another way that handles the NULL list element:
SQL> select trim(regexp_substr (str, '(.*?)(,|$)', 1, level, NULL, 1)) ASPLIT
from (select 'str 1,, str 3' as str from dual)
connect by level <= regexp_count(str, ',') + 1;
ASPLIT
-------------
str 1
str 3
SQL>
See this post for more info too: Split comma separated values to columns in Oracle

Creating XOR across several IN clauses within the WHERE clause

I am trying to create an exclusive or statement within an in clause. For example
WHERE ACCOUNT IN (1,2,3) XOR ACCOUNT IN (3,4) XOR ACCOUNT IN (5,6)
The only reference materials I can find do not facilitate using an IN clause. TIA.
Edit - Clarification :
DDL:
CREATE TABLE EXAMPLE
(
CONTRACT VARCHAR2(1),
ID_NUMBER NUMBER,
ACCOUNT NUMBER,
AMOUNT_1 NUMBER,
AMOUNT_2 NUMBER
);
INSERT INTO EXAMPLE (CONTRACT, ID_NUMBER, ACCOUNT, AMOUNT_1, AMOUNT_2)
VALUES ('A', 1, 100, 5, NULL);
INSERT INTO EXAMPLE (CONTRACT, ID_NUMBER, ACCOUNT, AMOUNT_1, AMOUNT_2)
VALUES ('A', 2, 101, NULL, 5);
INSERT INTO EXAMPLE (CONTRACT, ID_NUMBER, ACCOUNT, AMOUNT_1, AMOUNT_2)
VALUES ('A', 3, 200, 2, NULL);
INSERT INTO EXAMPLE (CONTRACT, ID_NUMBER, ACCOUNT, AMOUNT_1, AMOUNT_2)
VALUES ('B', 4, 100, 7, NULL);
INSERT INTO EXAMPLE (CONTRACT, ID_NUMBER, ACCOUNT, AMOUNT_1, AMOUNT_2)
VALUES ('B', 5, 100, 3, NULL);
INSERT INTO EXAMPLE (CONTRACT, ID_NUMBER, ACCOUNT, AMOUNT_1, AMOUNT_2)
VALUES ('B', 6, 101, NULL, 10);
INSERT INTO EXAMPLE (CONTRACT, ID_NUMBER, ACCOUNT, AMOUNT_1, AMOUNT_2)
VALUES ('B', 7, 200, 2, NULL);
INSERT INTO EXAMPLE (CONTRACT, ID_NUMBER, ACCOUNT, AMOUNT_1, AMOUNT_2)
VALUES ('C', 8, 200, 10, NULL);
INSERT INTO EXAMPLE (CONTRACT, ID_NUMBER, ACCOUNT, AMOUNT_1, AMOUNT_2)
VALUES ('C', 9, 200, 5, NULL);
INSERT INTO EXAMPLE (CONTRACT, ID_NUMBER, ACCOUNT, AMOUNT_1, AMOUNT_2)
VALUES ('C', 10, 201, NULL, 15);
INSERT INTO EXAMPLE (CONTRACT, ID_NUMBER, ACCOUNT, AMOUNT_1, AMOUNT_2)
VALUES ('C', 11, 300, 6, NULL);
INSERT INTO EXAMPLE (CONTRACT, ID_NUMBER, ACCOUNT, AMOUNT_1, AMOUNT_2)
VALUES ('C', 12, 301, NULL, 6);
INSERT INTO EXAMPLE (CONTRACT, ID_NUMBER, ACCOUNT, AMOUNT_1, AMOUNT_2)
VALUES ('D', 13, 100, NULL, -5);
INSERT INTO EXAMPLE (CONTRACT, ID_NUMBER, ACCOUNT, AMOUNT_1, AMOUNT_2)
VALUES ('D', 14, 100, NULL, 5);
INSERT INTO EXAMPLE (CONTRACT, ID_NUMBER, ACCOUNT, AMOUNT_1, AMOUNT_2)
VALUES ('D', 15, 300, 7, 3);
INSERT INTO EXAMPLE (CONTRACT, ID_NUMBER, ACCOUNT, AMOUNT_1, AMOUNT_2)
VALUES ('D', 16, 200, NULL, 4);
My query:
SELECT * FROM
(
SELECT
A.CONTRACT,
COUNT(NVL(ID_NUMBER,1)) AS ID_NUMBER_COUNT,
LISTAGG(ID_NUMBER, ', ') WITHIN GROUP(ORDER BY CONTRACT) AS ID_NUMBERS,
SUM(NVL(AMOUNT_1,0)) AS AMOUNT_1_SUM,
SUM(NVL(AMOUNT_2,0)) AS AMOUNT_2_SUM
FROM EXAMPLE A
WHERE 1=1
AND NOT (NVL(AMOUNT_1,0) = NVL(AMOUNT_2,0))
GROUP BY CUBE(CONTRACT,ACCOUNT)
) A
WHERE 1=1
AND NVL(A.AMOUNT_1_SUM,0) = NVL(A.AMOUNT_2_SUM,0)
AND CONTRACT IS NOT NULL
The CUBE function may seem like overkill for this example, but my actual table has several more descriptor columns that necessitates searching across the combinations.
If you run the query on the above table, without any IN clause to limit the accounts, you will not receive the true population of records that are offsets (should clarify that they only sum to zero if they are in the same column, other wise an offset will occur across both columns where the aggregated amounts are equal).
The true population of records that I am aiming to capture is:
-On contract A, ID Numbers 1 and 2
-On contract B, ID Number 4,5, and 6
-On contract C, all ID Numbers
-On contract D, all ID Numbers
The query as it stands currently can capture all ID numbers across contracts C and D, however there are records in contracts A and B that will not come back as a valid result unless the accounts are limited.
-Limiting account to IN (100,101) will yield the ID numbers from A and B that I aim to capture. The caveat is that there are ~20 combinations of accounts in my full population that must be searched.
-There will never be an offset that occurs between two different contracts. I handle this in the query on the full population by using GROUPING_ID, then just excluding anywhere the Contract field is blank.
-As a last resort, I can use a UNION statement, but would like to do without using one.
-The only other thing I can currently think to do is to define the sets of accounts somewhere before I run the query, then just run a FOR loop for each set.
Thank you!
The equivalent of A XOR B is ( A AND NOT B ) OR ( B AND NOT A ) which would make your query something like this:
WHERE ( ACCOUNT IN (1,2,3) AND ACCOUNT NOT IN (3,4,5,6) )
OR ( ACCOUNT IN (3,4) AND ACCOUNT NOT IN (1,2,3,5,6) )
OR ( ACCOUNT IN (5,6) AND ACCOUNT NOT IN (1,2,3,3,4) )
However, the question does not really make sense as ACCOUNT cannot have multiple values so (apart from 3 which appears in multiple sets) you appear to be testing for the equivalent of A XOR NOT A which will always be true (when ACCOUNT <> 3).
Given this, the logic above will simplify to:
WHERE ACCOUNT IN (1,2,4,5,6)
Edit - Following the clarification of the question:
Oracle Setup:
I renamed the Amount_1 and Amount_2 columns to Credit and Debit
CREATE TABLE EXAMPLE( CONTRACT, ID_NUMBER, ACCOUNT, CREDIT, DEBIT ) AS
SELECT 'A', 1, 100, 5, NULL FROM DUAL UNION ALL
SELECT 'A', 2, 101, NULL, 5 FROM DUAL UNION ALL
SELECT 'A', 3, 200, 2, NULL FROM DUAL UNION ALL
SELECT 'B', 4, 100, 7, NULL FROM DUAL UNION ALL
SELECT 'B', 5, 100, 3, NULL FROM DUAL UNION ALL
SELECT 'B', 6, 101, NULL, 10 FROM DUAL UNION ALL
SELECT 'B', 7, 200, 2, NULL FROM DUAL UNION ALL
SELECT 'C', 8, 200, 10, NULL FROM DUAL UNION ALL
SELECT 'C', 9, 200, 5, NULL FROM DUAL UNION ALL
SELECT 'C', 10, 201, NULL, 15 FROM DUAL UNION ALL
SELECT 'C', 11, 300, 6, NULL FROM DUAL UNION ALL
SELECT 'C', 12, 301, NULL, 6 FROM DUAL UNION ALL
SELECT 'D', 13, 100, NULL, -5 FROM DUAL UNION ALL
SELECT 'D', 14, 100, NULL, 5 FROM DUAL UNION ALL
SELECT 'D', 15, 300, 7, 3 FROM DUAL UNION ALL
SELECT 'D', 16, 200, NULL, 4 FROM DUAL UNION ALL
SELECT 'E', 17, 100, 3, NULL FROM DUAL UNION ALL
SELECT 'E', 18, 200, NULL, 4 FROM DUAL;
CREATE OR REPLACE TYPE TransactionObj AS OBJECT(
ID_NUMBER INT,
ACCOUNT INT,
VALUE INT
);
/
CREATE OR REPLACE TYPE TransactionTable AS TABLE OF TransactionObj;
/
CREATE OR REPLACE FUNCTION getMaxZeroSum(
Transactions TransactionTable
) RETURN TransactionTable
AS
zeroSumTransactions TransactionTable := Transactiontable();
bitCount INT;
valueSum INT;
maxBitCount INT := 0;
valueMax INT := 0;
BEGIN
IF Transactions IS NULL OR Transactions IS EMPTY THEN
RETURN zeroSumTransactions;
END IF;
FOR i IN 1 .. POWER( 2, Transactions.COUNT ) - 1 LOOP
bitCount := 0;
valueSum := 0;
FOR j IN 1 .. Transactions.COUNT LOOP
IF BITAND( i, POWER( 2, j - 1 ) ) > 0 THEN
valueSum := valueSum + Transactions(j).VALUE;
bitCount := bitCount + 1;
END IF;
END LOOP;
IF valueSum = 0 AND bitCount > maxBitCount THEN
maxBitCount := bitCount;
valueMax := i;
END IF;
END LOOP;
IF maxBitCount > 0 THEN
zeroSumTransactions.EXTEND( maxBitCount );
bitCount := 0;
FOR j IN 1 .. Transactions.COUNT LOOP
IF BITAND( valueMax, POWER( 2, j - 1 ) ) > 0 THEN
bitCount := bitCount + 1;
zeroSumTransactions(bitCount) := transactions(j);
END IF;
END LOOP;
END IF;
RETURN zeroSumTransactions;
END;
/
Query:
SELECT zs.Contract,
LISTAGG( t.ID_NUMBER, ',' ) WITHIN GROUP ( ORDER BY ID_NUMBER ) AS ids,
LISTAGG( t.ACCOUNT, ',' ) WITHIN GROUP ( ORDER BY ID_NUMBER ) AS accounts
FROM (
SELECT CONTRACT,
getMaxZeroSum( CAST( COLLECT( TransactionObj( ID_NUMBER, ACCOUNT, NVL( CREDIT, 0 ) - NVL( DEBIT, 0 ) ) ) AS TransactionTable ) ) AS Transactions
FROM EXAMPLE
WHERE NVL( CREDIT, 0 ) <> NVL( DEBIT, 0 )
GROUP BY CONTRACT
) zs,
TABLE( zs.Transactions ) (+) t
GROUP BY Contract;
Output:
CONTRACT IDS ACCOUNTS
-------- -------------- --------------------
A 1,2 100,101
B 4,5,6 100,100,101
C 8,9,10,11,12 200,200,201,300,301
D 13,14,15,16 100,100,300,200
E NULL NULL
The getMaxZeroSum function could almost certainly be improved to consider the transactions in order of least number of items excluded through to all-but-two excluded and then to return as soon as it finds a zero sum (however, I went for having an easy to write function as a demonstration of how it could be done over a performant one). But however you write it I can't see a way that isn't O(n(2^n)) where n is the number of transactions for a given contract.

Resources