Parsing strings regular expression for Oracle - oracle

select rtrim(regexp_substr (str, '[^|]*(.|$)', 1, level), '|') ASPLIT
from
(select 'str 1|str 2|str 3' as str from dual)
connect by level <= length (regexp_replace (str, '[^|]+')) + 1
str 1 str 2 str 3
How to alter the parser separator ', ' ?
'str 1, str 2, str 3'

You can just change the delimiter in the pattern:
select rtrim(regexp_substr (str, '[^,]*(.|$)', 1, level), ',') ASPLIT
from
(select 'str 1, str 2, str 3' as str from dual)
connect by level <= length (regexp_replace (str, '[^,]+')) + 1;
Note that you do not want to change the one in the grouping, (.|$); in that context it's an OR operator not a literal character.
It's simpler to use the same pattern in the substring as you do in the replace (but note Gary_W's warning about this losing empty values with this pattern):
select trim(regexp_substr (str, '[^,]+', 1, level)) ASPLIT
from (select 'str 1, str 2, str 3' as str from dual)
connect by level <= length (regexp_replace (str, '[^,]+')) + 1;
ASPLIT
-------------------
str 1
str 2
str 3
But since you have spaces after the commas, you need to eliminate those; the simplest way is to get rid of leading and trailing spaces with trim. This also shows a variation on the connect by limit but either works (again, note the warning about this pattern):
select trim(regexp_substr (str, '[^,]+', 1, level)) ASPLIT
from (select 'str 1, str 2, str 3' as str from dual)
connect by regexp_substr (str, '[^,]+', 1, level) is not null;
ASPLIT
-------------------
str 1
str 2
str 3

I must point out that using the regex of the format '[^,]+' to parse a string will give invalid results if there is a NULL element in the list and the position of the element in the list is important. Consider this where the 2nd element is NULL. The results make it seem the 2nd element is 'str 3' where really the 2nd element is NULL.
SQL> select trim(regexp_substr (str, '[^,]+', 1, level)) ASPLIT
from (select 'str 1,, str 3' as str from dual)
connect by level <= length (regexp_replace (str, '[^,]+')) + 1;
ASPLIT
-------------
str 1
str 3
Here's another way that handles the NULL list element:
SQL> select trim(regexp_substr (str, '(.*?)(,|$)', 1, level, NULL, 1)) ASPLIT
from (select 'str 1,, str 3' as str from dual)
connect by level <= regexp_count(str, ',') + 1;
ASPLIT
-------------
str 1
str 3
SQL>
See this post for more info too: Split comma separated values to columns in Oracle

Related

How can I split by a character without ignoring nulls?

I'm trying to use regexp_subst to split a delimited string. I'm running into an issue when delimited fields are null. The regexp_substr ignores the nulls and moves to the next occurrence of the delimiter. Is there a way to do this with regexp_substr? If not, what alternative do you use?
--Expecting hello, gets hello
select regexp_substr('hello##world', '[^#]+', 1, 1)
from dual;
--Expecting null, gets world
select regexp_substr('hello##world', '[^#]+', 1, 2)
from dual;
--Expecting world, gets null
select regexp_substr('hello##world', '[^#]+', 1, 3)
from dual;
EDIT: tried this, but it works only with | which isn't an option
Answering based on Matbailie's input in above comment
select regexp_substr('hello##world', '(.*?)(#|$)', 1, 1,NULL,1)
from dual
union all
--Expecting null, gets null
select regexp_substr('hello##world', '(.*?)(#|$)', 1, 2,NULL,1)
from dual
union all
--Expecting world, gets world
select regexp_substr('hello##world', '(.*?)(#|$)', 1, 3,NULL,1)
from dual;
You do not need regular expressions. It can be done with simple (and faster) string functions in a recursive sub-query:
WITH data (value) AS (
SELECT 'hello##world' FROM DUAL
),
bounds (value, start_pos, end_pos) AS (
SELECT value,
1,
INSTR(value, '#', 1)
FROM data
UNION ALL
SELECT value,
end_pos + 1,
INSTR(value, '#', end_pos + 1)
FROM bounds
WHERE end_pos > 0
)
SEARCH DEPTH FIRST BY value SET order_id
SELECT CASE end_pos
WHEN 0
THEN SUBSTR(value, start_pos)
ELSE SUBSTR(value, start_pos, end_pos - start_pos)
END AS item
FROM bounds;
Which outputs:
ITEM
hello
null
world
Or, if you want the data in columns (rather than rows):
WITH data (value) AS (
SELECT 'hello##world' FROM DUAL
),
bounds (value, pos1, pos2) AS (
SELECT value,
INSTR(value, '#', 1, 1),
INSTR(value, '#', 1, 2)
FROM data
)
SELECT SUBSTR(value, 1, pos1 - 1) AS item1,
SUBSTR(value, pos1 + 1, pos2 - pos1 - 1) AS item2,
SUBSTR(value, pos2 + 1) AS item3
FROM bounds
Which outputs:
ITEM1
ITEM2
ITEM3
hello
null
world
If you did want to use (slower) regular expressions then:
WITH data (value) AS (
SELECT 'hello##world' FROM DUAL
)
SELECT item
FROM data d
CROSS JOIN LATERAL(
SELECT REGEXP_SUBSTR( d.value, '(.*?)(#|$)', 1, LEVEL, NULL, 1) AS item
FROM DUAL
CONNECT BY LEVEL < REGEXP_COUNT( d.value, '(.*?)(#|$)')
)
or, for columns:
WITH data (value) AS (
SELECT 'hello##world' FROM DUAL
)
SELECT REGEXP_SUBSTR(value, '(.*?)(#|$)', 1, 1, NULL, 1) AS item1,
REGEXP_SUBSTR(value, '(.*?)(#|$)', 1, 2, NULL, 1) AS item2,
REGEXP_SUBSTR(value, '(.*?)(#|$)', 1, 3, NULL, 1) AS item3
FROM data
(Which both have the same output as above)
db<>fiddle here

split into rows and columns Oracle

I am creating a function that returns a table type object based on the split of the chain, the query is the following:
WITH COLUMNA AS (
SELECT ROWNUM COL_ID, REGEXP_SUBSTR ('A,B,C:D,E,F:','[^:]+',1,LEVEL) COL FROM DUAL
CONNECT BY REGEXP_SUBSTR ('A,B,C:D,E,F:','[^:]+',1,LEVEL) IS NOT NULL
ORDER BY COL_ID
)
SELECT * FROM (SELECT COL_ID, ROWNUM FIL_ID, SUBSTR(COL, INSTR(COL, ',', 1, LVL) + 1, INSTR(COL, ',', 1, LVL + 1) - INSTR(COL, ',', 1, LVL) - 1) NAME
FROM
( SELECT ',' || COL || ',' AS COL, COL_ID FROM COLUMNA ),
( SELECT LEVEL AS LVL FROM DUAL CONNECT BY LEVEL <= 100 )
WHERE LVL <= LENGTH(COL) - LENGTH(REPLACE(COL, ',')) - 1
ORDER BY COL_ID, NAME
) FILA
The result is as follows:
COL_ID FIL_ID NAME
1 1 A
1 2 B
1 3 C
2 4 D
2 5 E
2 6 F
And I Need To Get The Following Result
COL_ID VAL1 VAL2 VAL3 VALN
1 A B C X
2 D E F Y
I hope your valuable help!!!
You need to have a fixed number of columns in your object:
CREATE TYPE values_obj AS OBJECT(
COL_id INTEGER,
VAL1 VARCHAR2(10),
VAL2 VARCHAR2(10),
VAL3 VARCHAR2(10),
VAL4 VARCHAR2(10),
VAL5 VARCHAR2(10)
)
/
CREATE TYPE values_tab AS TABLE OF values_obj
/
CREATE OR REPLACE FUNCTION split_values(
in_list VARCHAR2
) RETURN values_tab
IS
vals VALUES_TAB;
BEGIN
SELECT values_obj(
LEVEL,
REGEXP_SUBSTR( in_list, '([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*).*?(:|$)', 1, LEVEL, NULL, 1 ),
REGEXP_SUBSTR( in_list, '([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*).*?(:|$)', 1, LEVEL, NULL, 2 ),
REGEXP_SUBSTR( in_list, '([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*).*?(:|$)', 1, LEVEL, NULL, 3 ),
REGEXP_SUBSTR( in_list, '([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*).*?(:|$)', 1, LEVEL, NULL, 4 ),
REGEXP_SUBSTR( in_list, '([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*).*?(:|$)', 1, LEVEL, NULL, 5 )
)
BULK COLLECT INTO vals
FROM DUAL
CONNECT BY LEVEL < REGEXP_COUNT( in_list, '([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*),?([^:,]*).*?(:|$)' );
RETURN vals;
END;
/
Then you can do:
SELECT *
FROM TABLE( split_values( 'A,B,C:D,E,F,G:H,I,J,K,L,M::N' ) );
Which outputs:
COL_ID VAL1 VAL2 VAL3 VAL4 VAL5
------ ---- ---- ---- ---- ----
1 A B C - -
2 D E F G -
3 H I J K L
4 - - - - -
5 N - - - -

Split column in Oracle

I have a column in an Oracle Database which has something like this data
column1
/opt/log/data/abcd.efghi.jklmn.aaa.txt
/opt/log/data/abbbcd.efccghi.jkdsdflmn.abab.txt
/opt/log/data/nmvcnmcd.efjhjghi.jkvslmn.abcbc.txt
/opt/log/data/hjsdhj.hjfdhdf.hdfhjd.aghag.txt
/opt/log/data/dfhjfdhj.yureyer.qwtyq.hjahjh.txt
I want to split the data in such a way that
**firstdot seconddot thirdnfourthdot**
abcd efghi jklmn.aaa
abbbcd efccghi jkdsdflmn.abab
nmvcnmcd efjhjghi jkvslmn.abcbc
hjsdhj hjfdhdf hdfhjd.aghag
dfhjfdhj yureyer qwtyq.hjahjh
I can get the seconddot value by
select substr(column1,instr(column1,'.',1+1,instr(column1,'.',1,2)-instr(column1,'.',1,1)-1) as secondot
but I could not get the rest. Can you guys help.
Thanks a lot
Without regexp, you need to reply the same logic for every substring you need, every timi picking the initial position and the leght, based on the position of the "terminator" of that substring.
/* input data */
with yourTable(column1) as (
select '/opt/log/data/abcd.efghi.jklmn.aaa.txt' from dual union all
select '/opt/log/data/abbbcd.efccghi.jkdsdflmn.abab.txt' from dual union all
select '/opt/log/data/nmvcnmcd.efjhjghi.jkvslmn.abcbc.txt' from dual union all
select '/opt/log/data/hjsdhj.hjfdhdf.hdfhjd.aghag.txt' from dual union all
select '/opt/log/data/dfhjfdhj.yureyer.qwtyq.hjahjh.txt' from dual
)
/* query */
select substr(column1, instr(column1, '/', -1) +1, instr(column1, '.') - instr(column1, '/', -1)-1) firstDot,
substr(column1, instr(column1, '.') +1, instr(column1, '.', 1, 2) - instr(column1, '.') -1) secondDot,
substr(column1, instr(column1, '.', 1, 2) +1, instr(column1, '.', 1, 4) - instr(column1, '.', 1, 2) -1) thirdAndFourthDot
from yourTable
gives:
FIRSTDOT SECONDDOT THIRDANDFOURTHD
--------------- --------------- ---------------
abcd efghi jklmn.aaa
abbbcd efccghi jkdsdflmn.abab
nmvcnmcd efjhjghi jkvslmn.abcbc
hjsdhj hjfdhdf hdfhjd.aghag
dfhjfdhj yureyer qwtyq.hjahjh
In a more readable way:
select substr(column1, lastSlashPos +1, firstDotPos - lastSlashPos -1) as firstDot,
substr(column1, firstDotPos +1, secondDotPos - firstDotPos -1) as secondDot,
substr(column1, secondDotPos +1, fourthDotPos - secondDotPos -1) as thirdAndFourthDot
from (
select instr(column1, '/', -1) as lastSlashPos,
instr(column1, '.') as firstDotPos,
instr(column1, '.', 1, 2) as secondDotPos,
instr(column1, '.', 1, 3) as thirdDotPos,
instr(column1, '.', 1, 4) as fourthDotPos,
column1
from yourTable
)
select substr('/opt/log/data/abcd.efghi.jklmn.aaa.txt',instr('/opt/log/data/abcd.efghi.jklmn.aaa.txt','/',-1) + 1) from dual;
This will give you text after last /
Then you need to apply instr for .:
select
substr(text, 1, instr(text,'.', 1) - 1),
substr(text, instr(text,'.', 1) + 1, instr(text,'.', 2) - 1),
substr(text, instr(text,'.', 2) + 1)
from (
select substr('/opt/log/data/abcd.efghi.jklmn.aaa.txt',instr('/opt/log/data/abcd.efghi.jklmn.aaa.txt','/',-1) + 1) text from dual
);

Split the string into variables

After executing below select , How to assign it into variables.
If input string is "x/y/z" , I have to store "x" into variable say A ,"y" into variable B and z into variable "C".
Suppose if string is "x/z" then I have to store "x" into variable say A but z into variable "c".
in all other cases suppose if input string is only "x" or "x/y/z/z" then nothing can be stored
SELECT REGEXP_SUBSTR(<<Input String>>, '[^/]+', 1, LEVEL)
FROM DUAL
CONNECT BY REGEXP_SUBSTR((<<Input String>>, '[^/]+', 1, LEVEL)
IS NOT NULL;
Instead of using SQL, I would just use PL/SQL, since there seems no need to introduce an unnecessary context switch:
declare
v_a varchar2(10);
v_b varchar2(10);
v_c varchar2(10);
v_string varchar2(33);
procedure split_string (p_string in varchar2,
p_a out varchar2,
p_b out varchar2,
p_c out varchar2)
is
begin
if regexp_count(p_string, '/') = 2 then
p_a := regexp_substr(p_string, '[^/]+', 1, 1);
p_b := regexp_substr(p_string, '[^/]+', 1, 2);
p_c := regexp_substr(p_string, '[^/]+', 1, 3);
elsif regexp_count(p_string, '/') = 1 then
p_a := regexp_substr(p_string, '[^/]+', 1, 1);
p_c := regexp_substr(p_string, '[^/]+', 1, 2);
end if;
end;
begin
v_string := 'x/y/z';
split_string(v_string, v_a, v_b, v_c);
dbms_output.put_line('v_string = "'||v_string||'", v_a = "'||v_a||'", v_b = "'||v_b||'", v_c = "'||v_c||'"');
v_string := 'x/y';
split_string(v_string, v_a, v_b, v_c);
dbms_output.put_line('v_string = "'||v_string||'", v_a = "'||v_a||'", v_b = "'||v_b||'", v_c = "'||v_c||'"');
v_string := 'x/y/z/1';
split_string(v_string, v_a, v_b, v_c);
dbms_output.put_line('v_string = "'||v_string||'", v_a = "'||v_a||'", v_b = "'||v_b||'", v_c = "'||v_c||'"');
v_string := 'x';
split_string(v_string, v_a, v_b, v_c);
dbms_output.put_line('v_string = "'||v_string||'", v_a = "'||v_a||'", v_b = "'||v_b||'", v_c = "'||v_c||'"');
end;
/
v_string = "x/y/z", v_a = "x", v_b = "y", v_c = "z"
v_string = "x/y", v_a = "x", v_b = "", v_c = "y"
v_string = "x/y/z/1", v_a = "", v_b = "", v_c = ""
v_string = "x", v_a = "", v_b = "", v_c = ""
If you absolutely must use SQL, there is no need to use the connect by - you can just separate the results into 3 columns to match the 3 variables you want to input the results into:
with strings as (select 'x/y/z' str from dual union all
select 'x/y' str from dual union all
select 'x/y/z/1' str from dual union all
select 'x' str from dual)
select str,
case when regexp_count(str, '/') in (1, 2) then regexp_substr(str, '[^/]+', 1, 1) end v_a,
case when regexp_count(str, '/') = 2 then regexp_substr(str, '[^/]+', 1, 2) end v_b,
case when regexp_count(str, '/') = 2 then regexp_substr(str, '[^/]+', 1, 3)
when regexp_count(str, '/') = 1 then regexp_substr(str, '[^/]+', 1, 2)
end v_c
from strings;
STR V_A V_B V_C
------- --------------------- --------------------- ---------------------
x/y/z x y z
x/y x y
x/y/z/1
x
Be careful, regex_substr using the format '[^/]+' to parse the string elements do not handle null list elements. Here's a way to extract a specific element from a list that handles nulls, which can be put into a function for reuse (this gets the first element where the separator is a slash):
REGEXP_SUBSTR(string_in, '(.*?)(/|$)', 1, 1, NULL, 1);
See the link above, but call like this simple example where the list elements are extracted in order by a function called get_list_element and assigned to variables. Perhaps you can apply this logic to your needs:
SQL> declare
2 a varchar2(1);
3 b varchar2(1);
4 c varchar2(1);
5 begin
6 select get_list_element('x/y/z', 1, '/'),
7 get_list_element('x/y/z', 2, '/'),
8 get_list_element('x/y/z', 3, '/')
9 into a, b, c
10 from dual;
11
12 dbms_output.put_line('a: ' || a);
13 dbms_output.put_line('b: ' || b);
14 dbms_output.put_line('c: ' || c);
15 end;
16 /
a: x
b: y
c: z
PL/SQL procedure successfully completed.
SQL>
Or fix your original try at turning your elements into rows with this:
SQL> with tbl(str) as (
2 select 'x/y/z' from dual
3 )
4 select regexp_substr(str, '(.*?)(/|$)', 1, level, null, 1) element
5 from tbl
6 connect by level <= regexp_count(str, '/')+1;
ELEME
-----
x
y
z
SQL>

Invalid number error - [Error Code: 1722, SQL State: 42000] ORA-01722: invalid number

The 1st query from the below 2 queries is giving me [Error Code: 1722, SQL State: 42000] ORA-01722: invalid number error.
But when I limit the no of records as in the 2nd query then it is running fine.
Other than limiting the rows in the 2nd query, both the queries are identical.
SELECT b.first_name,
b.last_name,
b.device_derived,
b.ios_version_group,
b.add_date,
FIRST_VALUE (b.add_date)
OVER (PARTITION BY b.first_name, b.last_name, b.ios_version_group)
AS first_date,
LAST_VALUE (b.add_date)
OVER (PARTITION BY b.first_name, b.last_name, b.ios_version_group)
AS last_date
FROM (SELECT a.first_name,
a.last_name,
a.os_version,
a.device_type,
a.device,
a.add_date,
a.device_derived,
CASE
WHEN ( ( UPPER (a.device_derived) = 'IPHONE'
OR UPPER (a.device_derived) = 'IPAD')
AND TO_NUMBER (SUBSTR (a.os_version, 1, 1)) > 4)
THEN
'iOS ' || SUBSTR (a.os_version, 1, 1)
ELSE
'Others'
END
AS ios_version_group
FROM (SELECT first_name,
last_name,
os_version,
device_type,
device,
add_date,
CASE
WHEN UPPER (device_type) = 'ANDROID'
THEN
'Android'
WHEN UPPER (device_type) = 'BB'
OR UPPER (device_type) = 'BLACKBERRY'
THEN
'Blackberry'
WHEN UPPER (device_type) = 'IOS'
AND ( SUBSTR (UPPER (device), 1, 6) = 'IPHONE'
OR SUBSTR (UPPER (device), 1, 4) = 'IPOD')
THEN
'iPhone'
WHEN UPPER (device_type) = 'IOS'
AND (SUBSTR (UPPER (device), 1, 4) = 'IPAD')
THEN
'iPad'
END
AS device_derived
FROM vw_mobile_devices_all) a) b;
SELECT b.first_name,
b.last_name,
b.device_derived,
b.ios_version_group,
b.add_date,
FIRST_VALUE (b.add_date)
OVER (PARTITION BY b.first_name, b.last_name, b.ios_version_group)
AS first_date,
LAST_VALUE (b.add_date)
OVER (PARTITION BY b.first_name, b.last_name, b.ios_version_group)
AS last_date
FROM (SELECT a.first_name,
a.last_name,
a.os_version,
a.device_type,
a.device,
a.add_date,
a.device_derived,
CASE
WHEN ( ( UPPER (a.device_derived) = 'IPHONE'
OR UPPER (a.device_derived) = 'IPAD')
AND TO_NUMBER (SUBSTR (a.os_version, 1, 1)) > 4)
THEN
'iOS ' || SUBSTR (a.os_version, 1, 1)
ELSE
'Others'
END
AS ios_version_group
FROM (SELECT first_name,
last_name,
os_version,
device_type,
device,
add_date,
CASE
WHEN UPPER (device_type) = 'ANDROID'
THEN
'Android'
WHEN UPPER (device_type) = 'BB'
OR UPPER (device_type) = 'BLACKBERRY'
THEN
'Blackberry'
WHEN UPPER (device_type) = 'IOS'
AND ( SUBSTR (UPPER (device), 1, 6) = 'IPHONE'
OR SUBSTR (UPPER (device), 1, 4) = 'IPOD')
THEN
'iPhone'
WHEN UPPER (device_type) = 'IOS'
AND (SUBSTR (UPPER (device), 1, 4) = 'IPAD')
THEN
'iPad'
END
AS device_derived
FROM vw_mobile_devices_all) a) b
WHERE ROWNUM <= 100;
Can somebody tell me why I am getting this error. Is there an efficient way to write this query?
You have TO_NUMBER (SUBSTR (a.os_version, 1, 1) in your queries, so presumably you're hitting data that doesn't have a number at the start of the os_version, when you request more than 100 rows.
You need to check your data.
This error happens when you try to convert a non-numeric value with TO_NUMBER.
In the second query the first 100 rows seem not to result into a.os_version to a non-numeric value.
Try a simple select vw_mobile_devices_all to find the non-numeric os_version. Figure out how you can work around the problem. Maybe you can query the os_version differently.

Resources