Managing dates when they're overlapping - oracle

How to write a procedure so that when adding a new insert, rows are added appropriately?
Let's say i have a table:
create table test_table
(
code varchar2(10) not null,
type varchar2(50) not null,
start_date date not null,
end_date date not null,
parameter number
);
1. First test case:
In table we have:
insert into test_table values ('CODE', 'a', to_date('01.01.2021', 'DD,MM,YYYY'), to_date('10.01.2021', 'DD,MM,YYYY'), 1);
[2021-01-01 - 2021-01-10] type = "a" parameter = 1
and when we want to insert:
insert into test_table values ('CODE', 'a', to_date('01.01.2021', 'DD,MM,YYYY'), to_date('20.01.2021', 'DD,MM,YYYY'), 1)
[2021-01-11 - 2021-01-20] type = "a" parameter = 1
*Result should be:
2021-01-01 - 2021-01-20 type = "a" parameter = 1*
2. Second test case:
In table we have:
insert into test_table values ('CODE', 'a', to_date('01.01.2021', 'DD,MM,YYYY'), to_date('10.01.2021', 'DD,MM,YYYY'), 1)
[2021-01-01 - 2021-01-10] type = "a" parameter = 1
and when we want to insert:
insert into test_table values ('CODE', 'a', to_date('06.01.2021', 'DD,MM,YYYY'), to_date('20.01.2021', 'DD,MM,YYYY'), 2)
[2021-01-06 - 2021-01-20] type = "a" parameter = 2
*in result we should have:
[2021-01-01 - 2021-01-05] type = "a" parameter = 1
[2021-01-06 - 2021-01-20] type = "a" parameter = 2*
3. Third test case:
In table we have:
insert into test_table values ('CODE', 'a', to_date('01.01.2021', 'DD,MM,YYYY'), to_date('10.01.2021', 'DD,MM,YYYY'), 1)
[2021-01-01 - 2021-01-20] type = "a" parameter = 1
and when we want to insert:
insert into test_table values ('CODE', 'a', to_date('06.01.2021', 'DD,MM,YYYY'), to_date('15.01.2021', 'DD,MM,YYYY'), 2)
[2021-01-06 - 2021-01-15] type = "a" parameter = 2
*in result we should have:
[2021-01-01 - 2021-01-05] type = "a" parameter = 1
[2021-01-06 - 2021-01-15] type = "a" parameter = 2
[2021-01-16 - 2021-01-20] type = "a" parameter = 1*

When you insert a new date range that is completely contained in the middle of an existing date range then you need to: INSERT of the new range; UPDATE the existing range to the portion of that range before the new range; and INSERT a new range for the portion of the existing range after the new range. So you need a total of 3 changes.
Similarly, when you insert a new date range that completely contains an existing range then you need to: INSERT the new range; and DELETE the existing range (or do a single UPDATE statement).
You can use a single MERGE statement for all of these actions:
MERGE INTO test_table dst
USING (
WITH new_data (code, type, start_date, end_date, parameter) AS (
SELECT 'CODE2', 'a', DATE '2021-01-01', DATE '2021-01-20', 2 FROM DUAL
)
SELECT NULL AS rid,
n.*,
0 AS status -- Insert
FROM new_data n
UNION ALL
-- Existing rows overlapping before
SELECT t.ROWID,
t.code,
t.type,
t.start_date,
n.start_date - INTERVAL '1' DAY,
t.parameter,
1 -- Update overlap before
FROM test_table t
INNER JOIN new_data n
ON ( t.start_date <= n.start_date
AND t.end_date >= n.start_date)
UNION ALL
SELECT t.ROWID,
t.code,
t.type,
n.end_date + INTERVAL '1' DAY,
t.end_date,
t.parameter,
CASE
WHEN n.start_date <= t.end_date AND t.end_date <= n.end_date
THEN 2 -- Delete
WHEN t.start_date < n.start_date AND n.end_date < t.end_date
THEN 0 -- Insert overlap afterwards
ELSE 1 -- Update overlap afterwards
END
FROM test_table t
INNER JOIN new_data n
ON ( t.start_date <= n.end_date
AND t.end_date >= n.start_date)
WHERE NOT (t.start_date <= n.start_date AND t.end_date <= n.end_date)
) src
ON (src.rid = dst.ROWID AND status > 0)
WHEN MATCHED THEN
UPDATE
SET code = src.code,
start_date = src.start_date,
end_date = src.end_date
DELETE
WHERE status = 2
OR src.start_date > src.end_date
WHEN NOT MATCHED THEN
INSERT (code, type, start_date, end_date, parameter)
VALUES (src.code, src.type, src.start_date, src.end_date, src.parameter);
db<>fiddle here

Related

Issues in inserting comma separated strings to table

I have following 3 parameters from stored procedure
P_Param1 = 12
P_Paramj2= 'val:15,val:16'
P_param3 = 'Name:check values,Name:bv,Name:cv'
I have a table and need to insert above details and final table looks like below
proID CatID CatName
12 15 check values
12 15 bv
12 15 cv
12 16 check values
12 16 bv
12 16 cv
I have written a query to split P_param3 as below and getting splitted values but stuck in generating loops to make a table like above.
SELECT
regexp_substr('Name:check values,Name:bv,Name:cv', '(Name:)?(.*?)(,Name:|$)', 1, level, NULL,
2) AS "CatName"
FROM
dual
CONNECT BY
level <= regexp_count('Name:check values,Name:bv,Name:cv', 'Name:');
Split the catIds into rows and split the catNames into rows and then CROSS JOIN them and insert.
You can do it with simple (fast) string functions using:
CREATE PROCEDURE insertCats(
p_proid IN table_name.proid%TYPE,
p_catids IN VARCHAR2,
p_catnames IN VARCHAR2
)
IS
c_catid_prefix CONSTANT VARCHAR2(10) := 'val:';
c_catid_length CONSTANT PLS_INTEGER := LENGTH(c_catid_prefix);
c_catname_prefix CONSTANT VARCHAR2(10) := 'Name:';
c_catname_length CONSTANT PLS_INTEGER := LENGTH(c_catname_prefix);
BEGIN
INSERT INTO table_name (proid, catid, catname)
WITH catid_bounds (catids, spos, epos) AS (
SELECT p_catids,
1 + c_catid_length,
INSTR(p_catids, ',', 1 + c_catid_length)
FROM DUAL
UNION ALL
SELECT catids,
epos + 1 + c_catid_length,
INSTR(catids, ',', epos + 1 + c_catid_length)
FROM catid_bounds
WHERE epos > 0
),
catids (catid) AS (
SELECT CASE epos
WHEN 0
THEN SUBSTR(catids, spos)
ELSE SUBSTR(catids, spos, epos - spos)
END
FROM catid_bounds
),
catname_bounds (catnames, spos, epos) AS (
SELECT p_catnames,
1 + c_catname_length,
INSTR(p_catnames, ',', 1 + c_catname_length)
FROM DUAL
UNION ALL
SELECT catnames,
epos + 1 + c_catname_length,
INSTR(catnames, ',', epos + 1 + c_catname_length)
FROM catname_bounds
WHERE epos > 0
),
catnames (catname) AS (
SELECT CASE epos
WHEN 0
THEN SUBSTR(catnames, spos)
ELSE SUBSTR(catnames, spos, epos - spos)
END
FROM catname_bounds
)
SELECT p_proid,
i.catid,
n.catname
FROM catids i CROSS JOIN catnames n;
END;
/
db<>fiddle here

How to compare two sets of rows in Oracle?

So, the problem is that i have two results (eg. number):
RES1:
10
11
RES2:
10
13
I need to compare those like if RES1 in RES2 and RES2 in RES1.
I would like to have result like:
RES3:
11
13
How do i do that?
I tried
RES1 MINUS RES2
UNION
RES2 MINUS RES1
but this approach is very slow, becouse my table contains milions of rows...
Why not to use one of supplied packages. DBMS_COMPARISON
The Package allows to compare and sync tables. It's only required that tables have an index.
1) create diff datasets
create table to_compare2 as (select OBJECT_NAME, SUBOBJECT_NAME, OBJECT_ID, DATA_OBJECT_ID, OBJECT_TYPE, case when mod(object_id,18) = 0 then CREATED +1 else CREATED end CREATED from all_objects where mod(object_id,6) = 0 );
CREATE table to_compare1 as (SELECT OBJECT_NAME, SUBOBJECT_NAME, OBJECT_ID, DATA_OBJECT_ID, OBJECT_TYPE, case when mod(object_id,12) = 0 then CREATED +1 else CREATED end CREATED FROM ALL_OBJECTS where mod(object_id,3) = 0 );
2) create indexes.
CREATE UNIQUE INDEX to_compare1_idx on to_compare1(object_id);
CREATE UNIQUE INDEX to_compare2_idx on to_compare2(object_id);
3) Prepare comparision context
BEGIN
DBMS_COMPARISON.create_comparison (
comparison_name => 'MY_COMPARISION',
schema_name => user,
object_name => 'to_compare1',
dblink_name => NULL,
remote_schema_name => null,
remote_object_name => 'to_compare2');
END;
/
4) Execute comparison and check results.
DECLARE
v_scan_info DBMS_COMPARISON.comparison_type;
v_result BOOLEAN;
BEGIN
v_result := DBMS_COMPARISON.compare (
comparison_name => 'MY_COMPARISION',
scan_info => v_scan_info,
perform_row_dif => TRUE
);
IF NOT v_result THEN
DBMS_OUTPUT.put_line('Differences. scan_id=' || v_scan_info.scan_id);
ELSE
DBMS_OUTPUT.put_line('No differences.');
END IF;
END;
/
4) Results
SELECT *
FROM user_comparison_row_dif
WHERE comparison_name = 'MY_COMPARISION';
if local_rowid is not null and remote_rowid is null -> record exit in table_1
if local_rowid is null and remote_rowid is not null -> record exit in table_2
if local_rowid is not null and remote_rowid is not null -> record exist in both tables but it has different values
solutin 1:
try UNION ALL instead of UNION.
why UNION ALL is better then UNION you can read here: What is the difference between UNION and UNION ALL?
solutin 2:
you can try to use full outer join
select coalesce(a.id,b.id)
from a
full outer join b on a.id = b.id
where a.id is null
or b.id is null
Example: http://www.sqlfiddle.com/#!4/88f81/3
Are those Values unique in RES1 or RES2? Then you could try counting:
SELECT col
FROM (
SELECT col FROM RES1
UNION ALL
SELECT col FROM RES2
)
GROUP BY col
HAVING COUNT(1) = 1
If it is not unique, you'd have to add a distinct on both sides of the union, which makes it a lot slower

subtract dates in workflow table with type and datestamps

I have a table with 3 columns:
resid type date
The table is used to track steps in a workflow and a specific resid can exist multiple with different type id(numbers) and datestamps.
I want to calculate the time used between two typeshift - i.e, 1 and 17 on a specific resid
I have tried with a sql-plus syntax like this
and also tried to use aliases:
Any suggestions?
select resid, date - date
from tablename
where resid, date in
(select resid, date from tablename
where type='1')
and
where resid, date in
(select resid, date from tablename
where type='17')
and tablename.resid=tablename.resid
Your attempted query is missing parentheses around the column list before the in - so should be where (resid, date in) - but also has and where which isn't valid, and probably other issues. Mostly it doesn't do what you want, not least because both date values are coming from the same row (for type 1) so subtracting them will always give zero.
You could use conditional aggregation:
select resid,
min(case when type_id = 17 then date_stamp end)
- min(case when type_id = 1 then date_stamp end) as diff
from tablename
where type_id in (1, 17) -- optional
and resid = :some_value
group by resid;
The case gives either null or the date stamp for each matching row; the aggregation then gives you a single value from those (favouring not-null ones).
If only one of the type IDs exists then the difference will be null.
You might want to change the min() for 17 to max() if there may be multiples - depends what you really need.
Quick demo:
with tablename(resid, type_id, date_stamp) as (
select 1, 1, sysdate - 10 from dual
union all select 1, 17, sysdate - 7 from dual
union all select 2, 1, sysdate - 5 from dual
union all select 2, 17, sysdate - 3 from dual
union all select 3, 1, sysdate - 10 from dual
)
select resid,
min(case when type_id = 17 then date_stamp end)
- min(case when type_id = 1 then date_stamp end) as diff
from tablename
where type_id in (1, 17) -- optional
--and resid = 2
group by resid;
RESID DIFF
---------- ----------
1 3
2 2
3
SELECT a.resid, a."type" type1, a."date" date1, b."type" type17, b."date" date17, b."date" - a."date" AS date_diff
FROM tablename a JOIN tablename b ON a.resid = b.resid AND b."type" = '17'
WHERE a."type" = '1' AND a.resid = :resid
Please do not use oracle reserved words as column names.
When (resid, type) is unique you can do:
SELECT :resid resid,
(select "date" FROM tablename WHERE resid = :resid AND "type" = '17') -
(select "date" FROM tablename WHERE resid = :resid AND "type" = '1') date_diff
FROM DUAL

Creating test load for self-referencing table

I have to do some SQL Server 2008 R2 performance testing and it would be very convenient to do it using only SSMS and SQL Server, without additional application support.
One of the tests I have to do is querying a self-referencing table (tree-like structure) with unknown content. So, for a start I would have to load something like 100K - 1M randomly parent-child-related rows into this table.
CREATE TABLE Test2 (
ID int IDENTITY(1,1) PRIMARY KEY CLUSTERED NOT NULL,
ParentID int NULL REFERENCES Test2 (ID))
I am currently trying with SSMS and this script to load 10K rows into the table:
SET NOCOUNT ON
INSERT INTO Test2 (ParentID)
VALUES (NULL)
DECLARE #n int = 0
;WHILE(1=1)
BEGIN
--PRINT #n
INSERT INTO Test2 (ParentID)
SELECT TOP 1 ID FROM Test2 ORDER BY NEWID()
SET #n = #n + 1
IF(#n >= 9999)
BREAK
END
SET NOCOUNT OFF
My problem is that it runs something like 2m 45s on my laptop. You can imagine how long it would take to load 100K or even 1M records this way.
I would like to have a faster way to load this random tree-like structure into database table using TSQL?
EDIT:
After Mitch Wheat's suggestion, I replaced
SELECT TOP 1 ID FROM Test2 ORDER BY NEWID()
with
SELECT TOP 1 ID FROM Test2
WHERE ID >= RAND(CHECKSUM(NEWID())) * (SELECT MAX(ID) FROM Test2)
Regarding random row selection, results really look uniformly distributed. Execution time falls from 160s to 5s (!) -> this enables me to insert 100K records in ~60s. However, inserting 1M records using my RBAR script is still very slow and I'm still searching for possible set-based expression to fill my table. If it exists.
Now, after ~10mins of filling random data I have 1M rows. It is slow but acceptable.
However, to copy this data to another table using batch insert it takes <10s.
SELECT *
INTO Test3
FROM Test2
So, I believe some form of batch insert could speed up the process.
You are not really measuring the INSERT performance with your posted code.
Picking a single random row using an ORDER BY clause like this:
SELECT TOP 1 * FROM table ORDER BY NEWID()
or even
SELECT TOP 1 * FROM table ORDER BY CHECKSUM(NEWID())
performs a table scan (because the random value associated with each row obviously needs to be calculated before the rows can be ordered), which can be slow for large tables. Using an indexed integer column (such as that commonly used for a primary key), and using:
SELECT TOP 1 * FROM table
WHERE rowid >= RAND(CHECKSUM(NEWID())) * (SELECT MAX(rowid) FROM table)
works in constant time, provided the rowid column is indexed. Note: this assumes that rowid is uniformly distributed in the range 0..MAX(rowid). If your dataset has some other distribution, your results will be skewed (i.e. some rows will be picked more often than others).
I ended up using my original aproach with some tweaks:
disabling reference constraint before insert and re-enabling afterwards
using batch inserts as Mitch Wheat suggested
This is the schema:
DROP TABLE Test2
GO
CREATE TABLE Test2 (
ID int IDENTITY(1,1) PRIMARY KEY CLUSTERED NOT NULL,
ParentID int NULL /*REFERENCES Test2 (ID)*/
)
GO
ALTER TABLE Test2
ADD CONSTRAINT FK_SelfRef
FOREIGN KEY(ParentID) REFERENCES Test2 (ID)
GO
And the script:
CHECKPOINT;
DBCC DROPCLEANBUFFERS;
SET NOCOUNT ON
ALTER TABLE Test2 NOCHECK CONSTRAINT FK_SelfRef
INSERT INTO Test2 (ParentID)
VALUES (NULL)
DECLARE #n int = 1
;WHILE(1=1)
BEGIN
INSERT INTO Test2 (ParentID)
SELECT ID FROM Test2 ORDER BY NEWID()
SELECT #n = COUNT(*) FROM Test2
IF(#n >= 999999)
BREAK
END
ALTER TABLE dbo.Test2 WITH CHECK CHECK CONSTRAINT FK_SelfRef
SET NOCOUNT OFF
This executes in 10 secs, and I can't do it this fast with any other method.
NOTE: It inserts more records than needed. But the method can be arranged to insert exact no of records by limiting number of inserts in the last pass.
When parent is assigned randomly from the previously inserted rows, there is no control over the tree height (number of levels) and the way levels populated, which may not be desired in some scenarios.
It may be more convenient to populate tree with a data level by level.
Auxiliary table valued function is taken to generate numbers sequence using Itzik's cross joined CTE method (see e.g. here about it)
create function ftItziksCJCTE
(
#cnt int
)
returns table as
return
(
WITH
E00(N) AS (SELECT 1 UNION ALL SELECT 1),
E02(N) AS (SELECT 1 FROM E00 a, E00 b),
E04(N) AS (SELECT 1 FROM E02 a, E02 b),
E08(N) AS (SELECT 1 FROM E04 a, E04 b),
E16(N) AS (SELECT 1 FROM E08 a, E08 b),
E32(N) AS (SELECT 1 FROM E16 a, E16 b),
E(N) AS (SELECT ROW_NUMBER() OVER (ORDER BY N) FROM E32)
select N from E where N <= #cnt
)
Simple table to control elements distribution in the tree:
create table #TreeLevels
(
LevelNo int identity(1, 1) not NULL,
MinElements int not NULL,
MaxElements int not NULL,
primary key clustered (LevelNo)
)
Sample distribution:
insert into #TreeLevels values (7, 10)
insert into #TreeLevels values (70, 100)
insert into #TreeLevels values (700, 1000)
Will give us something like 7 to 10 elements with ParentID = NULL, each of them having something like 70 to 100 elements, etc. With total number of elements 343000 to 1000000
Or other distribution:
insert into #TreeLevels values (1, 1)
insert into #TreeLevels values (9, 15)
insert into #TreeLevels values (10, 12)
insert into #TreeLevels values (9, 15)
insert into #TreeLevels values (10, 12)
insert into #TreeLevels values (9, 15)
insert into #TreeLevels values (10, 12)
Meaning there will be single root element with something between 9 and 15 child elements, each of them having something like 10 to 12 elements, etc.
Then tree can be populated level by level:
declare #levelNo int, #eMin int, #eMax int
create table #Inserted (ID int not NULL, primary key nonclustered (ID))
create table #Inserted2 (ID int not NULL, primary key nonclustered (ID))
set #levelNo = 1
while 1=1
begin
select #eMin = MinElements, #eMax = MaxElements from #TreeLevels where LevelNo = #levelNo
if ##ROWCOUNT = 0
break
if #levelNo = 1
begin
insert into TestTree (ParentID)
output inserted.ID into #Inserted (ID)
select NULL from ftItziksCJCTE(round(rand(checksum(newid())) * (#eMax - #eMin) + #eMin, 0))
end
else
begin
if exists (select 1 from #Inserted)
begin
insert into TestTree (ParentID)
output inserted.ID into #Inserted2 (ID)
select
I.ID
from
#Inserted I
cross apply ftItziksCJCTE(round(rand(checksum(newid())) * (#eMax - #eMin) + #eMin, 0)) F
truncate table #Inserted
end
else
begin
insert into TestTree (ParentID)
output inserted.ID into #Inserted (ID)
select
I.ID
from
#Inserted2 I
cross apply ftItziksCJCTE(round(rand(checksum(newid())) * (#eMax - #eMin) + #eMin, 0)) F
truncate table #Inserted2
end
end
set #levelNo = #levelNo + 1
end
However, there is no control on the exact number of elements the tree will contain and leaf nodes are on the last level only. It would be good to have additional parameter controlling level population (percent of nodes on the same level which will have children).
create table #TreeLevels
(
LevelNo int identity(1, 1) not NULL,
MinElements int not NULL,
MaxElements int not NULL,
PopulatedPct float NULL,
primary key clustered (LevelNo)
)
Sample distribution:
insert into #TreeLevels values (1, 1, NULL)
insert into #TreeLevels values (9, 15, NULL)
insert into #TreeLevels values (10, 12, NULL)
insert into #TreeLevels values (9, 15, 80)
insert into #TreeLevels values (10, 12, 65)
insert into #TreeLevels values (9, 15, 35)
insert into #TreeLevels values (10, 12, NULL)
NULL for a PopulatedPct percent is treated as 100%. PopulatedPct controls next level population and should be taken from previous level during cycle. Also it has no meaning for the last row in the #TreeLevels hence.
Now we can cycle trough levels taking PopulatedPct into account.
declare #levelNo int, #eMin int, #eMax int
create table #Inserted (ID int not NULL, primary key nonclustered (ID))
create table #Inserted2 (ID int not NULL, primary key nonclustered (ID))
set #levelNo = 1
while 1=1
begin
select #eMin = MinElements, #eMax = MaxElements from #TreeLevels where LevelNo = #levelNo
if ##ROWCOUNT = 0
break
if #levelNo = 1
begin
insert into TestTree (ParentID)
output inserted.ID into #Inserted (ID)
select NULL from ftItziksCJCTE(round(rand(checksum(newid())) * (#eMax - #eMin) + #eMin, 0))
end
else
begin
declare #pct float
select #pct = PopulatedPct from #TreeLevels where LevelNo = #levelNo - 1
if exists (select 1 from #Inserted)
begin
if (#pct is NULL)
insert into TestTree (ParentID)
output inserted.ID into #Inserted2 (ID)
select
I.ID
from
#Inserted I
cross apply ftItziksCJCTE(round(rand(checksum(newid())) * (#eMax - #eMin) + #eMin, 0)) F
else
insert into TestTree (ParentID)
output inserted.ID into #Inserted2 (ID)
select
I.ID
from
(select top (#pct) PERCENT ID from #Inserted order by rand(checksum(newid()))) I
cross apply ftItziksCJCTE(round(rand(checksum(newid())) * (#eMax - #eMin) + #eMin, 0)) F
truncate table #Inserted
end
else
begin
if (#pct is NULL)
insert into TestTree (ParentID)
output inserted.ID into #Inserted (ID)
select
I.ID
from
#Inserted2 I
cross apply ftItziksCJCTE(round(rand(checksum(newid())) * (#eMax - #eMin) + #eMin, 0)) F
else
insert into TestTree (ParentID)
output inserted.ID into #Inserted (ID)
select
I.ID
from
(select top (#pct) PERCENT ID from #Inserted2 order by rand(checksum(newid()))) I
cross apply ftItziksCJCTE(round(rand(checksum(newid())) * (#eMax - #eMin) + #eMin, 0)) F
truncate table #Inserted2
end
end
set #levelNo = #levelNo + 1
end
Still there is no control over the exact number of elements, but better control over the tree shape is gained.

select values from table and create oracle objects

i want to insert values to oracle Object type by selecting values from other table
And the tables and insert statement looks like this.
CREATE TYPE Test_obj AS OBJECT (
attr1 VARCHAR2(20),
attr2 VARCHAR2(20),
attr3 VARCHAR2(25) );
/
CREATE TABLE resultrow_obj (
resultrow Test_obj ,
RESULTTABLEID NUMBER(20,0),
ROWNUMBER NUMBER(20,0) );
/
INSERT INTO resultrow_obj VALUES (
Test_obj (select col1,col2,col3 from Table2 where rownum<=1),
1,123 );
/
You've got it nearly right:
SQL> INSERT INTO resultrow_obj
2 VALUES((SELECT Test_obj('A', 'B', 'C')
3 FROM dual WHERE rownum <= 1),
4 1, 123);
1 row inserted

Resources