SQL insert slow on 1 million rows - performance

WITH TOP 100000 (100k) this query is finished in about 3 seconds
WITH TOP 1000000 (1mil) this query is finished in about 2 minutes
SELECT TOP 1000000
db_id = IDENTITY(int, 1, 1), *
INTO dbo.tablename
FROM dbname.dbo.tablename
Actual execution plan is always:
clustered index scan 4% cost
top
top
compute scalar
insert (96% cost)
select into
The table has 1.3 mil rows and has an int primary key on first column
Can I speed it up somehow? I'm using SQL Server 2008 R2.

The results showed that 100,000 records takes 159 ms, and 1,000,000 records takes 1,435 ms. On a Raid 1 OS, Raid 1 Data, Raid 1 Log, Raid 1 TempDb all separate drives. Our Dev enviroment.
The results showed that 100,000 records takes 113 ms, and 1,000,000 records takes 996 ms. On my laptop with a single SSD (Samsung 840 250GB). SSD's rock!!!
The results showed that 100,000 records takes 188 ms, and 1,000,000 records takes 1,880 ms. On a Raid 1 OS, Raid 10 Data, Raid 10 Log, Raid 1 TempDb all separate drives under a production load.

Here is a complete script that shows that the 1 million takes less than ten times as long as 100,000. Your situation is likely slightly different, but this shows that the fundamentals are not the issue.
The results show that 100,000 records takes 146 ms, and 1,000,000 records takes 1,315 ms.
These results are from my desktop. If someone else could run the script and post their results, that would be very useful.
Rob
USE master;
GO
-- Drop database SourceDB
IF EXISTS (SELECT * FROM sys.databases WHERE name = 'SourceDB') ALTER DATABASE SourceDB SET SINGLE_USER WITH ROLLBACK IMMEDIATE;
IF EXISTS (SELECT * FROM sys.databases WHERE name = 'SourceDB') DROP DATABASE SourceDB;
GO
-- Create database SourceDB
CREATE DATABASE SourceDB;
ALTER DATABASE SourceDB SET RECOVERY SIMPLE;
GO
USE SourceDB;
GO
-- Create table SourceDB.dbo.SourceTable
CREATE TABLE dbo.SourceTable (
ColID int PRIMARY KEY
);
GO
-- Populate table SourceDB.dbo.SourceTable
DECLARE #i int = 0;
WHILE #i < 1300000
BEGIN
SET #i += 1;
INSERT INTO dbo.SourceTable (ColID) VALUES (#i);
END;
GO
-- Drop database Test1
IF EXISTS (SELECT * FROM sys.databases WHERE name = 'Test1') ALTER DATABASE Test1 SET SINGLE_USER WITH ROLLBACK IMMEDIATE;
IF EXISTS (SELECT * FROM sys.databases WHERE name = 'Test1') DROP DATABASE Test1;
GO
-- Create database Test1
CREATE DATABASE Test1;
ALTER DATABASE Test1 SET RECOVERY SIMPLE;
ALTER DATABASE Test1 MODIFY FILE (NAME = Test1, SIZE = 3000MB, MAXSIZE = 8TB);
ALTER DATABASE Test1 MODIFY FILE (NAME = Test1_log, SIZE = 3000MB, MAXSIZE = 2TB);
GO
USE Test1;
GO
IF EXISTS (SELECT * FROM sys.tables WHERE [OBJECT_ID] = OBJECT_ID('dbo.DestinationTable1')) DROP TABLE dbo.DestinationTable1;
IF EXISTS (SELECT * FROM sys.tables WHERE [OBJECT_ID] = OBJECT_ID('dbo.DestinationTable2')) DROP TABLE dbo.DestinationTable2;
GO
DECLARE #n int = 100000;
DECLARE #t1 datetime2 = SYSDATETIME();
SELECT TOP (#n) db_id = IDENTITY(int, 1, 1), *
INTO dbo.DestinationTable1
FROM SourceDB.dbo.SourceTable;
SELECT DATEDIFF(ms, #t1, SYSDATETIME()) AS ElapsedMs;
GO
DECLARE #n int = 1000000;
DECLARE #t1 datetime2 = SYSDATETIME();
SELECT TOP (#n) db_id = IDENTITY(int, 1, 1), *
INTO dbo.DestinationTable2
FROM SourceDB.dbo.SourceTable;
SELECT DATEDIFF(ms, #t1, SYSDATETIME()) AS ElapsedMs;
GO

Related

Slow performance of temp table variables

I've always operated under the assumptions that for small to medium data sets, I would use temp table variables and once the data sets get large (relatively - and I know this is somewhat subjective), I would move to #temp tables.
I have some code that returns 159 rows very quickly - 0.00 ms. I see 159 rows and assume temp table variables would be fine. However, When I insert into temp table variable, the code spins for 10 minutes. When I switch to #temp table, boom - back to 0.00 seconds.
Can someone tell me why this would occur or something I can do to figure this out?
The environment is SQL Server 2017 Enterprise, plenty of memory and disk space.
DECLARE #Year INT = 2017
DECLARE #Month INT = NULL
DECLARE #TempTableVariable TABLE
(
Test BIGINT
)
DROP TABLE IF EXISTS tempdb.dbo.#TempTableTest
CREATE TABLE #TempTableTest (Test BIGINT)
--INSERT INTO #TempTableVariable
INSERT INTO #TempTableTest
SELECT
idfHumanCase
FROM dbo.tlbHumanCase
WHERE YEAR(COALESCE(datOnSetDate, datFinalDiagnosisDate, datTentativeDiagnosisDate, datEnteredDate)) = #Year
AND (MONTH(COALESCE(datOnSetDate, datFinalDiagnosisDate, datTentativeDiagnosisDate, datEnteredDate)) = #Month OR #Month IS NULL)
AND LegacyCaseID IN
(
SELECT LegacyCaseID
FROM dbo.tlbHumanCase
WHERE YEAR(COALESCE(datOnSetDate, datFinalDiagnosisDate, datTentativeDiagnosisDate, datEnteredDate)) = #Year
AND (MONTH(COALESCE(datOnSetDate, datFinalDiagnosisDate, datTentativeDiagnosisDate, datEnteredDate)) = #Month OR #Month IS NULL)
GROUP BY LegacyCaseID HAVING COUNT(*) > 1
)
SELECT * FROM #TempTableTest
--SELECT * FROM #TempTableVariable

ORA-01652: Unable to extend temp space

I have a procedure and a particular query in the procedure is generating about 50GB of temp space causing below exception after few executions.:
SQL state [72000]; error code [1652]; ORA-01652: unable to extend temp segment by 128 in tablespace TEMP
DBAs are pointing to the below query in the stored procedure that needs to be rewritten. The tables used in the query below are smaller than 0.1 GB but the query generates 50GB of temp space!
SELECT tab1.ORDID ID1, tab2.ORDID ID2
FROM (
SELECT
OT.ORDID,
CONNECT_BY_ROOT OT.UNIQ_ORIG_KEY ORIG_UID
FROM order_tab OT, status_tab ST
WHERE OT.otype IN ('A','B')
AND OT.order_uid IS NULL
AND OT.BATCH_ID = ST.BATCH_ID
AND ST.CT_DATE = :A1
AND ST.BSTATUS = 1
CONNECT BY PRIOR OT.UNIQ_KEY = OT.UNIQ_ORIG_KEY
) tab1 , order_tab tab2
WHERE tab2.ORD_VERID = 1
AND tab1.ORIG_UID = tab2.UNIQ_KEY
ORDER BY ID1;
Could someone please help in rewriting the query efficiently so that temp space utilization is reduced. Database used Oracle 12c.

Oracle - Insert x amount of rows with random data

I am currently doing some testing and am in the need for a large amount of data (around 1 million rows)
I am using the following table:
CREATE TABLE OrderTable(
OrderID INTEGER NOT NULL,
StaffID INTEGER,
TotalOrderValue DECIMAL (8,2)
CustomerID INTEGER);
ALTER TABLE OrderTable ADD CONSTRAINT OrderID_PK PRIMARY KEY (OrderID)
CREATE SEQUENCE seq_OrderTable
MINVALUE 1
START WITH 1
INCREMENT BY 1
CACHE 10000;
and want to randomly insert 1000000 rows into it with the following rules:
OrderID needs to be be sequential (1, 2, 3 etc...)
StaffID needs to be a random number between 1 and 1000
CustomerID needs to be a random number between 1 and 10000
TotalOrderValue needs to be a random decimal value between 0.00 and 9999.99
Is this even possible to do? I can I could generate all of these using this update statement? however generating a million rows in 1 go I am not sure on how to do this
Thanks for any help on this matter
This is how i would randomly generate the number on update:
UPDATE StaffTable SET DepartmentID = DBMS_RANDOM.value(low => 1, high => 5);
For testing purposes I created the table and populated it in one shot, with this query:
CREATE TABLE OrderTable(OrderID, StaffID, CustomerID, TotalOrderValue)
as (select level, ceil(dbms_random.value(0, 1000)),
ceil(dbms_random.value(0,10000)),
round(dbms_random.value(0,10000),2)
from dual
connect by level <= 1000000)
/
A few notes - it is better to use NUMBER as data type, NUMBER(8,2) is the format for decimal. It is much more efficient for populating this kind of table to use the "hierarchical query without PRIOR" trick (the "connect by level <= ..." trick) to get the order ID's.
If your table is created already, insert into OrderTable (select level...) (same subquery as in my code) should work just as well. You may be better off adding the PK constraint only after you create the data though, so as not to slow things down.
A small sample from the table created (total time to create the table on my cheap laptop - 1,000,000 rows - was 7.6 seconds):
SQL> select * from OrderTable where orderid between 500020 and 500030;
ORDERID STAFFID CUSTOMERID TOTALORDERVALUE
---------- ---------- ---------- ---------------
500020 666 879 6068.63
500021 189 6444 1323.82
500022 533 2609 1847.21
500023 409 895 207.88
500024 80 2125 1314.13
500025 247 3772 5081.62
500026 922 9523 1160.38
500027 818 5197 5009.02
500028 393 6870 5067.81
500029 358 4063 858.44
500030 316 8134 3479.47

Update or Insert on a Table based on a column value

I have two tables BASE and DAILY as shown below:
BASE
Cust ID IP address
1 10.5.5.5
2 10.5.5.50
3 10.5.5.6
DAILY
Cust ID IP address
1 10.5.5.5
2 10.5.5.70
4 10.5.5.67
The table DAILY is periodically refreshed every 24 hours. Now for every Cust Id in BASE I have to check if the IP address is modified in DAILY. If yes then update the row in BASE.
All the new entries in DAILY have to be inserted into BASE.
I have tried this using a Cursor comparing and then updating and then another cursor for insertion.
But it is taking lot of time.
What is the best possible way to do this?
You could also use MERGE depending on your database system.
SQL Server syntax would be
MERGE INTO BASE B
USING DAILY D
ON D.CustId = B.CustId
WHEN NOT MATCHED THEN
INSERT (CustId, Ip) VALUES (D.CustId, D.Ip)
WHEN MATCHED AND D.Ip <> B.Ip THEN
UPDATE SET B.Ip = D.Ip;
Oracle PL/SQL syntax seems to be much the same, take a look here
If you just want to update all your BASE table, use an UPDATE to update all the rows in your BASE table.
UPDATE `BASE`
SET `IP address` = (SELECT `IP address`
FROM DAILY
WHERE DAILY.`Cust ID` = `BASE`.`Cust ID`);
Then, use this INSERT INTO query to insert new values that not exists in your table BASE.
INSERT INTO `BASE`
SELECT `Cust ID`, `IP address`
FROM DAILY
WHERE DAILY.`Cust ID` NOT IN (SELECT `Cust ID` FROM BASE);
SQL>
declare
begin
for i in (select * from daily where ip_add not in (select ip_add from base))
loop
update base set ip_add=i.ip_add where custid=i.custid;
end loop;
end;
PL/SQL procedure successfully completed.
SQL> select * from base;
CUSTID IP_ADD
---------- ----------
1 10..5.5.5
2 10..5.5.20 -- updated value from base where ip_add is different
3 10..5.5.6
SQL> select * from base ;
CUSTID IP_ADD
---------- ----------
1 10..5.5.5
2 10..5.5.20
4 10..5.5.62
SQL>

How do I get the number of inserts/updates occuring in an Oracle database?

How do I get the total number of inserts/updates that have occurred in an Oracle database over a period of time?
Assuming that you've configured AWR to retain data for all SQL statements (the default is to only retain the top 30 by CPU, elapsed time, etc. if the STATISTICS_LEVEL is 'TYPICAL' and the top 100 if the STATISTICS_LEVEL is 'ALL') via something like
BEGIN
dbms_workload_repository.modify_snapshot_settings (
topnsql => 'MAXIMUM'
);
END;
and assuming that SQL statements don't age out of the cache before a snapshot captures them, you can use the AWR tables for some of this.
You can gather the number of times that an INSERT statement was executed and the number of times that an UPDATE statement was executed
SELECT sum( stat.executions_delta ) insert_executions
FROM dba_hist_sqlstat stat
JOIN dba_hist_sqltext txt ON (stat.sql_id = txt.sql_id )
JOIN dba_hist_snapshot snap ON (stat.snap_id = snap.snap_id)
WHERE snap.begin_interval_time BETWEEN <<start time>> AND <<end time>>
AND txt.command_type = 2;
SELECT sum( stat.executions_delta ) update_executions
FROM dba_hist_sqlstat stat
JOIN dba_hist_sqltext txt ON (stat.sql_id = txt.sql_id )
JOIN dba_hist_snapshot snap ON (stat.snap_id = snap.snap_id)
WHERE snap.begin_interval_time BETWEEN <<start time>> AND <<end time>>
AND txt.command_type = 6;
Note that these queries include both statements that your application issues and statements that Oracle issues in the background. You could add additional criteria if you want to filter out certain SQL statements.
Similarly, you could get the total number of distinct INSERT and UPDATE statements
SELECT count( distinct stat.sql_id ) distinct_insert_stmts
FROM dba_hist_sqlstat stat
JOIN dba_hist_sqltext txt ON (stat.sql_id = txt.sql_id )
JOIN dba_hist_snapshot snap ON (stat.snap_id = snap.snap_id)
WHERE snap.begin_interval_time BETWEEN <<start time>> AND <<end time>>
AND txt.command_type = 2;
SELECT count( distinct stat.sql_id ) distinct_update_stmts
FROM dba_hist_sqlstat stat
JOIN dba_hist_sqltext txt ON (stat.sql_id = txt.sql_id )
JOIN dba_hist_snapshot snap ON (stat.snap_id = snap.snap_id)
WHERE snap.begin_interval_time BETWEEN <<start time>> AND <<end time>>
AND txt.command_type = 6;
Oracle does not, however, track the number of rows that were inserted or updated in a given interval. So you won't be able to get that information from AWR. The closest you could get would be to try to leverage the monitoring Oracle does to determine if statistics are stale. Assuming MONITORING is enabled for each table (it is by default in 11g and I believe it is by default in 10g), i.e.
ALTER TABLE table_name
MONITORING;
Oracle will periodically flush the approximate number of rows that are inserted, updated, and deleted for each table to the SYS.DBA_TAB_MODIFICATIONS table. But this will only show the activity since statistics were gathered on a table, not the activity in a particular interval. You could, however, try to write a process that periodically captured this data to your own table and report off that.
If you instruct Oracle to flush the monitoring information from memory to disk (otherwise there is a lag of up to several hours)
BEGIN
dbms_stats.flush_database_monitoring_info;
END;
you can get an approximate count of the number of rows that have changed in each table since statistics were last gathered
SELECT table_owner,
table_name,
inserts,
updates,
deletes
FROM sys.dba_tab_modifications

Resources