I'm currently trying to optimize a sql select statement. I would like to know if there's a simpler way to implement this sql statement and improve its performance.
I would also like to know if I can improve the performance of this statement using indexing, partitioning, clustering, tuning data buffer cache, using in-memory column store, etc.
select
ps_partkey,
sum(ps_supplycost * ps_availqty) value
from
partsupp,
supplier,
nation
where
ps_suppkey = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'FRANCE'
group by
ps_partkey having
sum(ps_supplycost * ps_availqty) > (
select
sum(ps_supplycost * ps_availqty) * 0.0005
from
partsupp,
supplier,
nation
where
ps_suppkey = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'FRANCE'
)
order by
value desc;
Can you use different query as follows and check for the performance?
Select distinct ps_partkey, value
from
(select
ps_partkey,
sum(ps_supplycost * ps_availqty) over (partition by ps_partkey) value,
sum(ps_supplycost * ps_availqty) over () as totalvalue
from partsupp
join supplier on ps_suppkey = s_suppkey
join nation on s_nationkey = n_nationkey
where n_name = 'FRANCE')
Where value > totalvalue * 0.0005
Note: It is recommended to use standard ANSI joins
Related
I am trying to download a huge table (billions of records) from oracle DB
The base can hold a session only for a few hours (no idea why)
So my idea is to split table for many peace's and download it using dynamic sql
split query:
SELECT
data_object_id,
file_id,
relative_fno,
file_batch,
subobject_name,
MIN (start_block_id) start_block_id,
MAX (end_block_id) end_block_id,
SUM (blocks) blocks
FROM
(SELECT
o.data_object_id,
e.file_id,
e.relative_fno,
e.block_id start_block_id,
e.block_id + e.blocks - 1 end_block_id,
e.blocks,
CEIL (SUM(e.blocks) OVER (PARTITION BY o.data_object_id, e.file_id ORDER BY e.block_id ASC) /
(SUM (e.blocks)OVER (PARTITION BY o.data_object_id,e.file_id) / 1)) file_batch
FROM
dba_extents e,
dba_objects o,
dba_tab_subpartitions tsp
WHERE
o.owner = :owner
AND o.object_name = :object_name
AND e.owner = :owner
AND e.segment_name = :object_name
AND o.owner = e.owner
AND o.object_name = e.segment_name
AND (o.subobject_name = e.partition_name
OR (o.subobject_name IS NULL
AND e.partition_name IS NULL))
AND o.owner = tsp.table_owner(+)
AND o.object_name = tsp.table_name(+)
AND o.subobject_name = tsp.subpartition_name(+))
GROUP BY
data_object_id,
file_id,
relative_fno,
file_batch
ORDER BY
data_object_id,
file_id,
relative_fno,
file_batch;
it splits usual table, but it doesn't work with partition or subpartition tables (when I try to download it has more or less lines than it is in DB)
the queries for download I used:
SELECT /*+ NO_INDEX(t) */ COLUMN_NAMES,'63_17' data_chunk_id
FROM OWNER.OBJECT_NAME t
WHERE ((rowid >= dbms_rowid.rowid_create(1, 846313, 63, 3057792, 0)
AND rowid <= dbms_rowid.rowid_create(1, 846313, 63, 4056447, 32767)));
Check out DBMS_PARALLEL_EXECUTE. Even if you don't use it "completely", ie, to run the extract, you can use the DBMS_PARALLEL_EXECUTE.CREATE_CHUNKS_BY_ROWID routine to generate a list of rowid ranges that you can then use to run your rowid range queries that you've already written.
SQL can be execute on Oracle, but not on clickhouse:
SELECT *
FROM PART, PARTSUPP
WHERE P_PARTKEY = PS_PARTKEY
AND PS_SUPPLYCOST = (
SELECT MIN(PS_SUPPLYCOST)
FROM PARTSUPP
WHERE P_PARTKEY = PS_PARTKEY
)
Execption:
Missing columns: 'P_PARTKEY' while processing query: 'SELECT min(PS_SUPPLYCOST)...
any help will be appreciated.
thank you.
correlated subquery SQL:
SELECT
*
FROM
(
SELECT
S_ACCTBAL,
S_NAME,
N_NAME,
P_PARTKEY,
P_MFGR ,
S_ADDRESS,
S_PHONE,
S_COMMENT
FROM
PART,
SUPPLIER,
PARTSUPP,
NATION,
REGION
WHERE
P_PARTKEY = PS_PARTKEY
AND S_SUPPKEY = PS_SUPPKEY
AND P_SIZE = 25
AND P_TYPE LIKE '%COPPER'
AND S_NATIONKEY = N_NATIONKEY
AND N_REGIONKEY = R_REGIONKEY
AND R_NAME = 'ASIA'
AND PS_SUPPLYCOST = (
SELECT
MIN(PS_SUPPLYCOST)
FROM
PARTSUPP,
SUPPLIER,
NATION,
REGION
WHERE
P_PARTKEY = PS_PARTKEY
AND S_SUPPKEY = PS_SUPPKEY
AND S_NATIONKEY = N_NATIONKEY
AND N_REGIONKEY = R_REGIONKEY
AND R_NAME = 'ASIA' )
ORDER BY
S_ACCTBAL DESC,
N_NAME,
S_NAME,
P_PARTKEY )
WHERE
ROWNUM <= 100;
for Clickhouse:
SELECT
*
from
(
SELECT
s.S_ACCTBAL AS S_ACCTBAL,
s.S_NAME AS S_NAME,
n.N_NAME AS N_NAME,
p.P_PARTKEY AS P_PARTKEY,
p.P_MFGR AS P_MFGR,
s.S_ADDRESS AS S_ADDRESS,
s.S_PHONE AS S_PHONE,
s.S_COMMENT AS S_COMMENT
FROM
PART AS p,
PARTSUPP AS ps,
SUPPLIER AS s,
NATION AS n,
REGION AS r,
(
SELECT
P_PARTKEY,
MIN(PS_SUPPLYCOST) AS PS_SUPPLYCOST
FROM
PARTSUPP,
PART,
SUPPLIER,
NATION,
REGION
WHERE
P_PARTKEY = PS_PARTKEY
AND S_SUPPKEY = PS_SUPPKEY
AND S_NATIONKEY = N_NATIONKEY
AND N_REGIONKEY = R_REGIONKEY
AND R_NAME = 'ASIA'
GROUP BY
P_PARTKEY) pps
WHERE
p.P_PARTKEY = pps.P_PARTKEY
AND ps.PS_SUPPLYCOST = pps.PS_SUPPLYCOST
AND p.P_PARTKEY = ps.PS_PARTKEY
AND s.S_SUPPKEY = ps.PS_SUPPKEY
AND p.P_SIZE = 25
AND p.P_TYPE LIKE '%COPPER'
AND s.S_NATIONKEY = n.N_NATIONKEY
AND n.N_REGIONKEY = r.R_REGIONKEY
AND r.R_NAME = 'ASIA')
ORDER BY
S_ACCTBAL DESC,
N_NAME,
S_NAME,
P_PARTKEY
LIMIT 100;
In sql below I return the search between the tables and then return the total of this already paginated.
The need to return this total is to paginate
That query is using sql server.
Sql server:
SELECT
bu.Id as 'BarcoUsuarioId',
Barco_Id as 'BarcoId',
bu.Usuario_Id as 'UsuarioId',
barco.Nome as 'NomeBarco'
into #tmpBarcoUsuario
FROM BARCO_USUARIO AS bu
inner join BARCO as barco on barco.Id = bu.Barco_Id
where bu.Usuario_Id = #usuarioId
declare #totalEmbarcacoes as int = (select count(*) from #tmpBarcoUsuario);
select
BarcoUsuarioId,
BarcoId,
UsuarioId,
NomeBarco,
#totalEmbarcacoes as TotalEmbarcacoes from #tmpBarcoUsuario
order by BarcoId
OFFSET #pageSize *(#pageNumber - 1) ROWS
fetch next #pageSize ROWS ONLY";
However, when using the Oracle database, I having some difficulty.
As can be seen in sql below, I already made the pagination query in oracle DB:
Oracle:
SELECT *
FROM(
SELECT ROWNUM rnum, b.*
FROM (
SELECT
barcoUser.ID BarcoUsuarioId ,
barcoUser.BARCO_ID BarcoId ,
barcoUser.USARIO_ID UsuarioId ,
barco.NOME NomeBarco
FROM BARCO_USUARIO barcoUser INNER JOIN EMBARCACAO barco ON barco.ID = barcoUser.BARCO_ID
WHERE (barcoUser.USARIO_ID=:usuarioId)ORDER BY BarcoId DESC
)b
)WHERE RNUM between :PageSize * (:PageNumber - 1) AND (:PageSize * :PageNumber)";
but how would I do to return the total of all this already paginated, as was done in sql server?
You can use the analytic count(*) function
SELECT *
FROM(
SELECT ROWNUM rnum, b.*
FROM (
SELECT
barcoUser.ID BarcoUsuarioId ,
barcoUser.BARCO_ID BarcoId ,
barcoUser.USARIO_ID UsuarioId ,
barco.NOME NomeBarco ,
count(*) over () TotalCount
FROM BARCO_USUARIO barcoUser
INNER JOIN EMBARCACAO barco ON barco.ID = barcoUser.BARCO_ID
WHERE (barcoUser.USARIO_ID=:usuarioId)
ORDER BY BarcoId DESC
)b
)
WHERE RNUM between :PageSize * (:PageNumber - 1)
AND (:PageSize * :PageNumber)
Note that Oracle has supported the OFFSET FETCH syntax since 12c if you want to minimize the changes between database engines. The rownum approach, however, may be faster than the OFFSET FETCH approach.
I want to add rownum in my below oracle query but it is giving me error as
ORA-30484: missing window specification for this function
Here is my query
SELECT ROW_NUMBER () AS sr_no, pn.lease_num, hz.party_name,
flt.location_code, flt.office flat_no, NULL action, la.no_of_days,
NULL remarks, flt.location_id flat_id, pn.lease_id
FROM xxcus.xxacl_pn_leases_all la,
pn_leases_all pn,
(SELECT *
FROM pn_locations_all flat
WHERE SYSDATE BETWEEN flat.active_start_date AND flat.active_end_date) bld,
(SELECT *
FROM pn_locations_all flat
WHERE SYSDATE BETWEEN flat.active_start_date AND flat.active_end_date) flr,
(SELECT *
FROM pn_locations_all flat
WHERE SYSDATE BETWEEN flat.active_start_date AND flat.active_end_date) flt,
pn_properties_all prop,
hz_parties hz,
apps.hz_cust_accounts sc1
WHERE la.lease_id = pn.lease_id
AND pn.location_id = flt.location_id
AND flt.parent_location_id = flr.location_id
AND flr.parent_location_id = bld.location_id
AND bld.property_id = prop.property_id
AND pn.customer_id = sc1.cust_account_id
AND sc1.party_id = hz.party_id
AND la.type_of_booking = 50
AND prop.property_id = '1'
AND bld.location_id = '1309'
kindly help what is wrong
I am using ORACLE
See documentation for ROW_NUMBER. You need to write something like:
SELECT ROW_NUMBER() OVER (PARTITION BY la.type_of_booking ORDER BY la.lease_id)
FROM xxcus.xxacl_pn_leases_all la
or
SELECT ROW_NUMBER() OVER (ORDER BY la.lease_id)
FROM xxcus.xxacl_pn_leases_all la
select A.*
from Incident_Audit_log a where incident_audit_log_id in
(select top 1 incident_audit_log_id from Incident_Audit_log b
where b.incident_id=a.incident_id and b.status_did=a.status_did
and b.tracking_code_did = (select tracking_code_did
from Incident_Audit_log where update_date = (select MAX(update_date)
from Incident_Audit_log where Status_did in (103, 1035)
and incident_id = b.incident_id)
and incident_id = b.incident_id)
order by update_date asc)
I am not sure what you want to achieve but I guess that you want to extract row with new newest update and status_did equal to 13 and 1035.
In that case this should work:
select *
from (
select ROW_NUMBER() OVER(ORDER BY update_date DESC) AS rn,
*
from Incident_Audit_log
where status_did in (103, 1035)
) as SubQueryAlias
where rn = 1
In case not , provide more info.