So in Postgres I created a function that creates a table vehicletracks_tracks and fills in with information calculated from original table. Original table includes the following atributes: id bigint NOT NULL DEFAULT, session character varying(32), client_id character varying(32) NOT NULL, vehicle_type smallint NOT NULL, geolocation geography(Geometry,4326) NOT NULL and "timestamp" timestamp without time zone NOT NULL.
In the function I calculate tracks and borders between them by observing time and velocity between two consecutive rows in original table. A fter creating tracks I calculate distance and duration of every track.
The function takes around 6 minutes and 30 seconds and then the output table is created and filled with rows. The original table has around 5 million rows. When you look at the procedure is there any possibility to reduce its execution time?
This is my function:
CREATE OR REPLACE FUNCTION vehicletracks()
RETURNS void AS
$BODY$
DECLARE
vrstica1 RECORD;
vrstica2 RECORD;
track_number INT;
BEGIN
SET TIME ZONE 'UTC';
DROP TABLE IF EXISTS vehicletracks_tracks;
CREATE TABLE vehicletracks_tracks (
session character varying(32),
client_id character varying(32) NOT NULL,
vehicle_type smallint NOT NULL,
track_number SERIAL PRIMARY KEY,
track_distance DOUBLE PRECISION,
track_duration INTERVAL,
track_start TIMESTAMP WITH TIME ZONE,
track_stop TIMESTAMP WITH TIME ZONE
);
DROP VIEW IF EXISTS group_by_client_session_vehicle;
CREATE VIEW group_by_client_session_vehicle AS
SELECT DISTINCT client_id, session, vehicle_type
FROM roaduserspositions
GROUP BY client_id, session, vehicle_type
ORDER BY session, client_id, vehicle_type;
FOR vrstica1 IN (SELECT * FROM group_by_client_session_vehicle)
LOOP
DROP VIEW IF EXISTS velocity_difference_view;
DROP TABLE IF EXISTS difference_table;
CREATE TABLE difference_table AS
SELECT id, session, client_id, vehicle_type, geolocation, timestamp, time_difference, distance_difference
FROM (SELECT t.*, t.timestamp - LAG(t.timestamp) OVER (ORDER BY t.session, t.client_id, t.vehicle_type, t.timestamp ASC) AS time_difference,
ST_Distance(geolocation, LAG(geolocation) OVER (ORDER BY t.session, t.client_id, t.vehicle_type, t.timestamp ASC)) AS distance_difference
FROM (SELECT *
FROM timonwww_roaduserspositions
WHERE client_id = vrstica1.client_id AND session = vrstica1.session AND vehicle_type = vrstica1.vehicle_type) AS t) AS tab
ORDER BY session, client_id, vehicle_type, timestamp;
CREATE VIEW velocity_difference_view AS (
SELECT *, distance_difference / EXTRACT(EPOCH FROM time_difference) AS velocity
FROM difference_table
WHERE EXTRACT(EPOCH FROM time_difference) > 0
UNION
SELECT *, 0 AS velocity
FROM difference_table
WHERE EXTRACT(EPOCH FROM time_difference) = 0
ORDER BY session, client_id, vehicle_type, timestamp);
DROP TABLE IF EXISTS new_time_difference;
CREATE TABLE new_time_difference (
track_id INTEGER,
LIKE velocity_difference_view INCLUDING ALL);
track_number := 1;
INSERT INTO new_time_difference(track_id, id, session, client_id, vehicle_type, geolocation, timestamp, time_difference)
SELECT
CASE
WHEN EXTRACT(EPOCH FROM time_difference) < 300 AND velocity < 121 THEN track_number
ELSE track_number + 1
END AS track_id, id, session, client_id, vehicle_type, geolocation, timestamp, time_difference
FROM velocity_difference_view OFFSET 1;
track_number := 1;
FOR vrstica2 IN (SELECT * FROM velocity_difference_view OFFSET 1)
LOOP
IF EXTRACT(EPOCH FROM vrstica2.time_difference) < 300 AND vrstica2.velocity < 121 THEN
INSERT INTO new_time_difference VALUES(track_number, vrstica2.id, vrstica2.session, vrstica2.client_id, vrstica2.vehicle_type, vrstica2.geolocation, vrstica2.timestamp, vrstica2.time_difference);
ELSE
track_number := track_number + 1;
END IF;
END LOOP;
DROP TABLE IF EXISTS geolocation_difference_table;
CREATE TABLE geolocation_difference_table AS
SELECT *
FROM (SELECT *, ST_Distance(geolocation, LAG(geolocation) over (ORDER BY timestamp ASC)) as geolocation_difference
FROM new_time_difference
WHERE track_id IN (SELECT * FROM generate_series((SELECT MIN(new_time_difference.track_id) FROM new_time_difference), (SELECT MAX(track_id) FROM new_time_difference)) num)) AS tab;
DROP TABLE IF EXISTS track_distance_table;
CREATE TABLE track_distance_table AS
SELECT track_id, SUM(geolocation_difference) AS track_distance, SUM(time_difference) AS track_duration, MIN(timestamp) AS track_start, MAX(timestamp) AS track_stop
FROM geolocation_difference_table
GROUP BY track_id
ORDER BY track_id ASC;
INSERT INTO vehicletracks_tracks(session, client_id, vehicle_type, track_distance, track_duration, track_start, track_stop)
SELECT vrstica1.session, vrstica1.client_id, vrstica1.vehicle_type, track_distance, track_duration, track_start, track_stop
FROM (SELECT * FROM track_distance_table WHERE track_distance > 0 AND track_distance IS NOT NULL) AS t;
END LOOP;
DROP TABLE IF EXISTS vehicletracks_binding;
CREATE TABLE vehicletracks_binding AS
SELECT id, track_id
FROM geolocation_difference_table
ORDER BY track_id ASC;
DROP VIEW IF EXISTS group_by_client_session_vehicle;
DROP VIEW IF EXISTS velocity_difference_view;
DROP TABLE IF EXISTS difference_table;
DROP TABLE IF EXISTS new_time_difference;
DROP TABLE IF EXISTS geolocation_difference_table;
DROP TABLE IF EXISTS track_distance_table;
END;
$BODY$
LANGUAGE plpgsql VOLATILE
COST 100;
Related
I'm new to work with database. Describing the Problem below:
Problem: Having 'X' number of Tables with 'Y' number of partitions. Now want to have a way of finding which tables have partitions and which partitions are almost full/partitions created before D-30 is needed to be deleted as getting error in application due to such issue.
Please share guidance/queries how this problem can be resolved?
Artifacts: Tried several queries as SYSDBA to list down all the partitions but there is multiple dependencies pop up. Still trying to figuring a concrete way to make a scheduler Job.
step 1 is to create your PARTITIONS. In my test CASE I am creating 1 PARTITION t1= weekly but I'm leaving the syntax for other partitions:
t0 = daily, t2= monthly, t3=quaterly and t4=yearly.
Then I will populate PARTITION t1 with sample data. I'm using 1 date for each day but during your testing add as many dates as you like per day. I would suggest data to emulate your application.
In addition, I am using a GLOBAL index for table t1 and the rest are LOCAL indexes.
When a partition from a global index is dropped the entire index MUST be rebuilt or it will be invalid.
Based on your application you need to make your own determination whether to use global or local indexes!!
Note: I am creating all the PARTITIONS on DATE columns but you can also use TIMESTAMP columns If need be.
/* weekly PARTITION */
CREATE TABLE t1 (
seq_num NUMBER GENERATED BY DEFAULT AS IDENTITY (START WITH 1) NOT NULL,
dt DATE)
PARTITION BY RANGE (dt)
INTERVAL ( NUMTODSINTERVAL (7, 'DAY') ) (
PARTITION OLD_DATA VALUES LESS THAN (DATE '2022-04-01')
);
/
INSERT into t1 (dt)
with dt (dt, interv) as (
select date '2022-04-01', numtodsinterval(1,'DAY') from dual
union all
select dt.dt + interv, interv from dt
where dt.dt + interv < date '2022-08-31')
select dt from dt;
/
create index t1_global_ix on T1 (dt);
/
/* daily PARTITION */
CREATE TABLE t0 (
seq_num NUMBER GENERATED BY DEFAULT AS IDENTITY (START WITH 1) NOT NULL,
dt DATE)
PARTITION BY RANGE (dt)
INTERVAL ( NUMTODSINTERVAL (1, 'DAY') ) (
PARTITION OLD_DATA VALUES LESS THAN (DATE 2022-01-01')
);
/
INSERT into t0 (dt)
with dt (dt, interv) as (
select date '2022-01-01', numtodsinterval(1,'DAY') from dual
union all
select dt.dt + interv, interv from dt
where dt.dt + interv < date '2022-12-31')
select dt from dt;
/
create index t0_global_ix on T0 (dt);
/
/* MONTHLY PARTITION */
CREATE TABLE t2 (
seq_num NUMBER GENERATED BY DEFAULT AS IDENTITY (START WITH 1) NOT NULL,
dt DATE)
PARTITION BY RANGE (dt)
INTERVAL ( NUMTODSINTERVAL (1, 'MONTH') ) (
PARTITION OLD_DATA VALUES LESS THAN (DATE '2022-01-01')
);
/
INSERT into t2 (dt)
with dt (dt, interv) as (
select date '2021-01-01', numtodsinterval(1,'DAY') from dual
union all
select dt.dt + interv, interv from dt
where dt.dt + interv < date '2022-12-31')
select dt from dt;
/
create index t2_local_ix on T2 (dt) LOCAL;
/
/* QUARTERLY PARTITION */
CREATE TABLE t3 (
seq_num NUMBER GENERATED BY DEFAULT AS IDENTITY (START WITH 1) NOT NULL,
dt DATE
)
PARTITION BY RANGE (dt)
INTERVAL
(NUMTOYMINTERVAL(3,'MONTH'))
(
PARTITION OLD_DATA values LESS THAN (TO_DATE('2021-01-01','YYYY-MM-DD'))
);
/
INSERT into t3 (dt)
with dt (dt, interv) as (
select date '2021-01-01', numtodsinterval(1,'DAY') from dual
union all
select dt.dt + interv, interv from dt
where dt.dt + interv < date '2022-12-31')
select dt from dt;
/
create index t3_local_ix on T3 (dt) LOCAL;
/
/* yearly PARTITION */
CREATE TABLE t4 (
seq_num NUMBER GENERATED BY DEFAULT AS IDENTITY (START WITH 1) NOT NULL,
dt DATE
)
PARTITION BY RANGE (dt)
INTERVAL
(NUMTOYMINTERVAL(1,'YEAR'))
(
PARTITION OLD_DATA values LESS THAN (TO_DATE('2020-01-01','YYYY-MM-DD'))
);
/
INSERT into t4 (dt)
with dt (dt, interv) as (
select date '2021-01-01', numtodsinterval(90,'DAY') from dual
union all
select dt.dt + interv, interv from dt
where dt.dt + interv < date 2023-12-31')
select dt from dt;
/
create index t4_local_ix on T4 (dt) LOCAL;
/
Every time a new partition is created Oracle generates a system name, which is quite cryptic
Here is a list of the PARTITION names, which Oracle generated when I loaded the data above
SELECT PARTITION_NAME
FROM USER_TAB_PARTITIONS
WHERE TABLE_NAME = 'T1'
PARTITION_NAME
OLD_DATA
SYS_P458773
SYS_P458774
SYS_P458775
SYS_P458776
SYS_P458777
SYS_P458778
SYS_P458779
SYS_P458780
SYS_P458781
SYS_P458782
SYS_P458783
SYS_P458784
SYS_P458785
SYS_P458786
SYS_P458787
SYS_P458788
SYS_P458789
SYS_P458790
SYS_P458791
SYS_P458792
SYS_P458793
SYS_P458794
Although PARTITION management will work with system GENERATED PARTITION names, I use the procedure below to rename them to something more meaningful.
Let's create and run the procedure and take a look at the names again. As you can see, since we are working with weekly partitions the name is P_ for partittion YYYY 4 digit year the PARTITION is in, W for Week of the year, and ## for the week number within the year.
I would suggest using the scheduler to run this process at least once a day. You can run it as many time as you want as it will not cause any harm.
CREATE OR REPLACE PROCEDURE MaintainPartitions IS EXPRESSION_IS_OF_WRONG_TYPE EXCEPTION;
PRAGMA EXCEPTION_INIT(EXPRESSION_IS_OF_WRONG_TYPE, -6550);
CURSOR PartTables IS
SELECT TABLE_NAME, INTERVAL
FROM USER_PART_TABLES
WHERE PARTITIONING_TYPE = 'RANGE'
ORDER BY TABLE_NAME;
CURSOR TabParts(aTableName VARCHAR2) IS
SELECT PARTITION_NAME, HIGH_VALUE
FROM USER_TAB_PARTITIONS
WHERE regexp_like(partition_name,'^SYS_P[[:digit:]]{1,10}') AND
TABLE_NAME = aTableName AND
table_name not like 'BIN$%'
and interval is not null
ORDER BY PARTITION_POSITION;
ym INTERVAL YEAR TO MONTH;
ds INTERVAL DAY TO SECOND;
newPartName VARCHAR2(30);
PERIOD TIMESTAMP;
BEGIN
FOR aTab IN PartTables LOOP
BEGIN
EXECUTE IMMEDIATE 'BEGIN :ret := '||aTab.INTERVAL||'; END;' USING OUT ds;
ym := NULL;
EXCEPTION
WHEN EXPRESSION_IS_OF_WRONG_TYPE THEN
EXECUTE IMMEDIATE 'BEGIN :ret := '||aTab.INTERVAL||'; END;' USING OUT ym;
ds := NULL;
END;
FOR aPart IN TabParts(aTab.TABLE_NAME) LOOP
EXECUTE IMMEDIATE 'BEGIN :ret := '||aPart.HIGH_VALUE||'; END;' USING OUT PERIOD;
IF ds IS NOT NULL THEN
IF ds >= INTERVAL '7' DAY THEN
-- Weekly partition
EXECUTE IMMEDIATE 'BEGIN :ret := TO_CHAR('||aPart.HIGH_VALUE||' - :int, :fmt); END;' USING OUT newPartName, INTERVAL '1' DAY, '"P_"IYYY"W"IW';
ELSE
-- Daily partition
EXECUTE IMMEDIATE 'BEGIN :ret := TO_CHAR('||aPart.HIGH_VALUE||' - :int, :fmt); END;' USING OUT newPartName, INTERVAL '1' DAY, '"P_"YYYYMMDD';
END IF;
ELSE
IF ym = INTERVAL '3' MONTH THEN
-- Quarterly partition
EXECUTE IMMEDIATE 'BEGIN :ret := TO_CHAR('||aPart.HIGH_VALUE||' - :int, :fmt); END;' USING OUT newPartName, INTERVAL '1' DAY, '"P_"YYYY"Q"Q';
ELSE
-- Monthly partition
EXECUTE IMMEDIATE 'BEGIN :ret := TO_CHAR('||aPart.HIGH_VALUE||' - :int, :fmt); END;' USING OUT newPartName, INTERVAL '1' DAY, '"P_"YYYYMM';
END IF;
END IF;
IF newPartName <> aPart.PARTITION_NAME THEN
EXECUTE IMMEDIATE 'ALTER TABLE '||aTab.TABLE_NAME||' RENAME PARTITION '||aPart.PARTITION_NAME||' TO '||newPartName;
END IF;
END LOOP;
END LOOP;
END MaintainPartitions;
/
EXEC MaintainPartitions
SELECT PARTITION_NAME
FROM USER_TAB_PARTITIONS
WHERE TABLE_NAME = 'T1'
PARTITION_NAME
OLD_DATA
P_2022W14
P_2022W15
P_2022W16
P_2022W17
P_2022W18
P_2022W19
P_2022W20
P_2022W21
P_2022W22
P_2022W23
P_2022W24
P_2022W25
P_2022W26
P_2022W27
P_2022W28
P_2022W29
P_2022W30
P_2022W31
P_2022W32
P_2022W33
P_2022W34
SELECT COUNT(*) FROM USER_TAB_PARTITIONS
COUNT(*)
31
Next step is setting up your RETENTION table. There should be an entry for each interval range PARTITION.
The RETENTION value is for you to decide. In my example, I chose 30 days fir table T1. This means, when the high value for a PARTITION is greater than 30 days its eligible to be dropped. So chose wisely when setting up these values.
Note: I listed the names of other tables to show how each table has its own value.
CREATE TABLE PARTITION_RETENTION (
seq_num NUMBER GENERATED BY DEFAULT AS IDENTITY (START WITH 1) NOT NULL,
TABLE_NAME VARCHAR2(30),
RETENTION INTERVAL DAY(3) TO SECOND(0),
CONSTRAINT
partition_retention_pk primary key (table_name),
CONSTRAINT CHK_NON_ZERO_DAYS CHECK (
RETENTION > INTERVAL '0' DAY
),
CONSTRAINT CHK_WHOLE_DAYS CHECK (
EXTRACT(HOUR FROM RETENTION) = 0
AND EXTRACT(MINUTE FROM RETENTION) = 0
AND EXTRACT(SECOND FROM RETENTION) = 0
)
);
insert into PARTITION_RETENTION (TABLE_NAME, RETENTION)
select 'T0', interval '10' day from dual union all
select 'T1', interval '30' day from dual union all
select 'T2', interval '15' day from dual union all
select 'T3', interval '30' day from dual union all
select 'T4', 15 * interval '1' day from dual union all
select 'T5', 5 * interval '1 00:00:00' day to second from dual;
Below are 3 procedures that need to be created.
The ddl procedure is a wrapper, which shows you what is being processed and how long it takes.
The rebuild_index procedure is obvious it rebuilds any invalid indexes. As I mentioned above if you are using a global index and a PARTITION is dropped then the index needs to be rebuilt. I hardcoded parallel 4 in this example but if you have plenty if CPU power you may want to increase the number to fit your needs.
In addition, there are other ways indexes can be marked unusable so you may want to consider scheduling that task.
Lastly, is the anonymous block. Which actually drops the PARTITIONS for, which the retention PERIOD has passed. This needs to be scheduled once a day!!
If you look carefully at the anonymous block the last step is a call to the rebuild index procedure. So if an index is unusable it will be rebuilt.
Now let's run the process and see what happens.
CREATE OR REPLACE PROCEDURE ddl(p_cmd varchar2)
authid current_user
is
t1 pls_integer;
BEGIN
t1 := dbms_utility.get_time;
dbms_output.put_line(p_cmd);
execute immediate p_cmd;
dbms_output.put_line((dbms_utility.get_time - t1)/100 || ' seconds');
END;
/
CREATE OR REPLACE PROCEDURE rebuild_index
authid current_user
is
BEGIN
for i in (
select index_owner, index_name, partition_name, 'partition' ddl_type
from all_ind_partitions
where status = 'UNUSABLE'
union all
select owner, index_name, null, null
from all_indexes
where status = 'UNUSABLE'
)
loop
if i.ddl_type is null then
ddl('alter index '||i.index_owner||'.'||i.index_name||' rebuild parallel 4 online');
else
ddl('alter index '||i.index_owner||'.'||i.index_name||' modify '||i.ddl_type||' '||i.partition_name||' rebuild parallel 4 online');
end if;
end loop;
END;
/
DECLARE
CANNOT_DROP_LAST_PARTITION EXCEPTION;
PRAGMA EXCEPTION_INIT(CANNOT_DROP_LAST_PARTITION, -14758);
CANNOT_DROP_ONLY_ONE_PARTITION EXCEPTION;
PRAGMA EXCEPTION_INIT(CANNOT_DROP_ONLY_ONE_PARTITION, -14083);
ts TIMESTAMP;
CURSOR TablePartitions IS
SELECT TABLE_NAME, PARTITION_NAME, p.HIGH_VALUE, t.INTERVAL, RETENTION, DATA_TYPE
FROM USER_PART_TABLES t
JOIN USER_TAB_PARTITIONS p USING (TABLE_NAME)
JOIN USER_PART_KEY_COLUMNS pk ON pk.NAME = TABLE_NAME
JOIN USER_TAB_COLS tc USING (TABLE_NAME, COLUMN_NAME)
JOIN PARTITION_RETENTION r USING (TABLE_NAME)
WHERE pk.object_type = 'TABLE' AND
t.partitioning_type = 'RANGE' AND
REGEXP_LIKE (tc.data_type, '^DATE$|^TIMESTAMP.*');
BEGIN
FOR aPart IN TablePartitions LOOP
EXECUTE IMMEDIATE 'BEGIN :ret := '||aPart.HIGH_VALUE||'; END;' USING OUT ts;
IF ts < SYSTIMESTAMP - aPart.RETENTION THEN
BEGIN
ddl('alter table '||aPart.TABLE_NAME||' drop partition '||aPart.partition_name);
EXCEPTION
WHEN CANNOT_DROP_ONLY_ONE_PARTITION THEN
DBMS_OUTPUT.PUT_LINE('Cant drop the only partition '||aPart.PARTITION_NAME ||' from table '||aPart.TABLE_NAME);
ddl('ALTER TABLE '||aPart.TABLE_NAME||' TRUNCATE PARTITION '||aPart.PARTITION_NAME);
WHEN CANNOT_DROP_LAST_PARTITION THEN
BEGIN
DBMS_OUTPUT.PUT_LINE('Drop last partition '||aPart.PARTITION_NAME ||' from table '||aPart.TABLE_NAME);
EXECUTE IMMEDIATE 'ALTER TABLE '||aPart.TABLE_NAME||' SET INTERVAL ()';
ddl('alter table '||aPart.TABLE_NAME||' drop partition '||aPart.partition_name);
EXECUTE IMMEDIATE 'ALTER TABLE '||aPart.TABLE_NAME||' SET INTERVAL( '||aPart.INTERVAL||' )';
EXCEPTION
WHEN CANNOT_DROP_ONLY_ONE_PARTITION THEN
-- Depending on the order the "last" partition can be also the "only" partition at the same time
EXECUTE IMMEDIATE 'ALTER TABLE '||aPart.TABLE_NAME||' SET INTERVAL( '||aPart.INTERVAL||' )';
DBMS_OUTPUT.PUT_LINE('Cant drop the only partition '||aPart.PARTITION_NAME ||' from table '||aPart.TABLE_NAME);
ddl('ALTER TABLE '||aPart.TABLE_NAME||' TRUNCATE PARTITION '||aPart.PARTITION_NAME);
END;
END;
END IF;
END LOOP;
rebuild_index();
END;
alter table T1 drop partition OLD_DATA
.02 seconds
alter table T1 drop partition P_2022W14
.01 seconds
alter table T1 drop partition P_2022W15
.02 seconds
alter table T1 drop partition P_2022W16
.01 seconds
alter table T1 drop partition P_2022W17
.02 seconds
alter table T1 drop partition P_2022W18
.01 seconds
alter table T1 drop partition P_2022W19
.02 seconds
alter table T1 drop partition P_2022W20
.01 seconds
alter table T1 drop partition P_2022W21
.01 seconds
alter table T1 drop partition P_2022W22
.02 seconds
alter table T1 drop partition P_2022W23
.01 seconds
alter table T1 drop partition P_2022W24
.01 seconds
alter table T1 drop partition P_2022W25
.01 seconds
alter table T1 drop partition P_2022W26
.01 seconds
alter table T1 drop partition P_2022W27
.02 seconds
alter index SQL_WUKYPRGVPTOUVLCAEKUDCRCQI.T1_GLOBAL_IX rebuild parallel 4 online
.1 seconds
…
…
…
alter index SQL_WUKYPRGVPTOUVLCAEKUDCRCQI.T1_GLOBAL_IX rebuild parallel 4 online
.1 seconds
SELECT count(*) from USER_TAB_PARTITIONS
Where
table_name not like 'BIN$%'
8
SELECT PARTITION_NAME
FROM USER_TAB_PARTITIONS
WHERE TABLE_NAME = 'T1'
AND
table_name not like 'BIN$%'
P_2022W28
P_2022W29
P_2022W30
P_2022W31
P_2022W32
P_2022W33
P_2022W34
P_2022W35
I have a query which pulls values in a column in a defined date range (last 10 yrs). If data in that column is missing for any specific date, the row simply does not exist in the table.
Is there a way that I can return NULLs if there is no row? Basically, I still want to return all dates in the range, but if a row does not exist, then I would just return a NULL for that specific date.
The below will give you a listing of all dates between two dates.
DECLARE #MinDate DATE = '20110913',
#MaxDate DATE = '20210913';
SELECT TOP (DATEDIFF(DAY, #MinDate, #MaxDate) + 1)
Date = DATEADD(DAY, ROW_NUMBER() OVER(ORDER BY a.object_id) - 1,
#MinDate)
FROM sys.all_objects a
CROSS JOIN sys.all_objects b;
Use this table to join to your table by date and any dates that are missing will show as null.
You just need to LEFT JOIN, in the example below the user is displayed even if they don't have an order. LEFT JOIN means keep the data on the left even if it doesn't have a match in the right table, if you used an INNER JOIN then it would only return rows that had data in both tables, hence would remove the NULL OrderId rows.
I notice you mentions date ranges (e.g dates table)too which could be the second part so this might help with that if you are going down that route, https://www.brentozar.com/archive/2020/08/date-tables-are-great-for-users-but-not-so-great-for-performance/
See full example below you can run, take note of the LEFT JOIN
BEGIN TRANSACTION
BEGIN TRY
CREATE TABLE #Users (
Id INT NOT NULL IDENTITY(1,1) PRIMARY KEY,
[Name] NVARCHAR(50) NOT NULL
)
INSERT INTO #Users ([Name] )
VALUES('Andrew'),
('Bob'),
('Dave'),
('Philip')
SELECT * FROM #Users
CREATE TABLE #Orders (
Id INT NOT NULL IDENTITY(1,1) PRIMARY KEY,
UserId INT NOT NULL,
Cost DECIMAL(18,2) NOT NULL,
CONSTRAINT FK_Orders_Users FOREIGN KEY (UserId) REFERENCES #Users (Id)
)
INSERT INTO #Orders (UserId, Cost)
VALUES(1, 2.99),
(1, 5.99),
(4, 8.99)
SELECT * FROM #Orders
SELECT U.Id AS UserId, U.[Name], O.Id AS OrderId FROM #Users AS U
LEFT JOIN #Orders AS O ON O.UserId = U.Id
ROLLBACK TRANSACTION
END TRY
BEGIN CATCH
PRINT 'Rolling back changes, there was an error!!'
ROLLBACK TRANSACTION
DECLARE #Msg NVARCHAR(MAX)
SELECT #Msg=ERROR_MESSAGE()
RAISERROR('Error Occured: %s', 20, 101,#msg) WITH LOG
END CATCH
I want to delete from an existing table the entries that respond to a specific time range if they exist in this table, then calculate them and insert them to it.
If they do not exist create them and just insert to the table.
How is that possible to be done?
Could anyone give an example of this?
The table structure should be the following:
create table test_table
(
[Date] float
,[UserName] nvarchar(max)
,[SessionType] int
,[Duration] float
,[MessageCount] int
)
if you have a Column that Stores the DATE and TIME then you can just delete the records based on that.
Suppose I have a Column Called CreateDate on My table. Then I can Delete all records Created between 10.00 am and 11.00 Today by Just Giving
DELETE FROM MyTable WHERE
CreateDate BETWEEN '2018-01-12 10:00:00.000' AND '2018-01-12 11:00:00.000'
Now Insert the values again using the Normal INSERT statement
You can do in steps like this
First store the set of records in time range in a temp table
SELECT * INTO tempTable FROM YourTable WHERE CONVERT(FLOAT, [Date]) BETWEEN 43100.3603763503 AND 43110.3603763503
Then delete the records from the table
DELETE FROM YourTable WHERE CONVERT(FLOAT, [Date]) BETWEEN 43100.3603763503 AND 43110.3603763503
Then do the calculations as per your requirement with the data available in tempTable and insert the data into your table
INSERT INTO YourTable
SELECT * FROM tempTable
Then drop the tempTable
DROP TABLE tempTable
I am updating my solution to this thank you all for help.
create mytable table
(
[Date] float
,[UserName] nvarchar(max)
,[SessionType] int
,[Duration] float
,[MessageCount] int
)
IF EXISTS (SELECT * FROM mytable WHERE [Date] >= 43082 AND [Date] < 43111)
BEGIN
DELETE FROM mytable WHERE [Date] >= 43082 AND [Date]< 43111
;WITH Data AS (
/*do the calculation of data*/
)
INSERT INTO mytable select * from Data
END
ELSE
BEGIN
;WITH Data AS (
/*do the calculation of data*/
)
INSERT INTO mytable select * from Data
END
I have a SQL Server database in which I store millions of records. Now, I have to delete a big number of records continuously, so what I do is running thousands of queries like this within the same execution:
delete TWEET
from TWEET
where REQUEST_ID >= x and TWEET_ID = y and ID < z
The single query is immediate, but putting them all together is extremely slow.
What would you suggest to me?
You can use this. You can delete 100000 rows every turn. This works faster than delete all rows in the same time.
DECLARE #RC INT = 1
WHILE #RC > 0
BEGIN
delete TOP(100000)
from TWEET
where REQUEST_ID >= x and TWEET_ID = y and ID < z
SET #RC = ##ROWCOUNT
END
Using a JOINed table is probably your best bet (see below). Another option is to use Service Broker: you send a delete request into SB and some asynchronous process deletes it when is has the CPU to do so.
------------------------------------------------------------------------
-- Create a table to hold the delete requests:
CREATE TABLE DeleteTweets (
ID INT IDENTITY(1,1) PRIMARY KEY
, REQUEST_ID INT
, TWEET_ID INT
)
GO
------------------------------------------------------------------------
-- Create an index to keep the deletions fast:
CREATE INDEX IX_DeleteTweets ON dbo.DeleteTweets (REQUEST_ID, TWEET_ID)
GO
------------------------------------------------------------------------
-- However you can put your delete requests into that table. This might
-- be part of you front-end application or via some batch process:
INSERT INTO dbo.DeleteTweets
( REQUEST_ID, TWEET_ID )
VALUES ( 0, /*REQUEST_ID*/, 0 /*TWEET_ID*/)
, ( 0, /*REQUEST_ID*/, 0 /*TWEET_ID*/)
, ( 0, /*REQUEST_ID*/, 0 /*TWEET_ID*/)
, ( 0, /*REQUEST_ID*/, 0 /*TWEET_ID*/)
, ( 0, /*REQUEST_ID*/, 0 /*TWEET_ID*/)
GO
------------------------------------------------------------------------
-- Delete from the main table via JOIN:
DELETE t
FROM TWEET t
INNER JOIN dbo.DeleteTweets dt
ON t.REQUEST_ID = dt.REQUEST_ID
AND t.TWEET_ID = dt.TWEET_ID
GO
------------------------------------------------------------------------
-- Once they're done, empty the table so you can re-fill with new deletion requests:
TRUNCATE TABLE dbo.DeleteTweets
GO
--IN QUERY WINDOW 1
DROP TABLE TWEETDEL
GO
DROP TABLE TWEET
GO
CREATE TABLE TWEET
(ID int NOT NULL
,REQUEST_ID AS ID*2
,TWEET_ID AS ID*10
)
GO
INSERT TWEET WITH (TABLOCKX) (ID )
SELECT TOP 20000000 id
FROM
(SELECT ROW_NUMBER() OVER (ORDER BY a.id) AS id
FROM sys.sysobjects AS a, sys.syscolumns AS b, sys.syscolumns AS c) x
GO
ALTER TABLE TWEET ADD PRIMARY KEY (ID)
GO
SELECT IDENTITY(INT,1,1) AS TWEETDELPK, *
INTO TWEETDEL
FROM TWEET
WHERE REQUEST_ID%14=0
GO
ALTER TABLE TWEETDEL ADD PRIMARY KEY (TWEETDELPK)
GO
ALTER TABLE TWEETDEL
ADD CONSTRAINT fk101
FOREIGN KEY (ID) REFERENCES TWEET(ID) ON DELETE CASCADE
GO
-- IN THE SAME WINDOW
SET NOCOUNT OFF
DECLARE #ID INT
CR:
DELETE TOP (SELECT CNT FROM ##) t WITH (PAGLOCK)
FROM TWEET AS t
WHERE EXISTS
(SELECT 1 FROM TWEET AS x WHERE T.ID = X.ID)
IF ##ROWCOUNT >0 GOTO CR
-- IN ANOTHER WINDOW
CREATE TABLE ## (CNT INT)
INSERT ## SELECT 1000
select COUNT(1) from tweetdel
select COUNT(1) from tweet
CHECK FOR LOCKING - IF LOCKS ARE STAYING OPEN TOO LONG, RAISE OR LOWER CHUNKSIZE ACCORDINGLY
UPDATE ## SET CNT =5000
SELECT resource_type, resource_associated_entity_id,
request_status, request_mode,request_session_id,
resource_description
FROM sys.dm_tran_locks
WHERE resource_database_id = DB_ID()
AND request_mode in ('x')
I am looking to create a SQL Server trigger that moves a record from one table to an identical replica table if the record matches a specific condition.
Questions: do I need to specify each column, or can I use a wildcard?
Can I use something like:
SET #RecID = (SELECT [RecoID] FROM Inserted)
IF NULLIF(#RecID, '') IS NOT NULL
(then insert....)
THANKS!
There's a lot of stuff you "CAN" do in a trigger, but that doesn't mean you should. I'd would urge to to avoid setting scalar variables within a trigger at all costs. Even if you 100% sure your table will never have more that 1 row inserted per transaction because that's how the app is designed... You'll be in for very rude awakening when you find out that not all transactions come through the application.
Below is a quick demonstration of both types of triggers...
USE tempdb;
GO
IF OBJECT_ID('tempdb.dbo.PrimaryTable', 'U') IS NOT NULL
DROP TABLE dbo.PrimaryTable;
GO
IF OBJECT_ID('tempdb.dbo.TriggerScalarLog', 'U') IS NOT NULL
DROP TABLE dbo.TriggerScalarLog;
GO
IF OBJECT_ID('tempdb.dbo.TriggerMultiRowLog', 'U') IS NOT NULL
DROP TABLE dbo.TriggerMultiRowLog;
GO
CREATE TABLE dbo.PrimaryTable (
Pt_ID INT NOT NULL IDENTITY (1,1) PRIMARY KEY CLUSTERED,
Col_1 INT NULL,
Col_2 DATE NOT NULL
CONSTRAINT df_Col2 DEFAULT (GETDATE())
);
GO
CREATE TABLE dbo.TriggerScalarLog (
Pt_ID INT,
Col1_Old INT,
Col1_New INT,
Col2_Old DATE,
Col2_New DATE
);
GO
CREATE TABLE dbo.TriggerMultiRowLog (
Pt_ID INT,
Col1_Old INT,
Col1_New INT,
Col2_Old DATE,
Col2_New DATE
);
GO
--=======================================================
CREATE TRIGGER dbo.PrimaryCrudScalar ON dbo.PrimaryTable
AFTER INSERT, UPDATE, DELETE
AS
SET NOCOUNT ON;
DECLARE
#Pt_ID INT,
#Col1_Old INT,
#Col1_New INT,
#Col2_Old DATE,
#Col2_New DATE;
SELECT
#Pt_ID = ISNULL(i.Pt_ID, d.Pt_ID),
#Col1_Old = d.Col_1,
#Col1_New = i.Col_1,
#Col2_Old = d.Col_2,
#Col2_New = i.Col_2
FROM
Inserted i
FULL JOIN Deleted d
ON i.Pt_ID = d.Pt_ID;
INSERT dbo.TriggerScalarLog (Pt_ID, Col1_Old, Col1_New, Col2_Old, Col2_New)
VALUES (#Pt_ID, #Col1_Old, #Col1_New, #Col2_Old, #Col2_New);
GO -- DROP TRIGGER dbo.PrimaryCrudScalar;
CREATE TRIGGER PrimaryCrudMultiRow ON dbo.PrimaryTable
AFTER INSERT, UPDATE, DELETE
AS
SET NOCOUNT ON;
INSERT dbo.TriggerMultiRowLog (Pt_ID, Col1_Old, Col1_New, Col2_Old, Col2_New)
SELECT
ISNULL(i.Pt_ID, d.Pt_ID),
d.Col_1,
i.Col_1,
d.Col_2,
i.Col_2
FROM
Inserted i
FULL JOIN Deleted d
ON i.Pt_ID = d.Pt_ID;
GO -- DROP TRIGGER dbo.TriggerMultiRowLog;
--=======================================================
--=======================================================
-- --insert test...
INSERT dbo.PrimaryTable (Col_1)
SELECT TOP 100
o.object_id
FROM
sys.objects o;
SELECT 'INSERT Scarar results';
SELECT * FROM dbo.TriggerScalarLog tsl;
SELECT 'INSERT Multi-Row results';
SELECT * FROM dbo.TriggerMultiRowLog tmrl;
UPDATE pt SET
pt.Col_1 = pt.Col_1 + rv.RandomVal,
pt.Col_2 = DATEADD(DAY, rv.RandomVal, pt.Col_2)
FROM
dbo.PrimaryTable pt
CROSS APPLY ( VALUES (ABS(CHECKSUM(NEWID())) % 10000 + 1) ) rv (RandomVal);
SELECT 'UPDATE Scarar results';
SELECT * FROM dbo.TriggerScalarLog tsl;
SELECT 'UPDATE Multi-Row results';
SELECT * FROM dbo.TriggerMultiRowLog tmrl;
DELETE pt
FROM
dbo.PrimaryTable pt;
SELECT 'DELETE Scarar results';
SELECT * FROM dbo.TriggerScalarLog tsl;
SELECT 'DELETE Multi-Row results';
SELECT * FROM dbo.TriggerMultiRowLog tmrl;
You could, but I'd recommend against it. If your source table changed things would start failing.
Also, in your example if you were to ever have more than one row inserted at a time you would get thrown an error (or have unpredictable results). I'd recommend a more set based approach:
INSERT table2 ( user_id ,
user_name ,
RecoID
)
SELECT user_id ,
user_name ,
RecoID
FROM inserted i
LEFT JOIN table2 t ON i.RecoID = t.RecoID
WHERE t.RecoID IS NULL;
EDIT:
If you want to stop the insert happening on your original table then you'll need to do something along the lines of:
CREATE TRIGGER trigger_name
ON table_orig
INSTEAD OF INSERT
AS
BEGIN
-- make sure we aren't triggering from ourselves from another trigger
IF TRIGGER_NESTLEVEL() <= 1
return;
-- insert into the table_copy if the inserted row is already in table_orig (not null)
INSERT table_copy ( user_id ,
user_name ,
RecoID
)
SELECT user_id ,
user_name ,
RecoID
FROM inserted i
LEFT JOIN table_orig c ON i.RecoID = c.RecoID
WHERE t.RecoID IS NOT NULL;
-- insert into table_orig if the inserted row is not already in table_orig (null)
INSERT table_orig ( user_id ,
user_name ,
RecoID
)
SELECT user_id ,
user_name ,
RecoID
FROM inserted i
LEFT JOIN table_orig c ON i.RecoID = c.RecoID
WHERE t.RecoID IS NULL;
END;
The instead of will stop the insert if you don't want it to actually be inserted, so you'll need to do that yourself (the second insert statement).
Please note I changed some nulls to not nulls and the table we are left joining to in some cases.