SQL Server : change strings in multiple rows - sql-server

I have about 2 million rows in my table with a column for dates... this column is of type VARCHAR, and it contains a wrong date format, we have this format dd/mm/YYYY and it must be YYYY-mm-dd...
How can I change it? Remember it is a Varchar column, not a Datetime column (and it can't be datetime field because we manipulate it with our queries)
Thank you in advice.
UPDATE
As I said, I have MILLIONS of rows, so it's not possible to query one by one the date field to a new one, as you said in your responses.... I need some kind of "automatic" mode to transfer every row to it's new format and datatype column.

If your end goal is just cleanup to end up in a datetime column, you only need to add a new column, then transfer the data:
update tbl
set new_datetime_col = convert(datetime, dateinvarchar, 103);
Query:
update tbl
set dateinvarchar = convert(char(10),
convert(datetime,
dateinvarchar, 103), 121)
select * from tbl
Sample table:
create table tbl (dateinvarchar varchar(10));
insert tbl values
('01/02/2012'),
('02/02/2012'),
('03/02/2012'),
('14/12/2012');
Sample Result:
| DATEINVARCHAR |
-----------------
| 2012-02-01 |
| 2012-02-02 |
| 2012-02-03 |
| 2012-12-14 |

Try this
Declare #t table (dt varchar(25));
insert into #t
SELECT '01/01/2012' UNION
SELECT '02/01/2012' UNION
SELECT '03/01/2012'
SELECT convert(varchar(10), CONVERT(datetime, dt, 103), 120) as dt_new
from #t;
CREATE TABLE newT(id, dt varchar(10));
INSERT INTO newT
SELECT id, convert(varchar(10), CONVERT(datetime, dt, 103), 120) as dt_new
from oldT;

Related

Listing number sequence for financial periods

In SQL 2016, I need to create a list using financial periods but only have the from/to available - it's formatted similar to dates but are 0mmyyyy, so the first 3 numbers are the month/period and the last 4 digits the year.
e.g. period_from is '0102017' and period_to '0032018', but trying to bring back a list that includes the ones in between as well?
0102017,
0112017,
0122017,
0012018,
0022018
Also, the first three characters can go to 012 or 013, so need to be able to easily alter the code for other databases.
I am not entirely sure what you are wanting to use this list for, but you can get all your period values with the help of a tally table and some common table expressions.
-- Test data
declare #p table(PeriodFrom nvarchar(10),PeriodTo nvarchar(10));
insert into #p values('0102017','0032018'),('0052018','0112018');
-- Specify the additional periods you want to include, use 31st December for correct sorting
declare #e table(ExtraPeriodDate date
,ExtraPeriodText nvarchar(10)
);
insert into #e values('20171231','0132017');
-- Convert start and end of periods to dates
with m as (select cast(min(right(PeriodFrom,4) + substring(PeriodFrom,2,2)) + '01' as date) as MinPeriod
,cast(max(right(PeriodTo,4) + substring(PeriodTo,2,2)) + '01' as date) as MaxPeriod
from #p
) -- Built a tally table of dates to join from
,t(t) as (select 1 union all select 1 union all select 1 union all select 1 union all select 1 union all select 1 union all select 1 union all select 1 union all select 1 union all select 1)
,d(d) as (select top (select datediff(month,MinPeriod,MaxPeriod)+1 from m) dateadd(m,row_number() over (order by (select null))-1,m.MinPeriod) from m, t t1, t t2, t t3, t t4, t t5)
-- Use the tally table to convert back to your date period text format
,p as (select d.d as PeriodDate
,'0' + right('00' + cast(month(d) as nvarchar(2)),2) + cast(year(d) as nvarchar(4)) as PeriodText
from d
union all -- and add in any of the addition '13th' month periods you specified previously
select ExtraPeriodDate
,ExtraPeriodText
from #e
)
select PeriodText
from p
order by PeriodDate;
Output:
+------------+
| PeriodText |
+------------+
| 0102017 |
| 0112017 |
| 0122017 |
| 0132017 |
| 0012018 |
| 0022018 |
| 0032018 |
| 0042018 |
| 0052018 |
| 0062018 |
| 0072018 |
| 0082018 |
| 0092018 |
| 0102018 |
| 0112018 |
+------------+
If this isn't what you require exactly it should put you on the right path to generating these values either as the result of a function or concatenated together into a list as per your comment by using for xml on the result by changing the final select statement to:
select stuff((select ', ' + PeriodText
from p
order by PeriodDate
for xml path('')
)
,1,2,'') as PeriodTexts;
Which outputs:
+---------------------------------------------------------------------------------------------------------------------------------------+
| PeriodTexts |
+---------------------------------------------------------------------------------------------------------------------------------------+
| 0102017, 0112017, 0122017, 0132017, 0012018, 0022018, 0032018, 0042018, 0052018, 0062018, 0072018, 0082018, 0092018, 0102018, 0112018 |
+---------------------------------------------------------------------------------------------------------------------------------------+
This is going to be a little complicated. To start, I have a user defined table value function that outputs a calendar table based on a start and end date. You'll want to create that first...
CREATE FUNCTION dbo.udf_calendar (#datestart smalldatetime, #dateend smalldatetime)
RETURNS #calendar TABLE (
[day] int,
[date] smalldatetime
)
AS
BEGIN
DECLARE #rows int
DECLARE #i int = 1
SELECT
#rows = DATEDIFF(DAY, #datestart, #dateend)
WHILE (#i <= #rows)
BEGIN
INSERT INTO #calendar ([day])
VALUES (#i)
SET #i = #i + 1
END
UPDATE a
SET [date] = DATEADD(DAY, [day] - 1, #datestart)
--select *, DATEADD(day,id-1,#datestart)
FROM #calendar a
RETURN
END
Then, the following will give you the output that I THINK you are looking for. I've commented to try and explain how I got there, but it still might be a bit difficult to follow...
--Create temp table example with your period from and to.
IF (SELECT
OBJECT_ID('tempdb..#example'))
IS NOT NULL
DROP TABLE #example
SELECT
'0102017' periodfrom,
'0032018' periodto INTO #example
/*
This is the difficult part. Basically you're inner joining the calendar
to the temp table where the dates are between the manipulated period from and to.
I've added an extra column formatted to allow ordering correctly by period.
*/
SELECT DISTINCT
periodfrom,
periodto,
RIGHT('00' + CAST(DATEPART(MONTH, [date]) AS varchar(50)), 3) + CAST(DATEPART(YEAR, [date]) AS varchar(50)) datefill,
CAST(DATEPART(YEAR, [date]) AS varchar(50)) + RIGHT('00' + CAST(DATEPART(MONTH, [date]) AS varchar(50)), 3) datefill2
FROM dbo.udf_calendar('2015-01-01', '2018-12-31') a
INNER JOIN #example b
ON a.[date] BETWEEN SUBSTRING(periodfrom, 2, 2) + '-01-' + SUBSTRING(periodfrom, 4, 4) AND SUBSTRING(periodto, 2, 2) + '-01-' + SUBSTRING(periodto, 4, 4)
ORDER BY datefill2

Selecting from Type 2 Dimension based Report Start and End Date

I have a large Type 2 Dimension table and causing performance issues in the select queries...I want to limit the Dimensions based on the report Start and End Dates ...But I am struggling to get the right query for that...Here is an example of what I am looking for...
declare #DimCustomers table (CKey int, ID nvarchar(20), Customer nvarchar(50), StartDate datetime, EndDate datetime)
insert into #DimCustomers values
(100, 'C1', 'Customer1', '2010-01-01', '2010-12-31'),
(101, 'C1', 'xCustomer1', '2011-01-01', '2011-12-31'),
(102, 'C1', 'xxCustomer1', '2012-01-01', '2012-12-31'),
(103, 'C1', 'xxxCustomer1', '2013-01-01', NULL)
declare #ReportStartDate datetime = '2010-05-01', #ReportEndDate datetime = '2011-03-01'
select
* from #DimCustomers
The expectation is that when someone runs a report between '2010-02-01' and '2011-02-01', I get an out put for Ckey 100 and 101.
For a report between '2011-02-01' and current date -> 101, 102 and 103
For a report between '2015-02-01' and current date -> 103
I hope that explains what I am looking for...how should my WHERE clause look like on the #DimCustomers?
Thanks
Note: I do not want to join with the Fact Table to start with...
You have NULL value in EndDate. You will have to deal with it.
For example:
SELECT *
FROM #DimCustomers
WHERE StartDate >= #ReportStartDate
AND ISNULL(EndDate,GETDATE()) <= #ReportEndDate
select
*
from
#DimCustomers
where
--gets where #ReportStartDate falls in range
(#ReportStartDate >= StartDate and #ReportStartDate <= isnull(EndDate,getdate()))
or
--gets where #ReportEndDate falls in range
(#ReportEndDate <= isnull(EndDate,getdate()) and #ReportEndDate >= StartDate)
or
--gets where the range in data falls inside parameter range
(#ReportStartDate < StartDate and #ReportEndDate > isnull(EndDate,getdate()))

Checking next row in table is incremented by 1 minute in datetime column

I need to check alot of data in a Table to make sure my feed has not skipped anything.
Basically the table has the following columns
ID Datetime Price
The data in DateTime column is incremented by 1 minute in each successive row. I need to check the next row of the current one to see if is 1 minute above the one being queries in that specific context.
The query will probably need some sort of loop, then grab a copy of the next row and compare it to the datetime row of the current to make sure it is incremented by 1 minute.
I created a test-table to match your description, and inserted 100 rows with 1 minute between each row like this:
CREATE TABLE [Test] ([Id] int IDENTITY(1,1), [Date] datetime, [Price] int);
WITH [Tally] AS (
SELECT GETDATE() AS [Date]
UNION ALL
SELECT DATEADD(minute, -1, [Date]) FROM [Tally] WHERE [Date] > DATEADD(minute, -99, GETDATE())
)
INSERT INTO [Test] ([Date], [Price])
SELECT [Date], 123 AS [Price]
FROM [Tally]
Then i deleted a record in the middle to simulate a missing minute:
DELETE FROM [Test]
WHERE Id = 50
Now we can use this query to find missing records:
SELECT
a.*
,CASE WHEN b.[Id] IS NULL THEN 'Next record is Missing!' ELSE CAST(b.[Id] as varchar) END AS NextId
FROM
[Test] AS a
LEFT JOIN [Test] AS b ON a.[Date] = DATEADD(minute,1,b.[Date])
WHERE
b.[Id] IS NULL
The resullt will look like this:
Id Date Price NextId
----------- ----------------------- ----------- ------------------------------
49 2013-05-11 22:42:56.440 123 Next record is Missing!
100 2013-05-11 21:51:56.440 123 Next record is Missing!
(2 row(s) affected)
The key solution to the problem is to join the table with itself, but use datediff to find the record that is supposed to be found on the next minute. The last record of the table will of course report that the next row is missing, since it hasn't been inserted yet.
Borrowing TheQ's sample data you can use
WITH T
AS (SELECT *,
DATEDIFF(MINUTE, '20000101', [Date]) -
DENSE_RANK() OVER (ORDER BY [Date]) AS G
FROM Test)
SELECT MIN([Date]) AS StartIsland,
MAX([Date]) AS EndIsland
FROM T
GROUP BY G

TSQL Performance issues using DATEADD in where clause

I have a query using the DATEADD method which takes a lot of time.
I'll try to simplify what we do.
We are monitoring tempretures and every 5 minutes we store the highest temp and lowest temp in
table A
Date | Time | MaxTemp | MinTemp
2011-09-18 | 12:05:00 | 38.15 | 38.099
2011-09-18 | 12:10:00 | 38.20 | 38.10
2011-09-18 | 12:15:00 | 38.22 | 38.17
2011-09-18 | 12:20:00 | 38.21 | 38.20
...
2011-09-19 | 11:50:00 | 38.17 | 38.10
2011-09-19 | 12:55:00 | 38.32 | 38.27
2011-09-19 | 12:00:00 | 38.30 | 38.20
Date/Time columns are of type date/time (and not datetime)
In another table (Table B) we store some data for the entire day, where a day is from NOON (12PM) to noon (not midnight to midnight).
So table B columns include:
Date (date only no time)
ShiftManager
MaxTemp (this is the max temp for the entire 24 hours starting at that date noon till next day noon)
MinTemp
I get table B with all the data and just need to update the MaxTemp and MinTemp using table A
For example:For 09/18/2011 I need the maximum temp reading that was between 09/18/2011 12PM and 09/19/2011 12PM.
In the TableA sample we have above, the returend result would be 38.32 as it is the MAX(MaxTemp) for the desired period.
The SQL I'm using:
update TableB
set MaxTemp = (
select MAX(HighTemp) from TableA
where
(Date=TableB.Date and Time > '12:00:00')
or
(Date=DATEADD(dd,1,TableB.Date) and Time <= '12:00:00')
)
And it takes a lot of time (if I remove the DATEADD method it is quick).
Here is a simplified sample that shows the data I have and the expected result:
DECLARE #TableA TABLE ([Date] DATE, [Time] TIME(0), HighTemp DECIMAL(6,2));
DECLARE #TableB TABLE ([Date] DATE, MaxTemp DECIMAL(6,2));
INSERT #TableA VALUES
('2011-09-18','12:05:00',38.15),
('2011-09-18','12:10:00',38.20),
('2011-09-18','12:15:00',38.22),
('2011-09-19','11:50:00',38.17),
('2011-09-19','11:55:00',38.32),
('2011-09-19','12:00:00',38.31),
('2011-09-19','12:05:00',38.33),
('2011-09-19','12:10:00',38.40),
('2011-09-19','12:15:00',38.12),
('2011-09-20','11:50:00',38.27),
('2011-09-20','11:55:00',38.42),
('2011-09-20','12:00:00',38.16);
INSERT #TableB VALUES
('2011-09-18', 0),
('2011-09-19', 0);
-- This is how I get the data, now I just need to update the max temp for each day
with TableB(d, maxt) as
(
select * from #TableB
)
update TableB
set maxt = (
select MAX(HighTemp) from #TableA
where
(Date=TableB.d and Time > '12:00:00')
or
(Date=DATEADD(dd,1,TableB.d) and Time <= '12:00:00')
)
select * from #TableB
Hope I was able to explian myself, any ideas how can I do it differently? Thx!
Functions on column usually kill performance. So can OR.
However, I assume you want AND not OR because it is a range.
So, applying some logic and having just one calculation
update TableB
set MaxTemp =
(
select MAX(HighTemp) from TableA
where
(Date + Time - 0.5 = TableB.Date)
)
(Date + Time - 0.5) will change noon to noon to be midnight to midnight (0.5 = 12 hours). More importantly, you can make this a computed column and index it
More correctly, Date + Time - 0.5 is DATEADD(hour, -12, Date+Time) assuming Date and Time are real dates/times and not varchar...
Edit: this answer is wrong but I'll leave it up as "what not to do"
See this for more:
Bad Habits to Kick : Using shorthand with date/time operations
This would probably be a lot easier if you used a single SMALLDATETIME column instead of separating this data into DATE/TIME columns. Also I'm assuming you are using SQL Server 2008 and not a previous version where you're storing DATE/TIME data as strings. Please specify the version of SQL Server and the actual data types being used.
DECLARE #d TABLE ([Date] DATE, [Time] TIME(0), MaxTemp DECIMAL(6,3), MinTemp DECIMAL(6,3));
INSERT #d VALUES
('2011-09-18','12:05:00',38.15,38.099),
('2011-09-18','12:10:00',38.20,38.10),
('2011-09-18','12:15:00',38.22,38.17),
('2011-09-18','12:20:00',38.21,38.20),
('2011-09-19','11:50:00',38.17,38.10),
('2011-09-19','12:55:00',38.32,38.27),
('2011-09-19','12:00:00',38.30,38.20);
SELECT '-- before update';
SELECT * FROM #d;
;WITH d(d,t,dtr,maxt) AS
(
SELECT [Date], [Time], DATEADD(HOUR, -12, CONVERT(SMALLDATETIME, CONVERT(CHAR(8),
[Date], 112) + ' ' + CONVERT(CHAR(8), [Time], 108))), MaxTemp FROM #d
),
d2(dtr, maxt) AS
(
SELECT CONVERT([Date], dtr), MAX(maxt) FROM d
GROUP BY CONVERT([Date], dtr)
)
UPDATE d SET maxt = d2.maxt FROM d
INNER JOIN d2 ON d.dtr >= d2.dtr AND d.dtr < DATEADD(DAY, 1, d2.dtr);
SELECT '-- after update';
SELECT * FROM #d;
Results:
-- before update
2011-09-18 12:05:00 38.150 38.099
2011-09-18 12:10:00 38.200 38.100
2011-09-18 12:15:00 38.220 38.170
2011-09-18 12:20:00 38.210 38.200
2011-09-19 11:50:00 38.170 38.100
2011-09-19 12:55:00 38.320 38.270
2011-09-19 12:00:00 38.300 38.200
-- after update
2011-09-18 12:05:00 38.220 38.099
2011-09-18 12:10:00 38.220 38.100
2011-09-18 12:15:00 38.220 38.170
2011-09-18 12:20:00 38.220 38.200
2011-09-19 11:50:00 38.220 38.100
2011-09-19 12:55:00 38.320 38.270
2011-09-19 12:00:00 38.320 38.200
Presumably you want to update the MinTemp as well, and that would just be:
;WITH d(d,t,dtr,maxt,mint) AS
(
SELECT [Date], [Time], DATEADD(HOUR, -12,
CONVERT(SMALLDATETIME, CONVERT(CHAR(8), [Date], 112)
+ ' ' + CONVERT(CHAR(8), [Time], 108))), MaxTemp, MaxTemp
FROM #d
),
d2(dtr, maxt, mint) AS
(
SELECT CONVERT([Date], dtr), MAX(maxt), MIN(mint) FROM d
GROUP BY CONVERT([Date], dtr)
)
UPDATE d
SET maxt = d2.maxt, mint = d2.maxt
FROM d
INNER JOIN d2
ON d.dtr >= d2.dtr
AND d.dtr < DATEADD(DAY, 1, d2.dtr);
Now, this is not really better than your existing query, because it's still going to be using scans to figure out aggregates and all the rows that need to be updating. I'm not saying you should be updating the table at all, because this information can always be derived at query time, but if it is something you really want to do, I would combine the advice in these answers and consider revising the schema. For example, if the schema were:
USE [tempdb];
GO
CREATE TABLE dbo.d
(
[Date] SMALLDATETIME,
MaxTemp DECIMAL(6,3),
MinTemp DECIMAL(6,3),
RoundedDate AS (CONVERT(DATE, DATEADD(HOUR, -12, [Date]))) PERSISTED
);
CREATE INDEX rd ON dbo.d(RoundedDate);
INSERT dbo.d([Date],MaxTemp,MinTemp) VALUES
('2011-09-18 12:05:00',38.15,38.099),
('2011-09-18 12:10:00',38.20,38.10),
('2011-09-18 12:15:00',38.22,38.17),
('2011-09-18 12:20:00',38.21,38.20),
('2011-09-19 11:50:00',38.17,38.10),
('2011-09-19 12:55:00',38.32,38.27),
('2011-09-19 12:00:00',38.30,38.20);
Then your update is this simple, and the plan is much nicer:
;WITH g(RoundedDate,MaxTemp)
AS
(
SELECT RoundedDate, MAX(MaxTemp)
FROM dbo.d
GROUP BY RoundedDate
)
UPDATE d
SET MaxTemp = g.MaxTemp
FROM dbo.d AS d
INNER JOIN g
ON d.RoundedDate = g.RoundedDate;
Finally, one of the reasons your existing query is probably taking so long is that you are updating all of time, every time. Is data from last week changing? Probably not. So why not limit the WHERE clause to recent data only? I see no need to go recalculate anything earlier than yesterday unless you are constantly receiving revised estimates of how warm it was last Tuesday at noon. So why are there no WHERE clauses on your current query, to limit the date range where it is attempting to do this work? Do you really want to update the WHOLE able, EVERY time? This is probably something you should only be doing once a day, sometime in the afternoon, to update yesterday. So whether it takes 2 seconds or 2.5 seconds shouldn't really matter.
You may need to use -12 depending on date as start date or end date for the noon to noon internal.
update tableA
set tableAx.MaxTemp = MAX(TableB.HighTemp)
from tableA as tableAx
join TableB
on tableAx.Date = CAST(DATEADD(hh,12,TableB.[Date]+TableB.[Time]) as Date)
group by tableAx.Date
Because of the 12 hour offset not sure how much would would gain by putting TableB Date plus Time in a DateTime field directly. Cannot get away from the DATEADD and the output from a functions is not indexed even if the parameters going into the function are indexed. What you might be able to to is create a computed column that = date + time +/- 12h and index that column.
Like the recommendation from Arron to only update those without values.
update tableA
set tableAx.MaxTemp = MAX(TableB.HighTemp)
from tableA as tableAx
join TableB
on tableAx.Date = CAST(DATEADD(hh,12,TableB.[Date]+TableB.[Time]) as Date)
where tableAx.MaxTemp is null
group by tableAx.Date
or an insert of new dates
insert into tableA (date, MaxTemp)
select CAST(DATEADD(hh,12,TableB.[Date]+TableB.[Time]), as Date) as [date] , MAX(TableB.HighTemp) as [MaxTemp]
from tableA as tableAx
right outer join TableB
on tableAx.Date = CAST(DATEADD(hh,12,TableB.[Date]+TableB.[Time]) as Date)
where TableB.Date is null
group by CAST(DATEADD(hh,12,TableB.[Date]+TableB.[Time]) as Date)

updating only date part from datetime in sql server 2000

I have data in the table like the following.
col1 col2 col3
--------------------------------------------------------
6/5/2010 18:05:00 6/2/2010 10:05:00 Null
6/8/2010 15:05:00 6/3/2010 10:45:00 6/5/2010 11:05:00
6/3/2010 15:05:00 Null 6/7/2010 12:05:00
6/1/2010 15:05:00 6/3/2010 10:45:00 6/1/2010 14:05:00
what my requirement is I want to update the date of there columns with single date without disturbing the time. say for example I want to update the table data with 6/1/2010 where the field data is not null. please let me know the query for updating the table data.
thanks & regards,
murali
I think this should work for you.
create table #t
(
col1 datetime
)
Insert Into #t
values ('2010-06-01 10:00:00')
Insert Into #t
values ('2010-06-06 11:00:00')
Insert Into #t
values ('2010-05-24 12:40:00')
Insert Into #t
values ('2010-05-07 13:00:00')
Insert Into #t
values (Null)
declare #newDate datetime
set #newDate = '2010-07-01'
update #t
Set col1 = DateAdd(day, DateDiff(day, col1, #newDate), Col1)
Where Col1 is not null
select * From #t
drop table #t
You may need to do it via SELECT statement as you dont need to run UPDATE statement each time new data are added to the table

Resources