Multiple rows to single row in Azure Data Warehouse - sql-server

As Azure DW is not supporting FOR XML and SELECT variable assignment, is there any other way to convert multiple rows into single row except using CURSOR?

I didn't found any direct method however the below code is working for me.
DECLARE #intColumnCount INT,
#intProcessCount INT,
#varColList VARCHAR(max)
SET #varColList = ''
IF Object_id('tempdb.dbo.#tempColumnNames') IS NOT NULL
BEGIN
DROP TABLE #tempcolumnnames;
END
CREATE TABLE #tempcolumnnames
(
intid INT,
varcolumnnames VARCHAR(256)
)
INSERT INTO #tempcolumnnames
SELECT Row_number()
OVER (
ORDER BY NAME),
NAME
FROM sys.tables
SET #intProcessCount = 1
SET #intColumnCount = (SELECT Count(*)
FROM #tempcolumnnames)
WHILE ( #intProcessCount <= #intColumnCount )
BEGIN
SET #varColList = #varColList + ', '
+ (SELECT varcolumnnames
FROM #tempcolumnnames
WHERE intid = #intProcessCount)
SET #intProcessCount +=1
END
SELECT Stuff(#varColList, 1, 2, '')
Hope this helps someone.

Related

Searching for multiple patterns in a string in T-SQL

In t-sql my dilemma is that I have to parse a potentially long string (up to 500 characters) for any of over 230 possible values and remove them from the string for reporting purposes. These values are a column in another table and they're all upper case and 4 characters long with the exception of two that are 5 characters long.
Examples of these values are:
USFRI
PROME
AZCH
TXJS
NYDS
XVIV. . . . .
Example of string before:
"Offered to XVIV and USFRI as back ups. No response as of yet."
Example of string after:
"Offered to and as back ups. No response as of yet."
Pretty sure it will have to be a UDF but I'm unable to come up with anything other than stripping ALL the upper case characters out of the string with PATINDEX which is not the objective.
This is unavoidably cludgy but one way is to split your string into rows, once you have a set of words the rest is easy; Simply re-aggregate while ignoring the matching values*:
with t as (
select 'Offered to XVIV and USFRI as back ups. No response as of yet.' s
union select 'Another row AZCH and TXJS words.'
), v as (
select * from (values('USFRI'),('PROME'),('AZCH'),('TXJS'),('NYDS'),('XVIV'))v(v)
)
select t.s OriginalString, s.Removed
from t
cross apply (
select String_Agg(j.[value], ' ') within group(order by Convert(tinyint,j.[key])) Removed
from OpenJson(Concat('["',replace(s, ' ', '","'),'"]')) j
where not exists (select * from v where v.v = j.[value])
)s;
* Requires a fully-supported version of SQL Server.
build a function to do the cleaning of one sentence, then call that function from your query, something like this SELECT Col1, dbo.fn_ReplaceValue(Col1) AS cleanValue, * FROM MySentencesTable. Your fn_ReplaceValue will be something like the code below, you could also create the table variable outside the function and pass it as parameter to speed up the process, but this way is all self contained.
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION fn_ReplaceValue(#sentence VARCHAR(500))
RETURNS VARCHAR(500)
AS
BEGIN
DECLARE #ResultVar VARCHAR(500)
DECLARE #allValues TABLE (rowID int, sValues VARCHAR(15))
DECLARE #id INT = 0
DECLARE #ReplaceVal VARCHAR(10)
DECLARE #numberOfValues INT = (SELECT COUNT(*) FROM MyValuesTable)
--Populate table variable with all values
INSERT #allValues
SELECT ROW_NUMBER() OVER(ORDER BY MyValuesCol) AS rowID, MyValuesCol
FROM MyValuesTable
SET #ResultVar = #sentence
WHILE (#id <= #numberOfValues)
BEGIN
SET #id = #id + 1
SET #ReplaceVal = (SELECT sValue FROM #allValues WHERE rowID = #id)
SET #ResultVar = REPLACE(#ResultVar, #ReplaceVal, SPACE(0))
END
RETURN #ResultVar
END
GO
I suggest creating a table (either temporary or permanent), and loading these 230 string values into this table. Then use it in the following delete:
DELETE
FROM yourTable
WHERE col IN (SELECT col FROM tempTable);
If you just want to view your data sans these values, then use:
SELECT *
FROM yourTable
WHERE col NOT IN (SELECT col FROM tempTable);

How to pass string with single quotations to in()

I am working on a query that I need to modify so that a string is passed to in(). The view table is being used by some other view table and ultimately by a stored procedure. The string values must be in ' '.
select region, county, name
from vw_main
where state - 'MD'
and building_id in ('101', '102') -- pass the string into in()
The values for the building_id will be entered at the stored procedure level upon its execution.
Please check below scripts which will give you answer.
Way 1: Split CSV value using XML and directly use select query in where condition
DECLARE #StrBuildingIDs VARCHAR(1000)
SET #StrBuildingIDs = '101,102'
SELECT
vm.region,
vm.county,
vm.name
FROM vw_main vm
WHERE vm.state = 'MD'
AND vm.building_id IN
(
SELECT
l.value('.','VARCHAR(20)') AS Building_Id
FROM
(
SELECT CAST('<a>' + REPLACE(#StrBuildingIDs,',','</a><a>') + '</a>') AS BuildIDXML
) x
CROSS APPLY x.BuildIDXML.nodes('a') Split(l)
)
Way 2: Split CSV value using XML, Create Variable Table and use that in where condition
DECLARE #StrBuildingIDs VARCHAR(1000)
SET #StrBuildingIDs = '101,102'
DECLARE #TblBuildingID TABLE(BuildingId INT)
INSERT INTO #TblBuildingID(BuildingId)
SELECT
l.value('.','VARCHAR(20)') AS Building_Id
FROM
(
SELECT CAST('<a>' + REPLACE(#StrBuildingIDs,',','</a><a>') + '</a>') AS BuildIDXML
) x
CROSS APPLY x.BuildIDXML.nodes('a') Split(l)
SELECT
vm.region,
vm.county,
vm.name
FROM vw_main AS vm
WHERE vm.state = 'MD'
AND vm.building_id IN
(
SELECT
BuildingId
FROM #TblBuildingID
)
Way 3: Split CSV value using XML, Create Variable Table and use that in INNER JOIN
Assuming the input string is not end-user input, you can do this. That is, derived or pulled from another table or other controlled source.
DECLARE #in nvarchar(some length) = N'''a'',''b'',''c'''
declare #stmt nvarchar(4000) = N'
select region, county, name
from vw_main
where state = ''MD''
and building_id in ({instr})'
set #stmt = replace(#stmt, N'{instr}', #instr)
exec sp_executesql #stmt=#stmt;
If the input is from an end-user, this is safer:
declare # table (a int, b char)
insert into #(a, b) values (1,'A'), (2, 'B')
declare #str varchar(50) = 'A,B'
select t.* from # t
join (select * from string_split(#str, ',')) s(b)
on t.b = s.b
You may like it better anyway, since there's no dynamic sql involved. However you must be running SQL Server 2016 or higher.

Assigning DISTINCT value to a variable in SQL

I have a dataset with one of the columns as Transaction_Date in which date varies from the year 2005 to 2018.
I need to assign a distinct year to a variable, later on, I will be using the same variable in SQL Pivot.
-- variable declaration
DECLARE #PCOL VARCHAR(20);
-- assigning values to the variable
SELECT #PCOL += (QUOTENAME (X.TD) + ',')
FROM
(
SELECT DISTINCT YEAR(TRANSACTION_DATE) AS TD
FROM TRANSACTION_INFO
) AS X;
-- Check the result
PRINT #PCOL
It is not resulting me the output as expected. Please suggest.
You need FOR XML PATH() clause :
SELECT #PCOL = STUFF( (SELECT DISTINCT ', '+ QUOTENAME(CAST(YEAR(TRANSACTION_DATE) AS VARCHAR(255)))
FROM TRANSACTION_INFO
FOR XML PATH('')
), 1, 1, ''
)
You're getting NULL because you didn't pre-set your variable to an empty string before you used the += operator.
Since NULL + 'some value' = NULL, your variable never gets changed from NULL to something else.
It looks like this is what you're attempting to do...
IF OBJECT_ID('tempdb..#TRANSACTION_INFO', 'U') IS NOT NULL
BEGIN DROP TABLE #TRANSACTION_INFO; END;
CREATE TABLE #TRANSACTION_INFO (
TRANSACTION_DATE DATE
);
INSERT #TRANSACTION_INFO (TRANSACTION_DATE) VALUES
('20130101'),('20130101'),('20140101'),
('20140102'),('20150102'),('20150102'),
('20160103'),('20160103'),('20170104'),
('20170104'),('20180105'),('20180105');
--================================================
DECLARE #POL VARCHAR(200) = '';
SELECT
#POL = CONCAT(#POL, ',', x.TD)
FROM (
SELECT DISTINCT
TD = YEAR(ti.TRANSACTION_DATE)
FROM
#TRANSACTION_INFO ti
) x;
SET #POL = STUFF(#POL, 1, 1, '');
PRINT(#POL);
Result:
2013,2014,2015,2016,2017,2018

How to get result table from two tables without common field between tables?

I have two tables as shown below.
Consider that there is no such relationship between the tables.
So I need my result table should have like this as shown below.
How may I achieve this?
"AcctRef", in your DebitNoteTable, is a "repeating column".
That's a big, huge, no-no :)
You're debit table should look something like this:
Debit No AcctRef
-------- -------
DN1 CMP1
DN1 CMP3
DN1 CMP6
...
Then you could do a simple "join" ;)
Here's a good overview of normalization. I definitely encourage you to revisit your schema, if at all possible:
http://databases.about.com/od/specificproducts/a/normalization.htm
If you can't, then I guess "Plan B" would be write a program that:
reads all the prices and stores them in a table
reads all the AcctRef's for all the debits
Parses each AcctRef into individual accruals
prints the price and each debit for each accrual
This effectively means reading all the data in the entire database and then reorganizing it manually. If you're going to do that, why bother with a database in the first place?
PS:
It's unfortunate that you used a bitmap for your tables. Text would have been much nicer ;)
Try this:
Since the table is not normalized, you have to do the below steps.. Keeping comma separated values in a column is a bad idea. For any kind of operation on the table you have to split it..
Step 1: You have to create the following function to split the accRef column
create FUNCTION [dbo].[SDF_SplitString]
(
#sString varchar(2048),
#cDelimiter char(1)
)
RETURNS #tParts TABLE ( part varchar(2048) )
AS
BEGIN
if #sString is null return
declare #iStart int,
#iPos int
if substring( #sString, 1, 1 ) = #cDelimiter
begin
set #iStart = 2
insert into #tParts
values( null )
end
else
set #iStart = 1
while 1=1
begin
set #iPos = charindex( #cDelimiter, #sString, #iStart )
if #iPos = 0
set #iPos = len( #sString )+1
if #iPos - #iStart > 0
insert into #tParts
values ( substring( #sString, #iStart, #iPos-#iStart ))
else
insert into #tParts
values( null )
set #iStart = #iPos+1
if #iStart > len( #sString )
break
end
RETURN
END
step 2:
Use this query to get the result
with cte as(
select * from DebitNote cross apply dbo.SDF_SplitString(AccRef,','))
select part,price,[Debit No]=STUFF((SELECT ', ' + DebitNo
FROM cte b
WHERE b.part = a.part
FOR XML PATH('')), 1, 2, '')
FROM cte a join AccrualNote
on part=AccrualNo
GROUP BY part,price
SQL Fiddle Demo
I guess it should be something like:
SELECT a.Accrual_No, a.Price, d.Debit_no FROM AccrualNote AS a, DebitNote as d
WHERE a.Accrual_No IN d.AccRef
Next you need to create a loop for the records you get using this query (just try it using phpMyAdmin
(sure paulsm4 is absolutly right, but maybe you can't change the table structure)
CREATE FUNCTION [dbo].[F_Get_DebitNo] ( #AccrualNo VARCHAR(20) )
RETURNS VARCHAR(100)
AS
BEGIN
DECLARE #DebitNo VARCHAR(100)
SET #DebitNo = ''
SELECT #DebitNo = #DebitNo + ',' + DebitNo
FROM DebitNote
WHERE CHARINDEX(#AccrualNo, AccRef) > 0
SELECT #DebitNo = RIGHT(#DebitNo, LEN(#DebitNo) - 1)
RETURN #DebitNo
END
The sql works well, I'v test it already, you just need to create a Scalar-values Function as below.
SELECT AccrualNo ,
Price ,
dbo.F_Get_DebitNo(AccrualNo) FROM AccrualNote

User defined function replacing WHERE col IN(...)

I have created a user defined function to gain performance with queries containing 'WHERE col IN (...)' like this case:
SELECT myCol1, myCol2
FROM myTable
WHERE myCol3 IN (100, 200, 300, ..., 4900, 5000);
The queries are generated from an web application and are in some cases much more complex.
The function definition looks like this:
CREATE FUNCTION [dbo].[udf_CSVtoIntTable]
(
#CSV VARCHAR(MAX),
#Delimiter CHAR(1) = ','
)
RETURNS
#Result TABLE
(
[Value] INT
)
AS
BEGIN
DECLARE #CurrStartPos SMALLINT;
SET #CurrStartPos = 1;
DECLARE #CurrEndPos SMALLINT;
SET #CurrEndPos = 1;
DECLARE #TotalLength SMALLINT;
-- Remove space, tab, linefeed, carrier return
SET #CSV = REPLACE(#CSV, ' ', '');
SET #CSV = REPLACE(#CSV, CHAR(9), '');
SET #CSV = REPLACE(#CSV, CHAR(10), '');
SET #CSV = REPLACE(#CSV, CHAR(13), '');
-- Add extra delimiter if needed
IF NOT RIGHT(#CSV, 1) = #Delimiter
SET #CSV = #CSV + #Delimiter;
-- Get total string length
SET #TotalLength = LEN(#CSV);
WHILE #CurrStartPos < #TotalLength
BEGIN
SET #CurrEndPos = CHARINDEX(#Delimiter, #CSV, #CurrStartPos);
INSERT INTO #Result
VALUES (CAST(SUBSTRING(#CSV, #CurrStartPos, #CurrEndPos - #CurrStartPos) AS INT));
SET #CurrStartPos = #CurrEndPos + 1;
END
RETURN
END
The function is intended to be used like this (or as an INNER JOIN):
SELECT myCol1, myCol2
FROM myTable
WHERE myCol3 IN (
SELECT [Value]
FROM dbo.udf_CSVtoIntTable('100, 200, 300, ..., 4900, 5000', ',');
Do anyone have some optimiztion idears of my function or other ways to improve performance in my case?
Is there any drawbacks that I have missed?
I am using MS SQL Server 2005 Std and .NET 2.0 framework.
I'm not sure of the performance increase, but I would use it as an inner join and get away from the inner select statement.
Using a UDF in a WHERE clause or (worse) a subquery is asking for trouble. The optimizer sometimes gets it right, but often gets it wrong and evaluates the function once for every row in your query, which you don't want.
If your parameters are static (they appear to be) and you can issue a multistatement batch, I'd load the results of your UDF into a table variable, then use a join against the table variable to do your filtering. This should work more reliably.
that loop will kill performance!
create a table like this:
CREATE TABLE Numbers
(
Number int not null primary key
)
that has rows containing values 1 to 8000 or so and use this function:
CREATE FUNCTION [dbo].[FN_ListAllToNumberTable]
(
#SplitOn char(1) --REQUIRED, the character to split the #List string on
,#List varchar(8000) --REQUIRED, the list to split apart
)
RETURNS
#ParsedList table
(
RowNumber int
,ListValue varchar(500)
)
AS
BEGIN
/*
DESCRIPTION: Takes the given #List string and splits it apart based on the given #SplitOn character.
A table is returned, one row per split item, with a columns named "RowNumber" and "ListValue".
This function workes for fixed or variable lenght items.
Empty and null items will be included in the results set.
PARAMETERS:
#List varchar(8000) --REQUIRED, the list to split apart
#SplitOn char(1) --OPTIONAL, the character to split the #List string on, defaults to a comma ","
RETURN VALUES:
a table, one row per item in the list, with a column name "ListValue"
TEST WITH:
----------
SELECT * FROM dbo.FN_ListAllToNumTable(',','1,12,123,1234,54321,6,A,*,|||,,,,B')
DECLARE #InputList varchar(200)
SET #InputList='17;184;75;495'
SELECT
'well formed list',LEFT(#InputList,40) AS InputList,h.Name
FROM Employee h
INNER JOIN dbo.FN_ListAllToNumTable(';',#InputList) dt ON h.EmployeeID=dt.ListValue
WHERE dt.ListValue IS NOT NULL
SET #InputList='17;;;184;75;495;;;'
SELECT
'poorly formed list join',LEFT(#InputList,40) AS InputList,h.Name
FROM Employee h
INNER JOIN dbo.FN_ListAllToNumTable(';',#InputList) dt ON h.EmployeeID=dt.ListValue
SELECT
'poorly formed list',LEFT(#InputList,40) AS InputList, ListValue
FROM dbo.FN_ListAllToNumTable(';',#InputList)
**/
/*this will return empty rows, and row numbers*/
INSERT INTO #ParsedList
(RowNumber,ListValue)
SELECT
ROW_NUMBER() OVER(ORDER BY number) AS RowNumber
,LTRIM(RTRIM(SUBSTRING(ListValue, number+1, CHARINDEX(#SplitOn, ListValue, number+1)-number - 1))) AS ListValue
FROM (
SELECT #SplitOn + #List + #SplitOn AS ListValue
) AS InnerQuery
INNER JOIN Numbers n ON n.Number < LEN(InnerQuery.ListValue)
WHERE SUBSTRING(ListValue, number, 1) = #SplitOn
RETURN
END /*Function FN_ListAllToNumTable*/
I have other versions that do not return empty or null rows, ones that return just the item and not the row number, etc. Look in the header comment to see how to use this as part of a JOIN, which is much faster than in a where clause.
The CLR solution did not give me an good performance so I will use a recursive query. So here is the definition of the SP I will use (mostly based on Erland Sommarskogs examples):
CREATE FUNCTION [dbo].[priudf_CSVtoIntTable]
(
#CSV VARCHAR(MAX),
#Delimiter CHAR(1) = ','
)
RETURNS
#Result TABLE
(
[Value] INT
)
AS
BEGIN
-- Remove space, tab, linefeed, carrier return
SET #CSV = REPLACE(#CSV, ' ', '');
SET #CSV = REPLACE(#CSV, CHAR(9), '');
SET #CSV = REPLACE(#CSV, CHAR(10), '');
SET #CSV = REPLACE(#CSV, CHAR(13), '');
WITH csvtbl(start, stop) AS
(
SELECT start = CONVERT(BIGINT, 1),
stop = CHARINDEX(#Delimiter, #CSV + #Delimiter)
UNION ALL
SELECT start = stop + 1,
stop = CHARINDEX(#Delimiter, #CSV + #Delimiter, stop + 1)
FROM csvtbl
WHERE stop > 0
)
INSERT INTO #Result
SELECT CAST(SUBSTRING(#CSV, start, CASE WHEN stop > 0 THEN stop - start ELSE 0 END) AS INT) AS [Value]
FROM csvtbl
WHERE stop > 0
OPTION (MAXRECURSION 1000)
RETURN
END
Thank for the input, I have to admit that I have made som bad research before I started my work. I found that Erland Sommarskog has written a lot of this problem on his webpage, after your responeses and after reading his page I decided that I will try to make a CLR to solve this.
I tried a recursive query, this resulted in good performance but I will try CLR function anyway.

Resources