loop through values and update after each one completes - sql-server

I have the following code that i need to run for 350 locations it takes an hour to do 5 locations so I run 5 at a time by using where location_code in ('0001', '0002', '0003', '0005', '0006') I would like to create a temp table with 2 columns one location_id and the other completed and loop through each value on the location_id column then update the completed column with date and time stamp when complete and commit after each. This way I can just let it run and if i need to kill it i can see the last completed location_id and know where to restart the process from or better yet have it check for a vaule in the completed column and if exists go to the next .....
--Collecting all records containing remnant cost. You will need to specify the location number(s). In the example below we're using location 0035
select sku_id, ib.location_id, price_status_id, inventory_status_id, sum(transaction_units) as units, sum(transaction_cost) as cost,
sum(transaction_valuation_retail) as val_retail, sum(transaction_selling_retail) as sell_retail
into #remnant_cost
from ib_inventory ib
inner join location l on l.location_id = ib.location_id
where location_code in ('0001', '0002', '0003', '0005', '0006')
group by sku_id, ib.location_id, price_status_id, inventory_status_id
having sum(transaction_units) = 0
and sum(transaction_cost) <> 0
--Verify the total remnant cost.
select location_id, sum(units) as units, sum(cost) as cost, sum(val_retail) as val_retail, sum(sell_retail) as sell_retail
from #remnant_cost
group by location_id
select *
from #remnant_cost
----------------------------------------------------Run above this line first and gather results--------------------------------
--inserting into a temp table the cost negation using transaction_type_code 500 (Actual shrink) before inserting into ib_inventory
--corrected query adding transaction date as column heading (Marshall)
select
sku_id, location_id, price_status_id, convert(smalldatetime,convert(varchar(50),getdate(),101)) as transaction_date, 500 as transaction_type_code, inventory_status_id, NULL as other_location_id,
NULL as transaction_reason_id, 999999 as document_number, 0 as transaction_units, cost * -1 as transaction_cost, 0 as transaction_valuation_retail,
0 as transaction_selling_retail,NULL as price_change_type, NULL as units_affected
into #rem_fix
from #remnant_cost
--Validating to make sure cost will have the exact opposite to negate.
select location_id, sum(transaction_units) as units, sum(transaction_cost) as cost, sum(transaction_valuation_retail) as val_retail,
sum(transaction_selling_retail) as sell_retail
from #rem_fix
group by location_id
BEGIN TRAN
EXEC inventory_update_$sp 'SELECT sku_id,location_id,price_status_id,transaction_date,transaction_type_code,inventory_status_id,other_location_id,
transaction_reason_id,document_number,transaction_units,transaction_cost,transaction_valuation_retail,transaction_selling_retail,price_change_type,
units_affected FROM #rem_fix'
COMMIT

Making some assumptions about your schema:
-- A working table to track progress that will stick around.
create table dbo.Location_Codes
( Location_Code VarChar(4), Started DateTime NULL, Completed DateTime NULL );
Then break up the work this way:
if not exists ( select 42 from dbo.Location_Codes where Completed is NULL )
begin
-- All of the locations have been processed (or this is the first time through).
delete from dbo.Location_Codes;
-- Get all of the location codes.
insert into dbo.Location_Codes
select Location_Code, NULL, NULL
from Location;
end
-- Temporary table to make things easier.
declare #Pass_Location_Codes as Table ( Location_Code VarChar(4) );
-- Loop until all locations have been processed.
while exists ( select 42 from dbo.Location_Codes where Completed is NULL )
begin
-- Get the next five locations for which processing has not completed.
delete from #Pass_Location_Codes;
insert into #Pass_Location_Codes
select top 5 Location_Code
from dbo.Location_Codes
where Completed is NULL
order by Location_Code;
-- Record the start date/time.
update dbo.Location_Codes
set Started = GetDate()
where Location_Code in ( select Location_Code from #Pass_Location_Codes );
-- Run the big query.
select ...
where Location_Code in ( select Location_Code from #Pass_Location_Codes )
...
-- Record the completion date/time.
update dbo.Location_Codes
set Completed = GetDate()
where Location_Code in ( select Location_Code from #Pass_Location_Codes );
end

Related

Query using a statement within a VARCHAR2 column

Is there a way for a select statement to include in the WHERE clause a statement that is contained within the table? For example, the following table:
CREATE TABLE test_tab(
date_column DATE,
frequency NUMBER,
test_statement VARCHAR2(255)
)
/
If
MOD(SYSDATE - DATE, frequency) = 0
were contained within the column test_statement, is there a way to select rows where this is true? The test_statement will vary and not be the same throughout the table. I am able to do this in PL/SQL but looking to do this without the use of PL/SQL.
This kind of dynamic SQL in SQL can created with DBMS_XMLGEN.getXML. Although the query looks a bit odd so you might want to consider a different design.
First, I created a sample table and row using your DDL. I'm not sure exactly what you're trying to do with the conditions, so I simplified them into two rows with simpler conditions. The first row matches the first condition, and neither row matches the second condition.
--Create sample table and row that matches the condition.
CREATE TABLE test_tab(
date_column DATE,
frequency NUMBER,
test_statement VARCHAR2(255)
)
/
insert into test_tab values(sysdate, 1, 'frequency = 1');
insert into test_tab values(sysdate, 2, '1=2');
commit;
Here's the large query, and it only returns the first row, which only matches the first condition.
--Find rows where ROWID is in a list of ROWIDs that match the condition.
select *
from test_tab
where rowid in
(
--Convert XMLType to relational data.
select the_rowid
from
(
--Convert CLOB to XMLType.
select xmltype(xml_results) xml_results
from
(
--Create a single XML file with the ROWIDs that match the condition.
select dbms_xmlgen.getxml('
select rowid
from test_tab where '||test_statement) xml_results
from test_tab
)
where xml_results is not null
)
cross join
xmltable
(
'/ROWSET/ROW'
passing xml_results
columns
the_rowid varchar2(128) path 'ROWID'
)
);
This calls for dynamic SQL, so - yes, it is PL/SQL that handles it. I don't think that SQL layer is capable of doing it.
I don't know what you tried so far, so - just an idea: a function that returns ref cursor might help, e.g.
SQL> create table test (date_column date, frequency number, test_statement varchar2(255));
Table created.
SQL> insert into test values (trunc(sysdate), 2, 'deptno = 30');
1 row created.
SQL> create or replace function f_test return sys_refcursor
2 is
3 l_str varchar2(200);
4 l_rc sys_refcursor;
5 begin
6 select test_statement
7 into l_str
8 from test
9 where date_column = trunc(sysdate);
10
11 open l_rc for 'select deptno, ename from emp where ' || l_str;
12 return l_rc;
13 end;
14 /
Function created.
Testing:
SQL> select f_test from dual;
F_TEST
--------------------
CURSOR STATEMENT : 1
CURSOR STATEMENT : 1
DEPTNO ENAME
---------- ----------
30 ALLEN
30 WARD
30 MARTIN
30 BLAKE
30 TURNER
30 JAMES
6 rows selected.
SQL>
A good thing about it is that you could save the whole statements into that table and run any of them using the same function.
You can try this
select * from test_tab where mod(sysdate - date, frequency) = 0;

Merge data between two tables using split compare

I have two tables and i need to compare data and update one table records. Please let me know how this can be done. This is the scenario
Proj1 Table
This is the first table where data needs to be synchronized
ID Text reqId
1 R1|R2 12
2 R2|R3 12
3 R3|R5|R2 12
Proj2 Table
This is the table where data updates are taking place
ID Text Active reqId
3 R1 1 12
4 R3 1 12
5 R4 1 12
I need to take each record from Proj1, use a split function then for each text in split, compare Text field between both these tables result should be similar to below. We are syncing data in Proj2 to similar to Proj1.
ID Text Active reqId
3 R1 1 12 (Ignore as it exists in both tables)
4 R3 1 12 (Ignore as it exists in both tables)
5 R4 0 12 (Update to inactive as it does not exist Proj1 table but exists in )
6 R2 1 12 (Insert as it does not exist in Proj2 table, insert only once)
7 R5 1 12 (Insert as it does not exist in Proj2 table, insert only once)
If you are using SQL Server 2008 or later, you could use a MERGE statement, like this:
/*
CREATE TABLE Proj1 (
ID INT PRIMARY KEY,
Text VARCHAR(100) NOT NULL,
reqId INT NOT NULL
)
INSERT INTO Proj1 VALUES (1,'R1|R2',12)
INSERT INTO Proj1 VALUES (2,'R2|R3',12)
INSERT INTO Proj1 VALUES (3,'R3|R5|R2',12)
CREATE TABLE Proj2 (
ID INT IDENTITY PRIMARY KEY,
Text VARCHAR(100) NOT NULL,
Active BIT NOT NULL,
reqId INT NOT NULL
)
SET IDENTITY_INSERT Proj2 ON
INSERT INTO Proj2 (ID, Text, Active, reqId) VALUES (3,'R1',1,12)
INSERT INTO Proj2 (ID, Text, Active, reqId) VALUES (4,'R3',1,12)
INSERT INTO Proj2 (ID, Text, Active, reqId) VALUES (5,'R4',1,12)
SET IDENTITY_INSERT Proj2 OFF
*/
GO
CREATE FUNCTION dbo.Split(#String VARCHAR(1000),#Separator CHAR(1))
RETURNS #Result TABLE (
Position INT IDENTITY PRIMARY KEY,
Value VARCHAR(1000)
) AS
BEGIN
DECLARE #Pos INT, #Prev INT
SET #Prev=0
WHILE 1=1 BEGIN
SET #Pos=CHARINDEX(#Separator,#String,#Prev+1)
IF #Pos=0 BREAK
INSERT INTO #Result (Value) VALUES (SUBSTRING(#String,#Prev+1,#Pos-#Prev-1))
SET #Prev=#Pos
END
INSERT INTO #Result (Value) VALUES (SUBSTRING(#String,#Prev+1,LEN(#String)))
RETURN
END
GO
MERGE INTO dbo.Proj2 p2
USING (
SELECT DISTINCT reqId, Value FROM dbo.Proj1 p1
CROSS APPLY dbo.Split(Text,'|') s
) x
ON p2.Text=x.Value AND p2.reqId=x.reqId
WHEN NOT MATCHED THEN INSERT VALUES (Value,1,reqid)
WHEN NOT MATCHED BY SOURCE THEN UPDATE SET Active=0
WHEN MATCHED AND Active=0 THEN UPDATE SET Active=1;
SELECT * FROM dbo.Proj2
Later edit: I added the third WHEN clause in the MERGE statement, to handle the case when the row is already present, but without the Active flag (although this case does not appear in the sample data).
You can also handle this without a MERGE statement, like this:
INSERT INTO Proj2
SELECT Value,1,reqid
FROM (
SELECT DISTINCT reqId, Value FROM dbo.Proj1 p1
CROSS APPLY dbo.Split(Text,'|') s
) x
WHERE NOT EXISTS (
SELECT *
FROM Proj2 p2
WHERE p2.Text=x.Value AND p2.reqId=x.reqId
)
UPDATE Proj2 SET Active=0
FROM Proj2 p2
WHERE NOT EXISTS (
SELECT *
FROM (
SELECT DISTINCT reqId, Value FROM dbo.Proj1 p1
CROSS APPLY dbo.Split(Text,'|') s
) x
WHERE p2.Text=x.Value AND p2.reqId=x.reqId
)
UPDATE Proj2 SET Active=1
FROM Proj2 p2
INNER JOIN (
SELECT DISTINCT reqId, Value FROM dbo.Proj1 p1
CROSS APPLY dbo.Split(Text,'|') s
) x ON p2.Text=x.Value AND p2.reqId=x.reqId
WHERE p2.Active=0
(I used the Split function mentioned in the other answer)

Check for Overlapping date on Insert/Update

I have a table which holds a list of dates and more data for a person. The table should never have any undeleted overlapping rows (Dates overlapping).
Is there a way I can put a check constraint on the table, to ensure that when I update or insert a row, that there's no overlapping details?
Below is a cut down version of my table. It has a deleted flag, and start/end dates. A 'Null' end date means it's ongoing.
I then provide some legal, and some not-so-legal inserts (and why they're legal and illegal).
DECLARE #Test TABLE
(
Id INT NOT NULL IDENTITY(1,1),
PersonID INT NOT NULL,
StartDate DATE NOT NULL,
EndDate DATE NULL,
Deleted BIT NOT NULL
)
INSERT INTO #Test
(PersonId, StartDate, EndDate, Deleted)
SELECT 1, '01-JAN-2015', '15-JAN-2015', 0 UNION ALL -- Valid
SELECT 1, '16-JAN-2015', '20-JAN-2015', 1 UNION ALL -- Valid and deleted
SELECT 1, '18-JAN-2015', NULL, 0 UNION ALL -- Valid
SELECT 2, '01-JAN-2015', NULL, 0 UNION ALL -- Valid.. never ending row.
SELECT 2, '18-JAN-2015', '30-JAN-2015', 0 UNION ALL -- Invalid! Overlaps above record.
SELECT 2, '20-JAN-2015', '30-JAN-2015', 1 UNION ALL -- Valid, as it's deleted (Still overlaps, though)
SELECT 3, '01-JAN-2015', '10-JAN-2015', 0 UNION ALL -- Valid
SELECT 3, '10-JAN-2015', NULL, 0 -- Invalid, as it overlaps the last and first days
SELECT * FROM #Test
I need to make sure that the table doesn't allow overlapping dates for the same person, for undeleted rows.
For the date range check, I will use the "(StartA <= EndB) and (EndA >= StartB)" formula, but unsure how to check this with a constraint, and across multiple rows.
I may need to do it with a Trigger, by checking the inserted.values to the exiting, and somehow, cancel if I find matches?
you cannot use a CHECK Constraint without adding additional columns.
you will have to create a Trigger to check if inserted date ranges are non overlapping. Something like this..
CREATE TRIGGER [dbo].[DateRangeTrigger]
ON [dbo].Test AFTER INSERT, UPDATE
AS
BEGIN
DECLARE #MaxDate DATE = '2999/12/31'
IF EXISTS (SELECT t.StartDate, t.EndDate FROM Test t
Join inserted i
On i.PersonID = t.PersonID
AND i.id <> t.Id
AND(
(i.StartDate > t.StartDate AND i.StartDate < ISNULL(t.EndDate,#MaxDate))
OR (ISNULL(i.EndDate,#MaxDate) < ISNULL(t.EndDate,#MaxDate) AND ISNULL(i.EndDate,#MaxDate) > t.StartDate)
OR (i.StartDate < t.StartDate AND ISNULL(i.EndDate,#MaxDate) > ISNULL(t.EndDate,#MaxDate))
)
WHERE t.Deleted = 0 AND i.Deleted = 0
)
BEGIN
RAISERROR ('Inserted date was within invalid range', 16, 1)
IF (##TRANCOUNT>0)
ROLLBACK
END
END
You can refer to one of these threads for more information
Enforcing unique date range fields in SQL Server 2008
Unique date range fields in SQL Server 2008
Here's a trigger-based approach:
CREATE TRIGGER [dbo].[trigPersonnel_PreventOverlaps]
ON [dbo].[Personnel]
AFTER INSERT, UPDATE
AS
BEGIN
IF EXISTS(
SELECT * FROM DateRange p
INNER JOIN inserted i ON i.PersonID = p.PersonID
AND i.Id != p.Id AND i.Deleted = 0
AND (
(p.StartDate <= i.StartDate
AND (i.StartDate <= p.EndDate OR p.EndDate IS NULL))
OR (p.StartDate <= i.EndDate
AND (i.EndDate <= p.EndDate OR p.EndDate IS NULL))
)
WHERE p.Deleted = 0
)
--RAISEERROR if you want
ROLLBACK
END
Note - it will roll back the whole transaction, so you'll need to perform inserts individually to ensure good ones don't get thrown out.
If you need something to comb through a bulk insert and pick out the bad ones, you'll need something more complex.

SQL query performance using CTE vs WHILE loop in very large table

I'm trying to figure out the best performing query given the scenario below.
The following table is very large, over 5 million rows. Every day, a few loads occur although exactly how many times varies. Each load is identified by load_id (a part of clustered index) and load_datetimestamp is time stamp applicable to that load. Each load inserts about 30,000 rows.
CREATE TABLE [VeryLargeTable](
[load_id] [int] NOT NULL,
[acct_cd] [varchar](20) NOT NULL,
[acct_num] [varchar](255) NULL,
[prod_id] [varchar](50) NOT NULL,
[domestic_foreign_cd] [varchar](3) NOT NULL,
[vendor_prod_id] [varchar](15) NULL,
[prod_name] [varchar](100) NULL,
[currency_cd] [varchar](3) NULL,
[total_Qty] [int] NULL,
[mkt_price] [float] NULL,
[load_datetimestamp] [datetime] NULL,
CONSTRAINT [pk_VeryLargeTable] PRIMARY KEY CLUSTERED (
[load_id] ASC,
[acct_cd] ASC,
[prod_id] ASC,
[domestic_foreign_cd] ASC )
)
On any given evening, I would like to get all the rows from today's FIRST load. Query must be as performant as possible given above DDL. Obviously, you don't want to start out with something like "WHERE datediff(day,load_datetimestamp,getDate())=0" on the entire table.
I wrote these 2 queries. Is any one better than the other? I know that both return the same result. Can you suggest even a better one than either one of these 2?
Query 1
With
T1 as (select
load_id,
load_datetimestamp
from dbo.VeryLargeTable
group by
load_id,load_datetimestamp),
T2 as (select
load_id,
load_datetimestamp
from T1
where
datediff(day,load_datetimestamp,getDate())=0),
T3 as (select min(load_id) as loadID from T2)
select * from dbo.VeryLargeTable
where load_id = (select loadID from T3)
Query 2
declare #found tinyint;
declare #loadID int;
declare #dateTimeStamp datetime;
-- Get max value of load id
select #loadID = max(load_id) from [dbo].[VeryLargeTable];
-- Keep looping until previous day is found or minimum load_id is reached
set #found = 0;
WHILE (#found=0)
BEGIN
select #dateTimeStamp = load_datetimestamp from [dbo].[VeryLargeTable] where load_id=#loadID;
if (#loadID=0) SET #found=1
else
BEGIN
if (DATEPART(day, #dateTimeStamp) = DATEPART(day, GetDate())) SET #loadID = #loadID - 1;
else SET #found=1;
END
END
SELECT * from [dbo].[VeryLargeTable] where load_id=(#loadID + 1);
1) As it was mentioned in the comments don't use expression like datediff(day,load_datetimestamp,getDate())=0). This expression can't use index on column load_datetimestamp.
Use load_datetimestamp >= CAST(CAST(GETDATE() AS date) AS datetime).
2) Create an index on column load_datetimestamp.
3) Find the first row that was loaded today and take load_id from it:
SELECT TOP(1) load_id
FROM dbo.VeryLargeTable
WHERE load_datetimestamp >= CAST(CAST(GETDATE() AS date) AS datetime)
ORDER BY load_datetimestamp
This query should be an instant seek of the index on load_datetimestamp. Check the execution plan.
4) Create an index on load_id column.
5) Use the found load_id to return all rows for that load:
WITH
CTE_FirstLoad
AS
(
SELECT TOP(1) load_id
FROM dbo.VeryLargeTable
WHERE load_datetimestamp >= CAST(CAST(GETDATE() AS date) AS datetime)
ORDER BY load_datetimestamp
)
SELECT *
FROM dbo.VeryLargeTable
WHERE load_id IN (SELECT load_id FROM CTE_FirstLoad)
;
This should use the index on load_id.
I've edited the answer and put IN instead of = in the last WHERE. With IN the query will work even if there were no loads for today at all. With = most likely there would be an error.
Edit
Now we know that you can't create new indexes on this table. And we know that the number of loads per day is small (around 5).
With this limitation it may well be that your second query with the explicit loop would be the fastest. In any case, you have to try all suggestions on the real system and measure their performance.
Can we assume that each load starts and ends during the same day? In other words, for each load_id the date of the load_datetimestamp is the same?
Then, you can try few tweaks to your loop. They may not change the performance much or at all, but you have to measure.
instead of:
select #loadID = max(load_id) from [dbo].[VeryLargeTable];
try:
SET #loadID =
(select TOP(1) load_id from [dbo].[VeryLargeTable] ORDER BY load_id DESC);
in the same fashion inside the loop (I doubt that the current line actually works, because you have many rows for the same load_id) instead of:
select #dateTimeStamp = load_datetimestamp from [dbo].[VeryLargeTable] where load_id=#loadID;
try:
SET #dateTimeStamp =
(select TOP(1) load_datetimestamp from [dbo].[VeryLargeTable] where load_id=#loadID);
If you are allowed to create another table
I assume that load IDs increase with time, i.e. load with ID=N has load_datetimestamp less than load with ID=N+1.
Create a separate table Loads with column: load_id with primary unique clustered index on load_id.
This table would contain one row for the last load_id for each day. It would be much smaller than the main table.
The purpose of this table is to quickly find the "first load_id for today" instead of scanning the large table.
Every evening when you run your report this small Loads table would contain rows for loads from previous days, but not from today.
Can you say, that at the time when you run your report in the evening all loading has finished for the day? If yes:
Get the last load from previous days from the small table:
SET #PrevLoadID =
(SELECT TOP(1) load_id FROM Loads ORDER BY load_id DESC)
Get the first load for today from the large table:
SET #TodayFirstLoadID =
(SELECT TOP(1) load_id
FROM VeryLargeTable
WHERE VeryLargeTable.load_id > #PrevLoadID
ORDER BY load_id ASC)
Get the last load for today from the large table:
SET #TodayLastLoadID =
(SELECT TOP(1) load_id
FROM VeryLargeTable
ORDER BY load_id DESC)
INSERT the #TodayLastLoadID into Loads.
Use #TodayFirstLoadID to run your main report:
SELECT *
FROM VeryLargeTable
WHERE load_id = #TodayFirstLoadID
You don't need 3 CTEs here. You can replace the first 2 CTEs by simply selecting all distinct combinations of Load ID and Load DateTimeStamp which satisfy the filter condition. Then, you can get rid of the 3rd CTE by moving that check directly to the subquery in your final select. The resulting query would look like this:
;With
T2 as
(select distinct
load_id,
load_datetimestamp
from dbo.VeryLargeTable
where datediff(day,load_datetimestamp,getDate())=0)
select * from dbo.VeryLargeTable
where load_id = (select min(load_ID) from T2)
You need to check it on your execution plan but if I were you I would use a temp table instead of using 3 table expressions and joining it with my original table.
Also as you mentioned earlier better not to use functions in your WHERE clause. Instead you can use date BETWEEN start AND end

T-SQL Grouping Sets of Information

I have a problem which my limited SQL knowledge is keeping me from understanding.
First the problem:
I have a database which I need to run a report on, it contains configurations of a users entitlements. The report needs to show a distinct list of these configurations and a count against each one.
So a line in my DB looks like this:
USER_ID SALE_ITEM_ID SALE_ITEM_NAME PRODUCT_NAME CURRENT_LINK_NUM PRICE_SHEET_ID
37715 547 CultFREE CultPlus 0 561
the above line is one row of a users configuration, for every user ID there can be 1-5 of these lines. So the definition of a configuration is multiple rows of data sharing a common User ID with variable attributes..
I need to get a distinct list of these configurations across the whole table, leaving me just one configuration set for every instance where > 1 has that configuration and a count of instances of that configuration.
Hope this is clear?
Any ideas?!?!
I have tried various group by's and unions, also the grouping sets function to no avail.
Will be very greatful if anyone can give me some pointers!
Ouch that hurt ...
Ok so problem:
a row represents a configurable line
users may be linked to more than 1 row of configuration
configuration rows when grouped together form a configuration set
we want to figure out all of the distinct configuration sets
we want to know what users are using them.
Solution (its a bit messy but the idea is there, copy and paste in to SQL management studio) ...
-- ok so i imported the data to a table named SampleData ...
-- 1. import the data
-- 2. add a new column
-- 3. select all the values of the config in to the new column (Configuration_id)
--UPDATE [dbo].[SampleData]
--SET [Configuration_ID] = SALE_ITEM_ID + SALE_ITEM_NAME + [PRODUCT_NAME] + [CURRENT_LINK_NUM] + [PRICE_SHEET_ID] + [Configuration_ID]
-- 4. i then selected just the distinct values of those and found 6 distinct Configuration_id's
--SELECT DISTINCT [Configuration_ID] FROM [dbo].[SampleData]
-- 5. to make them a bit easier to read and work with i gave them int values instead
-- for me it was easy to do this manually but you might wanna do some trickery here to autonumber them or something
-- basic idea is to run the step 4 statement but select into a new table then add a new primary key column and set identity spec on it
-- that will generate u a bunch of incremental numbers for your config id's so u can then do something like ...
--UPDATE [dbo].[SampleData] sd
--SET Configuration_ID = (SELECT ID FROM TempConfigTable WHERE Config_ID = sd.Configuration_ID)
-- at this point you have all your existing rows with a unique ident for the values combined in each row.
-- so for example in my dataset i have several rows where only the user_id has changed but all look like this ...
--SALE_ITEM_ID SALE_ITEM_NAME PRODUCT_NAME CURRENT_LINK_NUM PRICE_SHEET_ID Configuration_ID
--54101 TravelFREE TravelPlus 0 56101 1
-- now you have a config id you can start to work on building sets up ...
-- each user is now matched with 1 or more config id
-- 6. we use a CTE (common table expression) to link the possibles (keeps the join small) ...
--WITH Temp (ConfigID)
--AS
--(
-- SELECT DISTINCT SD.Configuration_Id --SD2.Configuration_Id, SD3.Configuration_Id, SD4.Configuration_Id, SD5.Configuration_Id,
-- FROM [dbo].[SampleData] SD
--)
-- this extracts all the possible combinations using the CTE
-- on the basis of what you told me, max rows per user is 6, in the result set i have i only have 5 distinct configs
-- meaning i gain nothing by doing a 6th join.
-- cross joins basically give you every combination of unique values from the 2 tables but we joined back on the same table
-- so its every possible combination of Temp + Temp (ConfigID + ConfigID) ... per cross join so with 5 joins its every combination of
-- Temp + Temp + Temp + Temp + Temp .. good job temp only has 1 column with 5 values in it
-- 7. uncomment both this and the CTE above ... need to use them together
--SELECT DISTINCT T.ConfigID C1, T2.ConfigID C2, T3.ConfigID C3, T4.ConfigID C4, T5.ConfigID C5
--INTO [SETS]
--FROM Temp T
--CROSS JOIN Temp T2
--CROSS JOIN Temp T3
--CROSS JOIN Temp T4
--CROSS JOIN Temp T5
-- notice the INTO clause ... this dumps me out a new [SETS] table in my db
-- if i go add a primary key to this and set its ident spec i now have unique set id's
-- for each row in the table.
--SELECT *
--FROM [dbo].[SETS]
-- now here's where it gets interesting ... row 1 defines a set as being config id 1 and nothing else
-- row 2 defines set 2 as being config 1 and config 2 and nothing else ... and so on ...
-- the problem here of course is that 1,2,1,1,1 is technically the same set as 1,1,1,2,1 from our point of view
-- ok lets assign a set to each userid ...
-- 8. first we pull the distinct id's out ...
--SELECT DISTINCT USER_ID usr, null SetID
--INTO UserSets
--FROM SampleData
-- now we need to do bit a of operating on these that's a bit much for a single update or select so ...
-- 9. process findings in a loop
DECLARE #currentUser int
DECLARE #set int
-- while theres a userid not linked to a set
WHILE EXISTS(#currentUser = SELECT TOP 1 usr FROM UserSets WHERE SetId IS NULL)
BEGIN
-- figure out a set to link it to
SET #set = (
SELECT TOP 1 ID
FROM [SETS]
-- shouldn't really do this ... basically need to refactor in to a table variable then compare to that
-- that way the table lookup on ur main data is only 1 per User_id
WHERE C1 IN (SELECT DISTINCT Configuration_id FROM SampleData WHERE USER_ID = #currentUser)
AND C2 IN (SELECT DISTINCT Configuration_id FROM SampleData WHERE USER_ID = #currentUser)
AND C3 IN (SELECT DISTINCT Configuration_id FROM SampleData WHERE USER_ID = #currentUser)
AND C4 IN (SELECT DISTINCT Configuration_id FROM SampleData WHERE USER_ID = #currentUser)
AND C5 IN (SELECT DISTINCT Configuration_id FROM SampleData WHERE USER_ID = #currentUser)
)
-- hopefully that worked
IF(#set IS NOT NULL)
BEGIN
-- tell the usersets table
UPDATE UserSets SET SetId = #set WHERE usr = #currentUser
set #set = null
END
ELSE -- something went wrong ... set to 0 to prevent endless loop but any userid linked to set 0 is a problem u need to look at
UPDATE UserSets SET SetId = 0 WHERE usr = #currentUser
-- and round we go again ... until we are done
END
SELECT
USER_ID,
SALE_ITEM_ID, ETC...,
COUNT(*) WhateverYouWantToNameCount
FROM TableNAme
GROUP BY USER_ID

Resources