I have multiple csv files in different location folders. I want to do bulk-insert in SQL server dynamically which will do bulk insert in a single table.
I did it for a single CSV file. Can someone help me out?
Here's something to get you started. You can read up on xp_dirtree and cursors to see how they work. If your files are spread across different parent folders, or different drives, you'll need an additional cursor to go get them...
---------------------------------------------------------------------------------------------------------------
--Set some variables
---------------------------------------------------------------------------------------------------------------
DECLARE #fileLocation VARCHAR(128) = '\\server\e$\data\' --location of files (parent folder)
DECLARE #sql NVARCHAR(4000) --dynamic sql variable
DECLARE #fileName VARCHAR(128) --full file name variable if you want to use this
---------------------------------------------------------------------------------------------------------------
--Get a list of all the file names in the directory
---------------------------------------------------------------------------------------------------------------
IF OBJECT_ID('tempdb..#FileNames') IS NOT NULL DROP TABLE #FileNames
CREATE TABLE #FileNames (
id int IDENTITY(1,1)
,subdirectory nvarchar(512)
,depth int
,isfile bit)
INSERT #FileNames (subdirectory,depth,isfile)
EXEC xp_dirtree #fileLocation, 1, 1
--Here's all the files and folders. Note isFile field.
select * from #FileNames
---------------------------------------------------------------------------------------------------------------
--Create a cursor to fetch the file names
---------------------------------------------------------------------------------------------------------------
DECLARE c CURSOR FOR
select name from #FileNames where isfile = 1
OPEN c
FETCH NEXT FROM c INTO #fileName
---------------------------------------------------------------------------------------------------------------
--For each file, bulk insert to the proper view, update the proper table, update the log, etc...
---------------------------------------------------------------------------------------------------------------
WHILE ##FETCH_STATUS = 0
BEGIN
--do your bulk insert work
FETCH NEXT FROM c INTO #fileName
END
Related
I have an MS Access database that we're converting to a SQL Server backend. This database has an Attachments table with a few simple columns:
PK, FK to MainTable.RecordID, Description, filename
Attachments are stored in a fileshare. VBA code uses a hardcoded filepath and ShellExecute to save attachments to a directory, under a RecordID subfolder.
We're moving to store attachments in SQL Server using filestream.
I need to move these attachments from fileshare, to SQL Server, while maintaining RecordID integrity. SQL Server tables and columns are already set up.
These attachments vary in extensions (.msg, .doc, .xlsx, .pdf)
I've been looking into "OPENROWSET" but every example I've seen uses only one file.
I've been looking into SSMA but can't find what I'm looking for.
Any references/reference articles or code resources I can use/repurpose would be greatly appreciated.
Sounds like you want to write an SQL stored procedure that will find all files in a given file path, iterate over those files, and insert the file into a table.
This article will help in general: https://www.mssqltips.com/sqlservertip/5432/stored-procedure-to-import-files-into-a-sql-server-filestream-enabled-table/
This article is about xp_dirtree: https://www.sqlservercentral.com/blogs/how-to-use-xp_dirtree-to-list-all-files-in-a-folder
Here's sample code to read the file system from SQL. THIS IS UNTESTED CODE, you'll need to modify to your needs but it gives you some idea of how to do the loops and read in files.
--You will need xm_cmdshell enabled on SQL server if not already.
USE master
GO
EXEC sp_configure 'show advanced option',1
RECONFIGURE WITH OVERRIDE
EXEC sp_configure 'xp_cmdshell',1
RECONFIGURE WITH OVERRIDE
GO
--Create a variable to hold the pickup folder.
DECLARE #PickupDirectory nvarchar(512) = '\\folder_containing_files_or_folders\';
--Create a temp table to hold the files found in the pickup directory.
PRINT 'Parsing directory to identify most recent file.';
DECLARE #DirTree TABLE (
id int IDENTITY(1,1)
, subdirectory nvarchar(512)
, depth int
, isfile bit
);
--Enumerate the pickup directory.
INSERT #DirTree
EXEC master.sys.xp_dirtree #PickupDirectory,1,1 --Second variable is depth.
--Create variables to loop through folders and files.
DECLARE #folderCount int;
DECLARE #folderName nvarchar(max);
DECLARE #folderPath nvarchar(max);
DECLARE #i int = 0;
DECLARE #fileCount int;
DECLARE #fileName NVARCHAR(max);
DECLARE #filePath varchar(max);
DECLARE #j int = 0;
DECLARE #RecordID nvarchar(50);
DECLARE #SQLText NVARCHAR(max);
SET #folderCount = (SELECT Count(*) FROM #DirTree WHERE isfile = 0);
WHILE ( #i < #folderCount )
BEGIN
--Get the next folder to process.
SET #folderName = (
SELECT TOP 1 subdirectory
FROM #DirTree as dt
LEFT OUTER JOIN #processedFolders as pf
on pf.folder_name = dt.subdirectory
WHERE isfile = 0
AND pf.folder_name IS NULL
);
--Get the recordID from folder name.
SET #recordID = #folderName; --Edit this to get the RecordID from your folder structure.
--Concat root path and new folder to get files from.
SET #folderPath = #PickupDirectory + #folderName + '\';
--Enumerate the this subdirectory to process files from.
INSERT #filesToProcess
EXEC master.sys.xp_dirtree #folderPath,1,1
--Get count of files to loop through.
SET #fileCount = (SELECT COUNT(*) FROM #filesToProcess WHERE isfile = 1);
WHILE (#j < #fileCount)
BEGIN
--Get next filename.
SET #fileName = (SELECT TOP 1 subdirectory FROM #filesToProcess WHERE isfile = 1);
--Concat the whole file path.
SET #filePath = #folderPath + #fileName;
SET #SQLText = '
INSERT INTO [table_name](RecordID,[filename],[filestreamCol])
SELECT
''' + #RecordID + '''
, ''' + #fileName + '''
, BulkColumn
FROM OPENROWSET(Bulk ''' + #filePath + ''', Single_Blob) as tb'
EXEC Sp_executesql #SQLText
DELETE FROM #filesToProcess
WHERE subdirectory = #fileName;
SET #j = #j + 1;
END
INSERT INTO #processedFolders (folder_name)
SELECT #folderName;
PRINT 'Folder complete: ' + #folderName;
SET #i = #i + 1
END
I think you want to parse just a root directory with the xp_dirtree command above. That will display all the subdirectories which should contain the "RecordID". Read the RecordID into a variable, then parse each of those subdirectories to get the actual files. If you want more detailed code, you'll have to show some examples of the directory structure and the destination table.
I need a stored procedure that will do a bulk insert for multiple .csv files into tables in SQL Server. The .csv files all sit in a folder. The .csv files are tab delimited. I am able to iterate through the folder and create a record in a table named FileNames with each file in it. I get an error when it gets to the BULK INSERT code.
It's an incorrect syntax error near '-'.
BULK INSERT FILE01-K FROM C:\Temp\CSV_FILES\FILE01-K.csv')
My procedure:
declare #filename varchar(255),
#path varchar(255),
#sql varchar(8000),
#cmd varchar(1000)
--get the list of files to process
SET #path = 'C:\Temp\CSV_FILES\'
SET #cmd = 'dir ' + #path + '*.csv /b'
--clear the FileNames table
DELETE FROM FileNames
INSERT INTO FileName(FileName)
EXEC Master..xp_cmdShell #cmd
UPDATE FileName SET FilePath = #path where FilePath is null
DELETE FROM FileNames WHERE FileName is null
--cursor loop
declare c1 cursor for SELECT FilePath,FileName FROM FileNames where FileName like '%.csv%' order by FileName desc
open c1
fetch next from c1 into #path,#filename
While ##fetch_status <> -1
begin
set #sql = 'BULK INSERT '+ Replace(#filename, '.csv','')+' FROM '+#Path+#filename+''')'
print #sql
exec (#sql)
fetch next from c1 into #path,#filename
end
close c1
deallocate c1
I've searched and found some examples, including up here that I've used to try and build mine.
I have query that extracts PDF files into directory. But for some reason all files are empty (0 KB).
Column PDF stored as image datatype. I know its obsolete but I am not the owner of the database.
Would you suggest any workaround?
CREATE TABLE #tblPolicyForms
(
FormID INT NOT NULL
,PDF varbinary(max) NULL
,PDF_FIlename VARCHAR(max)
)
INSERT INTO #tblPolicyForms
SELECT FormID,
PDF,
PDF_FileName
FROM [dbo].[tblPolicyForms]
WHERE FormID IN (19,20,21,22)
--select * from #tblPolicyForms
DECLARE #FormID varchar(4);
DECLARE #FileName VARCHAR(200);
DECLARE FormID_cursor CURSOR FOR
SELECT
FormID
,PDF_FIlename
FROM
#tblPolicyForms
WHERE FormID IN (19,20,21,22)
OPEN FormID_cursor
FETCH NEXT FROM FormID_cursor
INTO #FormID, #FileName
WHILE ##FETCH_STATUS = 0
BEGIN
DECLARE #cmd VARCHAR(8000) = 'BCP "SELECT PDF FROM #tblPolicyForms " queryout "C:\Users\oserdyuk\Documents\ExportDir\'+#FileName+'" -T -N'
--print #cmd
EXEC master.dbo.xp_cmdshell #cmd;
FETCH NEXT FROM FormID_cursor
INTO #FormID, #FileName
END
CLOSE FormID_cursor
DEALLOCATE FormID_cursor
The thing is BCP is opening another connection to SQL Server, and regular temp tables like your #tblPolicyForms are private to the connection. The BCP is probably generating an error which you don't get to see.
Can you change your temp table to a global one? This way the other connection (BCP) will be able to access it. You need to use two pound signs like this:
CREATE TABLE ##tblPolicyForms
(
FormID INT NOT NULL
,PDF varbinary(max) NULL
,PDF_FIlename VARCHAR(max)
)
You also need to change every reference from #tblPolicyForms to ##tblPolicyForms (single to double pounds). Be aware that if the procedure is executed in parallel by different sessions you will receive an error indicating that ##tblPolicyForms already exists.
Another thing, you are exporting every row into every file, so every file ends up the same size as you have seen. You need a WHERE clause in your BCP like this:
DECLARE #cmd VARCHAR(8000) = 'BCP "SELECT PDF FROM ##tblPolicyForms WHERE FormID = ''' + #FormID + ''' " queryout "C:\Users\oserdyuk\Documents\ExportDir\'+#FileName+'" -T -N'
But even then things might not work properly because bcp will prepend a 8 byte header to your row. Since bcp is an export/import tool it will create a file it can later import back into your database.
Any ideas/suggestions appreciated....
I've been asked to come up with a simple way to import new data we receive from an outside vendor (text files). We get several text files and each needs to be imported into its own table. Some tables have to have the current/existing data moved into a table called TABLENAME_Previous (to work with various existing reports), then have the current table emptied out and the new data imported into it. Also, any data now in the "previous" table has to be appended to an archive table.
Here's an example:
customer.txt comes in from vendor....
First we move the contents of customers_previous to customers_arch
Next we move the contents of customers to customers_previous
Finally we import the new customers.txt file into the table customers
Has anyone ever written a SQL routine to do this, or knows where to find one, that wouldn't be too painful to modify?
Thanks
you may try something like this:
To copy your previous data to Archive
Insert into customers_arch select * from customers_previous
To Copy your Customer Data to Previous:
truncate table customers_previous;
insert into customers_previous select * from customers
Then to Load you text file use Bulk Insert to load your customer table after clearing it.
truncate table customers;
bulk insert customers
from 'd:\yourfolder\customers.txt'
WITH
(
FIELDTERMINATOR =',',
ROWTERMINATOR ='\n'
);
UPDATE:
Ok, Brian, to answer your other question, How to run it for multiple files saved in your WeeklyTable.
Suppose your WeeklyTable is like this:
Declare #WeeklyTable TABLE(ID int Identity(1,1), [FileName] varchar(50))
insert into #WeeklyTable Values
('Customers'),('Orders'), ('Order_Details')
You can create a dynamic query to run your script for each file.
Declare #Template varchar(max)
Set #Template = '
-- Start of [[FILENAME]] --------------------
Insert into [FILENAME]_arch select * from [FILENAME]_previous
GO
truncate table [FILENAME]_previous;
insert into [FILENAME]_previous select * from [FILENAME]
GO
truncate table [FILENAME];
bulk insert [FILENAME]
from ''d:\yourfolder\[FILENAME].txt''
WITH
(
FIELDTERMINATOR ='','',
ROWTERMINATOR =''\n''
);
'
Declare #s varchar(max)
Declare #FileName varchar(50)
Declare #ID int =0
Select TOP 1 #ID=ID, #FileName=[FileName] From #WeeklyTable Where ID>#ID order by ID
While ##ROWCOUNT>0 Begin
Set #s = REPLACE(#Template, '[FILENAME]', #FileName)
Print #s
-- EXEC(#s) -- Uncomment to EXEC the script.
Select TOP 1 #ID=ID, #FileName=[FileName] From #WeeklyTable Where ID>#ID order by ID
End
I want to read xml files with sql server. I show below how I do it.
DECLARE #testxml TABLE (IntCol int, XmlCol xml);
INSERT INTO #testxml(XmlCol)
SELECT * FROM OPENROWSET(
BULK 'C:\XMLs\32056963_0001515351.xml',
SINGLE_BLOB) AS x;
SELECT * FROM #testxml
All is ok. But I need to read many files inside a folder, so I'm using:
EXEC master.sys.xp_dirtree 'C:\XMLs\',0,1;
But how can I doing a dynamic bulk insert in order to insert all xml files in the folder to #testxml?
I don't know if there is some way to do a bulk insert of all the files at once. I would suggest to execute your import query for each file, using dynamic queries. But in order to be able to fetch the data from the main query, you should insert the data in a temporary table, because the scope of the table variable will be limited to the dynamic query.
-- Get the file names
CREATE TABLE #files (
subdirectory NVARCHAR(255),
depth INT,
file BIT
)
INSERT INTO #files
EXEC master.sys.xp_dirtree 'C:\XMLs\',0,1;
-- Iterate through the XML files
DECLARE #filesCursor CURSOR;
SET #filesCursor = CURSOR FOR
SELECT subdirectory
FROM #files
WHERE file=1 AND LEN(subdirectory)>4 AND LOWER(RIGHT(subdirectory,4))='.xml'
DECLARE #fileName NVARCHAR(255), #query NVARCHAR(MAX);
FETCH NEXT FROM #filesCursor INTO #fileName;
-- Temporary table to store the data
CREATE TABLE #testxml (IntCol int, XmlCol xml);
WHILE ##fetch_status = 0
BEGIN
-- Build and execute the query for each file
SET #query = 'INSERT INTO #testxml(XmlCol) SELECT * FROM OPENROWSET(BULK ''C:\XMLs\' + #fileName + ''',SINGLE_BLOB) AS x';
EXECUTE sp_executesql #query;
FETCH NEXT FROM #filesCursor INTO #fileName;
END
-- Closing and deallocating cursor
CLOSE #filesCursor;
DEALLOCATE #filesCursor;
-- Get the data from the temp table into your table variable.
-- If it is not necessary to use a table variable, you could read
-- the data directly from the temp table
DECLARE #testxml TABLE (IntCol int, XmlCol xml);
INSERT INTO #testxml
SELECT * FROM #testxml;
-- Deleting temp tables, as they won't be used anymore
DROP TABLE #testxml;
DROP TABLE #files;