I would ignore duplicte key when insert in postgres the data
my table has an unique constraint on 3 columns. Now the code I post below works just with primary_key but not with unique constraint. Someone has suggestions?
thanks
E
#compiles(Insert)
def _prefix_insert_with_ignore(insert_srt, compiler, **kw):
conn = Connection()
conn_str = conn.conn_str()
test_conn = conn_str.find("sqlite")
if test_conn == 0:
return compiler.visit_insert(insert_srt.prefix_with('OR IGNORE'), **kw)
else:
#if the connection is postgresql
pk = insert_srt.table.primary_key
insert = compiler.visit_insert(insert_srt, **kw)
ondup = f"ON CONFLICT ({','.join(c.name for c in pk)}) DO NOTHING"
upsert = ' '.join((insert, ondup))
return upsert
Ok this works well for me
thanks very much #snakechrmerb
#compiles(Insert)
def _prefix_insert_with_ignore(insert_srt, compiler, **kw):
conn = Connection()
conn_str = conn.conn_str()
test_conn = conn_str.find("sqlite")
if test_conn == 0:
return compiler.visit_insert(insert_srt.prefix_with('OR IGNORE'), **kw)
else:
#if the connection is postgresql
ck = insert_srt.table.constraints
pk = insert_srt.table.primary_key
insert = compiler.visit_insert(insert_srt, **kw)
c = next(x for x in ck if isinstance(x, sa.UniqueConstraint))
column_names = [col.name for col in c.columns]
s= ", ".join(column_names)
ondup = f'ON CONFLICT ({s})DO NOTHING'
upsert = ' '.join((insert, ondup))
return upsert
Related
I am trying to upload a binary.zip to SQL Server as varbinary type column content.
Target Table:
CREATE TABLE myTable ( zipFile varbinary(MAX) );
My NIFI Flow is very simple:
-> GetFile:
filter:binary.zip
-> UpdateAttribute:<br>
sql.args.1.type = -3 # as varbinary according to JDBC types enumeration
sql.args.1.value = ??? # I don't know what to put here ! (I've triying everything!)
sql.args.1.format= ??? # Is It required? I triyed 'hex'
-> PutSQL:<br>
SQLstatement= INSERT INTO myTable (zip_file) VALUES (?);
What should I put in sql.args.1.value?
I think it should be the flowfile payload, but it would work as part of the INSERT in the PutSQL? Not by the moment!
Thanks!
SOLUTION UPDATE:
Based on https://issues.apache.org/jira/browse/NIFI-8052
(Consider I'm sending some data as attribute parameter)
import java.nio.charset.StandardCharsets
import org.apache.nifi.controller.ControllerService
import groovy.sql.Sql
def flowFile = session.get()
def lookup = context.controllerServiceLookup
def dbServiceName = flowFile.getAttribute('DatabaseConnectionPoolName')
def tableName = flowFile.getAttribute('table_name')
def fieldName = flowFile.getAttribute('field_name')
def dbcpServiceId = lookup.getControllerServiceIdentifiers(ControllerService).find
{ cs -> lookup.getControllerServiceName(cs) == dbServiceName }
def conn = lookup.getControllerService(dbcpServiceId)?.getConnection()
def sql = new Sql(conn)
flowFile.read{ rawIn->
def parms = [rawIn ]
sql.executeInsert "INSERT INTO " + tableName + " (date, "+ fieldName + ") VALUES (CAST( GETDATE() AS Date ) , ?) ", parms
}
conn?.close()
if(!flowFile) return
session.transfer(flowFile, REL_SUCCESS)
session.commit()
maybe there is a nifi native way to insert blob however you could use ExecuteGroovyScript instead of UpdateAttribute and PutSQL
add SQL.mydb parameter on the level of processor and link it to required DBCP pool.
use following script body:
def ff=session.get()
if(!ff)return
def statement = "INSERT INTO myTable (zip_file) VALUES (:p_zip_file)"
def params = [
p_zip_file: SQL.mydb.BLOB(ff.read()) //cast flow file content as BLOB sql type
]
SQL.mydb.executeInsert(params, statement) //committed automatically on flow file success
//transfer to success without changes
REL_SUCCESS << ff
inside the script SQL.mydb is a reference to groovy.sql.Sql oblject
I have created a table below in SQL using the following:
CREATE TABLE [dbo].[Validation](
[RuleId] [int] IDENTITY(1,1) NOT NULL,
[AppId] [varchar](255) NOT NULL,
[Date] [date] NOT NULL,
[RuleName] [varchar](255) NOT NULL,
[Value] [nvarchar](4000) NOT NULL
)
NOTE the identity key (RuleId)
When inserting values into the table as below in SQL it works:
Note: Not inserting the Primary Key as is will autofill if table is empty and increment
INSERT INTO dbo.Validation VALUES ('TestApp','2020-05-15','MemoryUsageAnomaly','2300MB')
However when creating a temp table on databricks and executing the same query below running this query on PySpark as below:
%python
driver = <Driver>
url = "jdbc:sqlserver:<URL>"
database = "<db>"
table = "dbo.Validation"
user = "<user>"
password = "<pass>"
#import the data
remote_table = spark.read.format("jdbc")\
.option("driver", driver)\
.option("url", url)\
.option("database", database)\
.option("dbtable", table)\
.option("user", user)\
.option("password", password)\
.load()
remote_table.createOrReplaceTempView("YOUR_TEMP_VIEW_NAMES")
sqlcontext.sql("INSERT INTO YOUR_TEMP_VIEW_NAMES VALUES ('TestApp','2020-05-15','MemoryUsageAnomaly','2300MB')")
I get the error below:
AnalysisException: 'unknown requires that the data to be inserted have the same number of columns as the target table: target table has 5 column(s) but the inserted data has 4 column(s), including 0 partition column(s) having constant value(s).;'
Why does it work on SQL but not when passing the query through databricks? How can I insert through pyspark without getting this error?
The most straightforward solution here is use JDBC from a Scala cell. EG
%scala
import java.util.Properties
import java.sql.DriverManager
val jdbcUsername = dbutils.secrets.get(scope = "kv", key = "sqluser")
val jdbcPassword = dbutils.secrets.get(scope = "kv", key = "sqlpassword")
val driverClass = "com.microsoft.sqlserver.jdbc.SQLServerDriver"
// Create the JDBC URL without passing in the user and password parameters.
val jdbcUrl = s"jdbc:sqlserver://xxxx.database.windows.net:1433;database=AdventureWorks;encrypt=true;trustServerCertificate=false;hostNameInCertificate=*.database.windows.net;loginTimeout=30;"
// Create a Properties() object to hold the parameters.
val connectionProperties = new Properties()
connectionProperties.put("user", s"${jdbcUsername}")
connectionProperties.put("password", s"${jdbcPassword}")
connectionProperties.setProperty("Driver", driverClass)
val connection = DriverManager.getConnection(jdbcUrl, jdbcUsername, jdbcPassword)
val stmt = connection.createStatement()
val sql = "INSERT INTO dbo.Validation VALUES ('TestApp','2020-05-15','MemoryUsageAnomaly','2300MB')"
stmt.execute(sql)
connection.close()
You could use pyodbc too, but the SQL Server ODBC drivers aren't installed by default, and the JDBC drivers are.
A Spark solution would be to create a view in SQL Server and insert against that. eg
create view Validation2 as
select AppId,Date,RuleName,Value
from Validation
then
tableName = "Validation2"
df = spark.read.jdbc(url=jdbcUrl, table=tableName, properties=connectionProperties)
df.createOrReplaceTempView(tableName)
sqlContext.sql("INSERT INTO Validation2 VALUES ('TestApp','2020-05-15','MemoryUsageAnomaly','2300MB')")
If you want to encapsulate the Scala and call it from another language (like Python), you can use a scala package cell.
eg
%scala
package example
import java.util.Properties
import java.sql.DriverManager
object JDBCFacade
{
def runStatement(url : String, sql : String, userName : String, password: String): Unit =
{
val connection = DriverManager.getConnection(url, userName, password)
val stmt = connection.createStatement()
try
{
stmt.execute(sql)
}
finally
{
connection.close()
}
}
}
and then you can call it like this:
jdbcUsername = dbutils.secrets.get(scope = "kv", key = "sqluser")
jdbcPassword = dbutils.secrets.get(scope = "kv", key = "sqlpassword")
jdbcUrl = "jdbc:sqlserver://xxxx.database.windows.net:1433;database=AdventureWorks;encrypt=true;trustServerCertificate=false;hostNameInCertificate=*.database.windows.net;loginTimeout=30;"
sql = "select 1 a into #foo from sys.objects"
sc._jvm.example.JDBCFacade.runStatement(jdbcUrl,sql, jdbcUsername, jdbcPassword)
I tried to create and ExternalCatalog to use in Apache Flink Table. I created and added to the Flink table environment (here the official documentation). For some reason, the only external table present in the 'catalog', it is not found during the scan. What I missed in the code above?
val catalogName = s"externalCatalog$fileNumber"
val ec: ExternalCatalog = getExternalCatalog(catalogName, 1, tableEnv)
tableEnv.registerExternalCatalog(catalogName, ec)
val s1: Table = tableEnv.scan("S_EXT")
def getExternalCatalog(catalogName: String, fileNumber: Int, tableEnv: BatchTableEnvironment): ExternalCatalog = {
val cat = new InMemoryExternalCatalog(catalogName)
// external Catalog table
val externalCatalogTableS = getExternalCatalogTable("S")
// add external Catalog table
cat.createTable("S_EXT", externalCatalogTableS, ignoreIfExists = false)
cat
}
private def getExternalCatalogTable(fileName: String): ExternalCatalogTable = {
// connector descriptor
val connectorDescriptor = new FileSystem()
connectorDescriptor.path(getFilePath(fileNumber, fileName))
// format
val fd = new Csv()
fd.field("X", Types.STRING)
fd.field("Y", Types.STRING)
fd.fieldDelimiter(",")
// statistic
val statistics = new Statistics()
statistics.rowCount(0)
// metadata
val md = new Metadata()
ExternalCatalogTable.builder(connectorDescriptor)
.withFormat(fd)
.withStatistics(statistics)
.withMetadata(md)
.asTableSource()
}
The example above is part of this test file in git.
This is probably a namespace issue. Tables in external catalogs are identified by a list of names of the catalog, (potentially schemas,) and finally the table name.
In your example, the following should work:
val s1: Table = tableEnv.scan("externalCatalog1", "S_EXT")
You can have a look at the ExternalCatalogTest to see how external catalogs can be used.
I have encrypted existing data using the SQL Server 2016 always encrypt method, one of column we have is NULLABLE but while insert from screen its not accepting either NULL or empty String. how to make it work if encrypted column is not mandatory on screen?
The error that you are seeing is incorrect, please ensure that you are passing the parameter correctly. Here is some sample code on how to do so.
Schema:
CREATE TABLE [dbo].[SO](
[ssn] [nvarchar](9) COLLATE Latin1_General_BIN2 ENCRYPTED WITH (COLUMN_ENCRYPTION_KEY = [CEK_Auto1], ENCRYPTION_TYPE = Deterministic, ALGORITHM = 'AEAD_AES_256_CBC_HMAC_SHA_256') NULL
)
GO
CREATE PROCEDURE dbo.insertSP #ssn nvarchar(9)
AS
INSERT INTO [dbo].[SO] ([SSN]) VALUES (#SSN);
GO
C# code:
SqlConnectionStringBuilder strbldr = new SqlConnectionStringBuilder();
strbldr.DataSource = ".";
strbldr.InitialCatalog = #"exptdb";
strbldr.IntegratedSecurity = true;
strbldr.ColumnEncryptionSetting = SqlConnectionColumnEncryptionSetting.Enabled;
string ssn = "";
using (var conn = new SqlConnection(strbldr.ConnectionString))
using (var command = conn.CreateCommand()) {
command.CommandType = CommandType.StoredProcedure;
command.CommandText = #"dbo.insertSP";
SqlParameter paramSSN = command.CreateParameter();
paramSSN.ParameterName = "#ssn";
paramSSN.SqlDbType = SqlDbType.NVarChar;
paramSSN.Direction = ParameterDirection.Input;
paramSSN.Value = ssn;
paramSSN.Size = 9;
command.Parameters.Add(paramSSN);
conn.Open();
command.ExecuteNonQuery();
}
Note, that in the scenario mentioned above, if
string ssn = "";
then the query succeeds, however if
string ssn = null;
you should see a failure on execution
Additional information: Procedure or function 'insertSP' expects
parameter '#ssn', which was not supplied.
This failure will occur even when ssn column is plaintext
You can insert null value in the encrypted column as follows, since null values are not encrypted:
using (var conn = new SqlConnection(strbldr.ConnectionString))
using (var command = conn.CreateCommand()) {
command.CommandText = #"INSERT INTO [dbo].[SO] ([SSN]) VALUES (null)";
conn.Open();
command.ExecuteNonQuery();
}
I'm struggling to create tables that belong to a schema in a SQL Server database, and ensuring that primary/foreign keys work correctly.
I'm looking for some examples of code to illustrate how this is done
The ingredients needed for this are __table_args__ and the use of the schema prefix on the ForeignKey
DBSession = sessionmaker(bind=engine)
session = DBSession()
from sqlalchemy import Column, Integer, String
from sqlalchemy.orm import relationship
Base = declarative_base()
class Table1(Base):
__tablename__ = 'table1'
__table_args__ = {"schema": 'my_schema'}
id = Column(Integer,primary_key = True)
col1 = Column(String(150))
col2 = Column(String(100))
reviews = relationship("Table2", cascade = "delete")
class Table2(Base):
__tablename__ = 'table2'
__table_args__ = {"schema": 'my_schema'}
id = Column(Integer,primary_key = True)
key = Column(Integer)
col2 = Column(String(100))
key = Column(Integer, ForeignKey("my_schema.table1.id"), index=True)
premise = relationship("Table1")
Base.metadata.create_all(bind=engine)