I'm trying to generate Craig interpolants using the C API but I get incorrect results.
However, when I dump the same problem to a file via Z3_write_interpolation_problem and call iZ3 I get the expected interpolant.
I attach the code to be able to reproduce the same results.
I'm using z3 4.1
#include<stdio.h>
#include<stdlib.h
#include<assert.h>
#include<stdarg.h>
#include<memory.h>
#include<setjmp.h>
#include<iz3.h>
Z3_ast mk_var(Z3_context ctx, const char * name, Z3_sort ty)
{
Z3_symbol s = Z3_mk_string_symbol(ctx, name);
return Z3_mk_const(ctx, s, ty);
}
Z3_ast mk_int_var(Z3_context ctx, const char * name)
{
Z3_sort ty = Z3_mk_int_sort(ctx);
return mk_var(ctx, name, ty);
}
void interpolation_1(){
// Create context
Z3_config cfg = Z3_mk_config();
Z3_context ctx = Z3_mk_interpolation_context(cfg);
// Build formulae
Z3_ast x0,x1,x2;
x0 = mk_int_var(ctx, "x0");
x1 = mk_int_var(ctx, "x1");
x2 = mk_int_var(ctx, "x2");
Z3_ast zero = Z3_mk_numeral(ctx, "0", Z3_mk_int_sort(ctx));
Z3_ast two = Z3_mk_numeral(ctx, "2", Z3_mk_int_sort(ctx));
Z3_ast ten = Z3_mk_numeral(ctx, "10", Z3_mk_int_sort(ctx));
Z3_ast c2_operands[2] = { x0, two };
Z3_ast c1 = Z3_mk_eq(ctx, x0, zero);
Z3_ast c2 = Z3_mk_eq(ctx, x1, Z3_mk_add(ctx, 2, c2_operands));
Z3_ast c3_operands[2] = { x1, two };
Z3_ast c3 = Z3_mk_eq(ctx, x2, Z3_mk_add(ctx, 2, c3_operands));
Z3_ast c4 = Z3_mk_gt(ctx, x2, ten);
Z3_ast A_operands[3] = { c1, c2, c3};
Z3_ast AB[2] = { Z3_mk_and(ctx,3, A_operands), c4 };
// Generate interpolant
Z3_push(ctx);
Z3_ast interps[1];
Z3_lbool status = Z3_interpolate(ctx, 2, AB, NULL, NULL, interps);
assert(status == Z3_L_FALSE && "A and B should be unsat");
printf("Interpolant: %s\n",Z3_ast_to_string(ctx, interps[0]));
// To dump the interpolation into a SMT file
// execute "iz3 tmp.smt" to compare
Z3_write_interpolation_problem(ctx, 2, AB, NULL, "tmp.smt");
Z3_pop(ctx,1);
}
int main() {
interpolation_1();
}
I generate an executable using the command:
g++ -fopenmp -o interpolation interpolation.c
-I/home/jorge/Systems/z3/include -I/home/jorge/Systems/z3/iz3/include -L/home/jorge/Systems/z3/lib -L/home/jorge/Systems/z3/iz3/lib -L/home/jorge/Systems/libfoci-1.1 -lz3 -liz3 -lfoci
Note that the constraints are basically:
A = (x=0 and x1 = x0+2 and x2 = x1 + 2),
and B = (x2 > 10)
which are clearly unsat. Moreover, it's also easy to see that the only common variable is x2. Thus, any valid interpolant can only include x2.
If I run the executable ./interpolation I obtain the nonsense interpolant:
(and (>= (+ x0 (* -1 x1)) -2) (>= (+ x1 (* -1 x3)) -2) (<= x0 0))
However, If I run "iz3 tmp.smt" (where tmp.smt is the file generated using Z3_write_interpolation_problem) I obtain a valid interpolant:
unsat interpolant: (<= x2 10)
Is this a bug? or am I missing some important precondition when I call Z3_interpolate?
P.S. I could not find any example using iZ3 with the C API.
Cheers,
Jorge
iZ3 was not built against version 4+ and the enumeration types and other features from the
headers from the different versions have changed. You cannot yet use iZ3 against the latest
versions of Z3. We hope to address this soon, most likely by placing the iZ3 stack along with
the rest of Z3 sources, but in the meanwhile use the previous release where iZ3 was built for.
Related
I trained an Tensorflow Lite object detection model with Yolo architecture in Python. Now I am trying to do the inference in C but I cannot make it work after the interpreter is invoked.
The main.c file looks something like this:
main.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "tensorflow/lite/c/c_api.h"
#include "tensorflow/lite/c/c_api_experimental.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/c/builtin_op_data.h"
int inputImageHeight = 416;
int inputImageWidth = 416;
int channel = 3;
int imageSize = inputImageHeight * inputImageWidth * channel;
float imageDataBuffer[imageSize]; // some steps lead to store an image to this buffer. The image is set to the model's input size (416x416) and normalized to [0, 1].
// Load model.
TfLiteModel* model = TfLiteModelCreateFromFile(modelFilePath);
// Create the interpreter.
TfLiteInterpreter* interpreter = TfLiteInterpreterCreate(model, NULL);
// Allocate tensors.
if(TfLiteInterpreterAllocateTensors(interpreter) != kTfLiteOk)
{
disposeTfLiteObjects(model, interpreter);
return -1;
}
int inputDims[4] = {1, inputImageHeight, inputImageWidth, channel};
if(TfLiteInterpreterResizeInputTensor(interpreter, 0, inputDims, 4)!= kTfLiteOk)
{
disposeTfLiteObjects(model, interpreter);
return -1;
}
if(TfLiteInterpreterAllocateTensors(interpreter) != kTfLiteOk)
{
disposeTfLiteObjects(model, interpreter);
return -1;
}
// Invoke the TensorFlow intepreter given the input and the model.
// The input tensor.
TfLiteTensor* inputTensor = TfLiteInterpreterGetInputTensor(interpreter, 0);
// Copy image data into input tensor.
if(TfLiteTensorCopyFromBuffer(inputTensor, imageDataBuffer, imageSize * sizeof(float)) != kTfLiteOk)
{
disposeTfLiteObjects(model, interpreter);
return -1;
}
// Invoke interpreter.
if(TfLiteInterpreterInvoke(interpreter) != kTfLiteOk)
{
disposeTfLiteObjects(model, interpreter);
return -1;
}
Until this point everything seems to work fine. From here, I am failing to extract the output data.
From Python I know that the output is a couple of tensors with dimension [1,13,13,18] and [1,26,26,18]. They represent a detection following the YOLO format: [x, y, W, H, detection_prob, class_prob] (only 1 class to be detected) with 3 anchors (6*3=18) over a 13x13 grid. Two tensors due to two anchor ratios.
In Python, fetching the output data results in a series of vectors to be prost-processed to get the positive detections and later non-max suppression.
In C, however, I am not sure how to manage this step. So far I assigned the output tensor like:
// Extract the output tensor data.
const TfLiteTensor* outputTensor = TfLiteInterpreterGetOutputTensor(interpreter, 0);
but I don't know how to properly extract the data.
Via gdb I could see that, at least, the first 4 dimensions coincide with [1,13,13,18]:
(gdb) print outputTensor->dims[0]
$10 = {size = 4, data = 0x555555785e04}
(gdb) print outputTensor->dims[1]
$11 = {size = 1, data = 0x555555785e08}
(gdb) print outputTensor->dims[2]
$12 = {size = 13, data = 0x555555785e0c}
(gdb) print outputTensor->dims[3]
$13 = {size = 13, data = 0x555555785e10}
(gdb) print outputTensor->dims[4]
$14 = {size = 18, data = 0x555555785e14}
I couldn't find the equivalent to [1,26,26,18] in the remaining dimensions though.
If I print the output tensor data:
(gdb) print outputTensor->data
$3 = {i32 = 0x7ffff561c8c0, i64 = 0x7ffff561c8c0, f = 0x7ffff561c8c0, f16 = 0x7ffff561c8c0,
f64 = 0x7ffff561c8c0, raw = 0x7ffff561c8c0 "", raw_const = 0x7ffff561c8c0 "",
uint8 = 0x7ffff561c8c0 "", b = 0x7ffff561c8c0, i16 = 0x7ffff561c8c0, c64 = 0x7ffff561c8c0,
c128 = 0x7ffff561c8c0, int8 = 0x7ffff561c8c0 "", data = 0x7ffff561c8c0}
First position of .f field:
(gdb) print outputTensor->data.f[0]
$5 = -nan(0x400000)
Anyone knows how to get the output detections? The goal is to get the "raw" detections and post-process them afterwards. Although a "one-shoot" approach could work too.
From what you said you have 2 outputs and the first one is correct with shape [1, 13, 13, 18].
So you need to fetch the second one
const TfLiteTensor* output_tensor_2 =
TfLiteInterpreterGetOutputTensor(interpreter, 1);
/// Copy the data like below to your own buffer, you can adjust to whatever type you want.
TfLiteTensorCopyToBuffer(output_tensor_2, output.data(),
output.size() * sizeof(float));
I'm making a mode 7/perspective projection project in Godot. When I run it, it produces the expected effect, displaying a 2d image as if it were a 3d plane.
Code:
func _ready():
map.load("res://map2.png")
perspective.load("res://map2.png")
for px in 1:
self.texture = display
for y in map.get_height():
_y = (y + py - 1)/z
for x in map.get_width():
_x = (x + px)/z
map.lock()
pix = map.get_pixel(_x, _y)
map.unlock()
perspective.lock()
perspective.set_pixel(x, y, pix)
perspective.unlock()
display.create_from_image(perspective)
z += 1
Image:
However, I have a problem. I have the code in the ready function, in a for loop. I want it to be called every frame, but when I increase the number of repeats from one to two, it turns the entire image red. I don't know what's causing this. one guess was that I wasn't locking and unlocking the images properly, but that is most likely not the case. Another guess was that the x and y variables were not resetting each time, but that was also working fine. I don't think the loop itself is the problem, but I have no idea what's wrong.
I struggled to make your code run. I half way gave up, and implemented the logic from my prior answer using lock bits instead. This is the code:
extends Sprite
export(Transform) var matrix:Transform
var sampler:Image
var buffer:Image
var size:Vector2
var center:Vector2
func _ready():
sampler = texture.get_data()
var err = sampler.decompress()
if err != OK:
push_error("Failed to decompress texture")
return
size = Vector2(texture.get_width(), texture.get_height())
center = size * 0.5
buffer = Image.new()
buffer.create(int(size.x), int(size.y), false, Image.FORMAT_RGBA8)
func _process(_delta):
#matrix = matrix.rotated(Vector3.RIGHT, 0.01)
sampler.lock()
buffer.lock()
for y in size.x:
for x in size.y:
var uv:Vector3 = matrix * Vector3(x - center.x, y - center.y, 1.0)
if uv.z <= 0.0:
buffer.set_pixel(x, y, Color.transparent)
continue
var _x = (uv.x / uv.z) + center.x
var _y = (uv.y / uv.z) + center.y
if _x < 0.0 or _x >= size.x or _y < 0.0 or _y >= size.y:
buffer.set_pixel(x, y, Color.transparent)
continue
#buffer.set_pixel(x, y, Color(_x / size.x, y / size.y, 0.0))
buffer.set_pixel(x, y, sampler.get_pixel(_x, _y))
buffer.unlock()
sampler.unlock()
var display = ImageTexture.new()
display.create_from_image(buffer, 0)
self.texture = display
As you can see, I'm exporting a Transfrom to be available on the editor. That is a proper 3D Transform. There is a commented line on _process that does a rotation, try it out.
The sampler Image is a copy of the Texture of the Sprite (the copy is made on _ready). And the buffer Image is where what is to be displayed is constructed.
The code is creating an ImageTexture from buffer and replacing the current texture with it, each frame (on _process). I'm setting flags to 0, because FLAG_REPEAT plus FLAG_FILTER blurred the border to the opposite side of the Sprite.
The vector Vector2 size holds the size of the texture. And the Vector2 Center is the coordinates of the center.
As I said at the start, this is the logic from my prior answer. This line:
vec3 uv = matrix * vec3(UV - 0.5, 1.0);
Is equivalent to (except I'm not scaling the coordinates to the range from 0 to 1):
var uv:Vector3 = matrix * Vector3(x - center.x, y - center.y, 1.0)
Then I had this line:
if (uv.z < 0.0) discard;
Which turned out like this:
if uv.z <= 0.0:
buffer.set_pixel(x, y, Color.transparent)
continue
I'm setting transparent because I do not recreate the buffer, nor clear it before hand.
Finally this line:
COLOR = texture(TEXTURE, (uv.xy / uv.z) + 0.5);
Turned out like this:
var _x = (uv.x / uv.z) + center.x
var _y = (uv.y / uv.z) + center.y
if _x < 0.0 or _x >= size.x or _y < 0.0 or _y >= size.y:
buffer.set_pixel(x, y, Color.transparent)
continue
buffer.set_pixel(x, y, sampler.get_pixel(_x, _y))
As per the result, here is the Godot Icon "rotating in 3D" (not really, but that is the idea):
Please disregard visual artifact due to GIF encoding.
I'm not sure if you want to stay with the logic of my prior answer. However, I believe this one should not be too hard to modify for your needs.
Addendum
I used a Transform because there is no convenient Matrix type available. However, the Transform uses a Matrix internally. See also Transformation matrix.
The Mode 7 formula according to Wikipedia works with a 2 by 2 Matrix, which is simpler that what I have here. However, you are going to need the product of a Matrix and a Vector anyway. You cannot compute the components independently.
This is the formula according to Wikipedia:
r' = M*(r - r_0) + r_0
That is:
var rp = mult(M, r - r_0) + r_0
Where mult would look like this:
func mult(matrix, vector:Vector2) -> Vector2:
var x = vector.x * matrix.a + vector.y * matrix.b
var y = vector.x * matrix.c + vector.y * matrix.d
return Vector2(x, y)
However, as I said, there is no convenient matrix type. If we export a, b, c, and d, we have:
var rp = mult(a, b, c, d, r - r_0) + r_0
And mult looks like this:
func mult(a:float, b:float, c:float, d:float, vector:Vector2) -> Vector2:
var x = vector.x * a + vector.y * b
var y = vector.x * c + vector.y * d
return Vector2(x, y)
We can easily use modify the code to do that. First export a, b, c and d as I said:
export(float) var a:float
export(float) var b:float
export(float) var c:float
export(float) var d:float
And this is _process modified:
func _process(_delta):
sampler.lock()
buffer.lock()
for y in size.x:
for x in size.y:
var rp = mult(a, b, c, d, Vector2(x, y) - center) + center
if rp.x < 0.0 or rp.x >= size.x or rp.y < 0.0 or rp.y >= size.y:
buffer.set_pixel(x, y, Color.transparent)
continue
buffer.set_pixel(x, y, sampler.get_pixel(rp.x, rp.y))
buffer.unlock()
sampler.unlock()
var display = ImageTexture.new()
display.create_from_image(buffer, 6)
self.texture = display
Of course, mult is the one I showed above. I'm assuming here that r_0 is what I called center.
I'm not sure how to interpret a, b, c and d, so here is a = 1, b = 2, c = 3 and d = 4:
When I remove the tests to compute minimum and maximum from the loop, the execution time is actually longer than with the test. How is that possible ?
Edit :
After running more test, it seems the runtime is not constant, ie the same code
can run in 9 sec or 13 sec.... So it was just a repetable coincidence. Repetable until you do enough tests that is...
Some details :
execution time with the min max test : 9 sec
execution time without the min max test : 13 sec
CFLAGS=-Wall -O2 -fPIC -g
gcc 4.4.3 32 bit
Section to remove is now indicated in code
Some guess :
bad cache interaction ?
void FillFullValues(void)
{
int i,j,k;
double X,Y,Z;
double p,q,r,p1,q1,r1;
double Ls,as,bs;
unsigned long t1, t2;
t1 = GET_TICK_COUNT();
MinLs = Minas = Minbs = 1000000.0;
MaxLs = Maxas = Maxbs = 0.0;
for (i=0;i<256;i++)
{
for (j=0;j<256;j++)
{
for (k=0;k<256;k++)
{
X = 0.4124*CielabValues[i] + 0.3576*CielabValues[j] + 0.1805*CielabValues[k];
Y = 0.2126*CielabValues[i] + 0.7152*CielabValues[j] + 0.0722*CielabValues[k];
Z = 0.0193*CielabValues[i] + 0.1192*CielabValues[j] + 0.9505*CielabValues[k];
p = X * InvXn;
q = Y;
r = Z * InvZn;
if (q>0.008856)
{
Ls = 116*pow(q,third)-16;
}
else
{
Ls = 903.3*q;
}
if (q<=0.008856)
{
q1 = 7.787*q+seiz;
}
else
{
q1 = pow(q,third);
}
if (p<=0.008856)
{
p1 = 7.787*p+seiz;
}
else
{
p1 = pow(p,third);
}
if (r<=0.008856)
{
r1 = 7.787*r+seiz;
}
else
{
r1 = pow(r,third);
}
as = 500*(p1-q1);
bs = 200*(q1-r1);
//
// cast on short int for reducing array size
//
FullValuesLs[i][j][k] = (char) (Ls);
FullValuesas[i][j][k] = (char) (as);
FullValuesbs[i][j][k] = (char) (bs);
//// Remove this and get slower code
if (MaxLs<Ls)
MaxLs = Ls;
if ((abs(Ls)<MinLs) && (abs(Ls)>0))
MinLs = Ls;
if (Maxas<as)
Maxas = as;
if ((abs(as)<Minas) && (abs(as)>0))
Minas = as;
if (Maxbs<bs)
Maxbs = bs;
if ((abs(bs)<Minbs) && (abs(bs)>0))
Minbs = bs;
//// End of Remove
}
}
}
TRACE(_T("LMax = %f LMin = %f\n"),(MaxLs),(MinLs));
TRACE(_T("aMax = %f aMin = %f\n"),(Maxas),(Minas));
TRACE(_T("bMax = %f bMin = %f\n"),(Maxbs),(Minbs));
t2 = GET_TICK_COUNT();
TRACE(_T("WhiteBalance init : %lu ms\n"), t2 - t1);
}
I think compiler is trying to unroll the inner loop because you are removing dependency between iterations. But somehow this doesn't help in your case. Maybe because the loop is too big and using too many registers to be unrolled.
Try to turn off unrolling and post results again.
If this is the case, I would suggest you to submit a performance issue to gcc.
PS. I think you can merge if (q>0.008856) and if (q<=0.008856).
Maybe its the cache, maybe unrolling problems, there is only one way to answer this: look at the generated code (e.g. by using the -S option). Maybe you can post it/or spot the difference when comparing them.
EDIT: As you now clarified that it was just the measurement I can only recommend (or better command ;-) you, that when you want to get runtime numbers: ALWAYS put it into some loop and average it. Best to do it outside your programm (in a shell script), so your cache is not already filled with the right data.
Hey guyz. Can help me on this?
if size(cost,1) == 2
A = (4*Pdt*cost(1,3)*cost(2,3)) + 2*(cost(1,2)*cost(2,3))+(cost(1,3)*cost(2,2));
B = 2*(cost(2,3)+cost(1,3));
lambda = num2str(A ./ B);
set(handles.answer1_staticText,'String', lambda);
P1 = (lambda - cost(1,2))./(2*cost(1,3));
P2 = (lambda - cost(2,2))./(2*cost(2,3));
PT = mat2str(P1 + P2);
set(handles.answer2_staticText,'String', PT);
guidata(hObject, handles);
end
From the coding above, the answer become like this :
[11.75 11.25 11.25 11.75 10.75 11.5 12.75 12.75 13]
My question is I want to display my answer at the static text box like this:
P1 = (%answer for P1)
P2 = (%answer for P2)
P TOTAL = (%answer for PT)
Can anyone help me with the coding?
You have converted lambda to a string (using num2str), and thus, the calculation of P1 etc will produce unexpected results.
It's better to only convert to string in the display step, so these accidents won't happen.
Try this:
if size(cost,1) == 2
A = (4*Pdt*cost(1,3)*cost(2,3)) + 2*(cost(1,2)*cost(2,3))+(cost(1,3)*cost(2,2));
B = 2*(cost(2,3)+cost(1,3));
lambda = A ./ B;
set(handles.answer1_staticText,'String', num2str(lambda));
P1 = (lambda - cost(1,2))./(2*cost(1,3));
P2 = (lambda - cost(2,2))./(2*cost(2,3));
PT = P1 + P2;
set(handles.answer2_staticText,'String', num2str(PT));
guidata(hObject, handles);
end
// Arg0 - Map, Arg1 - X, Arg2 - Y, Arg3 - Distance, Arg4 - MaxDistance
var xx,yy,dist, x1, y1, dir, maxdist, obj, res, map;
map = argument0
x1 = argument1
y1 = argument2
dir = argument3
maxdist = argument4
dist = 0
do {
dist+=1
xx = x1+round(lengthdir_x(dist,dir))
yy = y1+round(lengthdir_y(dist,dir))
} until(block_isSolid(map_get_block(map,xx,yy)) or dist>maxdist)
if !block_isSolid(map_get_block(map,xx,yy)) {
return false
} else {
res = ds_list_create()
ds_list_add(res,xx)
ds_list_add(res,yy)
return res
}
There's the function. lengthdir_x/y is sin/cos(dir)*dist.
Don't yell at me for putting the C tag on there. The languages are very very similar, to the point where I could almost copy this straight in.
Right, formalities done:
This current algorithm will sometimes go diagonally (Where both x and y change by one in either sign), but I wish it not to do this.
EG:
Current: (Where X is the ray casted)
xoooo
oxooo
ooxoo
oooxo
oooox
Wanted:
xxooo
oxxoo
ooxxo
oooxx
oooox
Make sense?
Please help.
delta is a float and is the x-distance of an virtual secound "ray"(should be around 1.0f - 2.0f, just experimentate)
Delta should not be less than the size of a single pixel in the map.
do {
dist+=1
xx = x1+round(lengthdir_x(dist,dir))
yy = y1+round(lengthdir_y(dist,dir))
} until(block_isSolid(map_get_block(map,xx,yy)) || block_isSolid(map_get_block(map,xx + delta,yy)) or dist>maxdist)