LLVM Pass to insert an external function call to LLVM bitcode - c

I am writing an LLVM pass to instrument a C source program. I want to insert a function call before each branch instruction which calls an external function like this :
void print(int x){
printf("x = %d\n", x);
return;
}
I want to link this external function to C source code using llvm-link tool and then instrument the code using opt tool.
Ths pass that I have implemented is like this:
#include "llvm/Pass.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/IRBuilder.h"
#include <vector>
using namespace llvm;
namespace{
struct ir_instrumentation : public ModulePass{
static char ID;
Function *monitor;
ir_instrumentation() : ModulePass(ID) {}
virtual bool runOnModule(Module &M)
{
std::vector<Type *> args;
args.push_back(Type::getInt32Ty(M.getContext()));
ArrayRef<Type*> argsRef(args);
FunctionType *FT = FunctionType::get(Type::getVoidTy(M.getContext()), args, false);
Constant* myFunc = M.getOrInsertFunction("print", FT, NULL);
minitor = cast<Function>(myFunc);
for(Module::iterator F = M.begin(), E = M.end(); F!= E; ++F)
{
for(Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
{
for(BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI)
{
if(isa<BranchInst>(&(*BI)) )
{
errs() << "found a branch instruction!\n";
ArrayRef< Value* > arguments(ConstantInt::get(Type::getInt32Ty(M.getContext()), 5, true));
Instruction *newInst = CallInst::Create(monitor, arguments, "");
BB->getInstList().insert(BI, newInst);
errs() << "Inserted the function!\n";
}
}
}
}
return true;
}
};
char ir_instrumentation::ID = 0;
static RegisterPass<ir_instrumentation> X("ir-instrumentation", "LLVM IR Instrumentation Pass");
}
LLVM is configured and built fine with this pass but when I use opt, I get this error :
opt: /llvm/lib/IR/Type.cpp:281:
llvm::FunctionType::FunctionType(llvm::Type*, llvm::ArrayRefllvm::Type*, bool):
Assertion `isValidReturnType(Result) && "invalid return type for function"' failed.
I think the problem is something like mismatching between the function type that I have declared and the external function (like the context).
LLVM version: LLVM version 7.0.0svn
Until now I have not solved the problem.
Thanks

I could finally solve this problem and successfully instrument LLVM bitcode. After a lot of trouble with function getOrInsertFunction, I found out it is not really necessary to use this method in my case. I just simply changed my pass to this:
#include "llvm/Pass.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/IRBuilder.h"
#include <vector>
using namespace llvm;
namespace{
struct ir_instrumentation : public ModulePass{
static char ID;
Function *monitor;
ir_instrumentation() : ModulePass(ID) {}
virtual bool runOnModule(Module &M)
{
errs() << "====----- Entered Module " << M.getName() << ".\n";
int counter = 0;
for(Module::iterator F = M.begin(), E = M.end(); F!= E; ++F)
{
errs() << "Function name: " << F->getName() << ".\n";
if(F->getName() == "print"){
monitor = cast<Function>(F);
continue;
}
for(Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
{
for(BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI)
{
if(isa<BranchInst>(&(*BI)) )
{
errs() << "found a brach instruction!\n";
ArrayRef< Value* > arguments(ConstantInt::get(Type::getInt32Ty(M.getContext()), counter, true));
counter++;
Instruction *newInst = CallInst::Create(monitor, arguments, "");
BB->getInstList().insert(BI, newInst);
errs() << "Inserted the function!\n";
}
}
}
}
return true;
}
};
char ir_instrumentation::ID = 0;
static RegisterPass<ir_instrumentation> X("ir-instrumentation", "LLVM IR Instrumentation Pass");
}
As I know the name of the external function, I can simply find it through iterating over all functions of module and then use it in the desired way.
Obviously the problem was caused by calling module->getOrInsertFunction and the function type. My experience says that this method is more useful when you want to insert a new function and declare the prototype of your own function. Using it to get an existing function is challenging (e.g. setting the right prototype, ...)
Thanks

Related

Excel Add-in: Assignment in for loop causes segmentation fault but line-by-line assignments work. Why?

I just tested a toy Excel add-in project, cross building the XLL with mingw32 tool chains.
Here is my code:
//testXLL.c
#include "windows.h"
#include "xlcall.h"
#define MEMORYSIZE 65535000
char vMemBlock[MEMORYSIZE];
int vOffsetMemBlock =0;
LPSTR GetTempMemory(int cBytes){
LPSTR lpMemory;
if(vOffsetMemBlock + cBytes > MEMORYSIZE)
return 0;
else{
lpMemory = (LPSTR) &vMemBlock + vOffsetMemBlock;
vOffsetMemBlock += cBytes;
if(vOffsetMemBlock & 1) vOffsetMemBlock++;
return lpMemory;
}
}
LPXLOPER TempStr(LPSTR lpstr){
LPXLOPER lpx;
int chars;
lpx = (LPXLOPER)GetTempMemory(sizeof(XLOPER));
if(!lpx) return 0;
chars = lstrlen(lpstr);
if(chars>255) chars=255;
lpx->val.str=(char*)GetTempMemory((sizeof(char)*chars+1));
if(!lpx->val.str) return 0;
strncpy(lpx->val.str, lpstr,chars);
lpx->val.str[0]=(BYTE) chars;
//lpx->val.str[chars]='\0';
lpx->xltype = xltypeStr;
return lpx;
}
#ifdef __cplusplus
extern "C" {
#endif
__declspec(dllexport) double __stdcall myadd2(double a1,double a2){
return a1+a2;
}
static char functionTable[11][255] =
{" myadd2", // procedure
" BBB", // type_text
" add", // function_text
" add1,add2", // argument_text
" 1", // macro_type
" category", // category
" ", // shortcut_text
" some help topic", // help_topic
" Adds toy", // function_help
" 1st.", // argument_help1
" 2nd" // argument_help2
};
__declspec(dllexport) int __stdcall xlAutoOpen(){
LPXLOPER pxDLL;
Excel4(xlGetName,pxDLL,0);
XLOPER xlRegArgs[11];
for(int i = 0; i < 11; i++){
xlRegArgs[i] = *TempStr(functionTable[i]);
}
Excel4(xlfRegister, 0, 12,
pxDLL,
&xlRegArgs[0], &xlRegArgs[1], &xlRegArgs[2],
&xlRegArgs[3], &xlRegArgs[4], &xlRegArgs[5],
&xlRegArgs[6], &xlRegArgs[7], &xlRegArgs[8],
&xlRegArgs[9], &xlRegArgs[10]);
return 1;
}
__declspec(dllexport) LPXLOPER __stdcall xlAddInManagerInfo(LPXLOPER xlAction) {
static XLOPER xlReturn, xlLongName, xlTemp;
xlTemp.xltype = xltypeInt;
xlTemp.val.w = xltypeInt;
Excel4(xlCoerce, &xlReturn, 2, xlAction, &xlTemp);
if(1 == xlReturn.val.w) {
xlLongName = *TempStr(" xll-name");
} else {
xlLongName.xltype = xltypeErr;
xlLongName.val.err = xlerrValue;
}
return &xlLongName;
}
#ifdef __cplusplus
}
#endif
I built this testXLL.c file in Ubuntu:
>i686-w64-mingw32-gcc -shared -Wl,--kill-at testXLL.c -o win.xll -L. -lxlcall32
This generates the "win.xll" successfully but, when loading this win.xll, Excel crashes.
In Windows 10, I tried to use gdb to debug it, but I can't catch break point in the xll file – it got disabled automatically when loading. But I can see in the gdb output, it is a segmentation fault when Excel crashes.
XLOPER xlRegArgs[11];
for(int i = 0; i < 11; i++){
xlRegArgs[i] = *TempStr(functionTable[i]);
}
What's weird is that, if I substitute the above for loop with the following line-by-line assignments in the xlAutoOpen function, the compiled XLL file works fine in Excel:
XLOPER xlRegArgs[11];
xlRegArgs[0] = *TempStr(functionTable[0]);
xlRegArgs[1] = *TempStr(functionTable[1]);
xlRegArgs[2] = *TempStr(functionTable[2]);
xlRegArgs[3] = *TempStr(functionTable[3]);
xlRegArgs[4] = *TempStr(functionTable[4]);
xlRegArgs[5] = *TempStr(functionTable[5]);
xlRegArgs[6] = *TempStr(functionTable[6]);
xlRegArgs[7] = *TempStr(functionTable[7]);
xlRegArgs[8] = *TempStr(functionTable[8]);
xlRegArgs[9] = *TempStr(functionTable[9]);
xlRegArgs[10] = *TempStr(functionTable[10]);
Please enlighten me. What's the difference between these two assignment approaches?
Although I don't (yet) have a full explanation for this behaviour, I'm posting this as a possible 'workaround', which I have used in a very similar case I encountered in one of my projects.
The issue appears to be some form of 'stack corruption' caused by the use of the function-local variable (i) used as the loop index; converting this to a global/static variable will likely fix the issue. The following code snippet is a suggested fix (I have changed the name of the index variable to avoid possible name clashes elsewhere in the code):
///...
static int regloop; // Used as the loop index, below...
__declspec(dllexport) int __stdcall xlAutoOpen(){
LPXLOPER pxDLL;
Excel4(xlGetName,pxDLL,0);
XLOPER xlRegArgs[11];
for(regloop = 0; regloop < 11; regloop++){
xlRegArgs[regloop] = *TempStr(functionTable[regloop]);
}
Here's the section of code from my aforementioned project (but note this is C++/MFC) that exhibits the same sort of behaviour – but only in x86 builds (x64 builds work without issue):
static int plin; // NOTA BENE:- We use this in the two functions below, as the use of
// a local 'plin' loop index is prone to induce stack corruption (?),
// especially in MSVC 2017 (MFC 14) builds for x86.
void BasicApp::OnUpdatePICmd(uint32_t nID, void *pUI)
{
//! for (int plin = 0; plin < Plugin_Number; ++plin) { // Can cause problems - vide supra
for (plin = 0; plin < Plugin_Number; ++plin) {
BOOL mEbl = FALSE; int mChk = -1;
if ((Plugin_UDCfnc[plin] != nullptr) && Plugin_UDCfnc[plin](nID, &mEbl, &mChk)) {
CommandEnable(pUI, mEbl ? true : false);
if (mChk >= 0) CmdUISetCheck(pUI, mChk);
return;
}
}
CommandEnable(pUI, false);
return;
}
(The Plugin_UDCfnc is a static array member of the BasicApp class.)
I have, in the years since the above code was written, had occasional 'fleeting insights' into why this is happening but, as of now, I can't offer a more robust fix. I shall revisit the issue and update this post if I should stumble upon a resolution. In the meantime, others are welcome to take this as a 'clue' and post their own explanations/solutions.

swift3.0 Cannot convert value of type '[UnsafeMutablePointer<Int8>]' to expected argument type 'UnsafeMutablePointer<Int8>?'

This code worked well in Swift2.3 and now I am converting it to Swift3. So I am getting this error. Anyone has idea, how to fix this?
var cmdLnConf: OpaquePointer?
fileprivate var cArgs: [UnsafeMutablePointer<Int8>]
public init?(args: (String,String)...) {
// Create [UnsafeMutablePointer<Int8>].
cArgs = args.flatMap { (name, value) -> [UnsafeMutablePointer<Int8>] in
//strdup move the strings to the heap and return a UnsageMutablePointer<Int8>
return [strdup(name),strdup(value)]
}
cmdLnConf = cmd_ln_parse_r(nil, ps_args(), CInt(cArgs.count), &cArgs, STrue)
if cmdLnConf == nil {
return nil
}
}
enter image description here
based on our discussion it seems that parameter in your C function should be char *p[]
I made a small test
//
// f.h
// test001
//
#ifndef f_h
#define f_h
#include <stdio.h>
void f(char *p[], int len);
#endif /* f_h */
I defined the function with some basic functionality
//
// f.c
// test001
#include "f.h"
void f(char *p[], int len) {
for(int i = 0; i<len; i++) {
printf("%s\n", p[i]);
};
};
with the required bridging header
//
// Use this file to import your target's public headers that you would like to expose to Swift.
//
#include "f.h"
and swift 'command line' application
//
// main.swift
// test001
//
import Darwin
var s0 = strdup("alfa")
var s1 = strdup("beta")
var s2 = strdup("gama")
var s3 = strdup("delta")
var arr = [s0,s1,s2,s3]
let ac = Int32(arr.count)
arr.withUnsafeMutableBytes { (p) -> () in
let pp = p.baseAddress?.assumingMemoryBound(to: UnsafeMutablePointer<Int8>?.self)
f(pp, ac)
}
it finally prints
alfa
beta
gama
delta
Program ended with exit code: 0
based on the result, your have to use
let count = CInt(cArgs.count)
cArgs.withUnsafeMutableBytes { (p) -> () in
let pp = p.baseAddress?.assumingMemoryBound(to: UnsafeMutablePointer<Int8>?.self)
cmdLnConf = cmd_ln_parse_r(nil, ps_args(), count, pp, STrue)
}
WARNING!!!
don't call cArgs.count inside the closure, where the pointer is defined!

Unable to access a Private Array

I'm having difficulty accessing an array of strings. It is declared as a private array and filled in the constructor for the class. I have a Get function defined. The problem is when I call this function at compile time I get an error that I cannot access private member declared in class. I'm just getting back into coding just for the yuks, as such I'm at a stage pre-pointers and pre-vectors so I'm trying to avoid situations that would force their use.
Words.h
#pragma once
#include <string>
#include <iostream>
#include <array>
class Words {
Words();
public:
std::string GetNewWord(int);
private:
std::string WordList[23] = {};
};
Words.cpp - The Array is completely filled but shortened here
#include "Words.h"
Words::Words(){
WordList[0] = "omega";
WordList[1] = "minors";
WordList[2] = "stigma";
WordList[3] = "glamor";
WordList[4] = "savior";
WordList[5] = "disarm";
WordList[6] = "isogram";
.
.
.
;
}
std::string Words::GetNewWord(int choice)
{
return WordList[choice];
}
main.cpp - contains an infinite loop so i could quickly test if the array was populated
#include <iostream>
#include <string>
#include "Words.h"
Words word;
int main() {
do {
std::cout << "choice: ";
int choice;
std::cin >> choice;
std::cout << "\n" << word.GetNewWord(choice) << "\n";
} while (true);
return 0;
}
The constructor is private, as all members of a class are by default. Simply move it to the public section.

printf with reference arguments

Imagine having a label that after created updates x amount of times / sec. The text of the label is given as a format-specifier text (ala printf), and any arguments for the format-specifier is updated on redraw, because the arguments for the format specifier is pointers to their respective values.
Does any variant of sprintf work like this?
The code would work something like this:
/* client */
createLabel("Value is %f", &myFloatValue);
I haven't quite figured out a way to do this yet, does anyone have any ideas? I guess one could parse the format text, retrieve the pointers (and types), and store them as some object in a list, where you later could reprint the text and maybe delegate the formatting to the objects themselves, passing them only a textbuffer.. hmmm
Btw, the interface is C, but the host is C++.
Okay i got a "working" prototype, but it's written mainly in assembler. Anyway it demonstrates the supposed use of the api. Can anyone see a portable way to do this / have a better idea for the implementation?
It's pretty large so i'm posting it on pastebin:
http://pastebin.com/H8ZpWb4u
So your createLabel interface would store the format string, along with the addresses of the variables you're wanting to display within the string. Then just use standard old sprintf to reformat the text. Just be careful with those pointers to the data, and make sure you invalidate them when necessary.
I'm not really sure what the problem is. What else are you looking for? sprintf is capable of doing what you want, but you're going to have to track the format string and variable addresses yourself.
Okay i suddenly got an idea .. stringstream + templated polymorphism. I ended up writing the thing in C++ in 5 mins, and at the very least it's a huge improvement.
#include <string>
#include <iostream>
#include <vector>
#include <sstream>
class CBaseValue
{
public:
virtual void toString(std::stringstream & buf) = 0;
};
template< typename T >
class CValue : public CBaseValue
{
typedef T type;
typedef T * ptr_type;
type * val;
public:
CValue(void * val)
{
this->val = reinterpret_cast<ptr_type>(val);
}
CValue(type * val) : val(val) {}
virtual void toString(std::stringstream & buf) {
buf << *val;
}
};
class CLabel
{
std::stringstream ss;
std::vector<CBaseValue *> valueList;
std::string format;
public:
CLabel() {};
void reset() {
format.clear();
ss.str("");
for(unsigned i = 0; i < valueList.size(); i++) {
delete valueList[i];
}
valueList.clear();
}
void setFormat(const char * fmt, ...) {
reset();
format = fmt;
va_list args;
va_start(args, fmt);
for(unsigned i = 0; i < format.size(); ++i) {
if(format[i] == '%') {
++i;
switch(fmt[i])
{
case 'd':
valueList.push_back(new CValue<unsigned int>( va_arg(args, void *) ));
break;
case 'f':
valueList.push_back(new CValue<float>( va_arg(args, void *) ));
break;
}
}
}
va_end(args);
}
std::string get() {
ss.str("");
unsigned count(0);
for(unsigned i = 0; i < format.size(); i++) {
if(format[i] == '%') {
i++; // ignore type specifiers, already polymorphically solved
valueList[count++]->toString(ss);
} else {
ss << format[i];
}
}
return ss.str();
}
~CLabel() {
reset();
}
};
int main() {
int test = 2;
float val = 3.14f;
CLabel myLabel;
myLabel.setFormat("Stringstream test, float: %f, and an int: %d \n", &val, &test);
std::cout << myLabel.get();
test = 3;
std::cout << myLabel.get();
system("pause");
}
You could do something relatively simple with std::bind or boost::bind. I'll leave it as an exercise on how to massage a C interface on top of this.
#include <functional>
int main() {
int test = 2;
float val = 3.14f;
std::function<int()> label = std::bind(
printf,
"Stringstream test, float: %f, and an int: %d \n",
std::ref(val),
std::ref(test));
label();
test = 3;
label();
}

shared library how to link to a symbol?

I have:
car.cc
#include "car.h"
#include <iostream>
using namespace std;
extern "C" Car* create_object()
{
return new Car;
}
Car::Car() {
this->maxGear = 2;
this->currentGear = 1;
this->speed = 0;
}
void Car::shift(int gear) {
if (gear < 1 || gear > maxGear) {
return;
}
currentGear = gear;
}
void Car::brake() {
speed -= (5 * this->getCurrentGear());
std::cout<<"THE SPEED IS:" <<speed<<std::endl;
}
extern "C" void destroy_object( Car* object )
{
delete object;
}
car.h
#ifndef VEHICLES_CAR_H
#define VEHICLES_CAR_H
// A very simple car class
class Car {
public:
Car();
void shift(int gear);
void accelerate();
void brake();
private:
int maxGear;
int currentGear;
int speed;
};
#endif /* VEHICLES_CAR_H */
test.cc
#include "/home/car.h"
#include <dlfcn.h>
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
using namespace std;
int main()
{
/* on Linux, use "./myclass.so" */
void* handle = dlopen("/usr/lib/libCarTest.so", RTLD_LAZY);
int (*result)(int);
if (!handle)
{
}
/*dlsym(handle,"accelerate");
cout<<"IN HERE: "<<endl;
dlsym(handle,"brake");
dlclose(handle);*/
Car* (*create)();
void (*destroy)(Car*);
dlerror();
create = (Car* (*)())dlsym(handle, "create_object");
destroy = (void (*)(Car*))dlsym(handle, "destroy_object");
Car* carr = (Car*)create();
carr->brake();
destroy( carr );
dlclose(handle);
/*
Car carr;
carr.brake();
* compilation g++ test.cpp -o tst /path/libcar.so
*/
return 0;
}
After creating libMyLib.so and install it in /usr/lib i've tried to compile test.cc using: g++ test.cc -o tst -ldl. WHY do i need to include -lMyLib? is there a way to compile the code without libMyLib.so? Secondly why dlsym(handle,"brake") is not working? If i change dlsym (Car* (*).... with dlsym(handle,"brake") i get nothing. why?
Appreciate
WHY do i need to include -lMyLib?
Because you need to link to the Car::brake method.
Secondly why dlsym(handle,"brake") is not working?
Because there is no brake symbol. The method Car::brake has a complicated mangled (implementation-defined) name. You can see this in the output of nm -D.
AFAIK, you can solve it by
making all the methods of Car virtual (they will be called through a pointer, so no linking will be needed)
doing it the old C way, ie. export a free function brake() that would call the Car::brake method from the .so
making all the public methods of Car inline and defining them in the header.
emulating the virtual table approach (as we do it in C)
Combining the last two approaches:
class Car {
public:
void brake() { brake_impl(this); }
private:
void (*brake_impl)(Car*);
void do_brake(); // this would be the actual implementation
Car() : brake_impl([] (Car* c){ c->do_brake(); }) { ... }
};
Of course you could split the implementation and the interface so it's not such a mess.

Resources