Thursday, July 29, 2010

Just In Time Compiler for Managed Platform- Part 4: Basic Conditional Branch

Today I'll generate code that can handle conditional branch. This one is really gian step- since once we have conditional branching enabled we are ready to execute almost all instructions. This is relatively complex to handle than earlier things.

First let us define the java class with a condition:

public class Math
{
public static int add(int x, int y)
{
int r;
r= x+y;
return r;
}

public static int SimpleCall()
{
return 17;
}
public static int SimpleCall2()
{
return add(17, 29);
}

public static int SimpleCall4(int p)
{
if(p>100) return 100;
return add(p, 29);
}

public static int SimpleCall5(int p)
{
return SimpleCall4(101);
}

public static int SimpleCall3()
{
return SimpleCall4(17);
}

public static int Main()
{
return SimpleCall3();
}
}


And the generated byte code:

public class Math extends java.lang.Object{
public Math();
Signature: ()V
Code:
0: aload_0
1: invokespecial #1; //Method java/lang/Object."init":()V
4: return

public static int add(int, int);
Signature: (II)I
Code:
0: iload_0
1: iload_1
2: iadd
3: istore_2
4: iload_2
5: ireturn

public static int SimpleCall();
Signature: ()I
Code:
0: bipush 17
2: ireturn

public static int SimpleCall2();
Signature: ()I
Code:
0: bipush 17
2: bipush 29
4: invokestatic #2; //Method add:(II)I
7: ireturn

public static int SimpleCall4(int);
Signature: (I)I
Code:
0: iload_0
1: bipush 100
3: if_icmple 9
6: bipush 100
8: ireturn
9: iload_0
10: bipush 29
12: invokestatic #2; //Method add:(II)I
15: ireturn

public static int SimpleCall5(int);
Signature: (I)I
Code:
0: bipush 101
2: invokestatic #3; //Method SimpleCall4:(I)I
5: ireturn

public static int SimpleCall3();
Signature: ()I
Code:
0: bipush 17
2: invokestatic #3; //Method SimpleCall4:(I)I
5: ireturn

public static int Main();
Signature: ()I
Code:
0: invokestatic #4; //Method SimpleCall3:()I
3: ireturn

}

The SimpleCall4 method has a conditional branch instruction if_icmple. Note that we deal with static methiods so far- so dont care the init method yet.

Let us first define the helper method for the instruction to generate machine code:

void IfIcmple(u1* code, int& ip, int targetpc, CMapPtrToPtr *pJmpTargetMap)
{
u1 c[] = {
//if((pRE->stack[pRE->stackTop-2].intValue > pRE->stack[pRE->stackTop-1].intValue))

0x8B, 0x45, 0x08, // mov eax,dword ptr [pRE]
0x8B, 0x48, 0x04, // mov ecx,dword ptr [eax+4]
0x8B, 0x55, 0x08, // mov edx,dword ptr [pRE]
0x8B, 0x02, // mov eax,dword ptr [edx]
0x8B, 0x55, 0x08, // mov edx,dword ptr [pRE]
0x8B, 0x52, 0x04, // mov edx,dword ptr [edx+4]
0x8B, 0x75, 0x08, // mov esi,dword ptr [pRE]
0x8B, 0x36, // mov esi,dword ptr [esi]
0x8B, 0x44, 0xC8, 0xF0, // mov eax,dword ptr [eax+ecx*8-10h]
0x3B, 0x44, 0xD6, 0xF8, // cmp eax,dword ptr [esi+edx*8-8]
0x76, 0x00, 0x90, 0x90, 0x90, // JBE nop nop nop


//pRE->stackTop -= 2;
0x8B, 0x45, 0x08, // mov eax,dword ptr [pRE]
0x8B, 0x48, 0x04, // mov ecx,dword ptr [eax+4]
0x83, 0xE9, 0x02, // sub ecx,2
0x8B, 0x55, 0x08, // mov edx,dword ptr [pRE]
0x89, 0x4A, 0x04, // mov dword ptr [edx+4],ecx
};

memcpy(&code[ip], c, sizeof(c));
ip+=sizeof(c);

LinkedListNode *pNode = new LinkedListNode(&code[ip-20], NULL);

LOG(_T("PC = 0x%X Jmp Inst Offset = 0x%X Inst = 0x%X\n"), targetpc, (int)(&code[ip-20])-(int)(code), code[ip-20]);

JmpTarget *pJmpTarget = NULL;
if(!pJmpTargetMap->Lookup((void *) targetpc, (void *&) pJmpTarget))
{
pJmpTarget = new JmpTarget();
pJmpTarget->pTargetList = pNode;
pJmpTarget->pc = targetpc;
pJmpTargetMap->SetAt((void *)targetpc, pJmpTarget);
}
else
{
pNode->pNext = pJmpTarget->pTargetList;
}

pJmpTarget->pTargetList = pNode;
}

The instruction if_icmple checks value on top of stack and brances if first argument is greater than second argument. We do same in the machine code. Since we can not calculate address of target instruction without first generating code that is behind the target- we keep the target address empty and fix all jump address after we generate machine code all the instructions. To do this we maintain a map of jmp instruction locations and also a map of native vs managed code locations.

Since one instruction can be target of multiple jmp instructions we keep a linked list of jmp instructions we need to fix for a specific managed target instruction-

struct LinkedListNode
{
void *pData;
LinkedListNode *pNext;
LinkedListNode(void *pData, LinkedListNode *pNext)
{
this->pData = pData;
this->pNext = pNext;
}

LinkedListNode()
{
this->pData = NULL;
this->pNext = NULL;
}
};

Here we need to fix out ireturn helper- since after return from method we must not execute any instruction we must return but before that we must fix the native stack (epilog) and then return. Since we add epilog at the end of each native function we just jmp to that location for the cleanup if there is multiple retutn from managed code. To track thios we insert an unconditional jmp instruction to the epilog-

// ireturn instruction takes the value from stack top and push
// to stack[0] position.
void IReturn(u1* code, int& ip, CMapPtrToPtr *pJmpTargetMap)
{
u1 c[] = {
//pRE->stack[0].intValue=pRE->stack[pRE->stackTop-1].intValue;
0x8B, 0x45, 0x08, // mov eax,dword ptr [pRE]
0x8B, 0x48, 0x04, // mov ecx,dword ptr [eax+4]
0x8B, 0x55, 0x08, // mov edx,dword ptr [pRE]
0x8B, 0x02, // mov eax,dword ptr [edx]
0x8B, 0x55, 0x08, // mov edx,dword ptr [pRE]
0x8B, 0x12, // mov edx,dword ptr [edx]
0x8B, 0x44, 0xC8, 0xF8, // mov eax,dword ptr [eax+ecx*8-8]
0x89, 0x02, // mov dword ptr [edx],eax
};

memcpy(&code[ip], c, sizeof(c));
ip+=sizeof(c);

u1 c1[] = {
0xE9, 0x00, 0x00, 0x00, 0x00 //JMP , nop ,nop
};

memcpy(&code[ip], c1, sizeof(c1));
ip+=sizeof(c1);

LinkedListNode *pNode = new LinkedListNode(&code[ip-5], NULL);

LOG(_T("PC = RETURN Jmp Inst Offset = 0x%X Inst = 0x%X\n"), (int)(&code[ip-5])-(int)(code), code[ip-5]);

JmpTarget *pJmpTarget = NULL;
if(!pJmpTargetMap->Lookup((void *) 0, (void *&) pJmpTarget))
{
pJmpTarget = new JmpTarget();
pJmpTarget->pTargetList = pNode;
pJmpTarget->pc = 0;
pJmpTargetMap->SetAt((void *)0, pJmpTarget);
}
else
{
pNode->pNext = pJmpTarget->pTargetList;
}

pJmpTarget->pTargetList = pNode;

}


Lets now see how we fix the address-

void FixJmpLocations(u1 *codes, CMapPtrToPtr *pJmpTargetMap, CMapPtrToPtr *pManagedtoNativeMap, u1* retAddress)
{
LOG(_T("Fixing Jmp Target\n"));

//Iterate through the entire map,
for (POSITION pos = pJmpTargetMap->GetStartPosition(); pos != NULL;)
{
JmpTarget *pJmpTarget;
int pc;
pJmpTargetMap->GetNextAssoc(pos, (void *&)pc, (void *&)pJmpTarget);

ASSERT(pc == pJmpTarget->pc);
int target;

if(0 == pJmpTarget->pc)
{
target = (int)retAddress;
}
else
{
target = (int) pManagedtoNativeMap->GetValueAt((void *&)pJmpTarget->pc);
}

LinkedListNode *pTargetList = pJmpTarget->pTargetList;

do{
int offset=0;
if(0xE9 == codes[(int)((int)pTargetList->pData-(int)codes)])
{
offset = target - (int)pTargetList->pData-5; //1 for inst 4 for 4 byte offset = -5
memcpy(&codes[(int)((int)pTargetList->pData-(int)codes)+1], &offset, sizeof(offset));
}
else
{
offset = target - (int)pTargetList->pData - 2; //1 for inst 1 for 1 byte offset = -2
codes[(int)((int)pTargetList->pData-(int)codes)+1]=offset;
}

LOG(_T("Fixed 0x%X with Native Address Offset 0x%X\n"), (int)pTargetList->pData - (int)codes, offset);

pTargetList = pTargetList->pNext;
}while(NULL != pTargetList);
}
}

Please note that the map pManagedtoNativeMap is polulated the Compile function like this in the giant for loop for each instruction-

pManagedtoNativeMap->SetAt((void *)pc, &codes[ip]);

Thats it. We are now ready to test the vodes we generate-

int main()
{
Context *pRE = new Context();;
pRE->stack = new Variable[STACK_SIZE];
pRE->stackTop = 0;
memset(pRE->stack, 0, sizeof(Variable)*STACK_SIZE);

pRE->pVMEnv = new VMEnvironment();
pRE->pVMEnv->pClassHeap = new ClassHeap();
pRE->pVMEnv->pObjectHeap = new ObjectHeap();

pRE->pVMEnv->ppHelperMethods = HelperMethods;

ClassHeap* pClsHeap = pRE->pVMEnv->pClassHeap;

JavaClass jc;
pClsHeap->LoadClass("Math", &jc);
JavaClass *pVirtualClass =&jc, *pClass1 = &jc;

int mindex=pClass1->GetMethodIndex(_T("Main"),_T("()I"),pVirtualClass);

method_info_ex *pMethod = &pVirtualClass->methods[mindex];

MethodLink *pMethodLink = new MethodLink();
pMethodLink->pClass = pVirtualClass;
pMethodLink->pMethod = pMethod;

((void (*)(MethodLink *pMethodLink, Context *pRE))pRE->pVMEnv->ppHelperMethods[CALL_METHOD_HELPER_INDEX])(pMethodLink, pRE);

LOG(_T("Ret = %d"), pRE->stack[0].intValue);

return 0;
}

Thats it. Our generated native code can handle branching!

Monday, July 26, 2010

Just In Time Compiler for Managed Platform- Part 3: Call a method

Today I'll try to extend the simple JIT compiler to the point where we can call a method from another method.

First lets create a simple java class:

public class Math
{
public static int add(int x, int y)
{
int r;
r= x+y;
return r;
}

public static int SimpleCall2()
{
return add(17, 29);
}
}

Here we call add method from SimpleCall2 method. Our JIT compiler will supply mechanism to handle this. When we compile using java compiler we get following class and method byte code:

public class Math extends java.lang.Object{
public Math();
Signature: ()V
Code:
0: aload_0
1: invokespecial #1;
4: return

public static int add(int, int);
Signature: (II)I
Code:
0: iload_0
1: iload_1
2: iadd
3: istore_2
4: iload_2
5: ireturn

public static int SimpleCall2();
Signature: ()I
Code:
0: bipush 17
2: bipush 29
4: invokestatic #2; //Method add:(II)I
7: ireturn
}


[Note: I do not describe the Java Virtual Machine basics here again- You may look at my article for basic understanding: Home Made Java Virtual Machine]

First we need to extend our Context structure to hold some more values. We should add members at the bottom- otherwise the code we generated so far will be invalid.

struct VMEnvironment
{
ObjectHeap *pObjectHeap;
ClassHeap *pClassHeap;
void **ppHelperMethods;
};

struct Context
{
Variable *stack;
int stackTop;
JavaClass *pClass;
Context *pCallerContext;
VMEnvironment *pVMEnv;
};


Also we want to keep track of native codes we generate. So we need another structure.

struct MethodLink
{
JavaClass *pClass;
method_info_ex *pMethod;
void *pNativeBlock;
};


Now we need a helper class to keep track of the methods we work with. We use a simple string to pointer map. The key string is generated from classname, method name and method desc. So key looks like "Math::add(II)I".

MethodLink* GetMethod(JavaClass *pClass, method_info_ex *pMethod, u4 pc)
{
static CMapStringToPtr methodsMap;

u2 mi=getu2(&pMethod->pCode_attr->code[pc+1]);
char *pConstPool = (char *)pClass->constant_pool[mi];

u2 classIndex = getu2(&pConstPool[1]);
u2 nameAndTypeIndex = getu2(&pConstPool[3]);

//get class at pool index
pConstPool = (char *)pClass->constant_pool[classIndex];

ASSERT(pConstPool[0] == CONSTANT_Class);

u2 ni=getu2(&pConstPool[1]);

CString strClassName;
pClass->GetStringFromConstPool(ni, strClassName);

ClassHeap *pClassHeap = new ClassHeap();

JavaClass *pClassCallee=pClassHeap->GetClass(strClassName);

pConstPool = (char *)pClassCallee->constant_pool[nameAndTypeIndex];
ASSERT(pConstPool[0] == CONSTANT_NameAndType);

u2 name_index = getu2(&pConstPool[1]);
u2 descriptor_index = getu2(&pConstPool[3]);

CString strMethodName, strMethodDesc;
pClassCallee->GetStringFromConstPool(name_index, strMethodName);
pClassCallee->GetStringFromConstPool(descriptor_index, strMethodDesc);

JavaClass *pVirtualClass=pClassCallee;
int nIndex=pClassCallee->GetMethodIndex(strMethodName, strMethodDesc, pVirtualClass);

method_info_ex *pCalleeMethod = &pClassCallee->methods[nIndex];

/*
if( ACC_SUPER & pCalleeMethod->access_flags)
{
pCalleeMethod = pClassCallee->GetSuperClass();
}
*/

CString sign(strClassName+"::"+strMethodName+strMethodDesc);
MethodLink *pLink=NULL;
if(!methodsMap.Lookup(sign, (void *&)pLink))
{
pLink = new MethodLink();
pLink->pClass = pClassCallee;
pLink->pMethod = pCalleeMethod;
pLink->pNativeBlock = NULL;
methodsMap.SetAt(sign, pLink);
}

return pLink;
}


To call a method we do not generate the statck preparation code using machine code for now to keep the things simple. We'll do that after we finish all type of code generation. So from native code we call back to a C++ method that again calls into generated codes-

void CallMethod(MethodLink *pMethodLink, Context *pRE)
{
LOG(_T("CallMethod\n"));

int codeBlockSize = pMethodLink->pMethod->pCode_attr->code_length*2; //todo guess better

int (*NativeBlock)(Context *)=(int (*)(Context *)) VirtualAlloc(NULL, codeBlockSize, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
u1* codes = (u1*) NativeBlock;

int ip =0;

JavaClass *pClass = pMethodLink->pClass;

if(NULL == pMethodLink->pNativeBlock)
{
Compile(pMethodLink->pClass, pMethodLink->pMethod, codes, ip);
pMethodLink->pNativeBlock = codes;
}

CString strName, strDesc;
pMethodLink->pClass->GetStringFromConstPool(pMethodLink->pMethod->name_index, strName);
pMethodLink->pClass->GetStringFromConstPool(pMethodLink->pMethod->descriptor_index, strDesc);

int params=GetMethodParametersStackCount(strDesc)+1;

//invokestatic: we are only dealing with static methods so far

int nDiscardStack =params;
if(pMethodLink->pMethod->access_flags & ACC_NATIVE)
{
}
else
{
nDiscardStack+= pMethodLink->pMethod->pCode_attr->max_locals;
}

pRE->stackTop+=(nDiscardStack-1);
LOG(_T("Invoking method %s%s, \n"), strName, strDesc);

(*NativeBlock)(pRE);

//if returns then get on stack
if(strDesc.Find(_T(")V")) < 0)
{
nDiscardStack--;
if(strDesc.Find(_T(")J")) < 0)
{
}
else
{
nDiscardStack--;
}
}

pRE->stackTop-=nDiscardStack;
LOG(_T("~CallMethod\n"));
}


OK, thats the callbacks we need for now. Now we generate the actual machine code that will use the MethodLink* value to call back to the CallMethod function. To do this we use a function pointer list and store it in the context environment-

#define CALL_METHOD_HELPER_INDEX 0

void* HelperMethods[] = {
CallMethod,
};

Let us now define the InvokeStatic helper method.

void InvokeStatic(JavaClass *pClass, method_info_ex *pMethod, u4 pc, u1* codes, int &ip)
{
MethodLink* pLink = GetMethod(pClass, pMethod, pc);
EmitCallMethod(codes, ip, pLink);
}

void EmitCallMethod(u1* code, int &ip, void* pLinkAddress)
{
//((void (*)(MethodLink *pMethodLink))pRE->pVMEnv->ppHelperMethods[CALL_METHOD_HELPER_INDEX])(pLinkAddress, pRE);
u1 c[] = {
0x8B, 0x45, 0x08, // mov eax,dword ptr [pRE]
0x50, // push eax
0x68, 0x00, 0x00, 0x00, 0x00, // push pLinkAddress
0x8B, 0x4D, 0x08, // mov ecx,dword ptr [pRE]
0x8B, 0x51, 0x10, // mov edx,dword ptr [ecx+10h]
0x8B, 0x42, 0x08, // mov eax,dword ptr [edx+8]
0x8B, 0x08, // mov ecx,dword ptr [eax]
0xFF, 0xD1, // call ecx
0x83, 0xC4, 0x08, // add esp,8
};

memcpy(c+5, &pLinkAddress, 4);
memcpy(&code[ip], c, sizeof(c));
ip+=sizeof(c);
}


To compile the methods we define a function that generates machine code for java byte codes. This function does not handle branch instructions right now. To handle branch we probably need two pass- since we would not know the exact address during first pass. So, here is a large while loop to do basic things:

u4 Compile(JavaClass *pClass, method_info_ex *pMethod, u1 *codes, int &ip)
{
if(pMethod->access_flags & ACC_NATIVE)
{
return 1;
}

Prolog(codes, ip);

u4 pc=0;
u1 *bc=pMethod->pCode_attr->code;

i4 error=0;

CString strMethod;
pClass->GetStringFromConstPool(pMethod->name_index, strMethod);

i4 index=0;
while(pMethod->pCode_attr->code_length>pc)
{
LOG(_T("Opcode = %s\n"),OpcodeDesc[(u1)bc[pc]]);

switch(bc[pc])
{
case nop:
pc++;
break;

case bipush:// 16 /*(0x10)*/
BiPush(codes, ip, (u1)bc[pc+1]);
pc+=2;
break;

case iload_0: //26 Load int from local variable 0
ILoad_0(codes, ip);
pc++;
break;

case iload_1: //27 Load int from local variable 1
ILoad_1(codes, ip);
pc++;
break;
case iload_2: //28 Load int from local variable 2
ILoad_2(codes, ip);
pc++;
break;
case iload_3: //29 Load int from local variable 3
ILoad_3(codes, ip);
pc++;
break;

case istore_2: // 61 /*(0x3d) */
IStore_2(codes, ip);
pc++;
break;

case iadd: //96
IAdd(codes, ip);
pc++;
break;

case invokestatic:// 184
InvokeStatic(pClass, pMethod, pc, codes, ip);
pc+=3;
break;
case ireturn: //172 (0xac)
IReturn(codes, ip);
pc++;
break;

default:
error=1;
break;
}

if(error) break;
}

Return0(codes, ip);
Epilog(codes, ip);

return error;
}


OK, we are now ready to test out code:

int main()
{
Context *pRE = new Context();;
pRE->stack = new Variable[STACK_SIZE];
pRE->stackTop = 0;
memset(pRE->stack, 0, sizeof(Variable)*STACK_SIZE);

pRE->pVMEnv = new VMEnvironment();
pRE->pVMEnv->pClassHeap = new ClassHeap();
pRE->pVMEnv->pObjectHeap = new ObjectHeap();

pRE->pVMEnv->ppHelperMethods = HelperMethods;

ClassHeap* pClsHeap = pRE->pVMEnv->pClassHeap;

JavaClass jc;
pClsHeap->LoadClass("Math", &jc);
JavaClass *pVirtualClass =&jc, *pClass1 = &jc;

int mindex=pClass1->GetMethodIndex(_T("SimpleCall2"),_T("()I"),pVirtualClass);

method_info_ex *pMethod = &pVirtualClass->methods[mindex];

MethodLink *pMethodLink = new MethodLink();
pMethodLink->pClass = pVirtualClass;
pMethodLink->pMethod = pMethod;

((void (*)(MethodLink *pMethodLink, Context *pRE))pRE->pVMEnv->ppHelperMethods[CALL_METHOD_HELPER_INDEX])(pMethodLink, pRE);
LOG(_T("Return Value = %d"), pRE->stack[0].intValue);

return 0;
}

Do you see value 46 on the stack as return value? Cool!

Thursday, July 15, 2010

Just In Time Compiler for Managed Platform- Part 2: Generate native method

Today we'll design a small block of code that is equivallent to a corresponding java method.

Since the generated native executable code will be used only by our VM we are free to define out own structure and calling convention for it. We generate one native function for each Java method. Each generated native function will have only parameter that is required for operation- a pointer to a structure RuntimeEnvironment:

union Variable
{
u1 charValue;
u2 shortValue;
u4 intValue;
f4 floatValue;
u4* ptrValue;
Object object;
};

struct RuntimeEnvironment
{
Variable *stack;
int stackTop;
//We'll add more as we require later
};

The return type will be int:

int ExecuteMethod(RuntimeEnvironment *pRE);

To generate the final method we need a lot of helper function. We define those as:
void HelperFunction(u1* code, int& ip);
These functions will take a code block and insert code in the block and fix the code pointer (ip).

First let us define the Prolog and Epilog and return 0 helper functions for this function prototype:

void Prolog(u1* code, int& ip)
{
u1 c[]= {
0x55,// push ebp
0x8B, 0xEC,// mov ebp,esp
0x81, 0xEC, 0xC0, 0x00, 0x00, 0x00,// sub esp,0C0h
0x53,// push ebx
0x56,// push esi
0x57, // push edi
};

memcpy(&code[ip], c, sizeof(c));
ip+=sizeof(c);
}

void Epilog(u1* code, int& ip)
{
u1 c[]= {
0x5F,// pop edi
0x5E,// pop esi
0x5B,// pop ebx
0x8B, 0xE5,// mov esp,ebp
0x5D,// pop ebp
0xC3,// ret
};

memcpy(&code[ip], c, sizeof(c));
ip+=sizeof(c);
}

void Return0(u1* code, int& ip)
{
//33 C0 xor eax,eax
code[ip++]=0x33;
code[ip++]=0xC0;
}

Now, we want to generate machine code for the following simple function-

public static int SimpleCall()
{
return 17;
}

Here is the generated java byte code:
Signature: ()I
Code:
0: bipush 17
2: ireturn

Here we start to generate helper function for Java Virtual Machine instruction.

For bipush [value] we need to push the [value] on the VM stack:

void BiPush(u1* code, int& ip, short value)
{
// C++ equivallent
// pRE->stack[pRE->stackTop++].shortValue = value;

u1 c[] = {
0x8B, 0x45, 0x08, // mov eax,dword ptr [pRE]
0x8B, 0x48, 0x04, // mov ecx,dword ptr [eax+4]
0x8B, 0x55, 0x08, // mov edx,dword ptr [pRE]
0x8B, 0x02, // mov eax,dword ptr [edx]
0xBA, 0x00, 0x00, 0x00, 0x00, // mov edx,value
0x66, 0x89, 0x14, 0xC8, // mov word ptr [eax+ecx*8],dx
0x8B, 0x45, 0x08, // mov eax,dword ptr [pRE]
0x8B, 0x48, 0x04, // mov ecx,dword ptr [eax+4]
0x83, 0xC1, 0x01, // add ecx,1
0x8B, 0x55, 0x08, // mov edx,dword ptr [pRE]
0x89, 0x4A, 0x04, // mov dword ptr [edx+4],ecx
};

//We need to encode value and set it to the 00 00 00 00 position
u1 encVal[4];
EncodeByte4((int)value, encVal);
memcpy(c + 12, encVal, 4);
memcpy(&code[ip], c, sizeof(c));
ip+=sizeof(c);
}

Thats it for the simple java function. We can now test this:

int main() 
{
int codeBlockSize = 4096;
int (*SimpleCall)(RuntimeEnvironment *)=(int (*)(RuntimeEnvironment *)) VirtualAlloc(NULL, codeBlockSize, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
u1* codes = (u1*) SimpleCall;
int ip=0;
memset(codes, 0, codeBlockSize);

Prolog(codes, ip);
BiPush(codes, ip, 17);
Return0(codes, ip);
Epilog(codes, ip);

//No lets test if it is really pushing value 17 on the VM stack
RuntimeEnvironment *pRE = new RuntimeEnvironment();;
pRE->stack = new Variable[20];
memset(pRE->stack, 0, sizeof(Variable)*20);
int retVal = (*SimpleCall)(pRE);
printf("pRE->stack[0].intValue = %d", pRE->stack[0].intValue);

return 0;
}

Thats cool- we have generated our first native function that actually does some byte code execution.

Saturday, July 10, 2010

Just In Time Compiler for Managed Platform- Part 1: Code Generation

First we need to generate executable code block.

So let us write some code in C++.
int add(int x, int y) 
{
int r;
r= x+y;
return r;
}

int main()
{
int r1 = add(13, 23);
printf("Returned value = %d", r1);
}

Great! It returns the right value. OK, but that is very basic. We want to generate the function from data in a simple buffer-

First we need a machine equivallent code for the function above:

unsigned char addcode[] = {  
0x55, //push ebp
0x8B, 0xEC, //mov ebp,esp
0x81, 0xEC, 0xC0, 0x00, 0x00, 0x00, //sub esp,0C0h
0x53, //push ebx
0x56, //push esi
0x57, //push edi

//r=x+y;
0x8B, 0x45, 0x08, //mov eax,dword ptr [x]
0x03, 0x45, 0x0C, //add eax,dword ptr [y]
0x89, 0x45, 0xF8, //mov dword ptr [r],eax

//return r;
0x8B, 0x45, 0xF8, //mov eax,dword ptr [r]

0x5F, //pop edi
0x5E, //pop esi
0x5B, //pop ebx
0x8B, 0xE5, //mov esp,ebp
0x5D, //pop ebp

0xC3 //ret
};

OK, this code is generated by Visual Studio compiler. We use it to generate our own code block in memory.

To get a memory block we can use to generate executable code we use the following Windows API:

LPVOID WINAPI VirtualAlloc(
__in_opt LPVOID lpAddress,
__in SIZE_T dwSize,
__in DWORD flAllocationType,
__in DWORD flProtect
);

First we allocate a 4096 byte executable code block and put it in a function pointer:

int (*addfn)(int, int) = (int (*)(int, int)) VirtualAlloc(NULL, 4096,  MEM_COMMIT, PAGE_EXECUTE_READWRITE);

Then we copy our executable code to this memory block:

memcpy(addfn, addcode, sizeof(addcode)); 

Now the majic - we call the function and print the return value:

int r1 = (*addfn)(13,23); 
printf("Returned value = %d", r1);

Thats easy- right?

Now we release the memory since we are gentle citizen-

VirtualFree(addfn, NULL, MEM_RELEASE);

Thats it. here is the full code:
#include [windows.h][stdio.h]...

unsigned char addcode[] = {
0x55, //push ebp
0x8B, 0xEC, //mov ebp,esp
0x81, 0xEC, 0xC0, 0x00, 0x00, 0x00, //sub esp,0C0h
0x53, //push ebx
0x56, //push esi
0x57, //push edi

//r=x+y;
0x8B, 0x45, 0x08, //mov eax,dword ptr [x]
0x03, 0x45, 0x0C, //add eax,dword ptr [y]
0x89, 0x45, 0xF8, //mov dword ptr [r],eax

//return r;
0x8B, 0x45, 0xF8, //mov eax,dword ptr [r]

0x5F, //pop edi
0x5E, //pop esi
0x5B, //pop ebx
0x8B, 0xE5, //mov esp,ebp
0x5D, //pop ebp

0xC3 //ret
};

int main()
{
int (*addfn)(int, int) = (int (*)(int, int)) VirtualAlloc(NULL, 4096, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
memcpy(addfn, addcode, sizeof(addcode));

int r1 = (*addfn)(13,23);
printf("Returned value = %d", r1);

VirtualFree(addfn, NULL, MEM_RELEASE);

return 0;
}

Thats all for now. We can now generate code in memory and execute it. First step to design a JIT compiler.