Saturday, August 7, 2010

Just In Time Compiler for Managed Platform- Part 7: Native Methods

Execution of native methods are easiest of all methods :D. Since we dont need to generate anything for them - they are already native.

We can not accept any native method for execution- Those should understand the stack. For native methods we fix the stack top to point to actual data- since there is nothing we need to generate- we dont need the clever stack pointer-

The signature of each native method must match the following type signature:

typedef Variable (*NativeMethod)(Context* pContext);


Lets define our helper for native method. And here also we do not deal with machine instruction since there is nothing to parse- (well except the return type of the method)-
u4 ExecuteNativeMethod(Context* pContext, CString strClassName, CString strMethod, CString strDesc)
{
JavaClass *pClass = pContext->pClass;
LOG(_T("Execute NativeMethod %s.%s%s \n"),strClassName , strMethod, strDesc);
CString strSignature= GetNativeMethodSignature(strClassName, strMethod, strDesc);
NativeMethod nativeMethod=pContext->pVMEnv->pNativeMethodProvider->GetNativeMethod(strSignature);

if(nativeMethod == NULL)
{
ASSERT(FALSE);
return -1;
}
else
{
Variable retVal = nativeMethod(pContext);

//if returns then get on stack
if(strDesc.Find(_T(")V")) < 0)
{
if(strDesc.Find(_T(")J")) < 0 && (strDesc.Find(_T(")D")) < 0))
{
//todo validate
pContext->stack[0]=retVal;
}
else
{
pContext->stack[0].intValue=0;
pContext->stack[1]=retVal;
}
}
}
return 0;
}



OK, lets now define a simple native method that adds two numbers:

Variable Add(Context* pContext)
{
Variable returnVal;
//The stack top is right in native methods-
returnVal.intValue
= pContext->stack[pContext->stackTop].intValue + pContext->stack[pContext->stackTop-1].intValue;
return returnVal;
}


Thats all for native methods for now. We'll add some native methods to dynamically load native method-

Thursday, August 5, 2010

Just In Time Compiler for Managed Platform- Part 6: Put Field and Get Field

Now we have our object on the heap. We need mechanism to putfield and getfield. The operation is very simple indeed.

First we need a helper to get a field index (variable) in the object memory in the heap:


//And also we put the method names in HelperMethods structure


int GetFieldIndex(JavaClass *pTargetClass, Context *pContext, u2 index)
{
CONSTANT_Fieldref_info *pFieldInfo
= (CONSTANT_Fieldref_info *)pContext->pClass->constant_pool[index];

ASSERT(pFieldInfo->tag == CONSTANT_Fieldref);

u2 classIndex = getu2(&((u1 *)pFieldInfo)[1]);
u2 nameAndTypeIndex = getu2(&((u1 *)pFieldInfo)[3]);

CONSTANT_NameAndType_info *pNameTypeInfo
= (CONSTANT_NameAndType_info *) pContext->pClass->constant_pool[nameAndTypeIndex];

ASSERT(pNameTypeInfo->tag == CONSTANT_NameAndType);

u2 nameIndex = getu2(&((u1 *)pNameTypeInfo)[1]);
u2 descIndex = getu2(&((u1 *)pNameTypeInfo)[3]);
CString strFieldName, strFieldDesc;

if(!pContext->pClass->GetStringFromConstPool(nameIndex, strFieldName))
{
ASSERT(FALSE);
return -1;
}

if(!pContext->pClass->GetStringFromConstPool(descIndex, strFieldDesc))
{
ASSERT(FALSE);
return -2;
}

int superClassSize = 0;

JavaClass *pCurClass = pTargetClass;

int fieldIndex = -1;
while(true)
{
fieldIndex = pCurClass->GetFieldIndex(strFieldName, strFieldDesc);
pCurClass = pTargetClass->GetSuperClass();
if(fieldIndex >= 0)
{
fieldIndex += pCurClass ? pCurClass->GetObjectFieldCount() : 0;
break;
}
else
{
if(!pCurClass)
{
break;
}
}
}

ASSERT(fieldIndex>=0);
return fieldIndex;
}


Now we define the method to execute the two instructions. We do not generate code foe this operaions and callback from the generated code since the operation is static and no parsing or instruction execution required-


#define PUT_FIELD_HELPER_INDEX 4
#define GET_FIELD_HELPER_INDEX 5

void PutField(Context *pContext, u2 index)
{
Variable obj=pContext->stack[pContext->stackTop-2];
Variable value=pContext->stack[pContext->stackTop-1];
Variable *pVarList = pContext->pVMEnv->pObjectHeap->GetObjectPointer(obj.object);

JavaClass *pTargetClass = (JavaClass *)pVarList[0].ptrValue;
ASSERT(pTargetClass && pTargetClass->magic == 0xCAFEBABE);

int fieldIndex = GetFieldIndex(pTargetClass, pContext, index);

pVarList[fieldIndex+1]=value;

pContext->stackTop-=2;
}

void GetField(Context *pContext, u2 index)
{
Variable obj=pContext->stack[pContext->stackTop-1];
Variable *pVarList=pContext->pVMEnv->pObjectHeap->GetObjectPointer(obj.object);

JavaClass *pTargetClass = (JavaClass *)pVarList[0].ptrValue;
ASSERT(pTargetClass && pTargetClass->magic == 0xCAFEBABE);

int fieldIndex = GetFieldIndex(pTargetClass, pContext, index);

pContext->stack[pContext->stackTop-1]=pVarList[fieldIndex+1];
}


Now we generate instruction for them. Here is how we do it for putfield:

void EmitCallPutField(u1* code, int &ip, u4 index)
{
u1 c[]={
//((void (*)(Context *pContext, u2 index))pContext->pVMEnv->ppHelperMethods[PUT_FIELD_HELPER_INDEX])(pContext, 0x1234);
0x8B, 0xF4, // mov esi,esp
0x68, 0x00, 0x00, 0x00, 0x00, // push index
0x8B, 0x45, 0x08, // mov eax,dword ptr [pContext]
0x50, // push eax
0x8B, 0x4D, 0x08, // mov ecx,dword ptr [pContext]
0x8B, 0x51, 0x10, // mov edx,dword ptr [ecx+10h]
0x8B, 0x42, 0x08, // mov eax,dword ptr [edx+8]
0x8B, 0x48, 0x10, // mov ecx,dword ptr [eax+10h]
0xFF, 0xD1, // call ecx
0x83, 0xC4, 0x08, // add esp,8
};

memcpy(c+3, &index, sizeof(index));
memcpy(&code[ip], c, sizeof(c));
ip+=sizeof(c);
}

void ExecutePutField(u1* code, int& ip, u2 index)
{
EmitCallPutField(code, ip, index);
}

For getfield we just need to change the method pointer (add 4)

mov ecx,dword ptr [eax+14h] 

So we can assign value to a class field and get that value when we need it. Next we need array handling. Comes next day.

Wednesday, August 4, 2010

Just In Time Compiler for Managed Platform- Part 5: Creating new object on heap

Lets create object on heap today.

Since we have the object creation code in JavaClass it is very easy to create an object on the heap. We just call the CreateObject method of the current class that is already pushed on the stack by previous instructions:

int CreateNewObject(Context *pContext, u2 index)
{
if(!pContext->pClass->CreateObject(index, pContext->pVMEnv->pObjectHeap, pContext->stack[pContext->stackTop].object))
return -1;
pContext->stackTop++;
return 0;
}


Now let us create the helper methods for the new instruction:

void EmitExecuteNew(u1* code, int &ip, u4 index)
{
u1 c[] = {
//((int (*)(Context *pContext, u2 index))pContext->pVMEnv->ppHelperMethods[EXECUTE_NEW_HELPER_INDEX])(pContext, index);
0x8B, 0xF4, // mov esi,esp
0x68, 0x00, 0x00, 0x00, 0x00, // push index
0x8B, 0x45, 0x08, // mov eax,dword ptr [pContext]
0x50, // push eax
0x8B, 0x4D, 0x08, // mov ecx,dword ptr [pContext]
0x8B, 0x51, 0x10, // mov edx,dword ptr [ecx+10h]
0x8B, 0x42, 0x08, // mov eax,dword ptr [edx+8]
0x8B, 0x48, 0x04, // mov ecx,dword ptr [eax+4]
0xFF, 0xD1, // call ecx
0x83, 0xC4, 0x08, // add esp,8
};

memcpy(c+3, &index, sizeof(index));
memcpy(&code[ip], c, sizeof(c));
ip+=sizeof(c);
}

void ExecuteNew(u1* code, int& ip, u2 index)
{
EmitExecuteNew(code, ip, index);
}


With the call mechanism that wer built to call method is used to callback the object creation method here.

And from the Compile method we just add a call for the new instruction:

case _new:// 187(0xbb) -'new' is a keyword in C++ so we use '_new' :)
ExecuteNew(codes, ip, getu2(&bc[pc+1]));
pc+=3;
break;

We actually have all the basic mechanism built. We just need to add things like native method support, garbase collector and helpers for remaining instruction. Those will be managed mostly in C++ since there is nothing to parse for them the optimizing C++ compiler does all the good thing for us.

Monday, August 2, 2010

Just In Time Compiler for Managed Platform- : Why stack is implemented wrong?

Well, its not really wrong- its just efficient-

The stack top is is always topvalue + 1. This is not right behaviour for stack usually- but what we do in our opetation is decrement the value first. It makes the offset zero which is efficient in terms of machine cycles.

But why do that in the first place?

This is because if we do not decrement the stack first before execution of the instruction it becomes complex in case of jmp (branch) instructions. Since we must decrement the stack no matter what we do this first.

Now if we use a negative offset it'll take some extra cycles to decode the instruction with offset than witout the offset.

This post is just to make sure we remember the stack top value points to invalid data. We must decrement it by one to get the top value.

Sunday, August 1, 2010

Just In Time Compiler for Managed Platform- Part 4A: Conditional Branch Correction

The Jxx instructions I used for branching was not working right- It treated values unsigned. The "IA-32 Intel Architecture Software Developers Manual- Vol 2" describes this (page- 3-355): The terms "less" and "greater" are used for comparisons of signed integers and the terms "above" and "below" are used for unsigned integers. So, for signed comparison, we use JL, JG, JLE and JGE instead of JB, GA, JBE and JAE.

With that change all branching instruction seems working now. We need two helper method for all of them.

First one is comparison with zero:

void IfXX(u1* code, int& ip, int targetpc, CMapPtrToPtr *pJmpTargetMap, u1 XX)
{
u1 c[] = {
//pContext->stackTop--;
0x8B,0x45,0x08, // mov eax,dword ptr [pContext]
0x8B,0x48,0x04, // mov ecx,dword ptr [eax+4]
0x83,0xE9,0x01, // sub ecx,1
0x8B,0x55,0x08, // mov edx,dword ptr [pContext]
0x89,0x4A,0x04, // mov dword ptr [edx+4],ecx

//if(pContext->stack[pContext->stackTop].intValue [XXoperator] 0)
0x8B, 0x45, 0x08, // mov eax,dword ptr [pContext]
0x8B, 0x48, 0x04, // mov ecx,dword ptr [eax+4]
0x8B, 0x55, 0x08, // mov edx,dword ptr [pContext]
0x8B, 0x02, // mov eax,dword ptr [edx]
0x83, 0x3C, 0xC8, 0x00, // cmp dword ptr [eax+ecx*8],0
0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, // JXX
};

memcpy(&code[ip], c, sizeof(c));
ip+=sizeof(c);

code[ip-5] = XX;
CreateJmpLink(&code[ip-5], targetpc, pJmpTargetMap);
}

void Ifle(u1* code, int& ip, int targetpc, CMapPtrToPtr *pJmpTargetMap)
{
IfXX(code, ip, targetpc, pJmpTargetMap, JLE);
}

void Ifeq(u1* code, int& ip, int targetpc, CMapPtrToPtr *pJmpTargetMap)
{
IfXX(code, ip, targetpc, pJmpTargetMap, JE);
}

void Ifne(u1* code, int& ip, int targetpc, CMapPtrToPtr *pJmpTargetMap)
{
IfXX(code, ip, targetpc, pJmpTargetMap, JNE);
}

void Iflt(u1* code, int& ip, int targetpc, CMapPtrToPtr *pJmpTargetMap)
{
IfXX(code, ip, targetpc, pJmpTargetMap, JL);
}

void Ifge(u1* code, int& ip, int targetpc, CMapPtrToPtr *pJmpTargetMap)
{
IfXX(code, ip, targetpc, pJmpTargetMap, JGE);
}

void Ifgt(u1* code, int& ip, int targetpc, CMapPtrToPtr *pJmpTargetMap)
{
IfXX(code, ip, targetpc, pJmpTargetMap, JG);
}



Second one is comparison of any two numbers:

void IfICmpXX(u1* code, int& ip, int targetpc, CMapPtrToPtr *pJmpTargetMap, u1 XX)
{
u1 c[]={
//pContext->stackTop -= 2;
0x8B, 0x45, 0x08, // mov eax,dword ptr [pContext]
0x8B, 0x48, 0x04, // mov ecx,dword ptr [eax+4]
0x83, 0xE9, 0x02, // sub ecx,2
0x8B, 0x55, 0x08, // mov edx,dword ptr [pContext]
0x89, 0x4A, 0x04, // mov dword ptr [edx+4],ecx

//if(!(pContext->stack[pContext->stackTop -2+2].intValue [XXOperator] pContext->stack[pContext->stackTop-1+2].intValue))
0x8B, 0x45, 0x08, // mov eax,dword ptr [pContext]
0x8B, 0x48, 0x04, // mov ecx,dword ptr [eax+4]
0x8B, 0x55, 0x08, // mov edx,dword ptr [pContext]
0x8B, 0x02, // mov eax,dword ptr [edx]
0x8B, 0x55, 0x08, // mov edx,dword ptr [pContext]
0x8B, 0x52, 0x04, // mov edx,dword ptr [edx+4]
0x8B, 0x75, 0x08, // mov esi,dword ptr [pContext]
0x8B, 0x36, // mov esi,dword ptr [esi]
0x8B, 0x04, 0xC8, // mov eax,dword ptr [eax+ecx*8]
0x3B, 0x44, 0xD6, 0x08, // cmp eax,dword ptr [esi+edx*8+8]
0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, // JXX
};

memcpy(&code[ip], c, sizeof(c));
ip+=sizeof(c);

code[ip-5] = XX;

CreateJmpLink(&code[ip-5], targetpc, pJmpTargetMap);
}

void IfIcmple(u1* code, int& ip, int targetpc, CMapPtrToPtr *pJmpTargetMap)
{
IfICmpXX(code, ip, targetpc, pJmpTargetMap, JLE);
}

void IfIcmpne(u1* code, int& ip, int targetpc, CMapPtrToPtr *pJmpTargetMap)
{
IfICmpXX(code, ip, targetpc, pJmpTargetMap, JNE);
}

void IfIcmpge(u1* code, int& ip, int targetpc, CMapPtrToPtr *pJmpTargetMap)
{
IfICmpXX(code, ip, targetpc, pJmpTargetMap, JGE);
}

void IfIcmplt(u1* code, int& ip, int targetpc, CMapPtrToPtr *pJmpTargetMap)
{
IfICmpXX(code, ip, targetpc, pJmpTargetMap, JL);
}

void IfIcmpgt(u1* code, int& ip, int targetpc, CMapPtrToPtr *pJmpTargetMap)
{
IfICmpXX(code, ip, targetpc, pJmpTargetMap, JG);
}

void IfIcmpeq(u1* code, int& ip, int targetpc, CMapPtrToPtr *pJmpTargetMap)
{
IfICmpXX(code, ip, targetpc, pJmpTargetMap, JE);
}


Thats it. We have all the branching instructions working correctly now.

Thursday, July 29, 2010

Just In Time Compiler for Managed Platform- Part 4: Basic Conditional Branch

Today I'll generate code that can handle conditional branch. This one is really gian step- since once we have conditional branching enabled we are ready to execute almost all instructions. This is relatively complex to handle than earlier things.

First let us define the java class with a condition:

public class Math
{
public static int add(int x, int y)
{
int r;
r= x+y;
return r;
}

public static int SimpleCall()
{
return 17;
}
public static int SimpleCall2()
{
return add(17, 29);
}

public static int SimpleCall4(int p)
{
if(p>100) return 100;
return add(p, 29);
}

public static int SimpleCall5(int p)
{
return SimpleCall4(101);
}

public static int SimpleCall3()
{
return SimpleCall4(17);
}

public static int Main()
{
return SimpleCall3();
}
}


And the generated byte code:

public class Math extends java.lang.Object{
public Math();
Signature: ()V
Code:
0: aload_0
1: invokespecial #1; //Method java/lang/Object."init":()V
4: return

public static int add(int, int);
Signature: (II)I
Code:
0: iload_0
1: iload_1
2: iadd
3: istore_2
4: iload_2
5: ireturn

public static int SimpleCall();
Signature: ()I
Code:
0: bipush 17
2: ireturn

public static int SimpleCall2();
Signature: ()I
Code:
0: bipush 17
2: bipush 29
4: invokestatic #2; //Method add:(II)I
7: ireturn

public static int SimpleCall4(int);
Signature: (I)I
Code:
0: iload_0
1: bipush 100
3: if_icmple 9
6: bipush 100
8: ireturn
9: iload_0
10: bipush 29
12: invokestatic #2; //Method add:(II)I
15: ireturn

public static int SimpleCall5(int);
Signature: (I)I
Code:
0: bipush 101
2: invokestatic #3; //Method SimpleCall4:(I)I
5: ireturn

public static int SimpleCall3();
Signature: ()I
Code:
0: bipush 17
2: invokestatic #3; //Method SimpleCall4:(I)I
5: ireturn

public static int Main();
Signature: ()I
Code:
0: invokestatic #4; //Method SimpleCall3:()I
3: ireturn

}

The SimpleCall4 method has a conditional branch instruction if_icmple. Note that we deal with static methiods so far- so dont care the init method yet.

Let us first define the helper method for the instruction to generate machine code:

void IfIcmple(u1* code, int& ip, int targetpc, CMapPtrToPtr *pJmpTargetMap)
{
u1 c[] = {
//if((pRE->stack[pRE->stackTop-2].intValue > pRE->stack[pRE->stackTop-1].intValue))

0x8B, 0x45, 0x08, // mov eax,dword ptr [pRE]
0x8B, 0x48, 0x04, // mov ecx,dword ptr [eax+4]
0x8B, 0x55, 0x08, // mov edx,dword ptr [pRE]
0x8B, 0x02, // mov eax,dword ptr [edx]
0x8B, 0x55, 0x08, // mov edx,dword ptr [pRE]
0x8B, 0x52, 0x04, // mov edx,dword ptr [edx+4]
0x8B, 0x75, 0x08, // mov esi,dword ptr [pRE]
0x8B, 0x36, // mov esi,dword ptr [esi]
0x8B, 0x44, 0xC8, 0xF0, // mov eax,dword ptr [eax+ecx*8-10h]
0x3B, 0x44, 0xD6, 0xF8, // cmp eax,dword ptr [esi+edx*8-8]
0x76, 0x00, 0x90, 0x90, 0x90, // JBE nop nop nop


//pRE->stackTop -= 2;
0x8B, 0x45, 0x08, // mov eax,dword ptr [pRE]
0x8B, 0x48, 0x04, // mov ecx,dword ptr [eax+4]
0x83, 0xE9, 0x02, // sub ecx,2
0x8B, 0x55, 0x08, // mov edx,dword ptr [pRE]
0x89, 0x4A, 0x04, // mov dword ptr [edx+4],ecx
};

memcpy(&code[ip], c, sizeof(c));
ip+=sizeof(c);

LinkedListNode *pNode = new LinkedListNode(&code[ip-20], NULL);

LOG(_T("PC = 0x%X Jmp Inst Offset = 0x%X Inst = 0x%X\n"), targetpc, (int)(&code[ip-20])-(int)(code), code[ip-20]);

JmpTarget *pJmpTarget = NULL;
if(!pJmpTargetMap->Lookup((void *) targetpc, (void *&) pJmpTarget))
{
pJmpTarget = new JmpTarget();
pJmpTarget->pTargetList = pNode;
pJmpTarget->pc = targetpc;
pJmpTargetMap->SetAt((void *)targetpc, pJmpTarget);
}
else
{
pNode->pNext = pJmpTarget->pTargetList;
}

pJmpTarget->pTargetList = pNode;
}

The instruction if_icmple checks value on top of stack and brances if first argument is greater than second argument. We do same in the machine code. Since we can not calculate address of target instruction without first generating code that is behind the target- we keep the target address empty and fix all jump address after we generate machine code all the instructions. To do this we maintain a map of jmp instruction locations and also a map of native vs managed code locations.

Since one instruction can be target of multiple jmp instructions we keep a linked list of jmp instructions we need to fix for a specific managed target instruction-

struct LinkedListNode
{
void *pData;
LinkedListNode *pNext;
LinkedListNode(void *pData, LinkedListNode *pNext)
{
this->pData = pData;
this->pNext = pNext;
}

LinkedListNode()
{
this->pData = NULL;
this->pNext = NULL;
}
};

Here we need to fix out ireturn helper- since after return from method we must not execute any instruction we must return but before that we must fix the native stack (epilog) and then return. Since we add epilog at the end of each native function we just jmp to that location for the cleanup if there is multiple retutn from managed code. To track thios we insert an unconditional jmp instruction to the epilog-

// ireturn instruction takes the value from stack top and push
// to stack[0] position.
void IReturn(u1* code, int& ip, CMapPtrToPtr *pJmpTargetMap)
{
u1 c[] = {
//pRE->stack[0].intValue=pRE->stack[pRE->stackTop-1].intValue;
0x8B, 0x45, 0x08, // mov eax,dword ptr [pRE]
0x8B, 0x48, 0x04, // mov ecx,dword ptr [eax+4]
0x8B, 0x55, 0x08, // mov edx,dword ptr [pRE]
0x8B, 0x02, // mov eax,dword ptr [edx]
0x8B, 0x55, 0x08, // mov edx,dword ptr [pRE]
0x8B, 0x12, // mov edx,dword ptr [edx]
0x8B, 0x44, 0xC8, 0xF8, // mov eax,dword ptr [eax+ecx*8-8]
0x89, 0x02, // mov dword ptr [edx],eax
};

memcpy(&code[ip], c, sizeof(c));
ip+=sizeof(c);

u1 c1[] = {
0xE9, 0x00, 0x00, 0x00, 0x00 //JMP , nop ,nop
};

memcpy(&code[ip], c1, sizeof(c1));
ip+=sizeof(c1);

LinkedListNode *pNode = new LinkedListNode(&code[ip-5], NULL);

LOG(_T("PC = RETURN Jmp Inst Offset = 0x%X Inst = 0x%X\n"), (int)(&code[ip-5])-(int)(code), code[ip-5]);

JmpTarget *pJmpTarget = NULL;
if(!pJmpTargetMap->Lookup((void *) 0, (void *&) pJmpTarget))
{
pJmpTarget = new JmpTarget();
pJmpTarget->pTargetList = pNode;
pJmpTarget->pc = 0;
pJmpTargetMap->SetAt((void *)0, pJmpTarget);
}
else
{
pNode->pNext = pJmpTarget->pTargetList;
}

pJmpTarget->pTargetList = pNode;

}


Lets now see how we fix the address-

void FixJmpLocations(u1 *codes, CMapPtrToPtr *pJmpTargetMap, CMapPtrToPtr *pManagedtoNativeMap, u1* retAddress)
{
LOG(_T("Fixing Jmp Target\n"));

//Iterate through the entire map,
for (POSITION pos = pJmpTargetMap->GetStartPosition(); pos != NULL;)
{
JmpTarget *pJmpTarget;
int pc;
pJmpTargetMap->GetNextAssoc(pos, (void *&)pc, (void *&)pJmpTarget);

ASSERT(pc == pJmpTarget->pc);
int target;

if(0 == pJmpTarget->pc)
{
target = (int)retAddress;
}
else
{
target = (int) pManagedtoNativeMap->GetValueAt((void *&)pJmpTarget->pc);
}

LinkedListNode *pTargetList = pJmpTarget->pTargetList;

do{
int offset=0;
if(0xE9 == codes[(int)((int)pTargetList->pData-(int)codes)])
{
offset = target - (int)pTargetList->pData-5; //1 for inst 4 for 4 byte offset = -5
memcpy(&codes[(int)((int)pTargetList->pData-(int)codes)+1], &offset, sizeof(offset));
}
else
{
offset = target - (int)pTargetList->pData - 2; //1 for inst 1 for 1 byte offset = -2
codes[(int)((int)pTargetList->pData-(int)codes)+1]=offset;
}

LOG(_T("Fixed 0x%X with Native Address Offset 0x%X\n"), (int)pTargetList->pData - (int)codes, offset);

pTargetList = pTargetList->pNext;
}while(NULL != pTargetList);
}
}

Please note that the map pManagedtoNativeMap is polulated the Compile function like this in the giant for loop for each instruction-

pManagedtoNativeMap->SetAt((void *)pc, &codes[ip]);

Thats it. We are now ready to test the vodes we generate-

int main()
{
Context *pRE = new Context();;
pRE->stack = new Variable[STACK_SIZE];
pRE->stackTop = 0;
memset(pRE->stack, 0, sizeof(Variable)*STACK_SIZE);

pRE->pVMEnv = new VMEnvironment();
pRE->pVMEnv->pClassHeap = new ClassHeap();
pRE->pVMEnv->pObjectHeap = new ObjectHeap();

pRE->pVMEnv->ppHelperMethods = HelperMethods;

ClassHeap* pClsHeap = pRE->pVMEnv->pClassHeap;

JavaClass jc;
pClsHeap->LoadClass("Math", &jc);
JavaClass *pVirtualClass =&jc, *pClass1 = &jc;

int mindex=pClass1->GetMethodIndex(_T("Main"),_T("()I"),pVirtualClass);

method_info_ex *pMethod = &pVirtualClass->methods[mindex];

MethodLink *pMethodLink = new MethodLink();
pMethodLink->pClass = pVirtualClass;
pMethodLink->pMethod = pMethod;

((void (*)(MethodLink *pMethodLink, Context *pRE))pRE->pVMEnv->ppHelperMethods[CALL_METHOD_HELPER_INDEX])(pMethodLink, pRE);

LOG(_T("Ret = %d"), pRE->stack[0].intValue);

return 0;
}

Thats it. Our generated native code can handle branching!

Monday, July 26, 2010

Just In Time Compiler for Managed Platform- Part 3: Call a method

Today I'll try to extend the simple JIT compiler to the point where we can call a method from another method.

First lets create a simple java class:

public class Math
{
public static int add(int x, int y)
{
int r;
r= x+y;
return r;
}

public static int SimpleCall2()
{
return add(17, 29);
}
}

Here we call add method from SimpleCall2 method. Our JIT compiler will supply mechanism to handle this. When we compile using java compiler we get following class and method byte code:

public class Math extends java.lang.Object{
public Math();
Signature: ()V
Code:
0: aload_0
1: invokespecial #1;
4: return

public static int add(int, int);
Signature: (II)I
Code:
0: iload_0
1: iload_1
2: iadd
3: istore_2
4: iload_2
5: ireturn

public static int SimpleCall2();
Signature: ()I
Code:
0: bipush 17
2: bipush 29
4: invokestatic #2; //Method add:(II)I
7: ireturn
}


[Note: I do not describe the Java Virtual Machine basics here again- You may look at my article for basic understanding: Home Made Java Virtual Machine]

First we need to extend our Context structure to hold some more values. We should add members at the bottom- otherwise the code we generated so far will be invalid.

struct VMEnvironment
{
ObjectHeap *pObjectHeap;
ClassHeap *pClassHeap;
void **ppHelperMethods;
};

struct Context
{
Variable *stack;
int stackTop;
JavaClass *pClass;
Context *pCallerContext;
VMEnvironment *pVMEnv;
};


Also we want to keep track of native codes we generate. So we need another structure.

struct MethodLink
{
JavaClass *pClass;
method_info_ex *pMethod;
void *pNativeBlock;
};


Now we need a helper class to keep track of the methods we work with. We use a simple string to pointer map. The key string is generated from classname, method name and method desc. So key looks like "Math::add(II)I".

MethodLink* GetMethod(JavaClass *pClass, method_info_ex *pMethod, u4 pc)
{
static CMapStringToPtr methodsMap;

u2 mi=getu2(&pMethod->pCode_attr->code[pc+1]);
char *pConstPool = (char *)pClass->constant_pool[mi];

u2 classIndex = getu2(&pConstPool[1]);
u2 nameAndTypeIndex = getu2(&pConstPool[3]);

//get class at pool index
pConstPool = (char *)pClass->constant_pool[classIndex];

ASSERT(pConstPool[0] == CONSTANT_Class);

u2 ni=getu2(&pConstPool[1]);

CString strClassName;
pClass->GetStringFromConstPool(ni, strClassName);

ClassHeap *pClassHeap = new ClassHeap();

JavaClass *pClassCallee=pClassHeap->GetClass(strClassName);

pConstPool = (char *)pClassCallee->constant_pool[nameAndTypeIndex];
ASSERT(pConstPool[0] == CONSTANT_NameAndType);

u2 name_index = getu2(&pConstPool[1]);
u2 descriptor_index = getu2(&pConstPool[3]);

CString strMethodName, strMethodDesc;
pClassCallee->GetStringFromConstPool(name_index, strMethodName);
pClassCallee->GetStringFromConstPool(descriptor_index, strMethodDesc);

JavaClass *pVirtualClass=pClassCallee;
int nIndex=pClassCallee->GetMethodIndex(strMethodName, strMethodDesc, pVirtualClass);

method_info_ex *pCalleeMethod = &pClassCallee->methods[nIndex];

/*
if( ACC_SUPER & pCalleeMethod->access_flags)
{
pCalleeMethod = pClassCallee->GetSuperClass();
}
*/

CString sign(strClassName+"::"+strMethodName+strMethodDesc);
MethodLink *pLink=NULL;
if(!methodsMap.Lookup(sign, (void *&)pLink))
{
pLink = new MethodLink();
pLink->pClass = pClassCallee;
pLink->pMethod = pCalleeMethod;
pLink->pNativeBlock = NULL;
methodsMap.SetAt(sign, pLink);
}

return pLink;
}


To call a method we do not generate the statck preparation code using machine code for now to keep the things simple. We'll do that after we finish all type of code generation. So from native code we call back to a C++ method that again calls into generated codes-

void CallMethod(MethodLink *pMethodLink, Context *pRE)
{
LOG(_T("CallMethod\n"));

int codeBlockSize = pMethodLink->pMethod->pCode_attr->code_length*2; //todo guess better

int (*NativeBlock)(Context *)=(int (*)(Context *)) VirtualAlloc(NULL, codeBlockSize, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
u1* codes = (u1*) NativeBlock;

int ip =0;

JavaClass *pClass = pMethodLink->pClass;

if(NULL == pMethodLink->pNativeBlock)
{
Compile(pMethodLink->pClass, pMethodLink->pMethod, codes, ip);
pMethodLink->pNativeBlock = codes;
}

CString strName, strDesc;
pMethodLink->pClass->GetStringFromConstPool(pMethodLink->pMethod->name_index, strName);
pMethodLink->pClass->GetStringFromConstPool(pMethodLink->pMethod->descriptor_index, strDesc);

int params=GetMethodParametersStackCount(strDesc)+1;

//invokestatic: we are only dealing with static methods so far

int nDiscardStack =params;
if(pMethodLink->pMethod->access_flags & ACC_NATIVE)
{
}
else
{
nDiscardStack+= pMethodLink->pMethod->pCode_attr->max_locals;
}

pRE->stackTop+=(nDiscardStack-1);
LOG(_T("Invoking method %s%s, \n"), strName, strDesc);

(*NativeBlock)(pRE);

//if returns then get on stack
if(strDesc.Find(_T(")V")) < 0)
{
nDiscardStack--;
if(strDesc.Find(_T(")J")) < 0)
{
}
else
{
nDiscardStack--;
}
}

pRE->stackTop-=nDiscardStack;
LOG(_T("~CallMethod\n"));
}


OK, thats the callbacks we need for now. Now we generate the actual machine code that will use the MethodLink* value to call back to the CallMethod function. To do this we use a function pointer list and store it in the context environment-

#define CALL_METHOD_HELPER_INDEX 0

void* HelperMethods[] = {
CallMethod,
};

Let us now define the InvokeStatic helper method.

void InvokeStatic(JavaClass *pClass, method_info_ex *pMethod, u4 pc, u1* codes, int &ip)
{
MethodLink* pLink = GetMethod(pClass, pMethod, pc);
EmitCallMethod(codes, ip, pLink);
}

void EmitCallMethod(u1* code, int &ip, void* pLinkAddress)
{
//((void (*)(MethodLink *pMethodLink))pRE->pVMEnv->ppHelperMethods[CALL_METHOD_HELPER_INDEX])(pLinkAddress, pRE);
u1 c[] = {
0x8B, 0x45, 0x08, // mov eax,dword ptr [pRE]
0x50, // push eax
0x68, 0x00, 0x00, 0x00, 0x00, // push pLinkAddress
0x8B, 0x4D, 0x08, // mov ecx,dword ptr [pRE]
0x8B, 0x51, 0x10, // mov edx,dword ptr [ecx+10h]
0x8B, 0x42, 0x08, // mov eax,dword ptr [edx+8]
0x8B, 0x08, // mov ecx,dword ptr [eax]
0xFF, 0xD1, // call ecx
0x83, 0xC4, 0x08, // add esp,8
};

memcpy(c+5, &pLinkAddress, 4);
memcpy(&code[ip], c, sizeof(c));
ip+=sizeof(c);
}


To compile the methods we define a function that generates machine code for java byte codes. This function does not handle branch instructions right now. To handle branch we probably need two pass- since we would not know the exact address during first pass. So, here is a large while loop to do basic things:

u4 Compile(JavaClass *pClass, method_info_ex *pMethod, u1 *codes, int &ip)
{
if(pMethod->access_flags & ACC_NATIVE)
{
return 1;
}

Prolog(codes, ip);

u4 pc=0;
u1 *bc=pMethod->pCode_attr->code;

i4 error=0;

CString strMethod;
pClass->GetStringFromConstPool(pMethod->name_index, strMethod);

i4 index=0;
while(pMethod->pCode_attr->code_length>pc)
{
LOG(_T("Opcode = %s\n"),OpcodeDesc[(u1)bc[pc]]);

switch(bc[pc])
{
case nop:
pc++;
break;

case bipush:// 16 /*(0x10)*/
BiPush(codes, ip, (u1)bc[pc+1]);
pc+=2;
break;

case iload_0: //26 Load int from local variable 0
ILoad_0(codes, ip);
pc++;
break;

case iload_1: //27 Load int from local variable 1
ILoad_1(codes, ip);
pc++;
break;
case iload_2: //28 Load int from local variable 2
ILoad_2(codes, ip);
pc++;
break;
case iload_3: //29 Load int from local variable 3
ILoad_3(codes, ip);
pc++;
break;

case istore_2: // 61 /*(0x3d) */
IStore_2(codes, ip);
pc++;
break;

case iadd: //96
IAdd(codes, ip);
pc++;
break;

case invokestatic:// 184
InvokeStatic(pClass, pMethod, pc, codes, ip);
pc+=3;
break;
case ireturn: //172 (0xac)
IReturn(codes, ip);
pc++;
break;

default:
error=1;
break;
}

if(error) break;
}

Return0(codes, ip);
Epilog(codes, ip);

return error;
}


OK, we are now ready to test out code:

int main()
{
Context *pRE = new Context();;
pRE->stack = new Variable[STACK_SIZE];
pRE->stackTop = 0;
memset(pRE->stack, 0, sizeof(Variable)*STACK_SIZE);

pRE->pVMEnv = new VMEnvironment();
pRE->pVMEnv->pClassHeap = new ClassHeap();
pRE->pVMEnv->pObjectHeap = new ObjectHeap();

pRE->pVMEnv->ppHelperMethods = HelperMethods;

ClassHeap* pClsHeap = pRE->pVMEnv->pClassHeap;

JavaClass jc;
pClsHeap->LoadClass("Math", &jc);
JavaClass *pVirtualClass =&jc, *pClass1 = &jc;

int mindex=pClass1->GetMethodIndex(_T("SimpleCall2"),_T("()I"),pVirtualClass);

method_info_ex *pMethod = &pVirtualClass->methods[mindex];

MethodLink *pMethodLink = new MethodLink();
pMethodLink->pClass = pVirtualClass;
pMethodLink->pMethod = pMethod;

((void (*)(MethodLink *pMethodLink, Context *pRE))pRE->pVMEnv->ppHelperMethods[CALL_METHOD_HELPER_INDEX])(pMethodLink, pRE);
LOG(_T("Return Value = %d"), pRE->stack[0].intValue);

return 0;
}

Do you see value 46 on the stack as return value? Cool!