here is an explanation of my cpu recompiler
its very buggy right now but i learned a lot from it
TAKE YOUR TIME READING IT ITS PREYY COMPLICATED!!!
this will be very complicated to understand (I normally dont teach)
but the pros will understand
(remember this is only an explanation I wrote in five minutes so it could have lots of mistakes)
I based my cpu from gcemu's.
(guess it will be open source then)
/-----------------------------------------------
this is how gcemu interpereted (add)
/-----------------------------------------------
unsigned int * pra;
unsigned int * prb;
unsigned int * prd;
void ppc_int_add()
{
unsigned int rD, rA, rB;
rD = ((opcode)>>21)&0x1f;
rA = ((opcode)>>16)&0x1f;
rB = ((opcode)>>11)&0x1f;
prd = &gpr[rD];
pra = &gpr[rA];
prb = &gpr[rB];
_asm
{
mov edx, dword ptr pra
mov eax, [edx]
mov edx, dword ptr prb
add eax, [edx]
mov edx, dword ptr prd
mov [edx], eax
};
}
/------------------------------------------------
that was just for sme background info on how emulators interperate the opcode
the structure is (add rD,rA,rB)
/------------------------------------------------
LEVEL 1 RECOMPILER
well now lets say that we generated this in asm.
add 1,2,3
that would be (this is just an explanation its not accurate)
mov eax, gpr2 //move gpr2 into eax
mov ebx, gpr3 //move gpr3 into ebx
add eax, ebx //add ebx to eax
mov gpr1,eax //move eax into gpr1
/------------------------------------------------
/-------------------------------------------------
LEVEL 2 RECOMPILER
well now lets say that we generated this in asm.
add 1,2,3
add 3,2,1
that would be (this is just an explanation its not accurate)
mov eax, gpr2 //move gpr2 into eax
mov ebx, gpr3 //move gpr3 into ebx
add eax, ebx //add ebx to eax
mov gpr1,eax //move eax into gpr1
add eax,ebx //add gpr1 to gpr2
mov gpr3,eax //and move into gpr3
/-------------------------------------------------
whereas a level 1 recompiler would generate
mov eax, gpr2 //move gpr2 into eax
mov ebx, gpr3 //move gpr3 into ebx
add eax, ebx //add ebx to eax
mov gpr1,eax //move eax into gpr1
mov eax, gpr2 //move gpr2 into eax
mov ebx, gpr1 //move gpr1 into ebx
add eax, ebx //add ebx to eax
mov gpr3,eax //move eax into gpr3
/-------------------------------------------------
it saves a lot of cycles for other stuff like gfx and audio emulation.
HERE IS THE BIG ONE LEVEL 3 as I call it.
well now lets say that we generated this in asm.
add 1,2,3
add 3,2,1
add 1,2,3
add 3,2,1
add 1,2,3
add 3,2,1
add 1,2,3
add 3,2,1
add 1,2,3
add 3,2,1
now if you notice that you add rA and rB together. You add which one you want to the other. eg. rA + rB, or rB + rA
the level three recompiler has a bit of ai because it recognizes patterns. for example above gpr2 is used in all opcodes
lets say we generate
mov eax, gpr2 //move gpr2 into eax
mov ebx, gpr3 //move gpr3 into ebx
add eax, ebx //add ebx to eax
mov gpr1,eax //move eax into gpr1
gpr2 is in eax. (eax has the fastest add transfers.)
this is not good because we write over eax (gpr2) for every opcode emulated.
so level three scans the opcode block to see what register would be best place to put the registers.
in this case it would be (as you can see below there are other factors taken into acoount in the algorithem.)
mov eax, gpr3 //move gpr3 into eax
mov ebx, gpr2 //move gpr2 into ebx
add eax, ebx //add ebx to eax (now eax contains gpr1 not gpr3)
add eax, ebx //add ebx to eax (now eax contains gpr3 not gpr1)
add eax, ebx //add ebx to eax (now eax contains gpr1 not gpr3)
add eax, ebx //add ebx to eax (now eax contains gpr3 not gpr1)
add eax, ebx //add ebx to eax (now eax contains gpr1 not gpr3)
add eax, ebx //add ebx to eax (now eax contains gpr3 not gpr1)
add eax, ebx //add ebx to eax (now eax contains gpr1 not gpr3)
add eax, ebx //add ebx to eax (now eax contains gpr3 not gpr1)
add eax, ebx //add ebx to eax (now eax contains gpr1 not gpr3)
mov gpr1,eax //move eax into gpr1
add eax, ebx //add ebx to eax (now eax contains gpr3 not gpr1)
mov gpr3,eax //move eax into gpr3
/--------------------------------------------------------------------------------------------------
this takes 14 opcodes most of which are eax adds.
compare that with the 40 opcode it would take with level 1 and you see speed, lots of speed
EDIT: sorry this was suposto be a threat for shizzy (ill move it to a new thread if you want)