%TOC% The tests performed here are downloaded from the file [[http://www.itee.uq.edu.au/~cristina/dcc/distribution/test.zip][test.zip]], part of the dcc distribution. It should be remembered that these tests were therefore chosen to suit the dcc decompiler. ---+++ Strlen The original C source for this program is as follows:
main()
{ char *s = "test";
	 strlen(s);
}
strlen(char *s)
{ int n = 0;
	 while (*s++)
		  n++;
	 return (n);
}
This disassembled as follows:

					 proc_1  PROC  NEAR
000 000305 55						PUSH			  bp
001 000306 8BEC					 MOV				bp, sp
002 000308 56						PUSH			  si
003 000309 33F6					 XOR				si, si

005 00030E 8B5E04			L1:  MOV				bx, [bp+4]
006 000311 FF4604				  INC	word ptr [bp+4]
007 000314 803F00				  CMP	byte ptr [bx], 0
008 000317 75F4					 JNE				L2
009 000319 8BC6					 MOV				ax, si
011 00031D 5E						POP				si
012 00031E 5D						POP				bp
013 00031F C3						RET

014 00030D 46				 L2:  INC				si
015									 JMP				L1					  ;Synthetic inst
					 proc_1  ENDP

					 main  PROC  NEAR
000 0002FA 56						PUSH			  si
001 0002FB BE9401				  MOV				si, 194h
002 0002FE 56						PUSH			  si
003 0002FF E80300				  CALL  near ptr proc_1
004 000302 59						POP				cx
005 000303 5E						POP				si
006 000304 C3						RET

					 main  ENDP
The decompiled output:

/*
 * Input file	: test\strlen.exe
 * File type	 : EXE
 */

#include "dcc.h"


void proc_1 (int arg0)
/* Takes 2 bytes of parameters.
 * High-level language prologue code.
 * C calling convention.
 */
{
int loc1;

	 loc1 = 0;
	 arg0 = (arg0 + 1);

	 while ((*arg0 != 0)) {
		  loc1 = (loc1 + 1);
		  arg0 = (arg0 + 1);
	 }	/* end of while */
}

void main ()
/* Takes no parameters.
 */
{
int loc1;

	 loc1 = 404;
	 proc_1 (loc1);
}
Here, arg0 is typed as an int, when in fact it is a char*. The program would actually compile, if one cast was added. This is the price of not having type inference; the program does not compile (the semantics are ambiguous). Unfortunately, there is an increment of =arg0= outside the loop, which makes the program logic incorrect. dcc would have recovered the return type of =strlen= (=proc1=), if the return value was used. ---+++ Fibo The original C source code is: int main() { int i, numtimes, number; unsigned value, fib(); printf("Input number of iterations: "); scanf ("%d", &numtimes); for (i = 1; i <= numtimes; i++) { printf ("Input number: "); scanf ("%d", &number); value = fib(number); printf("fibonacci(%d) = %u\n", number, value); } exit(0); } unsigned fib(x) /* compute fibonacci number recursively */ int x; { if (x > 2) return (fib(x - 1) + fib(x - 2)); else return (1); } The disassembly for the fib function is proc_1 PROC NEAR 000 00035B 55 PUSH bp 001 00035C 8BEC MOV bp, sp 002 00035E 56 PUSH si 003 00035F 8B7604 MOV si, [bp+4] 004 000362 83FE02 CMP si, 2 005 000365 7E1C JLE L1 006 000367 8BC6 MOV ax, si 007 000369 48 DEC ax 008 00036A 50 PUSH ax 009 00036B E8EDFF CALL near ptr proc_1 010 00036E 59 POP cx 011 00036F 50 PUSH ax 012 000370 8BC6 MOV ax, si 013 000372 05FEFF ADD ax, 0FFFEh 014 000375 50 PUSH ax 015 000376 E8E2FF CALL near ptr proc_1 016 000379 59 POP cx 017 00037A 8BD0 MOV dx, ax 018 00037C 58 POP ax 019 00037D 03C2 ADD ax, dx 021 000388 5E L2: POP si 022 000389 5D POP bp 023 00038A C3 RET 024 000383 B80100 L1: MOV ax, 1 025 000386 EB00 JMP L2 proc_1 ENDP Note that a branch before L2 has been eliminated, and the addresses are a little out of order. The decompiled output is as follows: /* * Input file : fibos.exe * File type : COM */ #include "dcc.h" int proc_1 (int arg0) /* Takes 2 bytes of parameters. * High-level language prologue code. * C calling convention. */ { int loc1; int loc2; /* ax */ loc1 = arg0; if (loc1 > 2) { loc2 = (proc_1 ((loc1 - 1)) + proc_1 ((loc1 + 0xFFFE))); } else { loc2 = 1; } return (loc2); } void main () /* Takes no parameters. * High-level language prologue code. */ { int loc1; int loc2; int loc3; int loc4; printf ("Input number of iterations: "); scanf ("%d", &loc1); loc3 = 1; while ((loc3 <= loc1)) { printf ("Input number: "); scanf ("%d", &loc2); loc4 = proc_1 (loc2); printf ("fibonacci(%d) = %u\n", loc2, loc4); loc3 = (loc3 + 1); } exit (0); } dcc has recovered the parameters (both formal and actual), and the return type. Library function names have been recovered. In fact, this output compiles, and runs correctly (or it would on a system with 16 bit integers; replacing the 0xFFFE with -2 corrects the problem). ---+++ Matrix Multiplication The original source is: #define n 5 #define m 4 static void multMatrix (int a[n][m], int b[m][n], int c[n][n]) { int i,j,k; for (i=0; i The disassembly for the matrixMult procedure only is rather large: proc_1 PROC NEAR 000 0002FA 55 PUSH bp 001 0002FB 8BEC MOV bp, sp 002 0002FD 83EC02 SUB sp, 2 003 000300 56 PUSH si 004 000301 57 PUSH di 005 000302 33F6 XOR si, si 007 000378 83FE05 L1: CMP si, 5 008 00037B 7C89 JL L2 009 00037D 5F POP di 010 00037E 5E POP si 011 00037F 8BE5 MOV sp, bp 012 000381 5D POP bp 013 000382 C3 RET 014 000306 33FF L2: XOR di, di 016 000372 83FF04 L3: CMP di, 4 017 000375 7C93 JL L4 018 000377 46 INC si 019 JMP L1 ;Synthetic inst 020 00030A C746FE0000 L4: MOV word ptr [bp-2], 0 022 00036B 837EFE04 L5: CMP word ptr [bp-2], 4 023 00036F 7CA0 JL L6 024 000371 47 INC di 025 JMP L3 ;Synthetic inst 026 000311 8BDE L6: MOV bx, si 027 000313 D1E3 SHL bx, 1 028 000315 D1E3 SHL bx, 1 029 000317 D1E3 SHL bx, 1 030 000319 035E04 ADD bx, [bp+4] 031 00031C 8B46FE MOV ax, [bp-2] 032 00031F D1E0 SHL ax, 1 033 000321 03D8 ADD bx, ax 034 000323 8B07 MOV ax, [bx] 035 000325 50 PUSH ax 036 000326 8B46FE MOV ax, [bp-2] 037 000329 BA0A00 MOV dx, 0Ah 038 00032C F7E2 MUL dx 039 00032E 8BD8 MOV bx, ax 040 000330 035E06 ADD bx, [bp+6] 041 000333 8BC7 MOV ax, di 042 000335 D1E0 SHL ax, 1 043 000337 03D8 ADD bx, ax 044 000339 58 POP ax 045 00033A F727 MUL word ptr [bx] 046 00033C 50 PUSH ax 047 00033D 8BC6 MOV ax, si 048 00033F BA0A00 MOV dx, 0Ah 049 000342 F7E2 MUL dx 050 000344 8BD8 MOV bx, ax 051 000346 035E08 ADD bx, [bp+8] 052 000349 8BC7 MOV ax, di 053 00034B D1E0 SHL ax, 1 054 00034D 03D8 ADD bx, ax 055 00034F 58 POP ax 056 000350 0307 ADD ax, [bx] 057 000352 50 PUSH ax 058 000353 8BC6 MOV ax, si 059 000355 BA0A00 MOV dx, 0Ah 060 000358 F7E2 MUL dx 061 00035A 8BD8 MOV bx, ax 062 00035C 035E08 ADD bx, [bp+8] 063 00035F 8BC7 MOV ax, di 064 000361 D1E0 SHL ax, 1 065 000363 03D8 ADD bx, ax 066 000365 58 POP ax 067 000366 8907 MOV [bx], ax 068 000368 FF46FE INC word ptr [bp-2] 069 JMP L5 ;Synthetic inst The decompiled output for the multMatrix procedure only: /* * Input file : test\matrixmu.exe * File type : EXE */ #include "dcc.h" void proc_1 (int arg0, int arg1, int arg2) /* Takes 6 bytes of parameters. * High-level language prologue code. * C calling convention. */ { int loc1; int loc2; int loc3; loc2 = 0; while ((loc2 < 5)) { loc3 = 0; while ((loc3 < 4)) { loc1 = 0; while ((loc1 < 4)) { *((((loc2 * 10) + arg2) + (loc3 << 1))) = ((*((((loc2 << 3) + arg0) + (loc1 << 1))) * *((((loc1 * 10) + arg1) + (loc3 << 1)))) + *((((loc2 * 10) + arg2) + (loc3 << 1)))); loc1 = (loc1 + 1); } loc3 = (loc3 + 1); } loc2 = (loc2 + 1); } } As Cristina states in her thesis, this example shows that forward substitution is able to find array expressions. The conversion to array format was not done, although an algorithm to achieve this was suggested. Again, the above will not compile, because the size of the items pointed to by the indirection operators (stars) is not given. It is desirable that expressions such as the above are converted to array format, because the assumption in the above code (that sizeof(int) == 2) is unlikely to be true for modern compilers. The for loops are here converted to functionally equivalent while loops. The number of formal parameters is correctly found. ---+++ Dhampstone benchmark The only source file contained in the above test.zip file was =dhamp.c= , another benchmark program. The main function is as follows: #define NUMTEST 6 main() { char buf1[TINY], buf2[TINY]; int i = 0; unsigned fib (); long square, sq (); double dmath, sroot (), dply (); printf("Start...%c\n\n",BELL); while (i < NUMTEST) { switch (i) { case (0): /* Character test */ results.cresult = stest (buf1,buf2); printf ("\ncresult = %d\n",results.cresult); break; case (1): /* Integer test */ results.iresult = intest (); printf ("\niresult = %d\n",results.iresult); break; case (2): /* Unsigned test */ results.uresult = fib (FIB); printf ("\nuresult = %u\n",results.uresult); break; case (3): /* Long test */ ... default: break; } ++i; } /* End while i */ printf ("\n\n...End%c",BELL); } The decompiled output for main is as follows: void main () /* Takes no parameters. * High-level language prologue code. * Contains instructions not normally used by compilers. * Contains coprocessor instructions. */ { int loc1; int loc2; ... int loc11; int loc12; /* ax */ int loc13; /* bx */ loc11 = 0; printf ("Start...%c\n\n", 7); while ((loc11 < 6)) { loc12 = loc11; if (loc12 <= 5) { loc13 = (loc12 << 1); var06278 = proc_1 (&loc2, &loc1, , ); printf ("\ncresult = %d\n", var06278); } loc11 = (loc11 + 1); } printf ("\n\n...End%c", 7); } Only the first branch of the switch statement is decompiled. This is a result of ignoring the indirect branch, falling in to the code for the first case, and not finding any control paths to the other cases. In her thesis, Cristina suggests several idioms for detecting switch statements, but none have been implemented in dcc. The variable loc12 could have been eliminated, merely replaced by loc11. CategoryDecompilation