Exe-2-C DOS/286 Decompiler Tests

This is the beta version of an experimental decompiler. The tests are from test.zip in the dcc distribution.

Strlen

The original C source for this program is as follows:

main()
{ char *s = "test";
    strlen(s);
}
strlen(char *s)
{ int n = 0;
    while (*s++)
        n++;
    return (n);
}
This disassembled as follows (amongst much other code):
proc_10         proc    near
                push    SI                      
                mov     SI,194h                 
                push    SI                      
                call    near ptr proc_11        
                pop     CX                      
                pop     SI                      
                retn                            
proc_10         endp

proc_11         proc    near
                push    BP                      
                mov     BP,SP                   
                push    SI                      
                xor     SI,SI                   
                jmp     short loc_12            
loc_11:         ; N-Ref=1
                inc     SI                      
loc_12:         ; N-Ref=1
                mov     BX,Word Ptr [BP+4]      
                inc     Word Ptr [BP+4]         
                cmp     Byte Ptr [BX],0         
                jne     loc_11                  ; Jump if not equal ( != )
                mov     AX,SI                   
                jmp     short loc_13            
loc_13:         ; N-Ref=1
                pop     SI                      
                pop     BP                      
                retn                            
proc_11         endp

The output was as follows:

/****************************************************************************/
                near proc_10()
/****************************************************************************/
{
register char *reg1 ;

    push(0x194);
    proc_11();
    cx = pop();
}
/****************************************************************************/
                near proc_11(int   arg0)
/****************************************************************************/
{
register char *reg1 ;

    reg1 = 0;
    while(bx = arg0, ++arg0, *bx != 0)   
        ++reg1;
    ax = reg1;
}
It analysed that proc_11 takes an int argument (actually a char*), but it did not pass the actual argument (0x194, the pointer to the string). It has guessed incorrectly that reg1 in proc11 is a char*. It may have been able to do better if main made use of the return value.

There is nothing to indicate the size of *bx (in fact, 8 bits), so this would never compile. The while loop does look good, though.

Fibo

The original C source code is:
int main()
{ int i, numtimes, number;
  unsigned value, fib();

    printf("Input number of iterations: ");
    scanf ("%d", &numtimes);
    for (i = 1; i <= numtimes; i++)
    {
        printf ("Input number: ");
        scanf ("%d", &number);
        value = fib(number);
        printf("fibonacci(%d) = %u\n", number, value);
    }
    exit(0);
}

unsigned fib(x)                 /* compute fibonacci number recursively */
int x;
{
    if (x > 2)
        return (fib(x - 1) + fib(x - 2));
    else
        return (1);
}

The disassembly for the fib function is

proc_11         proc    near
                push    BP                      
                mov     BP,SP                   
                push    SI                      
                mov     SI,Word Ptr [BP+4]      
                cmp     SI,+2                   
                jle     loc_13                  ; Jump if not greater ( <= )
                mov     AX,SI                   
                dec     AX                      
                push    AX                      
                call    near ptr proc_11        
                pop     CX                      
                push    AX                      
                mov     AX,SI                   
                add     AX,0FFFEh               
                push    AX                      
                call    near ptr proc_11        
                pop     CX                      
                mov     DX,AX                   
                pop     AX                      
                add     AX,DX                   
                jmp     short loc_14            

                dw      5EBh
loc_13:         ; N-Ref=1
                mov     AX,1                    
                jmp     short loc_14            
loc_14:         ; N-Ref=2
                pop     SI                      
                pop     BP                      
                retn                            
proc_11         endp
It correctly did not attempt to disassemble the unreachable code before loc_13.

The decompiled output is as follows. Again, main is proc_10; proc_11 is fib:

/****************************************************************************/
                near proc_10()
/****************************************************************************/
{
register char *reg1 ;
register char *reg2 ;
char  *loc0;
char  *loc1;

        push(0x194);
        proc_41();
        cx = pop();
        ax = &loc0;
        push(ax);
        push(0x1B1)
        proc_55();
        cx = pop();
        cx = pop();
        DELETE: reg1 = 1;
        si = 1;  /*PCH : RM_Table_init*/
        while(reg1 <= loc0)   {
                push(0x1B4);
                proc_41();
                cx = pop();
                ax = &loc1;
                push(ax);
                push(0x1C3);
                proc_55();
                cx = pop();
                cx = pop();
                push(loc1);
                proc_11();
                cx = pop();
                push(ax);
                push(loc1);
                push(0x1C6);
                proc_41();
                sp = sp + 6;
                ++reg1;
        }
        ax = 0;
        push(ax);
        proc_13();
        cx = pop();
}

/****************************************************************************/
                near proc_11(int   arg0)
/****************************************************************************/
{
register char *reg1 ;

        reg1 = arg0;
        if(reg1 > 2)   {
                ax = reg1;
                --ax;
                push(ax);
                proc_11();
                cx = pop();
                push(ax);
                ax = reg1;
                ax = ax +  - 2;
                push(ax);
                proc_11();
                cx = pop();
                dx = ax;
                ax = pop();
                ax = ax + dx;
        }
        else   {
                DELETE: ax = 1;
                ax = 1;  /*PCH : RM_Table_init*/
                return;
        }
}
Here the "instruction by instruction" nature of the decompilation is evident. Forward substitution (necessitating data flow analysis to ensure safety) would merge the individual instruction results into more readable and complex expressions. The condition codes (status flags) have been successfully removed.

No attempt has been made to determine the return value of function fib (here proc_11). No attempt is made to recognise the library functions printf and scanf. In proc_10, the decompiler seems to forget whether register SI is represented by variable si or variable reg1.

CategoryDecompilation

Revision: r1.3 - 13 Feb 2003 - 21:49 - MikeVanEmmerik
Transform > DeCompilationGeneralApproach? > DecompilerE2cTest
Copyright © 1999-2020 by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding TWiki? Send feedback