/* x86dis : command line disassembler using the libdisasm library */
/*
       x86dis [-a offset|--addr=offset]
              [-r offset len|--range=offset len]
              [-e offset|--entry=offset]
              [-s name|--syntax=name]
              [-f file|--file=file]
              [-o file|--out=file]
              [-l file|--log=file]
              [-h|-?|--help]
              [-v|--version]
*/

#ifdef HAVE_CONFIG_H
# include "config.h"
#endif


#include <errno.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

#include <libdis.h>


#define LIBDISASM_URL 		"http://bastard.sourceforge.net"
#define STDIN_PAGE_SIZE 	524288	/* 512 K */


enum dis_req_type { req_addr = 1, req_range, req_entry };

static struct DIS_REQ {
	unsigned long offset;
	unsigned int length;
	enum dis_req_type type;
	struct DIS_REQ *next;
} *dis_requests = NULL;

static struct DIS_INFO {
	/* file streams */
	FILE *in, *out, *err;
	/* size of input file */
	unsigned long size;
	/* flag for --entry option */
	int entry;
	/* pagesize for buffering STDIN */
	unsigned long pagesize;
	/* output syntax */
	enum x86_asm_format syntax;
} info = { NULL, NULL, NULL, 0, 0, STDIN_PAGE_SIZE, raw_syntax };


/* ------------------------------------------------------- REPORTER */
/* this is x86dis.c code that needs to be ripped off and used in x86dis.c */
void x86dis_reporter( enum x86_report_codes code, void *arg, void *junk ) {
	char * str;

	/* here we could examine the error and do something useful;
	 * instead we just print that an error occurred */
	switch ( code ) {
		case report_disasm_bounds:
			str = "Attempt to disassemble RVA beyond end of buffer";
			break;
		case report_insn_bounds:
			str = "Instruction at RVA extends beyond buffer";
			break;
		case report_invalid_insn:
			str = "Invalid opcode at RVA";
			break;
		case report_unknown:
		default:	/* make GCC shut up */
			str = "Unknown Error";
			break;
	}

	fprintf(info.err, "X86DIS ERROR \'%s:\' 0x%08X\n", str, (int)arg);
}

/* ---------------------------------------------------------- PRINTER */
void x86dis_manual_print( x86_insn_t *insn, void *arg ) {
	char line[4096];
	int i;


	if ( x86_format_insn(insn, line, 4096, info.syntax) <= 0 ) {
		return;
	}

	if ( info.syntax == att_syntax || info.syntax == intel_syntax ) {
		/* print an address and hex bytes, since libdisasm doesn't */
		printf("%08lX", insn->addr );
		for ( i = 0; i < 10; i++ ) {
			if ( i < insn->size ) {
				printf(" %02X", insn->bytes[i]);
			} else {
				printf("   ");
			}
		}
		printf("\t");
	}

	printf("%s\n", line);
}


/* -------------------------------------------------------- RESOLVER */
/* RESOLVER List support */
struct RVALIST {
	unsigned long rva;
	struct RVALIST *next;
} rva_list_head = {0};

static int rva_list_add( unsigned long rva ) {
	struct RVALIST *rl, *rl_new;

	for ( rl = &rva_list_head; rl; rl = rl->next ) {
		/* first rva is always 0 -- the list head */
		if ( rva > rl->rva ) {
			if ( ! rl->next || rva < rl->next->rva ) {
				/* we use exit() to free this, btw */
				rl_new = calloc(sizeof(struct RVALIST), 1);
				rl_new->rva = rva;
				rl_new->next = rl->next;
				rl->next = rl_new;
				return(1);
			}
		} else if ( rva == rl->rva ) {
			return(0);	/* already seen this rva */
		}
	}
	return(0);
}

/* In the resolver, we keep a list of RVAs we have seen and weed these out.
 * Needless to say, this is a simple example with poor performance. */

long x86dis_resolver( x86_op_t *op, x86_insn_t *insn, void *arg ) {
	long retval = -1;

	if (! rva_list_add(insn->addr) ) {
		/* we have seen this one already; return -1 */
		return(-1);
	}

	/* this part is a flat ripoff of internal_resolver in libdis.c */
	/* we don't do any register or stack resolving */
	if ( op->type == op_absolute || op->type == op_offset ) {
		retval = op->data.sdword; /* no need to cast the void* */
	} else if (op->type == op_relative_near ){
		retval = insn->addr + insn->size + op->data.sbyte;
	} else if (op->type == op_relative_far ){
		if ( op->datatype == op_word ) {
			retval = insn->addr + insn->size + op->data.sword;
		} else if ( op->datatype == op_dword ) {
			retval = insn->addr + insn->size + op->data.sdword;
		}
	}

	return( retval );
}


/* -------------------------------------------------------- DISASM REQUESTS */
static int insert_request_after(struct DIS_REQ *req, struct DIS_REQ *curr ) {
	if (! curr ) {
		req->next = dis_requests;
		dis_requests = req;
	} else {
		req->next = curr->next;
		curr->next = req;
	}
	return(1);
}

static int add_request( enum dis_req_type type, unsigned long offset, 
		        unsigned int len ){
	struct DIS_REQ *request, *curr, *prev = NULL;

	if ( type == req_entry ) {
		info.entry = 1;
	}

	request = calloc( sizeof(struct DIS_REQ), 1 );
	if (! request ) {
		return(0);
	}

	request->type = type;
	request->offset = offset;
	request->length = len;

	if (! dis_requests ) {
		dis_requests = request;
		return(1);
	}

	curr = dis_requests;
	for ( curr = dis_requests; curr; prev = curr, curr = curr->next ) {
		/* put request in before current */
		if ( curr->offset > request->offset ) {
			insert_request_after( request, prev );
			break;
		}

		if ( curr->offset == offset ) {
			/* follow precedence of request types */
			if ( curr->type > request->type ) {
				insert_request_after( request, prev );
			} else {
				insert_request_after( request, curr );
			}
			break;
		}

		if ( ! curr->next ) {
			insert_request_after( request, curr );
			break;
		}

		/* else wait until one of the above conditions applies */
	}

	return(1);
}

static int do_request( enum dis_req_type type, unsigned char *buf, unsigned int 
		       buf_len, unsigned long buf_rva, unsigned long offset, 
		       unsigned int len ) {
	x86_insn_t insn;

	/* 'len' is optional, i.e. for a range param */
	switch (type) {
		case req_addr:
#ifdef DEBUG
			fprintf(info.err, "X86DIS: Disassemble address %lX\n", 
					offset );
#endif
			if ( offset > buf_len ) {
				fprintf(info.err, 
					"X86DIS: address %lX out of bounds\n", 
					offset );
				break;
			}
			if ( x86_disasm(buf, buf_len, buf_rva, offset, &insn) ){
				x86dis_manual_print( &insn, NULL );
			}
			break;

		case req_range:
#ifdef DEBUG
			fprintf(info.err, 
					"X86DIS: Disassemble %d bytes at %lX\n",
					len, offset );
#endif
			if ( len > buf_len ) {
				len = buf_len;
			}
			x86_disasm_range( buf, buf_rva, offset, len, 
				  x86dis_manual_print, NULL );
			break;
		case req_entry:
#ifdef DEBUG
			fprintf(info.err, 
				"X86DIS: Disassembly forward from %lX\n",
					offset );
#endif
			x86_disasm_forward( buf, buf_len, buf_rva, offset, 
				    x86dis_manual_print, NULL,
				    x86dis_resolver, NULL );
			break;
	}

	return( 1 );
}


/* -------------------------------------------------------- DISASM ACTIONS */
static int act_on_mmap( struct DIS_REQ *list, unsigned char *image, int len, 
		int base ){
	unsigned char *buf;
	struct DIS_REQ *req;

	/* cycle through requests, performing each on image */
	for ( req = list; req; req = req->next ) {
		buf = image;
		do_request( req->type, buf, len, base, req->offset, 
			    req->length ); 
	}
	return(1);
}

static int act_on_mmap_file( void ){
	unsigned char *image;
	struct stat sb;
	int fd = fileno(info.in);
	
	fstat(fd, &sb);

	/* create image from file */
	image = (unsigned char *) mmap( NULL, sb.st_size, PROT_READ, 
				        MAP_SHARED, fd, 0 );
	if ( (int) image == -1 ) {
		fprintf( info.err, "Unable to map anonymous memory: %s\n",
				strerror(errno) );
		return(0);
	}

	return( act_on_mmap(dis_requests, image, sb.st_size, 0) );
}

static int mmap_stream( FILE *f, unsigned char **image ){
	int pos = 0, size = info.pagesize, cont = 1;

	/* create image from stream */
	*image = malloc( size );
	while ( cont ) {
		pos += fread(&((*image)[pos]), 1, info.pagesize, info.in);
		cont = !feof(info.in);
		if ( cont ) {
			size += info.pagesize;
			*image = realloc( *image, size );
		}
	}
	return( pos );
}

static int act_on_mmap_stream( void ){
	unsigned char *image;
	int len = mmap_stream(info.in, &image);
	return( act_on_mmap(dis_requests, image, len, 0) );
}

static int act_on_stream( void ){
	struct DIS_REQ *req;
	int size, pos = 0;
	unsigned char *bytes, buf[128];

	if ( info.entry ) {
		/* we need to have the whole stream in memory to do a -e */
		return( act_on_mmap_stream() );
	}

	for ( req = dis_requests; req; req = req->next ) {
		/* advance the stream until we reach request offset */
		while ( req->offset > pos ) {
			size = req->offset - pos;
			size = size > 128 ? 128 : size;
			/* advance the stream to request offset */
			fread( buf, size, 1, stdin );
			pos += size;
			if ( feof(stdin) ) {
				/* some kind of feedback here */
				break;
			}
		}

		if ( req->type == req_range && ! req->length ) {
			/* read to end of file ... via mmap ;) */
			size = mmap_stream( info.in, &bytes );
			act_on_mmap( req, bytes, size, pos );
			break;
		} else {
			if ( req->type == req_addr ) {
				size = x86_max_insn_size();
			} else {
				size = req->length;
			}
			if ( req->next && req->next->offset <= pos + size ) {
				/* crap ... overlapping requests
				 * mmap the thing and continue on from here */
				size = mmap_stream( info.in, &bytes );
				act_on_mmap( req, bytes, size, pos );
				break;
			}

			/* this calloc/free will need to be optimized
			 * if users do a lot of ops on STDIN ... hopefully
			 * they won't ;) */
			bytes = calloc( size, 1 );
			fread( bytes, size, 1, stdin );
			do_request( req->type, bytes, size, pos, req->offset, 
				    req->length ); 
		}
	}
	
	return(1);
}


/* -------------------------------------------------------- DISASM OPTIONS */
static enum x86_asm_format get_syntax_from_string( char *name ) {
	char *s, *d, lname[16] = {0};
	int i;
	
	for ( s = name, d = lname, i = 0; *s; s++, d++, i++ ) {
		
		if ( *s < 0x61 ) {
			*d = *s + 0x20;
		} else {
			*d = *s;
		}
		if ( *d < 0x61 || *d > 0x7A || i >= 15 ) { 
			/* bad input */
			fprintf( info.err, "Invalid syntax name: %s\n", name );
			return(0);
		}
	}

	if (! strcmp(lname, "att") ) {
		return( att_syntax );
	} else if (! strcmp(lname, "intel") ) {
		return( intel_syntax );
	} else if (! strcmp(lname, "raw") ) {
		return( raw_syntax );
	} else if (! strcmp(lname, "native") ) {
		return( native_syntax );
	} else if (! strcmp(lname, "xml") ) {
		return( xml_syntax );
	} else {
		fprintf( info.err, "Invalid syntax name: %s\n", lname );
	}

	return(unknown_syntax);
}

static int do_opt_s( char *name ) {
	enum x86_asm_format fmt;

	fmt = get_syntax_from_string( name );

	if ( fmt != unknown_syntax ) {
		info.syntax = fmt;
	} else {
		return(0);
	}

	return(1);
}

static int do_opt_d( char *name ) {
	char buf[2048];
	enum x86_asm_format fmt;

	fmt = get_syntax_from_string( name );

	if ( fmt != unknown_syntax ) {
		if ( fmt == intel_syntax || fmt == att_syntax ) {
			/* we supply these, they are not in libdisasm syntax */
			fprintf(info.out,  "ADDRESS BYTES\t" );
		}
		x86_format_header( buf, 2046, fmt);
		fprintf(info.out, "%s\n", buf);
	} else {
		return(0);
	}

	return(1);
}

static int do_opt_f( char *name ) {
	struct stat sb;

	if (info.in != stdin ){
		fclose(info.in);
	}

	if ( stat(name, &sb) ) {
		info.in = stdin;
		fprintf( info.err, "Unable to open stat %s: %s\n",
				name, strerror(errno) );
		return(0);
	}

	info.in = fopen(name, "r");

	if ((int)info.in == -1 ) {
		info.in = stdin;
		fprintf( info.err, "Unable to open file %s: %s\n",
				name, strerror(errno) );
		return(0);
	}
	return(1);
}

static int do_opt_o( char *name ) {
	if (info.out != stdout ){
		fclose(info.out);
	}

	info.out = fopen(name, "w+");

	if ((int)info.out == -1 ) {
		info.out = stdout;
		fprintf( info.err, "Unable to open file %s: %s\n",
				name, strerror(errno) );
		return(0);
	}
	return(1);
}

static int do_opt_l( char *name ) {
	if (info.err != stderr ){
		fclose(info.err);
	}

	info.err = fopen(name, "w+");

	if ((int)info.err == -1 ) {
		info.err = stderr;
		fprintf( info.err, "Unable to open file %s: %s\n",
				name, strerror(errno) );
		return(0);
	}
	return(1);
}

static void do_version(char *name) {
	printf("%s %f Distributed with libdisasm from %s\n", 
			name, PACKAGE_VERSION, LIBDISASM_URL);
}
static void do_help(char *name) {
	printf( "Usage: %s -aresfoldpLNhv\n"
		"Disassembles arbitrary bytes in a file or stream to x86 "
		"instructions.\n"
		"Options:\n"
		"\t-a offset     : disassemble instruction at offset\n"
		"\t-r offset len : disassemble range of bytes\n"
		"\t-e offset     : disassemble forward from offset\n"
		"\t-s name       : set output syntax"
				   "(intel, att, native, xml, raw)\n"
		"\t-f file       : take input from file\n"
		"\t-o file       : write output to file\n"
		"\t-l file       : write errors to file\n"
		"\t-d name       : display syntax description as header\n"
		"\t-p num        : memory map page size (default 512K)\n"
		"\t-L            : legacy (16-bit) mode\n"
		"\t-N            : no NULLs (ignore sequences of > 4 NULLs)\n"
		"\t-v            : display version information\n"
		"\t-h            : display this help screen\n"
		"\n"
		"The 'offset' and 'len' params must entered in stroul(3)format;"
		" any number or\n"
		"combination of -a, -r, and -e options may be used.\n"
		" Examples:  \n"
		"      x86dis -e 0 -s intel < bootsect.img\n"
		"      x86dis -d -s raw -f a.out -e `readelf -h a.out | "
		           "grep Entry | \n"
		"           awk '{ printf( \"0x%%x\", strtonum($4) - "
		           "0x8048000 ) }`\n"
		"      echo '55 89 e5 83 EC 08' | "
		           "perl -ane 'foreach(@F){print pack(\"C\",hex);}'|\n"
		"           x86dis -e 0 -s att\n",
	      name );
}

static int do_longarg( int argc, char **argv, int num ) {
	char *p, *arg1 = NULL, *arg2 = NULL, *opt = &argv[num][2];
	int n = num;
	unsigned long  off;
	unsigned int len;
	
	/* these take no parameters -- easy :) */
	if (! strcmp("help", opt) ) {
		do_help( argv[0] );
		return(0);
	} else if (! strcmp("version", opt) ) {
		do_version( argv[0] );
		return(0);
	}

	for ( p = opt; *p; p++ ) {
		if ( *p == '=' ) {
			arg1 = p;
		}
	}

	if ( ! arg1 ) { 
		n++;
		/* no '=' in argv[num] ... check argv[num++] */
		if ( n < argc ) {
			for ( p = argv[n]; *p; p++ ) {
				if ( *p == '=' ) {
					arg1 = p;
				}
			}
		}
	}

	if (! arg1 ) {
		return(-1);
	}

	/* arg1 and p now point to the '=' */
	for ( ; *p; p-- ) {
		/* next arg is part of this opt */
		if ( *p >= '0' && *p <= 'z' ) {
			arg1 = p;
		}
	}

	if (! *p ) {
		/* we didn't find the next argument */
		n++;
		arg1 = argv[n];
	}


	if (! strcmp("addr", opt) ) {
		/* --addr=offset */
		off = strtoul( arg1, NULL, 0 );
		add_request( req_addr, off, 0 );
	} else if (! strcmp("pagesize", opt) ) {
		/* --pagesize=num */
		off = strtoul( arg1, NULL, 0 );
		if (off) {
			info.pagesize = off;
		}
	} else if (! strcmp("range", opt) ) {
		/* --range=offset len */
		n++;
		if ( n < argc ) {
			arg2 = argv[n];
		} else {
			fprintf( info.err, "Missing range length param\n" );
			return(-1);
		}
		off = strtoul( arg1, NULL, 0 );
		len = (unsigned int) strtoul( arg2, NULL, 0 );
		add_request( req_range, off, len );
	} else if (! strcmp("entry", opt) ) {
		/* --entry=offset */
		off = strtoul( arg1, NULL, 0 );
		add_request( req_entry, off, 0 );
	} else if (! strcmp("syntax", opt) ) {
		/* --syntax=name */
		do_opt_s( arg1 );
	} else if (! strcmp("desc", opt) ) {
		/* --desc=name */
		do_opt_d( arg1 );
	} else if (! strcmp("file", opt) ) {
		/* --file=file */
		do_opt_f( arg1 );
	} else if (! strcmp("out", opt) ) {
		/* --out=file */
		do_opt_o( arg1 );
	} else if (! strcmp("log", opt) ) {
		/* --log=file */
		do_opt_l( arg1 );
	} else {
		return(0);
	}

	return(n - num);
}

int main( int argc, char **argv ) {
	char *name, c;
	int x, rv, error = 0;
	unsigned int len;
	unsigned long off;
	enum x86_options options = opt_none;

	/* initialize default file streams */
	info.in = stdin;
	info.out = stdout;
	info.err = stderr;

	if ( argc < 2 ) {
		error = 1;
	}

	/* process arguments */
	for (x = 1; x < argc && ! error; x++) {
		c = argv[x][0];
		if (argv[x][0] == '-' ) {
			c = argv[x][1];
		}
		switch (c) {
			case '-':
			/* handle long arg */
				rv = do_longarg( argc, argv, x );
				if (rv < 0) {
					error = 1;
				}
				x+= rv;
				break;
			case 'a':
			/* -a offset : disasm single insn */
				x++;
				if ( x < argc ) {
					off = strtoul( argv[x], NULL, 0 );
					add_request( req_addr, off, 0 );
				} else {
					error = 1;
				}
				break;
			case 'r':
			/* -r offset len : disasm range */
				x+=2;
				if ( x < argc ) {
					off = strtoul( argv[x-1], NULL, 0 );
					len = (unsigned int) 
					      strtoul(argv[x], NULL, 0);
					add_request( req_range, off, 
						     len );
				} else {
					error = 1;
				}
				break;
			case 'e':
			/* -e offset : disasm forward from offset */
				x++;
				if ( x < argc ) {
					off = strtoul( argv[x], NULL, 0 );
					add_request(req_entry, off, 0);
				} else {
					error = 1;
				}
				break;
			case 's':
			/* -s name : set output syntax */
				x++;
				if ( x < argc ) {
					name = argv[x];
					do_opt_s( name );
				} else {
					error = 1;
				}
				break;
			case 'd':
			/* -d name : show syntax description */
				x++;
				if ( x < argc ) {
					name = argv[x];
					do_opt_d( name );
				} else {
					error = 1;
				}
				break;
			case 'f':
			/* -f file : read input from file */
				x++;
				if ( x < argc ) {
					name = argv[x];
					do_opt_f( name );
				} else {
					error = 1;
				}
				break;
			case 'o':
			/* -o file : write output to file */
				x++;
				if ( x < argc ) {
					name = argv[x];
					do_opt_o( name );
				} else {
					error = 1;
				}
				break;
			case 'l':
			/* -l file : write log/errors to file */
				x++;
				if ( x < argc ) {
					name = argv[x];
					do_opt_l( name );
				} else {
					error = 1;
				}
				break;
			case 'p':
			/* -p num : set pagesize */
				x++;
				if ( x < argc ) {
					off = strtoul( argv[x], NULL, 0 );
					if ( off ) {
						info.pagesize = off;
					}
				} else {
					error = 1;
				}
				break;
			case 'L':
			/* -L : use legacy 16-bit mode */
				options = options | opt_16_bit;
				break;
			case 'N':
			/* -N : use IGNORE NULLS mode */
				options = options | opt_ignore_nulls;
				break;
			case 'v':
			/* -v : version info */
				name = argv[0];
				do_version( name );
				break;
			case 'h':
			case '?':
			/* -h : help */
				name = argv[0];
				do_help( name );
				break;
			default:
				error = 1;
		}
	}

	if ( error ) {
		do_help(argv[0]);
		/* perform any cleanup */
		return(-1);
	}

	if (! dis_requests ) {
		/* -h or -v only */
		return(0);
	}

	/* initialize libdisasm */
	x86_init( options, x86dis_reporter, NULL);

	/* OK, do disassembly requests */
	if ( info.in != stdin ) {
		act_on_mmap_file();
	} else {
		act_on_stream();
	}

	/* shut down disassembler */
	x86_cleanup();

	return(0);
}




syntax highlighted by Code2HTML, v. 0.9.1