/*
 * samefile - a program to find identical files
 *
 * This source looks beautiful with tabstop every four characters
 * In vi type :set ts=4
 */

/*
 * The @ enclosed @ comments are splint (formerly lclint) comments.
 * They reduce the noise generated by `strict' checking.
 */

/*@-compdef@*/
/*@-branchstate@*/
/*@-temptrans@*/
/*@-fullinitblock@*/
/*@-compdestroy@*/
/*@-ptrarith@*/

/*
 * Copyright by Jens Schweikhardt (samefile@schweikhardt.net)
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "config.h"  /* must be first because we need to test HAVE_LSTAT */

/*@ignore@*/
#include <sys/types.h>
#include <sys/stat.h>		/* stat, lstat */
#if !HAVE_LSTAT
#  define lstat stat
#endif
/*@end@*/

#include <stdio.h>			/* NULL */
#include <errno.h>			/* ENOENT */
#include <string.h>			/* memcmp */
#include <stdlib.h>			/* malloc */
#include <time.h>			/* clock */

#if HAVE_FCNTL_H
#  include <fcntl.h>		/* O_RDONLY */
#endif

#if HAVE_UNISTD_H
#  include <unistd.h>
#endif

#if HAVE_MMAP
#  include <sys/mman.h>
#endif

#ifndef EXIT_SUCCESS
#  define EXIT_SUCCESS 0
#endif
#ifndef EXIT_FAILURE
#  define EXIT_FAILURE 1
#endif

/* In case the POSIX S_ISREG is missing, use this alternate macro */
#ifndef S_ISREG
#  define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
#endif

#include "error.h"
#include "fgetline.h"
#include "options.h"

/******************************************************************************
 * CONSTANT DEFINITIONS
 ******************************************************************************/

#define plural(n)			(n), (n) == 1 ? "" : "s"

/******************************************************************************
 * TYPE DEFINITIONS
 ******************************************************************************/

typedef off_t file_size;			/* type of st_size in struct stat */

typedef struct file_list_elm {		/* file list element */
	char					*name;	/* pointer to file name */
	struct file_list_elm	*next;	/* pointer to next element */
	dev_t					 dev;	/* device of inode */
	ino_t					 ino;	/* inode number */
} file_list_elm;

typedef struct tnode {				/* binary tree node */
	file_size				 size;	/* size field */
	long					 count;	/* number of names in list */
	file_list_elm			*list;	/* points to list of names with same size */
	struct tnode			*left;	/* points to node with lesser size */
	struct tnode			*right;	/* points to node with greater size */
} tnode;

/******************************************************************************
 * STATIC VARIABLES
 ******************************************************************************/

/* Option related globals */
/*@observer@*/
static char *sep = "\t";		 /* default separator for filenames in output */
static off_t minsize = 0;			   /* skip all files with size <= minsize */

/* Encoding of flags: */
#define VERBOSE				1
#define IGNORE_ERRORS		2
#define DONT_BE_SMART		4
#define ALL_INODES			8
#define DONT_ALPHASORT	   16
static int flags = 0;								  /* where flags are kept */

/* Encoding of links: */
#define DO_NOT_OUTPUT		0			/* ...equal files that are hard links */
#define DO_NOT_CHECK		1		  /* don't check if equal files are links */
#define REPORT				2		  /* I want links mentioned in the output */
static int links = DO_NOT_OUTPUT;			 /* what to do if links are found */

/* Internal file scope varables. */
static file_size most_often = 0;			   /* size most often encountered */
static long longest_list = 0;				/* # of elms in longest file list */
static long files_in_groups = 0;					   /* files in all groups */
static unsigned long btotal = 0;				  /* bytes in identical files */
static long n_tree_node_alloc = 0;				/* calls to tree_node_alloc() */
static long n_list_elm_alloc = 0;				 /* calls to list_elm_alloc() */
static long malloc_free = 0;		  /* incremented for malloc, dec for free */
static long groups = 0;			  /* of different sizes = calls to comp_group */
static char *Errfile;					 /* filename for which error occurred */
static int Errno;										   /* a copy of errno */
static clock_t clock_at_start;						  /* to record start time */
static char *eq;								 /* pointer to equality table */
static size_t buffer_size;                             /* for file comparison */
static int eol = '\n';                               /* End of Line character */
#if !HAVE_MMAP
static char *c1, *c2;                          /* Pointers to compare buffers */
#endif

/******************************************************************************
 * FUNCTION PROTOTYPES
 ******************************************************************************/

	static /*@only@*/ tnode *
addtree (/*@null@*/ /*@returned@*/ tnode *p, struct stat *s, char *f)
	/*@globals  most_often,longest_list,malloc_free,stderr,fileSystem,internalState,n_tree_node_alloc,n_list_elm_alloc,links,sep,flags@*/
	/*@modifies most_often,longest_list,malloc_free,stderr,fileSystem,p->left,p->right,p->list,p->count,*f@*/;

	static void
comp_group (file_size size, long files, char **filelist)
	/*@globals  stderr,fileSystem,btotal,malloc_free,internalState,flags,Errfile,Errno,sep@*/
	/*@modifies stderr,fileSystem,btotal,malloc_free,internalState@*/;

	static int
compare (char *f1, char* f2, struct stat *s1, struct stat *s2)
	/*@globals  fileSystem,internalState,Errno,Errfile@*/
	/*@modifies fileSystem,internalState,Errno,Errfile,s1,s2@*/;

	static void
free_list (file_list_elm *first)
	/*@globals  malloc_free@*/
	/*@modifies malloc_free,first@*/;

	int
main (int argc, char **argv)
	/*@globals  internalState,fileSystem,stderr,stdout,flags,minsize,links,sep,malloc_free,groups,files_in_groups,longest_list,most_often,n_tree_node_alloc,n_list_elm_alloc,Errno,Errfile,btotal@*/
	/*@modifies internalState,fileSystem,stderr,stdout@*/;

#if HAVE_MMAP && __LCLINT
/*@ignore@*/
	extern /*@only@*/ caddr_t
mmap (/*@null@*/ caddr_t addr, size_t len, int prot,
	int flags, int fd, off_t offset)
	/*@modifies addr@*/;

	extern int
munmap (/*@only@*/ caddr_t addr, size_t len)
	/*@modifies addr@*/;
/*@end@*/
#endif

	static void /*@exits@*/
myexit (int status) attribute(noreturn)
	/*@globals btotal,malloc_free,flags,stderr,fileSystem,internalState@*/
	/*@modifies stderr,fileSystem@*/;

	static void
parse_options (int argc, char **argv)
	/*@globals  opt_err,stdout,stderr,fileSystem,flags,minsize,links,sep,btotal,malloc_free,internalState@*/
	/*@modifies opt_err,stdout,stderr,fileSystem,flags,minsize,links,sep@*/;

	static void
process_input (void)
	/*@globals  minsize,flags,groups,files_in_groups,longest_list,most_often,n_tree_node_alloc,n_list_elm_alloc,malloc_free,links,sep,btotal,Errfile,Errno,fileSystem,internalState@*/
	/*@modifies malloc_free,fileSystem@*/;

	static int
scompare (const void *a, const void *b)
	/*@*/;

	static /*@out@*/ /*@only@*/ tnode *
tree_node_alloc (void)
	/*@globals  malloc_free,n_tree_node_alloc,stderr,fileSystem,internalState@*/
	/*@modifies malloc_free,n_tree_node_alloc,stderr,fileSystem@*/;

	static /*@out@*/ /*@only@*/ file_list_elm *
list_elm_alloc (void)
	/*@globals  malloc_free,n_list_elm_alloc,stderr,fileSystem,internalState@*/
	/*@modifies malloc_free,n_list_elm_alloc,stderr,fileSystem@*/;

	static void
treeprint (/*@only@*/ tnode *p)
	/*@globals  fileSystem,stdout,groups,files_in_groups,malloc_free,btotal,flags,Errfile,Errno,sep,flags,internalState@*/
	/*@modifies fileSystem,stdout,groups,files_in_groups,malloc_free,p@*/;

	static void
usage (void)
	/*@globals  stderr,fileSystem,internalState@*/
	/*@modifies stderr,fileSystem@*/;

/******************************************************************************
 * FUNCTIONS
 ******************************************************************************/

	int
main (int argc, char **argv)
/*
 * The ball starts rolling here.
 *
 * Returns EXIT_FAILURE if one of these events happen:
 * - less than two files left to process.
 * - usage error
 * - memory exhausted
 * In any other case EXIT_SUCCESS.
 */
{
	clock_at_start = clock ();
	set_progname (argv[0], "samefile");
#if HAVE_MMAP
#if HAVE_GETPAGESIZE
    buffer_size = (size_t) getpagesize ();
#else
    buffer_size = 32768;
#endif
#else                           /* no mmap */
    buffer_size = 32768;
    c1 = xmalloc (buffer_size);
    c2 = xmalloc (buffer_size);
#endif
	parse_options (argc, argv);
	process_input ();
	myexit (EXIT_SUCCESS);
	/*@notreached@*/
	return EXIT_SUCCESS;
}

/*============================================================================*/

	static void
myexit (int status)
/*
 * All memory returned?
 * This function is used instead of exit().
 */
{
#if !HAVE_MMAP
    xfree (c1);
    xfree (c2);
#endif
	if (malloc_free != 0) { /* did I forget to free some memory? */
		err_msg ("malloc - free = %ld", malloc_free);
	}
	if (flags & VERBOSE) {
		/*
		 * the btotal value is only meaningful if neither
		 * of the -i and -x switches was supplied.
		 */
		if (!(flags & (ALL_INODES|DONT_BE_SMART))) {
			cfprintf (stderr,
			"You have a total of %lu byte%s in identical files.\n",
			plural (btotal));
		}
		cfprintf (stderr, "Execution time: %.2fs\n",
		(clock () - clock_at_start)/(double)CLOCKS_PER_SEC);
	}
	exit (status);
}

/*============================================================================*/

	static void
process_input (void)
/*
 * Add each input filename with filesize to the binary tree,
 * then call treeprint. Actual file comparison is done when
 * treeprint calls comp_group().
 */
{
	tnode *root;
	char *filename;	/* pointer to filename being processed */
	struct stat s;	/* where inode info goes, reference passed to addtree() */
	unsigned long input_filenames = 0; /* number of filenames read from stdin */
	unsigned long tree_filenames = 0; /* number of filenames inserted in tree */

	root = NULL;
	while ((filename = fgetline (stdin, eol)) != NULL) {
		++input_filenames;
		/* Weed out everything that
		 * - can't be stat()ed or
		 * - is not a regular file or
		 * - is <= minsize
		 */
		if (lstat (filename, &s) == 0 && S_ISREG (s.st_mode) &&
		s.st_size >  minsize) {
			++tree_filenames;
			root = addtree (root, &s, filename);
		} else {
			xfree (filename);
		}
	}
	if (flags & VERBOSE) {
		cfprintf (stderr, "%ld input filename%s.\n",
		plural ((long)input_filenames));
		cfprintf (stderr,
		"%ld regular file%s left with size > %ld byte%s.\n",
		plural ((long)tree_filenames), plural ((long)minsize));
	}
	if (tree_filenames == 0) {
		err_quit ("no files left");
	}
	check (root != NULL);
	if (tree_filenames == 1) {
		/*
		 * Normally, tnode *root and root->list are freed in treeprint.
		 * Because treeprint is not called at all if there is only one
		 * filename, we need to free them here.
		 */
		xfree (root->list->name);
		xfree (root->list);
		xfree (root);
		err_quit ("only one file left");
	}
	eq = xmalloc ((longest_list - 1) * (longest_list - 1));
	treeprint (root);
	xfree (eq);
	if (flags & VERBOSE) {
		cfprintf (stderr,
		"%ld file%s left after removing sizes that appear only once.\n",
		plural ((long)files_in_groups));
		cfprintf (stderr,
		"%ld group%s of files with same size.\n", plural ((long)groups));
		if (longest_list > 1) {
			cfprintf (stderr,
			"Largest group of one size is %ld files with size %ld.\n",
			(long)longest_list, (long)most_often);
		}
		cfprintf (stderr, "Memory consumption:\n");
		cfprintf (stderr, "Largest equality table used %ld byte%s.\n",
		plural ((long)(longest_list * (longest_list - 1) / 2)));
		cfprintf (stderr,
		"Binary tree built with %ld node%s of size %ld = %ld bytes.\n",
		plural ((long)n_tree_node_alloc), (long) sizeof *root,
		(long)(n_tree_node_alloc * sizeof *root));
		cfprintf (stderr,
		"Allocated %ld node%s of size %ld = %ld bytes for file lists.\n",
		plural ((long)n_list_elm_alloc), (long) sizeof *root->list,
		(long)(n_list_elm_alloc * sizeof *root->list));
	}
	if (files_in_groups == 0) {
		myexit (EXIT_SUCCESS); /* nothing to do */
	}
}
/*============================================================================*/

	static tnode *
addtree (tnode *p, struct stat *s, char *fname)
/*
 * Insert a size/filename pair into the binary tree.
 * If size is new, make new node, else add the filename to
 * the list of filenames in the corresponding node.
 * Also check if file is on the same device and has same inode.
 * If so, don't add to the tree but print a line if -v is set.
 *
 * p is a pointer to a treenode under which the filename fname
 * should be inserted. p == NULL means make a new node.
 * Return value is the pointer to the new node or p.
 * If p != NULL, addtree calls itself recursively until
 * it finds a node with same size or reaches a leaf.
 */
{
	if (p == NULL) { /* make and initialize new node */
		p = tree_node_alloc ();
		p->size = s->st_size;
		p->count = 1;
		p->left = p->right = NULL;
		p->list = list_elm_alloc ();
		p->list->name = fname;
		p->list->next = NULL;
		p->list->dev = s->st_dev;
		p->list->ino = s->st_ino;
		if (longest_list < 1) {
			longest_list = 1;
			most_often = p->size;
		}
	} else if (s->st_size == p->size) {
		/* same size; check if hard linked to a file we already know about */
		file_list_elm *e; /* pointer to a file list element */

		for (e = p->list; e != NULL; e = e->next) { /* scan through the list */
			/* dev/inode already in list? */
			if (s->st_dev == e->dev && s->st_ino == e->ino) {
				/* yes: want a report? */
				if (links == REPORT) {
					cfprintf (stdout, "%ld%s%s%s%s%s[%ld]\n", (long)s->st_size,
					sep, e->name, sep, fname, sep, (long)s->st_nlink);
				}
				/*
				 * if we want all filenames (even those for identical inodes)
				 * then we do not return right away
				 */
				if (!(flags & ALL_INODES)) {
					xfree (fname);
					return p;
				}
			}
		}
		/* insert this file at the beginning of the list */
		e = list_elm_alloc ();
		e->next = p->list;
		e->name = fname;
		e->dev = s->st_dev;
		e->ino = s->st_ino;
		p->list = e;
		++(p->count);
		if (p->count > longest_list) {
			longest_list = p->count;
			most_often = p->size;
		}
	} else if (s->st_size < p->size) {
		p->left = addtree (p->left, s, fname);
	} else {
		p->right = addtree (p->right, s, fname);
	}
	/*@i7@*/ return p;
}
/*============================================================================*/

	static void
treeprint (tnode *p)
/*
 * Recursively process the binary tree in reverse order,
 * that is, process node with largest size first.
 */
{
	if (p != NULL) {
		treeprint (p->right);
		if (p->count > 1) {
			long i;
			file_list_elm *l = p->list;
			/*
			 * Build the filelist[] array from the file name list.
			 * Freed at the end of this block.
			 */
			char **filelist = xmalloc (p->count * sizeof filelist[0]);
			++groups;
			for (i = 0; i < p->count; ++i) {
				++files_in_groups;
				filelist[i] = l->name;
				l = l->next;
			}
			check (l == NULL);
			if (!(flags & DONT_ALPHASORT)) {
/*@i@*/			qsort (filelist, (size_t)p->count, sizeof filelist[0], scompare);
			}
/*@i@*/		comp_group (p->size, p->count, filelist);
			xfree (filelist);
		}
		free_list (p->list); /* list was printed or skipped and can be freed */
		treeprint (p->left);
		xfree (p); /* node itself has been processed, can be freed now */
	}
}
/*============================================================================*/

	static int
scompare (const void *a, const void *b)
/*
 * Compare function for qsort. Receives pointers to pointers to strings.
 */
{
	return strcmp (*(const char * const *)a, *(const char * const *)b);
}
/*============================================================================*/

	static void
free_list (file_list_elm *first)
/*
 * Free a file list.
 */
{
	file_list_elm *current, *tmp;

	for (current = first; current != NULL; current = tmp) {
		tmp = current->next;
		xfree (current->name);
		xfree (current);
	}
}
/*============================================================================*/

	static tnode *
tree_node_alloc (void)
/*
 * Tree node allocation.
 * Memory allocated in this function is freed when
 * the tree is output using the treeprint() function.
 */
{
	tnode *t;

	t = xmalloc (sizeof *t);
	++n_tree_node_alloc;
	return t;
}
/*============================================================================*/

	static file_list_elm *
list_elm_alloc (void)
/*
 * File list element allocation.
 * Memory allocated in this function is freed when
 * the tree is output using the treeprint() function.
 */
{
	file_list_elm *f;

	f = xmalloc (sizeof *f);
	++n_list_elm_alloc;
	return f;
}
/*============================================================================*/

	static void
parse_options (int argc, char **argv)
/*
 * Parse command line options.
 */
{
	int c;

	opt_err = 0;
	while ((c = get_opt (argc, argv, "0ag:ilqrs:Vvx")) != EOF) {
		switch (c) {
		case '0':
			eol = '\0';
		break;
		case 'a':
			flags |= DONT_ALPHASORT;
		break;
		case 'g':
			if (sscanf (opt_arg, "%ld", (long *)&minsize) != 1) {
				err_msg ("warning: can't convert -g %s, using -g 0 instead",
				opt_arg);
				minsize = 0;
			}
		/*@switchbreak@*/ break;
		case 'i':
			flags |= ALL_INODES;
		/*@switchbreak@*/ break;
		case 'l':
			links = DO_NOT_CHECK;
		/*@switchbreak@*/ break;
		case 'q':
			flags |= IGNORE_ERRORS;
		/*@switchbreak@*/ break;
		case 'r':
			links = REPORT;
		/*@switchbreak@*/ break;
		case 's':
			sep = opt_arg;
		/*@switchbreak@*/ break;
		case 'V':
			(void) puts ("$Id: samefile.c,v 2.12 2005/08/07 17:20:16 schweikh Exp $");
			myexit (EXIT_SUCCESS);
		case 'v':
			flags |= VERBOSE;
		/*@switchbreak@*/ break;
		case 'x':
			flags |= DONT_BE_SMART;
		/*@switchbreak@*/ break;
		default:
			usage ();
			myexit (EXIT_FAILURE);
		}
	}
}

/*============================================================================*/

	static void
usage (void)
/*
 * Display the usage message.
 */
{
	cfprintf (stderr, "\n");
	(void) err_progname (stderr);
	cfprintf (stderr,
	" reads a list of filenames from stdin and\n"
	"writes a list of identical files on stdout.\n\n"
	"usage: "); (void) err_progname (stderr);
	cfprintf (stderr, " [-g size] [-l | -r] [-s sep] [-0aiqVvx]\n\n"
	"  options:  -0 : input lines are 0 terminated (newline)\n"
	"            -a : don't sort files with same size alphabetically\n"
	"            -g : only output files greater than size (0)\n"
	"            -i : compare even files that are hard linked\n"
	"            -l : skip checking for hard links\n"
	"            -q : do not warn when open(2) fails\n"
	"            -r : report if identical files are hard links\n"
	"            -s : use sep as separator string for files (tab)\n"
	"            -V : output version information and exit\n");
	cfprintf (stderr,
	"            -v : verbose output with statistics on stderr\n"
	"            -x : do not try to save work by using logic\n"
	"    default: names which are hard links to the same file\n"
	"    are neither compared nor output (ex and vi for example).\n\n"
	);
}

/*============================================================================*/

	static void
comp_group (file_size size, long files, char **filelist)
{
    long i, j, k, idx, row = files - 1;
    struct stat si, sj;

    (void) memset (eq, 0, (size_t) row * row);
/*
 * The eq array is organized as follows:
 * example for files = 7, eq[6*6 = 36].
 *
 *     i=0   1   2   3   4   5
 *     +----------------------
 * j=1 | 0  *6 *12 *18 *24 *30
 *   2 | 1   7 *13 *19 *25 *31  The *-marked elements are unused but
 *   3 | 2   8  14 *20 *26 *32  simplify processing a lot.
 *   4 | 3   9  15  21 *27 *33
 *   5 | 4  10  16  22  28 *34
 *   6 | 5  11  17  23  29  35
 *
 * The diagonal elements 0, 7, 14... compute like i*files.
 */
    /*
     * The nested for loop processes the above scheme in the
     * order of the indices and ignores the *-marked elements.
     */
    for (i = 0; i < row; ++i) {
        for (j = i + 1, idx = i * files; j < files; ++j, ++idx) {
            if (eq[idx] != '\0' && !(flags & DONT_BE_SMART))
                continue;
			switch (compare (filelist[i], filelist[j], &si, &sj)) {
			case 0: /* files are equal */
				cfprintf (stdout, "%ld%s%s%s%s", (long)size,
				    sep, filelist[i], sep, filelist[j]);
				cfprintf (stdout, "%s%c%s%ld%s%ld\n", sep,
				    si.st_dev == sj.st_dev ? '=' : 'X', sep,
				    (long)si.st_nlink, sep, (long)sj.st_nlink);
				btotal += size;
				if (j == i + 1)     /* already at diagonal element */
					break;
				/* mark indices to the right up to the diagonal element */
				for (k = idx + row; k <= (j - 1) * files; k += row) {
					++eq[k];
				}
			/*@switchbreak@*/ break;
			case 1: /* files are differing */
			/*@switchbreak@*/ break;
			case 2: /* no such file error */
				/* temporary files may have disappeared,
				   I don't treat 'file not found' as an error */
			/*@switchbreak@*/ break;
			default: /* other error */
				if (!(flags & IGNORE_ERRORS)) {
					err_ret ("warning: open(2) failed for %s", Errfile);
				}
			} /* switch (compare) */
        }
	}
}

/*============================================================================*/

	static int
compare (char *f1, char* f2, struct stat *s1, struct stat *s2)
/*
 * Compare the files with names f1 and f2.
 * Return 0 for equal, 1 for differing,
 * 2 for 'no file error', 3 for other errors.
 * Write the fstats to s1 and s2.
 */
{
#if HAVE_MMAP
    caddr_t c1, c2;
#endif
    int fd1, fd2;
    file_size whole, i;

/*@-moduncon@*/
    fd1 = open (f1, O_RDONLY);
/*@=moduncon@*/
    if (fd1 < 0) {
		Errno = errno;
		Errfile = f1;
		return Errno == ENOENT ? 2 : 3;
    }
/*@-moduncon@*/
	fd2 = open (f2, O_RDONLY);
/*@=moduncon@*/
	if (fd2 < 0) {
		Errno = errno;
		Errfile = f2;
		check_sys (close (fd1) == 0); /* or we run out of file descriptors */
		return Errno == ENOENT ? 2 : 3;
	}
    if (fstat (fd1, s1) != 0) {
		Errno = errno;
		Errfile = f1;
        check_sys (close (fd1) == 0);
        check_sys (close (fd2) == 0);
		return Errno == ENOENT ? 2 : 3;
    }
    if (fstat (fd2, s2) != 0) {
		Errno = errno;
		Errfile = f2;
        check_sys (close (fd1) == 0);
        check_sys (close (fd2) == 0);
		return Errno == ENOENT ? 2 : 3;
    }
    /* Just to be sure. The file's size may have changed in the meantime */
    if (s1->st_size != s2->st_size) {
        check_sys (close (fd1) == 0);
        check_sys (close (fd2) == 0);
        return 1;
    }
    /*
     * whole is the file size rounded down
     * to a multiple of buffer_size
     */
    whole = (s1->st_size / buffer_size) * buffer_size;
    /* compare alle completely fillable buffers */
    for (i = 0; i != whole; i += buffer_size) {
#if HAVE_MMAP
        c1 = mmap (0, buffer_size, PROT_READ, MAP_SHARED, fd1, i);
        if (c1 == NULL || c1 == (caddr_t) - 1 || c1 == MAP_FAILED)
            error (E_PERROR, "mmap failed");
        c2 = mmap (0, buffer_size, PROT_READ, MAP_SHARED, fd2, i);
        if (c2 == NULL || c2 == (caddr_t) - 1 || c2 == MAP_FAILED)
            error (E_PERROR, "mmap failed");
#else
        check_sys (read (fd1, c1, buffer_size) != -1);
        check_sys (read (fd2, c2, buffer_size) != -1);
#endif
        if (memcmp (c1, c2, buffer_size) != 0) {
            check_sys (close (fd1) == 0);
            check_sys (close (fd2) == 0);
#if HAVE_MMAP
            check_sys (munmap (c1, buffer_size) == 0);
            check_sys (munmap (c2, buffer_size) == 0);
#endif
            return 1;
        }
#if HAVE_MMAP
        check_sys (munmap (c1, buffer_size) == 0);
        check_sys (munmap (c2, buffer_size) == 0);
#endif
    }
    /* Done if nothing remains -- files have identical contents */
    if (whole == s1->st_size) {
        check_sys (close (fd1) == 0);
        check_sys (close (fd2) == 0);
        return 0;
    }
    /* compare the remaining incompletely filled buffer */
#if HAVE_MMAP
    c1 = mmap (0, buffer_size, PROT_READ, MAP_SHARED, fd1, i);
    if (c1 == NULL || c1 == (caddr_t) - 1 || c1 == MAP_FAILED)
        error (E_PERROR, "mmap failed");
    c2 = mmap (0, buffer_size, PROT_READ, MAP_SHARED, fd2, i);
    if (c2 == NULL || c2 == (caddr_t) - 1 || c1 == MAP_FAILED)
        error (E_PERROR, "mmap failed");
    {
        int differs = memcmp (c1, c2, (size_t) s1->st_size % buffer_size);

        check_sys (munmap (c1, buffer_size) == 0);
        check_sys (munmap (c2, buffer_size) == 0);
        check_sys (close (fd1) == 0);
        check_sys (close (fd2) == 0);
        return differs != 0 ? 1 : 0;
    }
#else                           /* kein mmap */
    check_sys (read (fd1, c1, s1->st_size % buffer_size) != -1);
    check_sys (read (fd2, c2, s1->st_size % buffer_size) != -1);
    check_sys (close (fd1) == 0);
    check_sys (close (fd2) == 0);
    return memcmp (c1, c2, s1->st_size % buffer_size) ? 1 : 0;
#endif
}
/*==================== THAT'S IT! ============================================*/


syntax highlighted by Code2HTML, v. 0.9.1