/* please see 'please_read_before_using_this_program' and 'license.txt' for important
 * information
 */
/* this application was written by folkert@vanheusden.com
 * check http://vanheusden.com/Linux/phantom.php3 for information/new versions
 */

#include <stdio.h>

#include <string.h>

#include <errno.h>

#include <sys/types.h>

#include <unistd.h>

#include <sys/stat.h>

#include <fcntl.h>

#include <stdlib.h>

#include <time.h>

#include <sys/mman.h>


char debug=0;

char seeksleft=0;
off_t cur_pos=0;
off_t phantomblocks=0;

/* function that creates a file and returns an fd. nothing fancy */
int create_file(char *fnam, int mode)
{
	int	fdout = creat(fnam, mode);

	if (fdout == -1)
	{
		perror("phantom");
		if (debug) fprintf(stderr, "error in create_file %s\n", fnam);
		exit(1);
	}

	if (fchmod(fdout, mode) == -1)	/* creat() doesn't set suid etc. */
	{
		perror("phantom");
		if (debug) fprintf(stderr, "error in fchmod on %s\n", fnam);
		exit(1);
	}

	if (debug) fprintf(stderr, "create_file: %s => %d\n", fnam, fdout);

	return fdout;
}

/* function that allocates a buffer, nothing fancy */
char * allocate_buffer(int size)
{
	/* alloc mem */
	char *dummy = (char *)malloc(size);

	if (!dummy)
	{
		fprintf(stderr, "phantom: cannot allocate %d bytes of memory", size);
		exit(1);
	}

	if (debug) fprintf(stderr, "allocate_buffer: %d bytes (ok)\n", size);

	return dummy;
}

/* this read takes care of EINTR, partial reads, etc. */
int MY_READ(int fd, char *buffer, int nreq)
{
	int	nread=0;

	for (;nread != nreq;)
	{
		int ncur = read(fd, buffer + nread, nreq-nread);

		if (ncur > 0)
		{
			nread += ncur;
		}
		else if (ncur == 0)
		{
			/* just an end-of-file */
			if (debug) fprintf(stderr, "MY_READ: EOF\n");
			break;
		}
		else if (ncur == -1 && errno != EINTR)
		{
			/* error! */
			perror("phantom");
			if (debug) fprintf(stderr, "MY_READ: %d, %d\n", fd, nreq);
			exit(1);
		}
	}

	if (debug) fprintf(stderr, "MY_READ: got %d\n", nread);

	return nread;
}

/* this write takes care of EINTR, partial writes, etc. */
int MY_WRITE(int fd, char *buffer, int nreq)
{
	int	nwritten=0;

	do
	{
		int ncur = write(fd, buffer + nwritten, nreq-nwritten);

		if (ncur > 0)
		{
			nwritten += ncur;
		}
		else if (ncur == 0)
		{
			/* ehr..., ok */
			break;
		}
		else if (ncur == -1 && errno != EINTR)
		{
			/* error! */
			perror("phantom");
			if (debug) fprintf(stderr, "MY_WRITE: %d, %d (%d)\n", fd, nreq, errno);
			exit(1);
		}
	}
	while(nreq != nwritten);

	return nwritten;
}

int do_write(int fd, char *buffer, int nreq)
{
	if (seeksleft)
	{
		if (ftruncate(fd, cur_pos) == -1)
		{
			perror("phantom: ");
			if (debug) fprintf(stderr, "do_write: ftruncate error!\n");
			exit(1);
		}
		if (lseek(fd, cur_pos, SEEK_SET) == -1)
		{
			perror("phantom: ");
			if (debug) fprintf(stderr, "do_write: lseek error!\n");
			exit(1);
		}

		if (debug) fprintf(stderr, "do_write, curpos: %ld, actual pos: %ld\n", 
				   (long)cur_pos, (long)lseek(fd, 0, SEEK_CUR));

		seeksleft=0;
	}

	return MY_WRITE(fd, buffer, nreq);
}

/* see if this block is all 0x00's  */
void do_block(int fdout, char *buf)
{
	long	*pnt = (long *)buf;
	int	loop;
	const int	nelements=512/sizeof(long);
	long	zero=0;

	/* all 0x00? */
	for(loop=0; loop<nelements; loop++)
	{
		zero |= pnt[loop];
	}

	/* not zero, just write
	 * it would be a little more efficient to queue 512-byte blocks
	 * until blksize is reached (fstat) or a seek is encountered
	 */
	if (zero)	/* not 0x00 */
	{
/*		if (debug) fprintf(stderr, "do_block: COPY\n");			*/
		do_write(fdout, buf, 512);
	}
	else
	{
		if (debug) fprintf(stderr, "do_block: SEEK\n");

		seeksleft=1;
		phantomblocks++;
	}

	cur_pos += 512;
}

/* this one does *the* trick with plain read/write
 * will be used when working with pipes
 */
void do_copy_normal(int fd_in, char *fnam_out, int mode)
{
	int	fdout = create_file(fnam_out, mode);
/* retrieve this from input-fd, if applic. */
	int	blksize = 512;
	char	*buf = allocate_buffer(blksize);

	if (debug) fprintf(stderr, "do_copy_normal start\n");

	for(;;)
	{
		char	*pnt = buf;
		int	curnread = MY_READ(fd_in, pnt, blksize);

		if (curnread == 0)
		{
			if (debug) fprintf(stderr, "do_copy_normal: EOF\n");
			break;
		}

		while (curnread >= 512)
		{
			do_block(fdout, pnt);
			pnt += 512;
			curnread -= 512;
		}

		/* partial block, is EOF */
		if (curnread)
		{
			if (debug) fprintf(stderr, "do_copy_normal: <512b; EOF\n");

			do_write(fdout, pnt, curnread);
			/* is also EOF */
			break;
		}
	}

	if (debug) fprintf(stderr, "do_copy_normal: out of loop\n");

	/* force write of any phantom blocks left */
	do_write(fdout, buf, 0);

	close(fdout);
	free(buf);

	/* all done */
	if (debug) fprintf(stderr, "do_copy_normal done\n");
}

/* return filesize of inputfile */
int file_size(int fd_in)
{
	struct stat sb;

	if (fstat(fd_in, &sb))
	{
		perror("phantom");
		exit(1);
	}

	return sb.st_size;
}

/* this one does *the* trick with mmap'ed input file */
void do_copy_mmap(int fd_in, char *fnam_out, int mode)
{
	int	fdout=-1;
	off_t	fin_size = file_size(fd_in);
	char	*memmap;

	if (debug) fprintf(stderr, "do_copy_mmap(%d) start\n", (int)fin_size);

	/* create memory-map */
	memmap = (char *)mmap(NULL, fin_size, PROT_READ, MAP_SHARED, fd_in, 0);

	/* mmap failed? then do slowcopy */
	if (memmap == NULL)
	{
		if (debug) fprintf(stderr, "phantom: mmap failed (size: %ld), errno: %d\n", (long)fin_size, errno);

		do_copy_normal(fd_in, fnam_out, mode);
	}
	else
	{
		char *pnt = memmap;
		int  len = fin_size;

		/* yes! memory mapped! */

/* Linux kernel 2.2 doesn't fully support (or at all) the madvise system-call
 * it seems 'madvise' is specified in the headerfiles, but MADV_SEQUENTIAL
 * is not, so I do a check for that one to see wether it is supported or not
 */
#ifdef MADV_SEQUENTIAL

		/* advise kernel on what we're gonna do with the input-file */
		if (madvise(memmap, fin_size, MADV_SEQUENTIAL) == -1)
		{
			perror("phantom");
			if (debug) fprintf(stderr, "do_copy_mmap error while madvise\n");
			exit(1);
		}
#endif


		/* create outputfile */
		fdout = create_file(fnam_out, mode);

		/* go trough file */
		while(len >= 512)
		{
			do_block(fdout, pnt);

			pnt += 512;
			len -= 512;
		}

		/* anything smaller then half a kb left? */
		if (len)
		{
			if (debug) fprintf(stderr, "do_copy_mmap: copy partial file (%d bytes)\n", len);
			do_write(fdout, pnt, len);
		}
		else
		{
			/* force write of any phantom blocks left */
			do_write(fdout, memmap, 0);
		}

		/* unmap file */
		if (munmap(memmap, fin_size) == -1)
		{
			perror("phantom");
			if (debug) fprintf(stderr, "do_copy_mmap error while munmap\n");
			exit(1);
		}

		close(fdout);
	}

	/* all done */
	if (debug) fprintf(stderr, "do_copy_mmap end\n");
}

void print_usage(void)
{
	if (debug) fprintf(stderr, "print_usage\n");

	fprintf(stderr, "phantom v1.1, (c) 2000 by F.J.J. van Heusden <folkert@vanheusden.com>\n\n");
	fprintf(stderr, "-o\toutput-file - You *must* supply an output-file for obvious reasons\n");
	fprintf(stderr, "-r\t...or use '-r' to replace the original file (only when an -i is given!)\n");
	fprintf(stderr, "-i\tinput file - File to read from. Omit to have this program read from stdin\n");
	fprintf(stderr, "-d\tSwitch on debugging\n");
	fprintf(stderr, "-s\tBe quiet\n");
	fprintf(stderr, "-n\tShow current filename\n");
	fprintf(stderr, "-h\tThis list\n\n");
	fprintf(stderr, "example: 	find /mnt/bv-space -type f -exec phantom -r -i \"{}\" \\;\n\n");
	/*  find /mnt/bv-space -type f -exec phantom -r -i "{}" \; */
}

int main(int argc, char *argv[])
{
	int	loop;
	char	*f_out = NULL, *f_in=NULL, silent=0, showName=0;
	char	replace_org=0;
	int	mode = S_IREAD|S_IWRITE;
	uid_t	uid = geteuid();	/* no error checking: geteuid & getegid */
	gid_t	gid = getegid();	/* are not supposed to fail */

	if (argc == 1)
	{
		print_usage();
		return 1;
	}

	for(loop=1;loop<argc;loop++)
	{
		if (strcmp(argv[loop], "-o") == 0)
		{
			f_out=argv[loop+1];
			loop++;
		}
		else if (strcmp(argv[loop], "-i") == 0)
		{
			f_in=argv[loop+1];
			loop++;
		}
		else if (strcmp(argv[loop], "-d") == 0)
		{
			debug=1;
			fprintf(stderr, "debug-mode\n");
		}
		else if (strcmp(argv[loop], "-h") == 0)
		{
			print_usage();
			return 0;
		}
		else if (strcmp(argv[loop], "-s") == 0)
		{
			silent=1;
		}
		else if (strcmp(argv[loop], "-n") == 0)
		{
			showName=1;
		}
		else if (strcmp(argv[loop], "-r") == 0)
		{
			replace_org=1;
		}
		else
		{
			fprintf(stderr, "Unknown option: %s\n", argv[loop]);
			print_usage();
			return 1;
		}
	}

	if (showName && *f_in) fprintf(stderr, "%s\n", f_in);

	/* when replacing file, one should give the file to replace */
	if (replace_org==1 && f_in == NULL)
	{
		fprintf(stderr, "Cannot use 'stdin' for input with 'replace'-function\n");
		print_usage();
		return 1;
	}

	/* when not replacing a file, an outputfile must be given */
	if (replace_org==0 && f_out == NULL)
	{
		fprintf(stderr, "No output-file given.\n");
		print_usage();
		return 1;
	}

	/* it doesn't make sense to give an output-file when you want to replace the input-file */
	if (replace_org==1 && f_out != NULL)
	{
		fprintf(stderr, "You cannot give an outputfilename when using the 'replace'-function\n");
		print_usage();
		return 1;
	}

	/* get owner(etc.) flags from input-file (if applicable) */
	if (f_in)
	{
		struct stat	statbuf;

		if (stat(f_in, &statbuf) == -1)
		{
			perror("phantom");
			fprintf(stderr, "get-mode: stat error\n");
			return 1;
		}

		mode = statbuf.st_mode;
		gid = statbuf.st_gid;
		uid = statbuf.st_uid;
	}

	/* when replacing, write to a temp-file first */
	if (replace_org==1)
	{
		struct stat	statbuf;
		int	rc;
		char	*buf = (char *)malloc(strlen(f_in) + 1 + 8 + 1);

		if (!buf)
		{
			fprintf(stderr, "Memory allocation error!\n");
			return 1;
		}

		/* hopefully, this won't loop forever :o) */
		srand(time(NULL));
		do
		{
			snprintf(buf, strlen(f_in)+1+8, "%s.%x%x", f_in, rand(), rand());
			rc = stat(buf, &statbuf);
		}
		while (errno != ENOENT && rc != -1);

		f_out = buf;

		if (debug) fprintf(stderr, "will write to temp-file: %s\n", f_out);
	}

	if (debug) fprintf(stderr, "%s to %s with phantomblocks\n", f_in, f_out);

	/* not reading from pipe or so? do mmap */
	if (f_in != NULL)
	{
		int fd_in = open(f_in, O_RDONLY);
		if (fd_in == -1)
		{
			perror("phantom");
			return 1;
		}

		do_copy_mmap(fd_in, f_out, mode);

		close(fd_in);
	}
	/* otherwise, do plain copy */
	else
	{
		do_copy_normal(0, f_out, mode);
	}
	if (debug) fprintf(stderr, "finished generating new file\n");

	/* in replace mode; move temp-file over input-file */
	if (replace_org)
	{
		if (debug) fprintf(stderr, "replace original by tempfile\n");

		if (rename(f_out, f_in) == -1)
		{
			perror("phantom");
			if (debug) fprintf(stderr, "rename error\n");
			return 1;
		}
		free(f_out);

		/* now set uid/gid */
		if (chown(f_in, uid, gid) == -1)
		{
			perror("phantom");
			if (debug) fprintf(stderr, "chown error\n");
			return 1;
		}
	}

	if (!silent) fprintf(stderr, "Phantom blocks: %ld\n", (long)phantomblocks);

	if (debug) fprintf(stderr, "%s exit\n", argv[0]);

	return 0;
}


syntax highlighted by Code2HTML, v. 0.9.1