/* 
 * Copyright 2001 The Regents of the University of California 
 * All Rights Reserved 
 * 
 * Permission to use, copy, modify and distribute any part of this
 * iffinder software package for educational, research and non-profit
 * purposes, without fee, and without a written agreement is hereby
 * granted, provided that the above copyright notice, this paragraph
 * and the following paragraphs appear in all copies.
 * 
 * Those desiring to incorporate this into commercial products or use
 * for commercial purposes should contact the Technology Transfer
 * Office, University of California, San Diego, 9500 Gilman Drive, La
 * Jolla, CA 92093-0910, Ph: (858) 534-5815, FAX: (858) 534-7345.
 * 
 * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY
 * PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL
 * DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 * 
 * THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE
 * UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
 * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. THE UNIVERSITY
 * OF CALIFORNIA MAKES NO REPRESENTATIONS AND EXTENDS NO WARRANTIES
 * OF ANY KIND, EITHER IMPLIED OR EXPRESS, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
 * PARTICULAR PURPOSE, OR THAT THE USE OF THE SOFTWARE WILL NOT INFRINGE
 * ANY PATENT, TRADEMARK OR OTHER RIGHTS.
 * 
 * The iffinder software package is developed by Ken Keys at the
 * University of California, San Diego under the Cooperative Association
 * for Internet Data Analysis (CAIDA) Program.
 */

#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <assert.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#include <signal.h>
#include <time.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/uio.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
#include <netinet/ip_icmp.h>
#include <arpa/inet.h>
#include "config.h"
#include "hashtab.h"

#define USE_RAW_IP	0
#define USE_RAW_UDP	0

#define OPT_SIZE	40
#define	INITIAL_TTL	255

#ifndef INADDR_NONE
# define INADDR_NONE 0xffffffff     /* should be in <netinet/in.h> */
#endif

#define NPASS			1
#define MAX_CONCURRENT_PROBES	100
#define MAX_PROBES_PER_SEC	300
#define MAX_TRIES		3
#define TIMEOUT			10 /* seconds */
#define BASE_PORT		(32768 + 666) /* same as traceroute */

#define INTERFACE_TABLE_SIZE	25013
#define HOST_TABLE_SIZE		10007

#ifndef ICMP_UNREACH_NET_UNKNOWN
# define ICMP_UNREACH_NET_UNKNOWN	 6	/* unknown net */
#endif
#ifndef ICMP_UNREACH_HOST_UNKNOWN
# define ICMP_UNREACH_HOST_UNKNOWN	 7	/* unknown host */
#endif
#ifndef ICMP_UNREACH_ISOLATED
# define ICMP_UNREACH_ISOLATED		 8	/* src host isolated */
#endif
#ifndef ICMP_UNREACH_NET_PROHIB
# define ICMP_UNREACH_NET_PROHIB	 9	/* prohibited access */
#endif
#ifndef ICMP_UNREACH_HOST_PROHIB
# define ICMP_UNREACH_HOST_PROHIB	10	/* prohibited access */
#endif
#ifndef ICMP_UNREACH_TOSNET
# define ICMP_UNREACH_TOSNET		11	/* bad tos for net */
#endif
#ifndef ICMP_UNREACH_TOSHOST
# define ICMP_UNREACH_TOSHOST		12	/* bad tos for host */
#endif
#ifndef ICMP_UNREACH_FILTER_PROHIB
# define ICMP_UNREACH_FILTER_PROHIB	13	/* admin prohib */
#endif
#ifndef ICMP_UNREACH_HOST_PRECEDENCE
# define ICMP_UNREACH_HOST_PRECEDENCE	14	/* host prec vio. */
#endif
#ifndef ICMP_UNREACH_PRECEDENCE_CUTOFF
# define ICMP_UNREACH_PRECEDENCE_CUTOFF	15	/* prec cutoff */
#endif

#ifndef ICMP_PARAMPROB_OPTABSENT
# define ICMP_PARAMPROB_OPTABSENT	 1	/* req. opt. absent */
#endif


/* RFC 1393 - Traceroute Using an IP Option */
#ifndef IPOPT_TRACEROUTE
# define IPOPT_TRACEROUTE	82
#endif
struct ip_traceroute {		/* IP traceroute option */
    uint8_t type;		/* IPOPT_TRACEROUTE */
    uint8_t len;		/* sizeof(struct ip_traceroute) */
    uint16_t id;		/* identifier */
    uint16_t ohc;		/* outbound hop count */
    uint16_t rhc;		/* return hop count */
    struct in_addr src;		/* originator IP address */
};
#ifndef ICMP_TRACEROUTE
# define ICMP_TRACEROUTE	30
#endif
struct icmp_traceroute {	/* ICMP traceroute message */
    uint8_t type;		/* ICMP_TRACEROUTE */
    uint8_t code;		/* 0: forwarded;  1: no route */
    uint16_t cksum;		/* checksum */
    uint16_t id;		/* identifier */
    uint8_t unused1;		/* spec: unused;  us: verification of copy */
#define TR_VERIFY_UNUSED    0x42
    uint8_t unused2;		/* spec: unused;  us: experiment index */
    uint16_t ohc;		/* outbound hop count */
    uint16_t rhc;		/* return hop count */
    uint32_t speed;		/* speed of link (octets/second) */
    uint32_t mtu;		/* MTU of link */
};
/* end RFC 1393 */

#define ICMP_HDRLEN	8
#define PKTLEN (sizeof(struct ip) + OPT_SIZE + ICMP_HDRLEN + \
    sizeof(struct ip) + OPT_SIZE + sizeof(struct udphdr) + sizeof(payload))

/* Encode exp's array index and trial number into port number */
#define exp_to_port(exp)	(base_port + MAX_TRIES * ((exp) - exptab) + (exp)->tries)
/* Get array index encoded in port number */
#define port_to_index(port)	(((port) - base_port) / MAX_TRIES)
/* Get trial number encoded in port number */
#define port_to_trial(port)	(((port) - base_port) % MAX_TRIES)

/* result/discovery codes */
enum {
    DR_NONE='-', DR_NO_RESPONSE = 'X',
    DR_ERROR = 'E', DR_ICMP_ERROR='I', DR_RR = 'R',
    DR_PORT_UNREACH_SELF = 'S', DR_PORT_UNREACH_DIFF = 'D', DR_BAD_ADDR = 'A',
    DR_ICMP_TRACEROUTE = 't', DR_MATED = 'M'
};

typedef struct interface {
    struct in_addr addr;	/* IP address of interface */
    short probed;		/* how many times interface has been probed */
    short passes;		/* how many experiments done on this iface */
    uint8_t discover_how;	/* how this interface was discovered */
    uint8_t discover_type;	/* for DR_ICMP_ERROR */
    uint8_t discover_code;	/* for DR_ICMP_ERROR */
    unsigned mated:1;		/* /30-mating was tried on this addr? */
    unsigned exists:1;		/* we know for sure that this inteface exists */
    unsigned does_rr:1;		/* this interface supports record route */
    unsigned does_tr:1;		/* this interface supports traceroute */
    struct interface *unext;	/* next interface in unprobed list */
} interface_t;

typedef struct experiment {
    uint32_t exp_id;
    interface_t *iface;
    interface_t *alias;
    struct timeval expires;
    uint8_t tries;
    uint8_t orig_ttl;		/* TTL of original packet */
    uint8_t resp_ttl;		/* TTL of ICMP_UNREACH_PORT response */
    struct timeval send_time[MAX_TRIES];    /* time probe was sent */
    struct timeval rtt;
    uint8_t result_how;		/* result of experiment */
    unsigned use_rr:1;
    unsigned use_traceroute:1;
    union {
	struct {
	    uint8_t type;	/* for DR_ICMP_ERROR */
	    uint8_t code;	/* for DR_ICMP_ERROR */
	} icmp_err;
	int error;		/* for DR_ERROR */
    } result;			/* details of result of experiment */
#define result_type	result.icmp_err.type
#define result_code	result.icmp_err.code
    struct experiment *next;	/* next experiment in expiration list */
} experiment_t;

static int verbosity = 1, interrupted = 0;
static int no_probe_new = 0, use_rr = 0, use_traceroute = 0, use_mate = 0;
static const char *argv0;
static int max_concurrent_probes = MAX_CONCURRENT_PROBES;
/*static int max_probes_per_sec = MAX_PROBES_PER_SEC;*/
static int sendsock, recvsock;
static hash_tab *interface_table;
static struct sockaddr_in my_sockaddr;
static uint16_t base_port = BASE_PORT;
static experiment_t *exptab;		/* array of experiments in progress */
static int current_experiments = 0;	/* number of experiments in progress */
static uint32_t next_exp_id = 0;
static struct timeval delay_time;
static struct timeval expire_time = { TIMEOUT, 0 };
static int npass = NPASS;
static time_t start_time;

/* list of current experiments, sorted by expiry */
static experiment_t *first_exp = NULL, *last_exp = NULL;

static interface_t *first_unprobed = NULL;
static int unprobed_count = 0;

static char payload[] = "See http://mapping-project2.caida.org/";

static struct {
    int experiments;
    int probes;
    int unreach_port;
    int other_icmp_errors;
    int noise;
    int timeouts;
    int aliases;
    int new_if;
    int new_if_by_port_unreach;
    int new_if_by_icmp_err;
    int new_if_by_rr;
    int new_if_by_traceroute;
    int new_if_by_mate;
    int failed;
    int invalid_addr;
    int ifaces;
    int support_rr;	/* # of ifaces that support record route */
    int support_tr;	/* # of ifaces that support traceroute */
    int failed_mates;	/* number of /30 mates that didn't succeed */
} stats;

#ifndef timeradd
#define timeradd(tvp, uvp, vvp) /* v = t + u */                         \
	do {                                                            \
		(vvp)->tv_sec = (tvp)->tv_sec + (uvp)->tv_sec;          \
		(vvp)->tv_usec = (tvp)->tv_usec + (uvp)->tv_usec;       \
		if ((vvp)->tv_usec >= 1000000) {                        \
			(vvp)->tv_sec++;                                \
			(vvp)->tv_usec -= 1000000;                      \
		}                                                       \
	} while (0)
#endif

#ifndef timersub
#define timersub(tvp, uvp, vvp) /* v = t - u */                         \
	do {                                                            \
		(vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec;          \
		(vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec;       \
		if ((vvp)->tv_usec < 0) {                               \
			(vvp)->tv_sec--;                                \
			(vvp)->tv_usec += 1000000;                      \
		}                                                       \
	} while (0)
#endif

static void interrupt_handler(int sig)
{
    interrupted++;
}

static int eprintf(const char *fmt, ...)
{
    va_list ap;
    int result, err;

    err = errno;
    va_start(ap, fmt);
    result = vfprintf(stderr, fmt, ap);
    va_end(ap);
    errno = err;
    return result;
}

#define diag(level, args) \
	do { if (level <= verbosity) { eprintf args; } } while (0)

static const char *fmt_addr(const struct sockaddr_in *addr)
{
    return inet_ntoa(addr->sin_addr);
}

#define my_swapl(x) \
    (((x)&0x000000FFul)<<24 | ((x)&0x0000FF00ul)<<8 | \
     ((x)&0x00FF0000ul)>>8  | ((x)&0xFF000000ul)>>24)

#if WORDS_BIGENDIAN
# define ipaddr(a,b,c,d)	(((a)<<24) | ((b)<<16) | ((c)<<8) | (d))
# define ipmask(len)		(0xFFFFFFFF << (32-(len)))
#else
# define ipaddr(a,b,c,d)	((a) | ((b)<<8) | ((c)<<16) | ((d)<<24))
# define ipmask(len)		my_swapl(0xFFFFFFFF << (32-(len)))
#endif

#define ipmatch(ip, prefix, len)	((ip & ipmask(len)) == prefix)

static inline int inaddr_is_routable_unicast(struct in_addr in_addr)
{
    uint32_t addr = in_addr.s_addr;

    /* see also: http://www.cymru.com/Documents/bogon-list.html */

    return !(
#if 0
        /*
         * XXX: May 2004
         *  Many of these prefixes are still bogons, but some probably
         *  aren't.  In the interest of having only a minimal list
         *  of bogons hardcoded into programs, the majority of these
         *  prefixes have been commented out.  It is up to the user
         *  to filter out bogons from input addresses.
         */

	/* http://www.iana.org/assignments/ipv4-address-space, 2003-05-30 */
	ipmatch(addr, ipaddr(0,0,0,0), 7) ||	/* 0/8 - 1/8 */
	ipmatch(addr, ipaddr(2,0,0,0), 8) ||
	ipmatch(addr, ipaddr(5,0,0,0), 8) ||
	ipmatch(addr, ipaddr(7,0,0,0), 8) ||
	ipmatch(addr, ipaddr(23,0,0,0), 8) ||
	ipmatch(addr, ipaddr(27,0,0,0), 8) ||
	ipmatch(addr, ipaddr(31,0,0,0), 8) ||
	ipmatch(addr, ipaddr(36,0,0,0), 7) ||	/* 36/8 - 37/8 */
	ipmatch(addr, ipaddr(39,0,0,0), 8) ||
	ipmatch(addr, ipaddr(41,0,0,0), 8) ||
	ipmatch(addr, ipaddr(42,0,0,0), 8) ||
	ipmatch(addr, ipaddr(49,0,0,0), 8) ||
	ipmatch(addr, ipaddr(50,0,0,0), 8) ||
	ipmatch(addr, ipaddr(58,0,0,0), 7) ||	/* 58/8 - 59/8 */
	ipmatch(addr, ipaddr(70,0,0,0), 7) ||	/* 70/8 - 71/8 */
	ipmatch(addr, ipaddr(72,0,0,0), 5) ||	/* 72/8 - 79/8 */
	ipmatch(addr, ipaddr(83,0,0,0), 8) ||
	ipmatch(addr, ipaddr(84,0,0,0), 6) ||	/* 84/8 - 87/8 */
	ipmatch(addr, ipaddr(88,0,0,0), 5) ||	/* 88/8 - 95/8 */
	ipmatch(addr, ipaddr(96,0,0,0), 3) ||	/* 96/8 - 127/8 */

	ipmatch(addr, ipaddr(173,0,0,0), 8) ||
	ipmatch(addr, ipaddr(174,0,0,0), 7) ||	/* 174/8 - 175/8 */
	ipmatch(addr, ipaddr(176,0,0,0), 5) ||	/* 176/8 - 183/8 */
	ipmatch(addr, ipaddr(184,0,0,0), 6) ||	/* 184/8 - 187/8 */
	ipmatch(addr, ipaddr(189,0,0,0), 8) ||
	ipmatch(addr, ipaddr(190,0,0,0), 8) ||

	ipmatch(addr, ipaddr(197,0,0,0), 8) ||

	ipmatch(addr, ipaddr(224,0,0,0), 3) ||	/* 224/8 - 255/8 */
					/* includes 224-239 multicast */
#endif
#if 0
	/* ftp://ftp.isi.edu/in-notes/iana/assignments/introduction */
	/* these are covered by the larger prefixes above */
	ipmatch(addr, ipaddr(127,0,0,0),       8)  ||	/* loopback */
	ipmatch(addr, ipaddr(0,0,0,0),         32) ||	/* this host */
	ipmatch(addr, ipaddr(255,255,255,255), 32) ||	/* broadcast */
#endif

        ipmatch(addr, ipaddr(0,0,0,0),         8)  || /* this network */
        ipmatch(addr, ipaddr(127,0,0,0),       8)  || /* loopback */
        ipmatch(addr, ipaddr(169,254,0,0),    16)  || /* link local (RFC3330)*/
        ipmatch(addr, ipaddr(192,0,2,0),      24)  || /* test net (RFC3330)*/
        ipmatch(addr, ipaddr(198,18,0,0),     15)  || /* benchmark (RFC3330)*/
	ipmatch(addr, ipaddr(224,0,0,0), 4) ||	/* 224/8 - 239/8 multicast */
	ipmatch(addr, ipaddr(255,0,0,0),       8)  || /* broadcast */

	/* private - RFC 1918 */
	ipmatch(addr, ipaddr(10,0,0,0),        8)  ||
	ipmatch(addr, ipaddr(172,16,0,0),      12) ||
	ipmatch(addr, ipaddr(192,168,0,0),     16));
}


/*
 * Experiment utilities
 */

static int next_exp_slot = 0;

static void init_experiments(void)
{
    int i;

    exptab = malloc(sizeof(experiment_t) * max_concurrent_probes);
    if (!exptab) {
	fprintf(stderr, "%s: %s\n", argv0, strerror(errno));
	exit(1);
    }
    for (i = 0; i < max_concurrent_probes; i++) {
	exptab[i].iface = NULL;
    }
}

static experiment_t *new_experiment(interface_t *iface)
{
    int i;

    i = next_exp_slot;
    while (1) {
	if (!exptab[i].iface) break;
	i = (i + 1) % max_concurrent_probes;
	if (i == next_exp_slot) return NULL;
    }
    next_exp_slot = (i + 1) % max_concurrent_probes;

    current_experiments++;
    exptab[i].iface = iface;
    exptab[i].tries = 0;
    exptab[i].orig_ttl = 0;
    exptab[i].resp_ttl = 0;
    exptab[i].rtt.tv_sec = 0;
    exptab[i].rtt.tv_usec = 0;
    exptab[i].alias = NULL;
    exptab[i].exp_id = next_exp_id++;
    exptab[i].result_how = DR_NONE;
    exptab[i].use_rr = use_rr;
    exptab[i].use_traceroute = use_traceroute;
    return &exptab[i];
}

static void free_experiment(experiment_t *exp)
{
    time_t est;

    if (!exp->iface) return;

    exp->iface = NULL;
    current_experiments--;
    stats.experiments++;
    next_exp_slot = exp - exptab;

    if (interrupted) return;

    est = ((double)time(NULL) - start_time + .5*MAX_TRIES*expire_time.tv_sec) /
	(stats.experiments + current_experiments) *
	(npass * stats.ifaces + stats.failed_mates - stats.experiments);
    diag(1, (" %d, ETA %ld:%02ld:%02ld  \r", stats.experiments,
	est / 3600, (est / 60) % 60, est % 60));
}

/*
 * functions for interface hash table
 */

static int cmp_if(const void *a, const void *b)
{
    return ((interface_t*)a)->addr.s_addr != ((interface_t*)b)->addr.s_addr;
}

static unsigned long key_if(const void *a)
{
    return ((interface_t*)a)->addr.s_addr;
}

static void delete_if(void *a)
{
    interface_t *iface = a;
    stats.ifaces--;
    free(iface);
}


static interface_t *new_interface(struct in_addr addr, uint8_t how,
    uint8_t type, uint8_t code)
{
    interface_t *interface;

    interface = malloc(sizeof(interface_t));
    if (!interface) return NULL;
    interface->addr = addr;
    interface->unext = NULL;
    interface->discover_how = how;
    interface->discover_type = type;
    interface->discover_code = code;
    interface->probed = 0;
    interface->passes = 0;
    interface->mated = 0;
    interface->does_rr = 0;
    interface->does_tr = 0;
    if (how != DR_MATED)
	interface->exists = 1;
    stats.ifaces++;
    add_hash_entry(interface_table, interface);
    return interface;
}

static interface_t *find_interface(struct in_addr addr)
{
    interface_t match;
    match.addr = addr;
    return find_hash_entry(interface_table, &match);
}

static int read_ip_file(const char *filename)
{
    FILE *file;
    char line[80];
    int linenum = 0, result = 0;
    struct in_addr addr;
    interface_t *iface;

    if (!(file = fopen(filename, "r"))) {
	fprintf(stderr, "%s: %s: %s\n", argv0, filename, strerror(errno));
	return 0;
    }

    while (fgets(line, sizeof(line), file)) {
	linenum++;
	if (line[0] == '#') /* comment */
	    continue;
	line[strcspn(line, " \t\n")] = '\0';
	if (!*line) /* empty line */
	    continue;
#ifdef HAVE_INET_ATON
	if (!inet_aton(line, &addr))
#else
	addr.s_addr = inet_addr(line);
	if (addr.s_addr == INADDR_NONE)
#endif
	{
	    fprintf(stderr, "%s: %s, line %d: bad IP address '%s'\n",
		argv0, filename, linenum, line);
	    goto error;
	}
	if (!inaddr_is_routable_unicast(addr)) {
	    fprintf(stderr, "%s: %s, line %d: ignoring interface %s"
		" (not a routable unicast address)\n",
		argv0, filename, linenum, inet_ntoa(addr));
	    continue;
	}

	if (!(iface = find_interface(addr))) {
	    iface = new_interface(addr, DR_NONE, 0, 0);
	    if (!iface) {
		fprintf(stderr, "%s: %s, line %d: %s\n",
		    argv0, filename, linenum, strerror(errno));
		goto error;
	    }
	}
    }

    result = 1;
    fclose(file);
    return result;

error:
    exit(1);
}

static void init_sockets(void)
{
    int opt;

    memset(&my_sockaddr, 0, sizeof(struct sockaddr_in));
    my_sockaddr.sin_family = AF_INET;
    my_sockaddr.sin_addr.s_addr = INADDR_ANY;

    /* The "traceroute" program identifies packets belonging to it by
     * setting the source port based on pid.  We do the same, to avoid
     * conflict with simultaneous traceroute processes as well as other
     * processes running this program.
     */
    my_sockaddr.sin_port = (getpid() & 0xFFFF) | 0x8000;
    diag(0, ("Using local port %d.\n", my_sockaddr.sin_port));
    my_sockaddr.sin_port = htons(my_sockaddr.sin_port);

    /* Set up UDP sending socket */
#if USE_RAW_IP
    sendsock = socket(PF_INET, SOCK_RAW, IPPROTO_IP);
    if (sendsock < 0) {
	fprintf(stderr, "%s: create ip sender socket: %s\n",
	    argv0, strerror(errno));
	exit(1);
    }
# ifdef IP_HDRINCL
    opt = 1;
    if (setsockopt(sendsock, IPPROTO_IP, IP_HDRINCL,
	(char *)&opt, sizeof(opt)) < 0)
    {
	fprintf(stderr, "%s: set IP_HDRINCL: %s\n",
	    argv0, strerror(errno));
	exit(1);
    }
# endif
	           
#else
# if USE_RAW_UDP
    sendsock = socket(PF_INET, SOCK_RAW, IPPROTO_UDP);
    if (sendsock < 0) {
	fprintf(stderr, "%s: create udp sender socket: %s\n",
	    argv0, strerror(errno));
	exit(1);
    }
# else
    sendsock = socket(PF_INET, SOCK_DGRAM, 0);
    if (sendsock < 0) {
	fprintf(stderr, "%s: create udp sender socket: %s\n",
	    argv0, strerror(errno));
	exit(1);
    }
# endif /* USE_RAW_UDP */
    if (bind(sendsock, (struct sockaddr*)&my_sockaddr,
	sizeof(struct sockaddr_in)) < 0)
    {
	fprintf(stderr, "%s: bind sender socket (%s %d): %s\n", argv0,
	    fmt_addr(&my_sockaddr), my_sockaddr.sin_port, strerror(errno));
	exit(1);
    }
    opt = 255;
    if (setsockopt(sendsock, IPPROTO_IP, IP_TTL, &opt, sizeof(opt)) < 0) {
	fprintf(stderr, "%s: set TTL: %s\n", argv0, strerror(errno));
	exit(1);
    }
#endif /* USE_RAW_IP */

#if 0
    opt = sizeof(struct ip) + OPT_SIZE + sizeof(struct udphdr) +
	sizeof(payload);
    setsockopt(sendsock, SOL_SOCKET, SO_SNDBUF, &opt, sizeof(opt));
#endif

    /* Set up ICMP receiving socket */
    recvsock = socket(PF_INET, SOCK_RAW, IPPROTO_ICMP);
    if (recvsock < 0) {
	fprintf(stderr, "%s: create icmp reciever socket: %s\n",
	    argv0, strerror(errno));
	exit(1);
    }
}

static void dump_experiment(experiment_t *exp, char *rr_buf)
{
    int features = 0;
    char buf[9];

    printf("%-15s ", inet_ntoa(exp->iface->addr));
    printf("%-15s ", exp->alias ? inet_ntoa(exp->alias->addr) : "-");

    if (exp->orig_ttl)
	printf("%3d ", exp->orig_ttl);
    else
	printf("%3s ", "-");

    if (exp->resp_ttl)
	printf("%3d ", exp->resp_ttl);
    else
	printf("%3s ", "-");

    if (exp->rtt.tv_sec || exp->rtt.tv_usec)
	printf("%3ld.%06ld ", exp->rtt.tv_sec, exp->rtt.tv_usec);
    else
	printf("%10s ", "-");

    printf("%c", exp->result_how);
    if (exp->result_how == DR_ICMP_ERROR) {
	sprintf(buf, ":%d,%d ", exp->result_type, exp->result_code);
	printf("%-6s ", buf);
    } else if (exp->result_how == DR_ERROR) {
	printf(":%-5d ", exp->result.error);
    } else {
	printf("%-6s ", "");
    }

    printf("%c", exp->iface->discover_how);
    if (exp->iface->discover_how == DR_ICMP_ERROR) {
	sprintf(buf, ":%d:%d ",
	    exp->iface->discover_type, exp->iface->discover_code);
	printf("%-6s ", buf);
    } else {
	printf("%-6s ", "");
    }

    if (exp->iface->does_rr) {
	features++;
	putchar('R');
    }
    if (exp->iface->does_tr) {
	features++;
	putchar('t');
    }
    if (!features)
	putchar('-');

    if (rr_buf && *rr_buf) {
	fputs("       ", stdout);
	fputs(rr_buf, stdout);
    }

    putchar('\n');
    fflush(stdout);
}

/* returns -1 for error, 0 for no probe (too many tries), 1 for probe. */
static int send_probe(experiment_t *exp)
{
    interface_t *iface;
    struct sockaddr_in dest_sockaddr;
#if USE_RAW_UDP
    struct udphdr udp;
#endif
#if USE_RAW_IP
    struct ip ip;
#endif
    struct msghdr msg;
    struct iovec iov[5];
    uint16_t dport;
    uint32_t ip_opt_buf[OPT_SIZE/4]; /* uint32_t for alignment */
    char *ip_options;
    int ip_opt_len = 0;

    if (exp->tries >= MAX_TRIES)
	return 0;
    iface = exp->iface;

    memset(&dest_sockaddr, 0, sizeof(struct sockaddr_in));
    dest_sockaddr.sin_family = AF_INET;
    dest_sockaddr.sin_addr = iface->addr;
    dport = exp_to_port(exp);
    dest_sockaddr.sin_port = htons(dport);

    memset(&msg, 0, sizeof(msg));
    msg.msg_name = (caddr_t)&dest_sockaddr;
    msg.msg_namelen = sizeof(dest_sockaddr);
    msg.msg_iov = iov;
#if 0 /* These fields were zeroed by memset() (if they exist on this system). */
    msg.msg_iovlen = 0;
    msg.msg_control = NULL;
    msg.msg_controllen = 0;
    msg.msg_flags = 0;
#endif

#if USE_RAW_IP
    /* ip header */
    ip.ip_v	= IPVERSION;
    ip.ip_hl	= 0; /* we will set this after setting options */
    ip.ip_tos	= 0;
    ip.ip_len	= sizeof(struct ip);
    ip.ip_id	= 0; /* kernel will set this */
    ip.ip_off	= 0;
    ip.ip_ttl	= INITIAL_TTL;
    ip.ip_p	= IPPROTO_UDP;
    ip.ip_sum	= 0; /* kernel will set this */
    ip.ip_src	= my_sockaddr.sin_addr;
    ip.ip_dst	= iface->addr;

    iov[msg.msg_iovlen].iov_base = (char*)&ip;
    iov[msg.msg_iovlen].iov_len = sizeof(ip);
    msg.msg_iovlen++;
#endif

    /* ip options (if enabled) */
    if (exp->use_traceroute || exp->use_rr) {
	memset(ip_opt_buf, 0, OPT_SIZE);
	ip_options = (char*)ip_opt_buf;

	if (exp->use_traceroute) {
	    /* The traceroute option is not supported by most routers. */
	    struct ip_traceroute *ip_tr;
	    ip_tr = (struct ip_traceroute*)ip_opt_buf;
	    ip_tr->type = IPOPT_TRACEROUTE;
	    ip_tr->len = sizeof(struct ip_traceroute);
	    ip_tr->id = getpid() & 0xFFFF;
	    ip_tr->ohc = 0;
	    ip_tr->rhc = 0xFFFF;
	    ip_tr->src = my_sockaddr.sin_addr;
	    ip_opt_len += ip_tr->len;
	    ip_options += ip_tr->len;
	}

	if (exp->use_rr) {
#if 0
	    ip_options[0] = IPOPT_NOP; /* filler to make RR data word aligned */
	    /* You might think it's a good idea to insert a NOP before the RR
	     * to make the RR data 4-octet-aligned, thus:
	     *   NOP RR len ptr data0 data1 data2 data3 ...
	     * However, I have observed a bug in some routers when presented
	     * with such options which corrupts the RR data.
	     */
#endif
	    ip_options[IPOPT_OPTVAL] = IPOPT_RR;
	    ip_options[IPOPT_OLEN] = OPT_SIZE - ip_opt_len;
	    ip_options[IPOPT_OFFSET] = IPOPT_MINOFF;
	    ip_opt_len += ip_options[IPOPT_OLEN];
	    ip_options += ip_options[IPOPT_OLEN];
	}

	if (ip_opt_len) {
	    while (ip_opt_len % 4) {
		((char*)ip_opt_buf)[ip_opt_len] = IPOPT_NOP;
		ip_opt_len++;
	    }
#if USE_RAW_IP
	    ip.ip_len += ip_opt_len;
	    iov[msg.msg_iovlen].iov_base = ip_opt_buf;
	    iov[msg.msg_iovlen].iov_len = ip_opt_len
	    msg.msg_iovlen++;
#endif
	}
    }
#if !USE_RAW_IP
    if (setsockopt(sendsock, IPPROTO_IP, IP_OPTIONS, ip_opt_buf,
	ip_opt_len) < 0)
    {
	fprintf(stderr, "%s: set ip options: %s\n", argv0,
	    strerror(errno));
	/* make no future attempts to use options */
	use_rr = 0;
	use_traceroute = 0;
    }
#endif

#if USE_RAW_IP
    ip.ip_hl = ip.ip_len / 4;
#endif

#if USE_RAW_UDP
    /* udp header */
    udp.uh_sport	= my_sockaddr.sin_port;
    udp.uh_dport	= dest_sockaddr.sin_port;
    udp.uh_ulen		= htons(sizeof(udp) + sizeof(payload));
    udp.uh_sum		= 0;	/* not used */

    iov[msg.msg_iovlen].iov_base = (char*)&udp;
    iov[msg.msg_iovlen].iov_len = sizeof(udp);
    msg.msg_iovlen++;
    ip.ip_len += sizeof(udp);
#endif /* USE_RAW_UDP */

    /* udp payload */
    iov[msg.msg_iovlen].iov_base = payload;
    iov[msg.msg_iovlen].iov_len = sizeof(payload);
    msg.msg_iovlen++;
#if USE_RAW_IP
    ip.ip_len += sizeof(payload);
#endif

    diag(2, ("Experiment %d.%d: probing %s %d%s%s\n",
	exp->exp_id, exp->tries, inet_ntoa(iface->addr), dport,
	exp->use_rr ? " (with RR)" : "",
	exp->use_traceroute ? " (with TR)" : ""));

    /* calculate expriation time */
    gettimeofday(&exp->send_time[exp->tries], NULL);
    timeradd(&exp->send_time[exp->tries], &expire_time, &exp->expires);

    exp->tries++;
    stats.probes++;
    iface->probed++;

#if 1
    if (sendmsg(sendsock, &msg, 0) < 0) {
	fprintf(stderr, "%s: send %s: %s\n", argv0, inet_ntoa(exp->iface->addr),
	    strerror(errno));
	switch (errno) {
	    case 0:
#ifdef EHOSTUNREACH
	    case EHOSTUNREACH:
#endif
#ifdef EHOSTDOWN
	    case EHOSTDOWN:
#endif
		return 0;	/* non-fatal error */
	    default:
		interrupted++;
		return -1;	/* fatal error */
	}
    }
#else
    if (sendto(sendsock, &udp, sizeof(udp), 0, (struct sockaddr*)&dest_sockaddr,
	sizeof(dest_sockaddr)) < 0)
    {
	fprintf(stderr, "%s: send: %s\n", argv0, strerror(errno));
	return -1;
    }
#endif

    /* append to expiration list */
    *(last_exp ? &last_exp->next : &first_exp) = exp;
    last_exp = exp;
    exp->next = NULL;

    return 1;
}

static void fail(experiment_t *exp)
{
    dump_experiment(exp, NULL);
    diag(2, ("Experiment %d (%s) failed.\n", exp->exp_id,
	inet_ntoa(exp->iface->addr)));
    stats.failed++;
    if (!exp->iface->exists) {
	assert(exp->iface->discover_how == DR_MATED);
	stats.new_if--;
	stats.new_if_by_mate--;
	stats.failed_mates++;
	clear_hash_entry(interface_table, exp->iface);
    }
    free_experiment(exp);
}

static void remove_exp_from_expiration_list(experiment_t *exp)
{
    experiment_t *cur, *prev;
    for (prev = NULL, cur = first_exp; cur; prev = cur, cur = cur->next) {
	if (cur == exp) {
	    *(prev ? &prev->next : &first_exp) = exp->next;
	    if (exp == last_exp)
		last_exp = prev;
	    break;
	}
    }
    assert(cur);
}

static int retry(experiment_t *exp)
{
    int result;

    if ((result = send_probe(exp)) <= 0) {
	if (result < 0) {
	    exp->result_how = DR_ERROR;
	    exp->result.error = errno;
	}
	fail(exp);
    }
    return result;
}

static interface_t *new_unprobed_interface(struct in_addr in_addr,
    uint8_t how, uint8_t type, uint8_t code)
{
    interface_t *iface;

    stats.new_if++;
    switch (how) {
	case DR_RR: stats.new_if_by_rr++; break;
	case DR_ICMP_TRACEROUTE: stats.new_if_by_traceroute++; break;
	case DR_ICMP_ERROR: stats.new_if_by_icmp_err++; break;
	case DR_PORT_UNREACH_DIFF: stats.new_if_by_port_unreach++; break;
	case DR_MATED: stats.new_if_by_mate++; break;
    }
    iface = new_interface(in_addr, how, type, code);
    if (!iface) {
	fprintf(stderr, "%s: new interface %s: %s\n",
	    argv0, inet_ntoa(in_addr), strerror(errno));
	return NULL;
    }
    /* add to unprobed list */
    iface->unext = first_unprobed;
    first_unprobed = iface;
    unprobed_count++;
    return iface;
}

static interface_t *find_or_create_iface(struct in_addr in_addr,
    uint8_t how, uint8_t type, uint8_t code)
{
    interface_t *iface;

    if (!(iface = find_interface(in_addr))) {
	if (verbosity >= 2) {
	    eprintf("> %s is a new interface! (%c", inet_ntoa(in_addr), how);
	    if (how == DR_ICMP_ERROR)
		eprintf(":%d:%d", type, code);
	    eprintf(")\n");
	}
	if (!(iface = new_unprobed_interface(in_addr, how, type, code)))
	    return NULL;
    } else {
	iface->exists = 1;
    }
    return iface;
}

static int parse_ip_options(struct ip *ip, int len, const char *label,
    char *rr_buf)
{
    int i, offset, ip_hdrlen, options_len;
    struct in_addr in_addr;
    const unsigned char *ip_options;

    ip_hdrlen = ip->ip_hl * 4;
    options_len = ip_hdrlen - sizeof(struct ip);
    if (options_len > len)
	options_len = len;
    ip_options = (const char *)ip + sizeof(struct ip);

    while (options_len > 0) {
	switch (ip_options[IPOPT_OPTVAL]) {
	case IPOPT_EOL:
	    options_len = 0;
	    break;
	case IPOPT_NOP:
	    options_len--;
	    ip_options++;
	    break;
	case IPOPT_RR:
	    diag(3, ("\n"));
	    if (ip_options[IPOPT_OLEN] < 3) {
		diag(1, ("bad length (%d) in RR option of %s packet\n",
		    ip_options[IPOPT_OLEN], label));
		return 0;
	    }
	    offset = ip_options[IPOPT_OFFSET];
	    if (offset > ip_options[IPOPT_OLEN])
		offset = ip_options[IPOPT_OLEN];	/* truncated */
	    if (offset > options_len)
		offset = options_len;	/* truncated */
	    for (i = 3; i + sizeof(in_addr) <= offset; i += sizeof(in_addr)) {
		interface_t *iface;
		memcpy(&in_addr, ip_options+i, sizeof(in_addr));
		diag(3, ("RR (%s): %s\n", label, inet_ntoa(in_addr)));
		if (rr_buf)
		    rr_buf += sprintf(rr_buf, "%s ", inet_ntoa(in_addr));
		if (!inaddr_is_routable_unicast(in_addr)) {
		    diag(2, ("RR datum %s is not a routable unicast address.\n",
			inet_ntoa(in_addr)));
		    continue;
		} else {
		    iface = find_or_create_iface(in_addr, DR_RR, 0, 0);
		    if (!iface) return -1;
		}
		if (!iface->does_rr) {
		    iface->does_rr = 1;
		    stats.support_rr++;
		}
	    }
	    options_len -= ip_options[IPOPT_OLEN];
	    ip_options += ip_options[IPOPT_OLEN];
	    break;
	default:
	    if (ip_options[IPOPT_OLEN] < 3) {
		diag(1, ("bad length (%d) in %d option of %s packet\n",
		    ip_options[IPOPT_OLEN], ip_options[IPOPT_OPTVAL], label));
		return 0;
	    }
	    options_len -= ip_options[IPOPT_OLEN];
	    ip_options += ip_options[IPOPT_OLEN];
	    break;
	}
    }
    return 0;
}

static int handle_icmp_error(struct ip *ip, struct icmp *icmp, int len,
    struct timeval *recv_time)
{
    int type, code, port_unreachable, i, trial;
    struct ip *orig_ip;
    struct udphdr *orig_udp;
    unsigned short orig_sport, orig_dport;
    experiment_t *exp;
    interface_t *responder;
    char rr_buf[9*16+1]; /* buffer to print record route addresses */

    type = icmp->icmp_type;
    code = icmp->icmp_code;

    if (verbosity >= 2) {
	eprintf("Got ICMP");
	switch (type) {
	case ICMP_REDIRECT:
	    eprintf("_REDIRECT");
	    switch (code) {
	    case ICMP_REDIRECT_NET: eprintf("_NET"); break;
	    case ICMP_REDIRECT_HOST: eprintf("_HOST"); break;
	    case ICMP_REDIRECT_TOSNET: eprintf("_TOSNET"); break;
	    case ICMP_REDIRECT_TOSHOST: eprintf("_TOSHOST"); break;
	    default: eprintf(" code %d", code);
	    }
	    break;
	case ICMP_TIMXCEED:
	    eprintf("_TIMXCEED");
	    switch (code) {
	    case ICMP_TIMXCEED_INTRANS: eprintf("_INTRANS"); break;
	    case ICMP_TIMXCEED_REASS: eprintf("_REASS"); break;
	    default: eprintf(" code %d", code);
	    }
	    break;
	case ICMP_PARAMPROB:
	    eprintf("_PARAMPROB");
	    switch (code) {
	    case ICMP_PARAMPROB_OPTABSENT: eprintf("_OPTABSENT"); break;
	    default: eprintf(" code %d", code);
	    }
	    eprintf(", offset %d", icmp->icmp_pptr);
	    break;
	case ICMP_SOURCEQUENCH:
	    eprintf("_SOURCEQUENCH");
	    eprintf(" code %d", code);
	    break;
	case ICMP_UNREACH:
	    eprintf("_UNREACH");
	    switch (code) {
	    case ICMP_UNREACH_NET: eprintf("_NET"); break;
	    case ICMP_UNREACH_HOST: eprintf("_HOST"); break;
	    case ICMP_UNREACH_PROTOCOL: eprintf("_PROTOCOL"); break;
	    case ICMP_UNREACH_PORT: eprintf("_PORT"); break;
	    case ICMP_UNREACH_SRCFAIL: eprintf("_SRCFAIL"); break;
	    case ICMP_UNREACH_NEEDFRAG: eprintf("_NEEDFRAG"); break;
	    case ICMP_UNREACH_NET_UNKNOWN: eprintf("_NET_UNKNOWN"); break;
	    case ICMP_UNREACH_HOST_UNKNOWN: eprintf("_HOST_UNKNOWN"); break;
	    case ICMP_UNREACH_ISOLATED: eprintf("_ISOLATED"); break;
	    case ICMP_UNREACH_NET_PROHIB: eprintf("_NET_PROHIB"); break;
	    case ICMP_UNREACH_HOST_PROHIB: eprintf("_HOST_PROHIB"); break;
	    case ICMP_UNREACH_TOSNET: eprintf("_TOSNET"); break;
	    case ICMP_UNREACH_TOSHOST: eprintf("_TOSHOST"); break;
	    default: eprintf(" code %d", code);
	    }
	    break;
	default:
	    eprintf("type %d, code %d", type, code); /* shouldn't happen */
	    break;
	}
	eprintf(" from %s", inet_ntoa(ip->ip_src));
    }

    orig_ip = (struct ip*)((uint32_t*)icmp + ICMP_HDRLEN/4);
    len -= ICMP_HDRLEN;

    if (verbosity >= 2 && type == ICMP_PARAMPROB) {
	for (i = 0; i < len; i++) {
	    if (i % 16 == 0) eprintf("\n");
	    eprintf(" %02x", ((uint8_t*)orig_ip)[i]);
	}
	eprintf("\n");
    }

    if (len < sizeof(struct ip)) {
	diag(2, (": original ip header is too short (%d)\n", len));
	stats.noise++;
	return 0;
    }

    rr_buf[0] = '\0';
    if (parse_ip_options(orig_ip, len, "orig", rr_buf) < 0)
	return -1;

#if 0
    diag(2, (" original addresses: src %s,", inet_ntoa(orig_ip->ip_src)));
    diag(2, (" dst %s,", inet_ntoa(orig_ip->ip_dst)));
    diag(2, (" protocol %d\n", orig_ip->ip_p));
#endif

    if (orig_ip->ip_p != IPPROTO_UDP) {
	diag(2, (": original packet from %s", inet_ntoa(orig_ip->ip_src)));
	diag(2, (" to %s ", inet_ntoa(orig_ip->ip_dst)));
	diag(2, ("was not UDP (%d)\n", len));
	stats.noise++;
	return 0;
    }

    orig_udp = (struct udphdr*)((uint32_t*)orig_ip + orig_ip->ip_hl);
    len -= orig_ip->ip_hl * 4;
    if (len < sizeof(struct udphdr)) {
	diag(2, (": original packet from %s", inet_ntoa(orig_ip->ip_src)));
	diag(2, (" to %s ", inet_ntoa(orig_ip->ip_dst)));
	diag(2, ("had UDP header too short (%d)\n", len));
	stats.noise++;
	return 0;
    }

    orig_sport = ntohs(orig_udp->uh_sport);
    orig_dport = ntohs(orig_udp->uh_dport);

    if (orig_udp->uh_sport != my_sockaddr.sin_port) {
	diag(2, (": original packet from %s", inet_ntoa(orig_ip->ip_src)));
	diag(2, (" to %s ", inet_ntoa(orig_ip->ip_dst)));
	diag(2, ("has src port (%d) that isn't ours\n", orig_sport));
	stats.noise++;
	return 0;
    }

    /* Look up experiment using array index encoded in original dest port */
    i = port_to_index(orig_dport);
    if (i < 0 || i >= max_concurrent_probes || !exptab[i].iface ||
	exptab[i].iface->addr.s_addr != orig_ip->ip_dst.s_addr)
    {
	diag(2, (": original dst (%s %d) doesn't match experiment\n",
	    inet_ntoa(orig_ip->ip_dst), orig_dport));
	return 0;
    }
    exp = &exptab[i];
    trial = port_to_trial(orig_dport);
    diag(2, (" -> exp %d.%d (%s %d)\n",
	exp->exp_id, trial, inet_ntoa(orig_ip->ip_dst), orig_dport));
    remove_exp_from_expiration_list(exp);
    timersub(recv_time, &exp->send_time[trial], &exp->rtt);
    exp->resp_ttl = ip->ip_ttl;
    exp->orig_ttl = orig_ip->ip_ttl;

    if (!inaddr_is_routable_unicast(ip->ip_src)) {
	diag(2, ("> src (%s) is not a routable unicast address\n",
	    inet_ntoa(ip->ip_src)));
	stats.invalid_addr++;
	exp->result_how = DR_BAD_ADDR;
	fail(exp);
	return 0;
    }

    port_unreachable = (type == ICMP_UNREACH && code == ICMP_UNREACH_PORT);

    if (orig_ip->ip_dst.s_addr == ip->ip_src.s_addr) {
	responder = exp->iface;
	responder->exists = 1;
    } else {
	responder = find_or_create_iface(ip->ip_src,
	    port_unreachable ? DR_PORT_UNREACH_DIFF : DR_ICMP_ERROR,
	    type, code);
	if (!responder) return -1;
    }

    if (!port_unreachable) {
	stats.other_icmp_errors++;
	exp->result_how = DR_ICMP_ERROR;
	exp->result_type = type;
	exp->result_code = code;
	switch (type) {
	case ICMP_REDIRECT:
	    {
		interface_t *gw;
		gw = find_or_create_iface(ip->ip_src, DR_ICMP_ERROR, type,code);
		if (!gw) return -1;
		fail(exp);			/* retrying is pointless */
		return 0;
	    }
	case ICMP_PARAMPROB:
	    if (!exp->use_rr && !exp->use_traceroute) {
		/* If already tried without options, retrying is pointless */
		fail(exp);
		return 0;
	    }
	    /* Retry without options.  Note that some bad hosts give PARAMPROB
	     * with pptr=0 if they can't parse options, so we do this even
	     * if pptr doesn't point into the options. */
	    exp->use_rr = 0;
	    exp->use_traceroute = 0;
	case ICMP_UNREACH:
	    switch (code) {
	    case ICMP_UNREACH_ISOLATED:
	    case ICMP_UNREACH_NET_PROHIB:
	    case ICMP_UNREACH_HOST_PROHIB:
	    case ICMP_UNREACH_TOSNET:
	    case ICMP_UNREACH_TOSHOST:
	    case ICMP_UNREACH_FILTER_PROHIB:
	    case ICMP_UNREACH_HOST_PRECEDENCE:
	    case ICMP_UNREACH_PRECEDENCE_CUTOFF:
		fail(exp);		/* retrying is pointless */
		return 0;
	    }
	default:
	    break;
	}
	return retry(exp) < 0 ? -1 : 0;
    }

    /* The ICMP error was "port unreachable".  If responder is different
     * from the original dest, it is an alias for dest. */

    exp->alias = responder;
    if (responder != exp->iface) {
	diag(2, ("> %s is an alias for ", inet_ntoa(orig_ip->ip_dst)));
	diag(2, ("%s\n", inet_ntoa(ip->ip_src)));
	stats.aliases++;
	exp->result_how = DR_PORT_UNREACH_DIFF;
    } else {
	exp->result_how = DR_PORT_UNREACH_SELF;
    }
    stats.unreach_port++;

    dump_experiment(exp, rr_buf);
    free_experiment(exp);
    return 1;
}

static int handle_icmp_traceroute(struct ip *ip, struct icmp *icmp, int len)
{
    int type, code, i;
    experiment_t *exp = NULL;
    interface_t *responder;
    struct icmp_traceroute *tr;

    type = icmp->icmp_type;
    code = icmp->icmp_code;

    diag(2, ("Got ICMP_TRACEROUTE, code %d from %s",
	code, inet_ntoa(ip->ip_src)));

    tr = (struct icmp_traceroute*)((uint32_t*)icmp + ICMP_HDRLEN/4);
    len -= ICMP_HDRLEN;
    if (len < sizeof(struct icmp_traceroute)) {
	diag(2, (": traceroute message is too short (%d)\n", len));
	return 0;
    }

    if (ntohs(tr->id) != (getpid() & 0xFFFF)) {
	diag(2, (": traceroute id (%hd) isn't ours\n", tr->id));
	return 0;
    }

    diag(2, ("TR OHC: %hd\n", tr->ohc));
    diag(2, ("TR RHC: %hd\n", tr->rhc));
    diag(2, ("TR speed: %d\n", tr->speed));
    diag(2, ("TR MTU: %d\n", tr->mtu));

    /* Look up experiment using array index encoded in "unused" field
     * (and hope that routers copied it) */
    if (tr->unused1 == TR_VERIFY_UNUSED) {
	i = tr->unused2;
	if (i < 0 || i >= max_concurrent_probes || !exptab[i].iface) {
	    diag(2, (": original index doesn't match experiment\n"));
	} else {
	    exp = &exptab[i];
	}
    } else {
	diag(2, (": original index wasn't copied\n"));
    }

    if (!inaddr_is_routable_unicast(ip->ip_src)) {
	diag(2, ("> src (%s) is not a routable unicast address\n",
	    inet_ntoa(ip->ip_src)));
	return 0;
    }

    responder = find_or_create_iface(ip->ip_src, DR_ICMP_TRACEROUTE, type,code);
    if (!responder) return -1;

    if (!responder->does_tr) {
	responder->does_tr = 1;
	stats.support_tr++;
    }

    return 0;
}

/* Returns -1 for error, 1 if received packet ends an experiment, 0 otherwise */
static int handle_reply(void)
{
    uint32_t packet[(PKTLEN+3)/4];
    size_t pktlen, len;
    struct ip *ip;
    struct icmp *icmp;
    int ip_hdrlen, i;
    struct timeval recv_time;

    len = sizeof(packet);
    gettimeofday(&recv_time, NULL);
    pktlen = recvfrom(recvsock, packet, len, 0, NULL, NULL);
    if ((len = pktlen) < 0) {
	fprintf(stderr, "%s: recv: %s\n", argv0, strerror(errno));
	return -1;
    }

    if (verbosity >= 9) {
	for (i = 0; i < len; i++) {
	    eprintf(" %02x", packet[i]);
	    if ((i % 16) == 15) eprintf("\n");
	}
	eprintf("\n");
    }

    ip = (struct ip *)packet;
    ip_hdrlen = ip->ip_hl * 4;

    if (parse_ip_options(ip, len, "resp", NULL) < 0)
	return -1;

    if (ip->ip_p != IPPROTO_ICMP) {
	diag(3, ("Got %d bytes from %s: not ICMP\n",
	    pktlen, inet_ntoa(ip->ip_src)));
	stats.noise++;
	return 0;
    }

    if (len < ip_hdrlen + ICMP_HDRLEN) {
	diag(3, ("Got %d bytes from %s: too short\n",
	    pktlen, inet_ntoa(ip->ip_src)));
	stats.noise++;
	return 0;
    }

    icmp = (struct icmp*)(packet + ip_hdrlen/4);
    len -= ip_hdrlen;

    switch (icmp->icmp_type) {
	case ICMP_REDIRECT:
	case ICMP_TIMXCEED:
	case ICMP_PARAMPROB:
	case ICMP_SOURCEQUENCH:
	case ICMP_UNREACH:
	    return handle_icmp_error(ip, icmp, len, &recv_time);
	case ICMP_TRACEROUTE:
	    return handle_icmp_traceroute(ip, icmp, len);
    }

    diag(3, ("Got %d bytes from %s: ICMP type %d, code %d\n",
	pktlen, inet_ntoa(ip->ip_src), icmp->icmp_type, icmp->icmp_code));
    stats.noise++;
    return 0;
}

/* Returns -1 for error, or boolean need_new_next_iface */
static int iteration(interface_t *next_iface)
{
    fd_set readers;
    int count;
    experiment_t *exp;

    if (interrupted) {
	while ((exp = first_exp)) {
	    dump_experiment(exp, NULL);
	    remove_exp_from_expiration_list(exp);
	    free_experiment(exp);
	}
	if (next_iface) {
	    exp = new_experiment(next_iface);
	    dump_experiment(exp, NULL);
	    free_experiment(exp);
	    stats.experiments--;
	}
	return !!next_iface;
    }

    if (current_experiments) {
	struct timeval *tp, timeout, now;
	FD_ZERO(&readers);
	FD_SET(recvsock, &readers);

	if (current_experiments < max_concurrent_probes && next_iface) {
	    tp = &delay_time;
	} else {
	    gettimeofday(&now, NULL);
	    timeout = first_exp->expires;
	    timersub(&timeout, &now, &timeout);
	    if (timeout.tv_sec < 0 || timeout.tv_usec < 0)
		timeout.tv_sec = timeout.tv_usec = 0;
	    tp = &timeout;
	}
	count = select(recvsock+1, &readers, NULL, NULL, tp);
	if (count > 0) {
	    if (handle_reply() < 0)
		return -1;
	} else if (count < 0) {
	    fprintf(stderr, "%s: select: %s\n", argv0, strerror(errno));
	    return (errno == EINTR) ? 0 : -1;
	} else {
	    gettimeofday(&now, NULL);
	    while (first_exp && timercmp(&now, &first_exp->expires, >)){
		exp = first_exp;
		diag(2, ("Experiment %d.%d (%s) timed out.\n",
		    exp->exp_id, exp->tries-1,
		    inet_ntoa(exp->iface->addr)));
		stats.timeouts++;
		remove_exp_from_expiration_list(exp);
		/* timeout may be caused by options, so disable options */
		exp->use_rr = 0;
		exp->use_traceroute = 0;
		/* Set result only if there was no previous result */
		if (exp->result_how == DR_NONE)
		    exp->result_how = DR_NO_RESPONSE;
		if (retry(exp) < 0)
		    return -1;
	    }
	}
    }
    if (current_experiments < max_concurrent_probes && next_iface) {
	exp = new_experiment(next_iface);
	assert(exp);
	next_iface->passes++;
	if (send_probe(exp) < 0) return -1;
	return 1;
    }
    return 0;
}

static void usage_exit(const char *msg)
{
    if (msg) eprintf("%s\n", msg);
    eprintf("Usage:  %s [options] <ip_file>\n",
	argv0);
    eprintf("Options (with defaults in brackets):\n");
    eprintf("-v<V>   verbosity <V> [1]\n");
    eprintf("-w<W>   wait <W> seconds for response [%d]\n", TIMEOUT);
    eprintf("-c<C>   send at most <C> concurrent probes [%d]\n",
	MAX_CONCURRENT_PROBES);
    eprintf("-p<P>   use dest ports in the range <P> to <P>+<C>*%d-1 [%d]\n",
	MAX_TRIES, BASE_PORT);
    eprintf("-r<R>   send at most <R> probes per second [%d]\n",
	MAX_PROBES_PER_SEC);
    eprintf("-n<N>   probe each address in the list <N> times [%d]\n", NPASS);
    eprintf("-N      don't probe newly discovered addresses\n");
#if USE_RAW_IP || defined(IP_OPTIONS)
    eprintf("-R      use RECORD ROUTE option\n");
    eprintf("-T      use RFC 1393 TRACEROUTE option (not widely supported)\n");
#endif
    eprintf("-M      probe the /30-mate of every address\n");
    exit(1);
}

int main(int argc, char *argv[])
{
    interface_t *next_iface;
    int opt, iter_result;
    int pass = 0;
    char buf[32];
    struct tm *tm;
    time_t elapsed;
    int rate = MAX_PROBES_PER_SEC;

    argv0 = argv[0];

    /* command line options */
    while ((opt = getopt(argc, argv, "v:w:c:p:r:n:NRTM")) != -1) {
	switch (opt) {
	case 'v':
	    verbosity = atoi(optarg);
	    break;
	case 'p':
	    base_port = atoi(optarg);
	    break;
	case 'r':
	    rate = atoi(optarg);
	    if (rate <= 0 || rate > 1000)
		usage_exit("Rate must be in [1, 1000].");
	    break;
	case 'w':
	    expire_time.tv_sec = atoi(optarg);
	    if (expire_time.tv_sec <= 0 || expire_time.tv_sec > 1000)
		usage_exit("Timeout must be in [1, 1000].");
	    break;
	case 'c':
	    max_concurrent_probes = atoi(optarg);
	    if (max_concurrent_probes <= 0 || max_concurrent_probes > 255)
		usage_exit("Concurrent probes must be in [1, 255].");
	    break;
	case 'n':
	    npass = atoi(optarg);
	    if (npass < 0)
		usage_exit("Number of passes must be >0.");
	    break;
	case 'N':
	    no_probe_new++;
	    break;
#if USE_RAW_IP || defined(IP_OPTIONS)
	case 'R':
	    use_rr++;
	    break;
	case 'T':
	    use_traceroute++;
	    break;
#endif
	case 'M':
	    use_mate++;
	    break;
	default:
	    usage_exit(NULL);
	}
    }
    delay_time.tv_usec = 1000000.0/rate;
    if (base_port + max_concurrent_probes * MAX_TRIES - 1 > 0xFFFF) {
	eprintf("<P>+<C>*%d-1 must be no greater than %d.\n",
	    MAX_TRIES, 0xFFFF);
	usage_exit(NULL);
    }
    if (optind >= argc) {
	usage_exit(NULL);
    }

    init_experiments();
    if (npass)
	init_sockets();

    interface_table = init_hash_table("interface table", cmp_if, key_if,
	delete_if, INTERFACE_TABLE_SIZE);

    while (optind < argc)
	read_ip_file(argv[optind++]);

    signal(SIGINT, interrupt_handler);
    time(&start_time);

    printf("# iffinder revision:      %s\n", "$Revision: 1.42 $");
    tm = gmtime(&start_time);
    strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M:%S UTC", tm);
    printf("# started:                %s\n", buf);

    printf("# options:\n");
    printf("#   base port:       %d\n", base_port);
    printf("#   rate:            %d pps\n", rate);
    printf("#   timeout:         %ld s\n", expire_time.tv_sec);
    printf("#   concurrent:      %d probes\n", max_concurrent_probes);
    printf("#   passes:          %d\n", npass);
    printf("#   probe new addrs: %s\n", no_probe_new ? "no" : "yes");
    printf("#   record route:    %s\n", use_rr ? "yes" : "no");
    printf("#   traceroute:      %s\n", use_traceroute ? "yes" : "no");
    printf("#   /30 mate:        %s\n", use_mate ? "yes" : "no");
    printf("#\n");

    printf("# Columns:\n");
    printf("# addr         - address of interface that was probed\n");
    printf("# alias        - address from which port unreachable response was received\n");
    printf("# o-TTL-r      - TTLs of original packet (sent by iffinder with TTL=%d)\n", INITIAL_TTL);
    printf("#                and ICMP error response packet\n");
    printf("# RTT          - time between sending probe and receiving response\n");
    printf("# result       - result of probe\n");
    printf("# discovr      - how addr was discovered\n");
    printf("# feature      - features supported by interface\n");
    printf("# record_route - list of Record Route addresses in original packet\n");
    printf("#\n");
    printf("# Codes:\n");
    printf("# %c  not probed or not discovered\n", DR_NONE);
    printf("# %c  no response\n", DR_NO_RESPONSE);
    printf("# %c  error (with errno)\n", DR_ERROR);
    printf("# %c  ICMP error (with type and code)\n", DR_ICMP_ERROR);
    printf("# %c  response from same address\n", DR_PORT_UNREACH_SELF);
    printf("# %c  response from different address\n", DR_PORT_UNREACH_DIFF);
    printf("# %c  response from bad address\n", DR_BAD_ADDR);
    printf("# %c  IP Record Route\n", DR_RR);
    printf("# %c  IP Traceroute\n", DR_ICMP_TRACEROUTE);
    printf("# %c  /30-mating\n", DR_MATED);

    printf("\n");
    printf("# %-13s %-15s %7s %10s %-7s %-7s %-7s %s\n",
	"addr", "alias", "o-TTL-r", "RTT", "result", "discovr", "feature", "record_route");

#define should_probe(iface) \
    ((iface)->passes < npass && \
     (!no_probe_new || (iface)->discover_how == DR_NONE))

    for (pass = 0; pass < npass; pass++) {
	{
	    diag(1, ("Pass %d: probing %d known addresses...\n",
		pass, stats.ifaces));
	    init_hash_walk(interface_table);
	    do {
		next_iface = next_hash_walk(interface_table);
	    } while (next_iface && !should_probe(next_iface));
	    while (next_iface || current_experiments) {
		iter_result = iteration(next_iface);
		if (iter_result < 0) goto end;
		if (iter_result)
		    do {
			next_iface = next_hash_walk(interface_table);
		    } while (next_iface && !should_probe(next_iface));
	    }
	}

	if (no_probe_new) {
	    first_unprobed = NULL;
	    unprobed_count = 0;
	}

	if (use_mate && !interrupted) {
	    diag(1, ("Pass %d: generating /30 mates...\n", pass));
	    init_hash_walk(interface_table);
	    while ((next_iface = next_hash_walk(interface_table))) {
		struct in_addr addr;
		interface_t *mate;
		if (next_iface->mated) continue; /* don't mate again */
		next_iface->mated = 1;
		addr = next_iface->addr;
		diag(4, ("Mating %s: ", inet_ntoa(addr)));
		if (((addr.s_addr & ipaddr(0,0,0,3)) == 0) ||
		    ((addr.s_addr & ipaddr(0,0,0,3)) == ipaddr(0,0,0,3)))
		{
		    /* not suitable for /30 mating */
		    diag(4, ("all 0/1\n"));
		    continue;
		}
		addr.s_addr ^= ipaddr(0,0,0,3);
		diag(4, ("%s ", inet_ntoa(addr)));
		if ((mate = find_interface(addr))) {
		    mate->mated = 1;
		    diag(4, ("already exists.\n"));
		    continue;
		}
		mate = new_unprobed_interface(addr, DR_MATED, 0, 0);
		if (!mate)  goto end;
		mate->mated = 1;
		diag(4, ("is new!\n"));
	    }
	}

	while (first_unprobed) {
	    if (!interrupted)
		diag(1, ("Pass %d: probing %d new addresses...\n",
		    pass, unprobed_count));
	    next_iface = first_unprobed;
	    first_unprobed = NULL;
	    unprobed_count = 0;
	    while (next_iface && !should_probe(next_iface)) {
		next_iface = next_iface->unext;
	    }
	    while (next_iface || current_experiments) {
		iter_result = iteration(next_iface);
		if (iter_result < 0) goto end;
		if (iter_result)
		    do {
			next_iface = next_iface->unext;
		    } while (next_iface && !should_probe(next_iface));
	    }
	}
    }

end:

    diag(1, ("                         \n")); /* erase progress line */

    elapsed = difftime(time(NULL), start_time);
    printf("\n");
    printf("# elapsed time:           %ld:%02ld:%02ld\n",
	elapsed / 3600, (elapsed / 60) % 60, elapsed % 60);
    printf("# experiments:            %8d\n", stats.experiments);
    printf("# failed experiments:     %8d\n", stats.failed);
    printf("# probes:                 %8d\n", stats.probes);
    printf("# responses\n");
    printf("#   from unroutable addrs:%8d\n", stats.invalid_addr);
    printf("#   good (port unreach):  %8d\n", stats.unreach_port);
    printf("#   other icmp errors:    %8d\n", stats.other_icmp_errors);
    printf("#   timeouts:             %8d\n", stats.timeouts);
    printf("#   noise:                %8d\n", stats.noise);
    printf("# aliases:                %8d\n", stats.aliases);
    printf("# interfaces:             %8d\n", stats.ifaces);
    if (use_rr)
      printf("#   that do record route: %8d\n", stats.support_rr);
    if (use_traceroute)
      printf("#   that do traceroute:   %8d\n", stats.support_tr);
    printf("# new interfaces:         %8d\n", stats.new_if);
    printf("#   by port unreachable:  %8d\n", stats.new_if_by_port_unreach);
    printf("#   by other icmp err:    %8d\n", stats.new_if_by_icmp_err);
    if (use_rr || stats.new_if_by_rr)
      printf("#   by record route:      %8d\n", stats.new_if_by_rr);
    if (use_traceroute || stats.new_if_by_traceroute)
      printf("#   by traceroute:        %8d\n", stats.new_if_by_traceroute);
    if (use_mate) {
      printf("#   by /30 mate:          %8d\n", stats.new_if_by_mate);
      printf("# failed /30 mates:       %8d\n", stats.failed_mates);
    }
    printf("\n");

    return 0;
}


syntax highlighted by Code2HTML, v. 0.9.1