// -*- c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t -*-
// Copyright (c) 2001-2007 International Computer Science Institute
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software")
// to deal in the Software without restriction, subject to the conditions
// listed in the XORP LICENSE file. These conditions include: you must
// preserve this copyright notice, and you cannot mention the copyright
// holders in advertising related to the Software without their permission.
// The Software is provided WITHOUT ANY WARRANTY, EXPRESS OR IMPLIED. This
// notice is a summary of the XORP LICENSE file; the license in that file is
// legally binding.
#ident "$XORP: xorp/fea/rawsock.cc,v 1.40 2007/02/16 22:45:49 pavlin Exp $"
//
// Raw socket support.
//
#include "fea_module.h"
#include "libxorp/xorp.h"
#include "libxorp/xlog.h"
#include "libxorp/debug.h"
#include "libxorp/ipvx.hh"
#include "libxorp/ipvxnet.hh"
#include "libxorp/utils.hh"
#ifdef HAVE_SYS_IOCTL_H
#include <sys/ioctl.h>
#endif
#ifdef HAVE_NET_IF_H
#include <net/if.h>
#endif
#ifdef HAVE_NET_IF_VAR_H
#include <net/if_var.h>
#endif
#ifdef HAVE_NET_IF_DL_H
#include <net/if_dl.h>
#endif
#ifdef HAVE_NETINET_IN_SYSTM_H
#include <netinet/in_systm.h>
#endif
#ifdef HAVE_NETINET_IP_H
#include <netinet/ip.h>
#endif
#ifdef HAVE_NETINET_IP6_H
#include <netinet/ip6.h>
#endif
#ifdef HAVE_NETINET_ICMP6_H
#include <netinet/icmp6.h>
#endif
#ifdef HAVE_NETINET6_IN6_VAR_H
#include <netinet6/in6_var.h>
#endif
#ifdef HOST_OS_WINDOWS
#include <mswsock.h>
#include "libxorp/win_io.h"
#include "ip.h"
#endif
#include "libcomm/comm_api.h"
#include "libproto/packet.hh"
#include "mrt/include/ip_mroute.h"
// XXX: _PIM_VT is needed if we want the extra features of <netinet/pim.h>
#define _PIM_VT 1
#if defined(HAVE_NETINET_PIM_H) && defined(HAVE_STRUCT_PIM_PIM_VT)
#include <netinet/pim.h>
#else
#include "mrt/include/netinet/pim.h"
#endif
#include "iftree.hh"
#include "kernel_utils.hh"
#include "rawsock.hh"
//
// Local constants definitions
//
#define IO_BUF_SIZE (64*1024) // I/O buffer(s) size
#define CMSG_BUF_SIZE (10*1024) // 'rcvcmsgbuf' and 'sndcmsgbuf'
#define SO_RCV_BUF_SIZE_MIN (48*1024) // Min. rcv socket buffer size
#define SO_RCV_BUF_SIZE_MAX (256*1024) // Desired rcv socket buffer size
#define SO_SND_BUF_SIZE_MIN (48*1024) // Min. snd socket buffer size
#define SO_SND_BUF_SIZE_MAX (256*1024) // Desired snd socket buffer size
#ifndef MINTTL
#define MINTTL 1
#endif
#ifndef IPDEFTTL
#define IPDEFTTL 64
#endif
#ifndef MAXTTL
#define MAXTTL 255
#endif
#ifndef MLD_MINLEN
# ifdef HAVE_MLD_HDR
# define MLD_MINLEN (sizeof(struct mld_hdr))
# else
# define MLD_MINLEN 24
# endif
#endif
//
// Local structures/classes, typedefs and macros
//
#ifdef HOST_OS_WINDOWS
#define cmsghdr wsacmsghdr
typedef char *caddr_t;
#ifdef __MINGW32__
#ifndef _ALIGNBYTES
#define _ALIGNBYTES (sizeof(int) - 1)
#endif
#ifndef _ALIGN
#define _ALIGN(p) (((unsigned)(p) + _ALIGNBYTES) & ~_ALIGNBYTES)
#endif
#define CMSG_DATA(cmsg) \
((unsigned char *)(cmsg) + _ALIGN(sizeof(struct cmsghdr)))
#define CMSG_NXTHDR(mhdr, cmsg) \
(((char *)(cmsg) + _ALIGN((cmsg)->cmsg_len) + \
_ALIGN(sizeof(struct cmsghdr)) > \
(char *)(mhdr)->Control.buf + (mhdr)->Control.len) ? \
(struct cmsghdr *)0 : \
(struct cmsghdr *)((char *)(cmsg) + _ALIGN((cmsg)->cmsg_len)))
#define CMSG_FIRSTHDR(mhdr) \
((mhdr)->Control.len >= sizeof(struct cmsghdr) ? \
(struct cmsghdr *)(mhdr)->Control.buf : \
(struct cmsghdr *)NULL)
#define CMSG_SPACE(l) (_ALIGN(sizeof(struct cmsghdr)) + _ALIGN(l))
#define CMSG_LEN(l) (_ALIGN(sizeof(struct cmsghdr)) + (l))
typedef INT (WINAPI * LPFN_WSARECVMSG)(SOCKET, LPWSAMSG, LPDWORD,
LPWSAOVERLAPPED,
LPWSAOVERLAPPED_COMPLETION_ROUTINE);
typedef INT (WINAPI * LPFN_WSASENDMSG)(SOCKET, LPWSAMSG, DWORD, LPDWORD,
LPWSAOVERLAPPED,
LPWSAOVERLAPPED_COMPLETION_ROUTINE);
#define WSAID_WSARECVMSG \
{ 0xf689d7c8,0x6f1f,0x436b,{0x8a,0x53,0xe5,0x4f,0xe3,0x51,0xc3,0x22} }
#define WSAID_WSASENDMSG \
{ 0xa441e712,0x754f,0x43ca,{0x84,0xa7,0x0d,0xee,0x44,0xcf,0x60,0x6d} }
#else // ! __MINGW32__
#define CMSG_FIRSTHDR(msg) WSA_CMSG_FIRSTHDR(msg)
#define CMSG_NXTHDR(msg, cmsg) WSA_CMSG_NEXTHDR(msg, cmsg)
#define CMSG_DATA(cmsg) WSA_CMSG_DATA(cmsg)
#define CMSG_SPACE(len) WSA_CMSG_SPACE(len)
#define CMSG_LEN(len) WSA_CMSG_LEN(len)
#endif // ! __MINGW32__
#ifdef HAVE_IPV6
static const GUID guidWSARecvMsg = WSAID_WSARECVMSG;
static const GUID guidWSASendMsg = WSAID_WSASENDMSG;
static LPFN_WSARECVMSG lpWSARecvMsg = NULL;
static LPFN_WSASENDMSG lpWSASendMsg = NULL; // Windows Longhorn and up
#endif // HAVE_IPV6
#endif // HOST_OS_WINDOWS
#ifndef CMSG_LEN
#define CMSG_LEN(l) (ALIGN(sizeof(struct cmsghdr)) + (l)) // XXX
#endif
//
// Local variables
//
// IPv4 Router Alert stuff
#ifndef IPTOS_PREC_INTERNETCONTROL
#define IPTOS_PREC_INTERNETCONTROL 0xc0
#endif
#ifndef IPOPT_RA
#define IPOPT_RA 148 // 0x94
#endif
static uint32_t ra_opt4;
// IPv6 Router Alert stuff
#ifdef HAVE_IPV6
#ifndef IP6OPT_RTALERT
#define IP6OPT_RTALERT 0x05
#endif
#ifndef IP6OPT_RTALERT_LEN
#define IP6OPT_RTALERT_LEN 4
#endif
#ifndef IP6OPT_RTALERT_MLD
#define IP6OPT_RTALERT_MLD 0
#endif
#ifndef IP6OPT_ROUTER_ALERT // XXX: for compatibility with older systems
#define IP6OPT_ROUTER_ALERT IP6OPT_RTALERT
#endif
#endif // HAVE_IPV6
//
#ifdef HAVE_IPV6
static uint16_t rtalert_code6;
#ifndef HAVE_RFC3542
static uint8_t ra_opt6[IP6OPT_RTALERT_LEN];
#endif
#endif // HAVE_IPV6
// XXX: This is a bit of a dirty hack
#ifdef HOST_OS_WINDOWS
#ifdef strerror
#undef strerror
#endif
#define strerror(errnum) win_strerror(GetLastError())
#endif
RawSocket::RawSocket(EventLoop& eventloop, int init_family,
uint8_t ip_protocol, const IfTree& iftree)
: _eventloop(eventloop),
_family(init_family),
_ip_protocol(ip_protocol),
_iftree(iftree),
_is_ip_hdr_included(false),
_ip_id(random())
{
// Init Router Alert related option stuff
ra_opt4 = htonl((IPOPT_RA << 24) | (0x04 << 16));
#ifdef HAVE_IPV6
rtalert_code6 = htons(IP6OPT_RTALERT_MLD); // XXX: used by MLD only (?)
#ifndef HAVE_RFC3542
ra_opt6[0] = IP6OPT_ROUTER_ALERT;
ra_opt6[1] = IP6OPT_RTALERT_LEN - 2;
memcpy(&ra_opt6[2], (caddr_t)&rtalert_code6, sizeof(rtalert_code6));
#endif // ! HAVE_RFC3542
#endif // HAVE_IPV6
// Allocate the buffers
_rcvbuf = new uint8_t[IO_BUF_SIZE];
_sndbuf = new uint8_t[IO_BUF_SIZE];
_rcvcmsgbuf = new uint8_t[CMSG_BUF_SIZE];
_sndcmsgbuf = new uint8_t[CMSG_BUF_SIZE];
// Scatter/gatter array initialization
_rcviov[0].iov_base = (caddr_t)_rcvbuf;
_rcviov[0].iov_len = IO_BUF_SIZE;
_sndiov[0].iov_base = (caddr_t)_sndbuf;
_sndiov[0].iov_len = 0;
// recvmsg() and sendmsg() related initialization
#ifndef HOST_OS_WINDOWS
switch (family()) {
case AF_INET:
_rcvmh.msg_name = (caddr_t)&_from4;
_sndmh.msg_name = (caddr_t)&_to4;
_rcvmh.msg_namelen = sizeof(_from4);
_sndmh.msg_namelen = sizeof(_to4);
break;
#ifdef HAVE_IPV6
case AF_INET6:
_rcvmh.msg_name = (caddr_t)&_from6;
_sndmh.msg_name = (caddr_t)&_to6;
_rcvmh.msg_namelen = sizeof(_from6);
_sndmh.msg_namelen = sizeof(_to6);
break;
#endif // HAVE_IPV6
default:
XLOG_UNREACHABLE();
break;
}
_rcvmh.msg_iov = _rcviov;
_sndmh.msg_iov = _sndiov;
_rcvmh.msg_iovlen = 1;
_sndmh.msg_iovlen = 1;
_rcvmh.msg_control = (caddr_t)_rcvcmsgbuf;
_sndmh.msg_control = (caddr_t)_sndcmsgbuf;
_rcvmh.msg_controllen = CMSG_BUF_SIZE;
_sndmh.msg_controllen = 0;
#endif // ! HOST_OS_WINDOWS
}
RawSocket::~RawSocket()
{
string dummy_error_msg;
stop(dummy_error_msg);
// Free the buffers
delete[] _rcvbuf;
delete[] _sndbuf;
delete[] _rcvcmsgbuf;
delete[] _sndcmsgbuf;
}
int
RawSocket::start(string& error_msg)
{
if (_proto_socket_in.is_valid() && _proto_socket_out.is_valid())
return (XORP_OK);
return (open_proto_sockets(error_msg));
}
int
RawSocket::stop(string& error_msg)
{
if (! (_proto_socket_in.is_valid() && _proto_socket_out.is_valid()))
return (XORP_OK);
return (close_proto_sockets(error_msg));
}
int
RawSocket::enable_ip_hdr_include(bool is_enabled, string& error_msg)
{
UNUSED(is_enabled);
switch (family()) {
case AF_INET:
{
#ifdef IP_HDRINCL
// XXX: the setsockopt() argument must be 'int'
int bool_flag = is_enabled;
if (setsockopt(_proto_socket_out, IPPROTO_IP, IP_HDRINCL,
XORP_SOCKOPT_CAST(&bool_flag),
sizeof(bool_flag)) < 0) {
error_msg = c_format("setsockopt(IP_HDRINCL, %u) failed: %s",
bool_flag, strerror(errno));
return (XORP_ERROR);
}
_is_ip_hdr_included = is_enabled;
#endif // IP_HDRINCL
}
break;
#ifdef HAVE_IPV6
case AF_INET6:
break; // XXX
#endif // HAVE_IPV6
default:
XLOG_UNREACHABLE();
error_msg = c_format("Invalid address family %d", family());
return (XORP_ERROR);
}
return (XORP_OK);
}
int
RawSocket::enable_recv_pktinfo(bool is_enabled, string& error_msg)
{
switch (family()) {
case AF_INET:
{
// XXX: the setsockopt() argument must be 'int'
int bool_flag = is_enabled;
//
// Interface index
//
#ifdef IP_RECVIF
// XXX: BSD
if (setsockopt(_proto_socket_in, IPPROTO_IP, IP_RECVIF,
XORP_SOCKOPT_CAST(&bool_flag), sizeof(bool_flag)) < 0) {
XLOG_ERROR("setsockopt(IP_RECVIF, %u) failed: %s",
bool_flag, strerror(errno));
return (XORP_ERROR);
}
#endif // IP_RECVIF
#ifdef IP_PKTINFO
// XXX: Linux
if (setsockopt(_proto_socket_in, IPPROTO_IP, IP_PKTINFO,
XORP_SOCKOPT_CAST(&bool_flag), sizeof(bool_flag)) < 0) {
XLOG_ERROR("setsockopt(IP_PKTINFO, %u) failed: %s",
bool_flag, strerror(errno));
return (XORP_ERROR);
}
#endif // IP_PKTINFO
UNUSED(bool_flag);
break;
}
#ifdef HAVE_IPV6
case AF_INET6:
{
// XXX: the setsockopt() argument must be 'int'
int bool_flag = is_enabled;
//
// Interface index and address
//
#ifdef IPV6_RECVPKTINFO
// The new option (applies to receiving only)
if (setsockopt(_proto_socket_in, IPPROTO_IPV6, IPV6_RECVPKTINFO,
XORP_SOCKOPT_CAST(&bool_flag), sizeof(bool_flag)) < 0) {
error_msg = c_format("setsockopt(IPV6_RECVPKTINFO, %u) failed: %s",
bool_flag, strerror(errno));
return (XORP_ERROR);
}
#else
// The old option (see RFC-2292)
if (setsockopt(_proto_socket_in, IPPROTO_IPV6, IPV6_PKTINFO,
XORP_SOCKOPT_CAST(&bool_flag), sizeof(bool_flag)) < 0) {
error_msg = c_format("setsockopt(IPV6_PKTINFO, %u) failed: %s",
bool_flag, strerror(errno));
return (XORP_ERROR);
}
#endif // ! IPV6_RECVPKTINFO
//
// Hop-limit field
//
#ifdef IPV6_RECVHOPLIMIT
if (setsockopt(_proto_socket_in, IPPROTO_IPV6, IPV6_RECVHOPLIMIT,
XORP_SOCKOPT_CAST(&bool_flag), sizeof(bool_flag)) < 0) {
error_msg = c_format("setsockopt(IPV6_RECVHOPLIMIT, %u) failed: %s",
bool_flag, strerror(errno));
return (XORP_ERROR);
}
#else
if (setsockopt(_proto_socket_in, IPPROTO_IPV6, IPV6_HOPLIMIT,
XORP_SOCKOPT_CAST(&bool_flag), sizeof(bool_flag)) < 0) {
error_msg = c_format("setsockopt(IPV6_HOPLIMIT, %u) failed: %s",
bool_flag, strerror(errno));
return (XORP_ERROR);
}
#endif // ! IPV6_RECVHOPLIMIT
//
// Traffic class value
//
#ifdef IPV6_RECVTCLASS
if (setsockopt(_proto_socket_in, IPPROTO_IPV6, IPV6_RECVTCLASS,
XORP_SOCKOPT_CAST(&bool_flag), sizeof(bool_flag)) < 0) {
error_msg = c_format("setsockopt(IPV6_RECVTCLASS, %u) failed: %s",
bool_flag, strerror(errno));
return (XORP_ERROR);
}
#endif // IPV6_RECVTCLASS
//
// Hop-by-hop options
//
#ifdef IPV6_RECVHOPOPTS
if (setsockopt(_proto_socket_in, IPPROTO_IPV6, IPV6_RECVHOPOPTS,
XORP_SOCKOPT_CAST(&bool_flag), sizeof(bool_flag)) < 0) {
error_msg = c_format("setsockopt(IPV6_RECVHOPOPTS, %u) failed: %s",
bool_flag, strerror(errno));
return (XORP_ERROR);
}
#else
if (setsockopt(_proto_socket_in, IPPROTO_IPV6, IPV6_HOPOPTS,
XORP_SOCKOPT_CAST(&bool_flag), sizeof(bool_flag)) < 0) {
error_msg = c_format("setsockopt(IPV6_HOPOPTS, %u) failed: %s",
bool_flag, strerror(errno));
return (XORP_ERROR);
}
#endif // ! IPV6_RECVHOPOPTS
//
// Routing header options
//
#ifdef IPV6_RECVRTHDR
if (setsockopt(_proto_socket_in, IPPROTO_IPV6, IPV6_RECVRTHDR,
XORP_SOCKOPT_CAST(&bool_flag), sizeof(bool_flag)) < 0) {
error_msg = c_format("setsockopt(IPV6_RECVRTHDR, %u) failed: %s",
bool_flag, strerror(errno));
return (XORP_ERROR);
}
#else
if (setsockopt(_proto_socket_in, IPPROTO_IPV6, IPV6_RTHDR,
XORP_SOCKOPT_CAST(&bool_flag), sizeof(bool_flag)) < 0) {
error_msg = c_format("setsockopt(IPV6_RTHDR, %u) failed: %s",
bool_flag, strerror(errno));
return (XORP_ERROR);
}
#endif // ! IPV6_RECVRTHDR
//
// Destination options
//
#ifdef IPV6_RECVDSTOPTS
if (setsockopt(_proto_socket_in, IPPROTO_IPV6, IPV6_RECVDSTOPTS,
XORP_SOCKOPT_CAST(&bool_flag), sizeof(bool_flag)) < 0) {
error_msg = c_format("setsockopt(IPV6_RECVDSTOPTS, %u) failed: %s",
bool_flag, strerror(errno));
return (XORP_ERROR);
}
#else
if (setsockopt(_proto_socket_in, IPPROTO_IPV6, IPV6_DSTOPTS,
XORP_SOCKOPT_CAST(&bool_flag), sizeof(bool_flag)) < 0) {
error_msg = c_format("setsockopt(IPV6_DSTOPTS, %u) failed: %s",
bool_flag, strerror(errno));
return (XORP_ERROR);
}
#endif // ! IPV6_RECVDSTOPTS
}
break;
#endif // HAVE_IPV6
default:
XLOG_UNREACHABLE();
error_msg = c_format("Invalid address family %d", family());
return (XORP_ERROR);
}
return (XORP_OK);
}
int
RawSocket::set_multicast_ttl(int ttl, string& error_msg)
{
switch (family()) {
case AF_INET:
{
u_char ip_ttl = ttl; // XXX: In IPv4 the value argument is 'u_char'
if (setsockopt(_proto_socket_out, IPPROTO_IP, IP_MULTICAST_TTL,
XORP_SOCKOPT_CAST(&ip_ttl), sizeof(ip_ttl)) < 0) {
error_msg = c_format("setsockopt(IP_MULTICAST_TTL, %u) failed: %s",
ip_ttl, strerror(errno));
return (XORP_ERROR);
}
}
break;
#ifdef HAVE_IPV6
case AF_INET6:
{
#ifndef HAVE_IPV6_MULTICAST
error_msg = c_format("set_multicast_ttl() failed: "
"IPv6 multicast not supported");
return (XORP_ERROR);
#else
int ip_ttl = ttl;
if (setsockopt(_proto_socket_out, IPPROTO_IPV6, IPV6_MULTICAST_HOPS,
XORP_SOCKOPT_CAST(&ip_ttl), sizeof(ip_ttl)) < 0) {
error_msg = c_format("setsockopt(IPV6_MULTICAST_HOPS, %u) failed: %s",
ip_ttl, strerror(errno));
return (XORP_ERROR);
}
#endif // HAVE_IPV6_MULTICAST
}
break;
#endif // HAVE_IPV6
default:
XLOG_UNREACHABLE();
error_msg = c_format("Invalid address family %d", family());
return (XORP_ERROR);
}
return (XORP_OK);
}
int
RawSocket::enable_multicast_loopback(bool is_enabled, string& error_msg)
{
switch (family()) {
case AF_INET:
{
u_char loop = is_enabled;
if (setsockopt(_proto_socket_out, IPPROTO_IP, IP_MULTICAST_LOOP,
XORP_SOCKOPT_CAST(&loop), sizeof(loop)) < 0) {
error_msg = c_format("setsockopt(IP_MULTICAST_LOOP, %u) failed: %s",
loop, strerror(errno));
return (XORP_ERROR);
}
}
break;
#ifdef HAVE_IPV6
case AF_INET6:
{
#ifndef HAVE_IPV6_MULTICAST
error_msg = c_format("enable_multicast_loop() failed: "
"IPv6 multicast not supported");
return (XORP_ERROR);
#else
uint loop6 = is_enabled;
if (setsockopt(_proto_socket_out, IPPROTO_IPV6, IPV6_MULTICAST_LOOP,
XORP_SOCKOPT_CAST(&loop6), sizeof(loop6)) < 0) {
error_msg = c_format("setsockopt(IPV6_MULTICAST_LOOP, %u) failed: %s",
loop6, strerror(errno));
return (XORP_ERROR);
}
#endif // HAVE_IPV6_MULTICAST
}
break;
#endif // HAVE_IPV6
default:
XLOG_UNREACHABLE();
error_msg = c_format("Invalid address family %d", family());
return (XORP_ERROR);
}
return (XORP_OK);
}
int
RawSocket::set_default_multicast_interface(const string& if_name,
const string& vif_name,
string& error_msg)
{
// Find the interface
IfTree::IfMap::const_iterator ii = _iftree.get_if(if_name);
if (ii == _iftree.ifs().end()) {
error_msg = c_format("Setting the default multicast interface failed:"
"interface %s not found",
if_name.c_str());
return (XORP_ERROR);
}
const IfTreeInterface& fi = ii->second;
// Find the vif
IfTreeInterface::VifMap::const_iterator vi = fi.get_vif(vif_name);
if (vi == fi.vifs().end()) {
error_msg = c_format("Setting the default multicast interface failed:"
"interface %s vif %s not found",
if_name.c_str(), vif_name.c_str());
return (XORP_ERROR);
}
const IfTreeVif& fv = vi->second;
switch (family()) {
case AF_INET:
{
struct in_addr in_addr;
// Find the first address
IfTreeVif::V4Map::const_iterator ai = fv.v4addrs().begin();
if (ai == fv.v4addrs().end()) {
error_msg = c_format("Setting the default multicast interface "
"failed: "
"interface %s vif %s has no address",
if_name.c_str(), vif_name.c_str());
return (XORP_ERROR);
}
const IfTreeAddr4& fa = ai->second;
fa.addr().copy_out(in_addr);
if (setsockopt(_proto_socket_out, IPPROTO_IP, IP_MULTICAST_IF,
XORP_SOCKOPT_CAST(&in_addr), sizeof(in_addr)) < 0) {
error_msg = c_format("setsockopt(IP_MULTICAST_IF, %s) failed: %s",
cstring(fa.addr()), strerror(errno));
return (XORP_ERROR);
}
}
break;
#ifdef HAVE_IPV6
case AF_INET6:
{
#ifndef HAVE_IPV6_MULTICAST
error_msg = c_format("set_default_multicast_interface() failed: "
"IPv6 multicast not supported");
return (XORP_ERROR);
#else
u_int pif_index = fv.pif_index();
if (setsockopt(_proto_socket_out, IPPROTO_IPV6, IPV6_MULTICAST_IF,
XORP_SOCKOPT_CAST(&pif_index), sizeof(pif_index)) < 0) {
error_msg = c_format("setsockopt(IPV6_MULTICAST_IF, %s/%s) failed: %s",
if_name.c_str(), vif_name.c_str(), strerror(errno));
return (XORP_ERROR);
}
#endif // HAVE_IPV6_MULTICAST
}
break;
#endif // HAVE_IPV6
default:
XLOG_UNREACHABLE();
error_msg = c_format("Invalid address family %d", family());
return (XORP_ERROR);
}
return (XORP_OK);
}
int
RawSocket::join_multicast_group(const string& if_name,
const string& vif_name,
const IPvX& group,
string& error_msg)
{
// Find the interface
IfTree::IfMap::const_iterator ii = _iftree.get_if(if_name);
if (ii == _iftree.ifs().end()) {
error_msg = c_format("Joining multicast group %s failed: "
"interface %s not found",
cstring(group),
if_name.c_str());
return (XORP_ERROR);
}
const IfTreeInterface& fi = ii->second;
// Find the vif
IfTreeInterface::VifMap::const_iterator vi = fi.get_vif(vif_name);
if (vi == fi.vifs().end()) {
error_msg = c_format("Joining multicast group %s failed: "
"interface %s vif %s not found",
cstring(group),
if_name.c_str(),
vif_name.c_str());
return (XORP_ERROR);
}
const IfTreeVif& fv = vi->second;
#if 0 // TODO: enable or disable the enabled() check?
if (! (fi.enabled() || fv.enabled())) {
error_msg = c_format("Cannot join group %s on interface %s vif %s: "
"interface/vif is DOWN",
cstring(group),
if_name.c_str(),
vif_name.c_str());
return (XORP_ERROR);
}
#endif // 0/1
switch (family()) {
case AF_INET:
{
struct ip_mreq mreq;
struct in_addr in_addr;
// Find the first address
IfTreeVif::V4Map::const_iterator ai = fv.v4addrs().begin();
if (ai == fv.v4addrs().end()) {
error_msg = c_format("Cannot join group %s on interface %s vif %s: "
"interface/vif has no address",
cstring(group),
if_name.c_str(),
vif_name.c_str());
return (XORP_ERROR);
}
const IfTreeAddr4& fa = ai->second;
fa.addr().copy_out(in_addr);
group.copy_out(mreq.imr_multiaddr);
mreq.imr_interface.s_addr = in_addr.s_addr;
if (setsockopt(_proto_socket_in, IPPROTO_IP, IP_ADD_MEMBERSHIP,
XORP_SOCKOPT_CAST(&mreq), sizeof(mreq)) < 0) {
error_msg = c_format("Cannot join group %s on interface %s vif %s: %s",
cstring(group),
if_name.c_str(),
vif_name.c_str(),
strerror(errno));
return (XORP_ERROR);
}
}
break;
#ifdef HAVE_IPV6
case AF_INET6:
{
#ifndef HAVE_IPV6_MULTICAST
error_msg = c_format("join_multicast_group() failed: "
"IPv6 multicast not supported");
return (XORP_ERROR);
#else
struct ipv6_mreq mreq6;
group.copy_out(mreq6.ipv6mr_multiaddr);
mreq6.ipv6mr_interface = fv.pif_index();
if (setsockopt(_proto_socket_in, IPPROTO_IPV6, IPV6_JOIN_GROUP,
XORP_SOCKOPT_CAST(&mreq6), sizeof(mreq6)) < 0) {
error_msg = c_format("Cannot join group %s on interface %s vif %s: %s",
cstring(group),
if_name.c_str(),
vif_name.c_str(),
strerror(errno));
return (XORP_ERROR);
}
#endif // HAVE_IPV6_MULTICAST
}
break;
#endif // HAVE_IPV6
default:
XLOG_UNREACHABLE();
error_msg = c_format("Invalid address family %d", family());
return (XORP_ERROR);
}
return (XORP_OK);
}
int
RawSocket::leave_multicast_group(const string& if_name,
const string& vif_name,
const IPvX& group,
string& error_msg)
{
// Find the interface
IfTree::IfMap::const_iterator ii = _iftree.get_if(if_name);
if (ii == _iftree.ifs().end()) {
error_msg = c_format("Leaving multicast group %s failed: "
"interface %s not found",
cstring(group),
if_name.c_str());
return (XORP_ERROR);
}
const IfTreeInterface& fi = ii->second;
// Find the vif
IfTreeInterface::VifMap::const_iterator vi = fi.get_vif(vif_name);
if (vi == fi.vifs().end()) {
error_msg = c_format("Leaving multicast group %s failed: "
"interface %s vif %s not found",
cstring(group),
if_name.c_str(),
vif_name.c_str());
return (XORP_ERROR);
}
const IfTreeVif& fv = vi->second;
#if 0 // TODO: enable or disable the enabled() check?
if (! (fi.enabled() || fv.enabled())) {
error_msg = c_format("Cannot leave group %s on interface %s vif %s: "
"interface/vif is DOWN",
cstring(group),
if_name.c_str(),
vif_name.c_str());
return (XORP_ERROR);
}
#endif // 0/1
switch (family()) {
case AF_INET:
{
struct ip_mreq mreq;
struct in_addr in_addr;
// Find the first address
IfTreeVif::V4Map::const_iterator ai = fv.v4addrs().begin();
if (ai == fv.v4addrs().end()) {
error_msg = c_format("Cannot leave group %s on interface %s vif %s: "
"interface/vif has no address",
cstring(group),
if_name.c_str(),
vif_name.c_str());
return (XORP_ERROR);
}
const IfTreeAddr4& fa = ai->second;
fa.addr().copy_out(in_addr);
group.copy_out(mreq.imr_multiaddr);
mreq.imr_interface.s_addr = in_addr.s_addr;
if (setsockopt(_proto_socket_in, IPPROTO_IP, IP_DROP_MEMBERSHIP,
XORP_SOCKOPT_CAST(&mreq), sizeof(mreq)) < 0) {
error_msg = c_format("Cannot leave group %s on interface %s vif %s: %s",
cstring(group),
if_name.c_str(),
vif_name.c_str(),
strerror(errno));
return (XORP_ERROR);
}
}
break;
#ifdef HAVE_IPV6
case AF_INET6:
{
#ifndef HAVE_IPV6_MULTICAST
error_msg = c_format("leave_multicast_group() failed: "
"IPv6 multicast not supported");
return (XORP_ERROR);
#else
struct ipv6_mreq mreq6;
group.copy_out(mreq6.ipv6mr_multiaddr);
mreq6.ipv6mr_interface = fv.pif_index();
if (setsockopt(_proto_socket_in, IPPROTO_IPV6, IPV6_LEAVE_GROUP,
XORP_SOCKOPT_CAST(&mreq6), sizeof(mreq6)) < 0) {
error_msg = c_format("Cannot leave group %s on interface %s vif %s: %s",
cstring(group),
if_name.c_str(),
vif_name.c_str(),
strerror(errno));
return (XORP_ERROR);
}
#endif // HAVE_IPV6_MULTICAST
}
break;
#endif // HAVE_IPV6
default:
XLOG_UNREACHABLE();
error_msg = c_format("Invalid address family %d", family());
return (XORP_ERROR);
}
return (XORP_OK);
}
int
RawSocket::open_proto_sockets(string& error_msg)
{
string dummy_error_msg;
// If necessary, open the protocol sockets
if (! _proto_socket_in.is_valid()) {
_proto_socket_in = socket(family(), SOCK_RAW, _ip_protocol);
if (!_proto_socket_in.is_valid()) {
char *errstr;
#ifdef HAVE_STRERROR
errstr = strerror(errno);
#else
errstr = "unknown error";
#endif
error_msg = c_format("Cannot open IP protocol %u raw socket: %s",
_ip_protocol, errstr);
return (XORP_ERROR);
}
} while (false);
if (! _proto_socket_out.is_valid()) {
_proto_socket_out = socket(family(), SOCK_RAW, _ip_protocol);
if (!_proto_socket_out.is_valid()) {
char *errstr;
#ifdef HAVE_STRERROR
errstr = strerror(errno);
#else
errstr = "unknown error";
#endif
error_msg = c_format("Cannot open IP protocol %u raw socket: %s",
_ip_protocol, errstr);
return (XORP_ERROR);
}
} while (false);
#ifdef HOST_OS_WINDOWS
switch (family()) {
case AF_INET:
break;
#ifdef HAVE_IPV6
case AF_INET6:
{
// Obtain the pointer to the extension function WSARecvMsg() if needed
if (lpWSARecvMsg == NULL) {
int result;
DWORD nbytes;
result = WSAIoctl(_proto_socket_in,
SIO_GET_EXTENSION_FUNCTION_POINTER,
const_cast<GUID *>(&guidWSARecvMsg),
sizeof(guidWSARecvMsg),
&lpWSARecvMsg, sizeof(lpWSARecvMsg), &nbytes,
NULL, NULL);
if (result == SOCKET_ERROR) {
XLOG_ERROR("Cannot obtain WSARecvMsg function pointer; "
"unable to receive raw IPv6 traffic.");
lpWSARecvMsg = NULL;
}
}
// Obtain the pointer to the extension function WSASendMsg() if needed
// XXX: Only available on Windows Longhorn.
if (lpWSASendMsg == NULL) {
int result;
DWORD nbytes;
result = WSAIoctl(_proto_socket_in,
SIO_GET_EXTENSION_FUNCTION_POINTER,
const_cast<GUID *>(&guidWSASendMsg),
sizeof(guidWSASendMsg),
&lpWSASendMsg, sizeof(lpWSASendMsg), &nbytes,
NULL, NULL);
if (result == SOCKET_ERROR) {
XLOG_ERROR("Cannot obtain WSASendMsg function pointer; "
"unable to send raw IPv6 traffic.");
lpWSASendMsg = NULL;
}
}
}
break;
#endif // HAVE_IPV6
default:
XLOG_UNREACHABLE();
error_msg = c_format("Invalid address family %d", family());
return (XORP_ERROR);
}
#endif // HOST_OS_WINDOWS
// Set various socket options
// Lots of input buffering
if (comm_sock_set_rcvbuf(_proto_socket_in, SO_RCV_BUF_SIZE_MAX,
SO_RCV_BUF_SIZE_MIN)
< SO_RCV_BUF_SIZE_MIN) {
error_msg = c_format("Cannot set the receiver buffer size: %s",
comm_get_last_error_str());
close_proto_sockets(dummy_error_msg);
return (XORP_ERROR);
}
// Lots of output buffering
if (comm_sock_set_rcvbuf(_proto_socket_out, SO_SND_BUF_SIZE_MAX,
SO_SND_BUF_SIZE_MIN)
< SO_SND_BUF_SIZE_MIN) {
error_msg = c_format("Cannot set the sender buffer size: %s",
comm_get_last_error_str());
close_proto_sockets(dummy_error_msg);
return (XORP_ERROR);
}
// Include IP header when sending (XXX: doesn't do anything for IPv6)
if (enable_ip_hdr_include(true, error_msg) != XORP_OK) {
close_proto_sockets(dummy_error_msg);
return (XORP_ERROR);
}
// Show interest in receiving information from IP header (XXX: IPv6 only)
if (enable_recv_pktinfo(true, error_msg) != XORP_OK) {
close_proto_sockets(dummy_error_msg);
return (XORP_ERROR);
}
// Restrict multicast TTL
if (set_multicast_ttl(MINTTL, error_msg) != XORP_OK) {
close_proto_sockets(dummy_error_msg);
return (XORP_ERROR);
}
// Disable mcast loopback
if (enable_multicast_loopback(false, error_msg) != XORP_OK) {
close_proto_sockets(dummy_error_msg);
return (XORP_ERROR);
}
// Protocol-specific setup
switch (family()) {
case AF_INET:
break;
#ifdef HAVE_IPV6
case AF_INET6:
{
if (ip_protocol() == IPPROTO_ICMPV6) {
struct icmp6_filter filter;
// Pass all ICMPv6 messages
ICMP6_FILTER_SETPASSALL(&filter);
#ifdef HAVE_IPV6_MULTICAST_ROUTING
#if 0 // TODO: XXX: used only for multicast routing purpose by MLD
if (module_id() == XORP_MODULE_MLD6IGMP) {
// Filter all non-MLD ICMPv6 messages
ICMP6_FILTER_SETBLOCKALL(&filter);
ICMP6_FILTER_SETPASS(MLD_LISTENER_QUERY, &filter);
ICMP6_FILTER_SETPASS(MLD_LISTENER_REPORT, &filter);
ICMP6_FILTER_SETPASS(MLD_LISTENER_DONE, &filter);
ICMP6_FILTER_SETPASS(MLD_MTRACE_RESP, &filter);
ICMP6_FILTER_SETPASS(MLD_MTRACE, &filter);
#ifdef MLDV2_LISTENER_REPORT
ICMP6_FILTER_SETPASS(MLDV2_LISTENER_REPORT, &filter);
#endif
}
#endif // 0
#endif // HAVE_IPV6_MULTICAST_ROUTING
if (setsockopt(_proto_socket_in, _ip_protocol, ICMP6_FILTER,
XORP_SOCKOPT_CAST(&filter), sizeof(filter)) < 0) {
close_proto_sockets(dummy_error_msg);
error_msg = c_format("setsockopt(ICMP6_FILTER) failed: %s",
strerror(errno));
return (XORP_ERROR);
}
}
}
break;
#endif // HAVE_IPV6
default:
XLOG_UNREACHABLE();
error_msg = c_format("Invalid address family %d", family());
return (XORP_ERROR);
}
#ifdef HOST_OS_WINDOWS
//
// Winsock requires that raw sockets be bound, either to the IPv4
// address of a physical interface, or the INADDR_ANY address,
// in order to receive traffic or to join multicast groups.
//
struct sockaddr_in sin;
memset(&sin, 0, sizeof(sin));
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = INADDR_ANY;
if (SOCKET_ERROR == bind(_proto_socket_in, (sockaddr *)&sin,
sizeof(sockaddr_in))) {
XLOG_WARNING("bind() failed: %s\n", win_strerror(GetLastError()));
}
#endif // HOST_OS_WINDOWS
// Assign a method to read from this socket
if (eventloop().add_ioevent_cb(_proto_socket_in,
IOT_READ,
callback(this,
&RawSocket::proto_socket_read))
== false) {
error_msg = c_format("Cannot add a protocol socket to the set of "
"sockets to read from in the event loop");
return (XORP_ERROR);
}
return (XORP_OK);
}
int
RawSocket::close_proto_sockets(string& error_msg)
{
if (! (_proto_socket_in.is_valid() && _proto_socket_out.is_valid())) {
error_msg = c_format("Invalid protocol socket");
return (XORP_ERROR);
}
//
// Close the outgoing protocol socket
//
if (_proto_socket_out.is_valid()) {
comm_close(_proto_socket_out);
_proto_socket_out.clear();
}
//
// Close the incoming protocol socket
//
if (_proto_socket_in.is_valid()) {
// Remove it just in case, even though it may not be select()-ed
eventloop().remove_ioevent_cb(_proto_socket_in);
#ifdef HOST_OS_WINDOWS
switch (family()) {
case AF_INET:
break;
#ifdef HAVE_IPV6
case AF_INET6:
// Reset the pointer to the WSARecvMsg()/WSASendMsg() functions.
lpWSARecvMsg = NULL;
lpWSASendMsg = NULL;
break;
#endif // HAVE_IPV6
default:
XLOG_UNREACHABLE();
error_msg = c_format("Invalid address family %d", family());
return (XORP_ERROR);
}
#endif // HOST_OS_WINDOWS
comm_close(_proto_socket_in);
_proto_socket_in.clear();
}
return (XORP_OK);
}
void
RawSocket::proto_socket_read(XorpFd fd, IoEventType type)
{
ssize_t nbytes;
size_t ip_hdr_len = 0;
size_t ip_data_len = 0;
IPvX src_address(family());
IPvX dst_address(family());
int int_val;
int32_t ip_ttl = -1; // a.k.a. Hop-Limit in IPv6
int32_t ip_tos = -1;
bool is_router_alert = false; // Router Alert option received
uint32_t pif_index = 0;
vector<uint8_t> ext_headers_type;
vector<vector<uint8_t> > ext_headers_payload;
void* cmsg_data; // XXX: CMSG_DATA() is aligned, hence void ptr
UNUSED(fd);
UNUSED(type);
UNUSED(int_val);
UNUSED(cmsg_data);
#ifndef HOST_OS_WINDOWS
// Zero and reset various fields
_rcvmh.msg_controllen = CMSG_BUF_SIZE;
// TODO: when resetting _from4 and _from6 do we need to set the address
// family and the sockaddr len?
switch (family()) {
case AF_INET:
memset(&_from4, 0, sizeof(_from4));
_rcvmh.msg_namelen = sizeof(_from4);
break;
#ifdef HAVE_IPV6
case AF_INET6:
memset(&_from6, 0, sizeof(_from6));
_rcvmh.msg_namelen = sizeof(_from6);
break;
#endif // HAVE_IPV6
default:
XLOG_UNREACHABLE();
return; // Error
}
// Read from the socket
nbytes = recvmsg(_proto_socket_in, &_rcvmh, 0);
if (nbytes < 0) {
if (errno == EINTR)
return; // OK: restart receiving
XLOG_ERROR("recvmsg() failed: %s", strerror(errno));
return; // Error
}
#else // HOST_OS_WINDOWS
switch (family()) {
case AF_INET:
{
struct sockaddr_storage from;
socklen_t from_len = sizeof(from);
nbytes = recvfrom(_proto_socket_in, XORP_BUF_CAST(_rcvbuf),
IO_BUF_SIZE, 0,
reinterpret_cast<struct sockaddr *>(&from),
&from_len);
debug_msg("Read fd %s, %d bytes\n",
_proto_socket_in.str().c_str(), XORP_INT_CAST(nbytes));
if (nbytes < 0) {
// XLOG_ERROR("recvfrom() failed: %s", strerror(errno));
return; // Error
}
}
break;
#ifdef HAVE_IPV6
case AF_INET6:
{
WSAMSG mh;
DWORD error, nrecvd;
struct sockaddr_in6 from;
mh.name = (LPSOCKADDR)&from;
mh.namelen = sizeof(from);
mh.lpBuffers = (LPWSABUF)_rcviov;
mh.dwBufferCount = 1;
mh.Control.len = CMSG_BUF_SIZE;
mh.Control.buf = (caddr_t)_rcvcmsgbuf;
mh.dwFlags = 0;
if (lpWSARecvMsg == NULL) {
XLOG_ERROR("lpWSARecvMsg is NULL");
return; // Error
}
error = lpWSARecvMsg(_proto_socket_in, &mh, &nrecvd, NULL, NULL);
nbytes = (ssize_t)nrecvd;
debug_msg("Read fd %s, %d bytes\n",
_proto_socket_in.str().c_str(), XORP_INT_CAST(nbytes));
if (nbytes < 0) {
// XLOG_ERROR("lpWSARecvMsg() failed: %s", strerror(errno));
return; // Error
}
}
break;
#endif // HAVE_IPV6
default:
XLOG_UNREACHABLE();
return; // Error
}
#endif // HOST_OS_WINDOWS
// Check whether this is a signal from the kernel to the user-level
switch (_ip_protocol) {
case IPPROTO_IGMP:
{
#ifndef HAVE_IPV4_MULTICAST_ROUTING
XLOG_WARNING("proto_socket_read(): "
"IGMP is unsupported on this platform");
return; // Error
#else
if (nbytes < (ssize_t)sizeof(struct igmpmsg)) {
XLOG_WARNING("proto_socket_read() failed: "
"kernel signal packet size %d is smaller than minimum size %u",
XORP_INT_CAST(nbytes),
XORP_UINT_CAST(sizeof(struct igmpmsg)));
return; // Error
}
struct igmpmsg igmpmsg;
memcpy(&igmpmsg, _rcvbuf, sizeof(igmpmsg));
if (igmpmsg.im_mbz == 0) {
//
// XXX: Packets sent up from kernel to daemon have
// igmpmsg.im_mbz = ip->ip_p = 0
//
// TODO: not implemented
// kernel_call_process(_rcvbuf, nbytes);
return; // OK
}
#endif // HAVE_IPV4_MULTICAST_ROUTING
}
break;
#ifdef HAVE_IPV6
case IPPROTO_ICMPV6:
{
if (nbytes < (ssize_t)sizeof(struct icmp6_hdr)) {
XLOG_WARNING("proto_socket_read() failed: "
"packet size %d is smaller than minimum size %u",
XORP_INT_CAST(nbytes),
XORP_UINT_CAST(sizeof(struct icmp6_hdr)));
return; // Error
}
#ifdef HAVE_IPV6_MULTICAST_ROUTING
struct mrt6msg *mrt6msg;
if (nbytes < (ssize_t)sizeof(*mrt6msg)) {
// Not a kernel signal
break;
}
mrt6msg = reinterpret_cast<struct mrt6msg *>(_rcvbuf);
if ((mrt6msg->im6_mbz == 0) || (_rcvmh.msg_controllen == 0)) {
//
// XXX: Packets sent up from kernel to daemon have
// mrt6msg->im6_mbz = icmp6_hdr->icmp6_type = 0
// Because we set ICMP6 filters on the socket,
// we should never see a real ICMPv6 packet
// with icmp6_type = 0 .
//
//
// TODO: XXX: (msg_controllen == 0) is presumably
// true for older IPv6 systems (e.g. KAME circa
// April 2000, FreeBSD-4.0) which don't have the
// 'icmp6_type = 0' mechanism.
//
// TODO: not implemented
// kernel_call_process(_rcvbuf, nbytes);
return; // OK
}
#endif // HAVE_IPV6_MULTICAST_ROUTING
}
break;
#endif // HAVE_IPV6
default:
break;
}
//
// Not a kernel signal. Pass to the registered processing function
//
//
// Input check.
// Get source and destination address, IP TTL (a.k.a. hop-limit),
// and (eventually) interface address (IPv6 only).
//
switch (family()) {
case AF_INET:
{
IpHeader4 ip4(_rcvbuf);
bool is_datalen_error = false;
// Input check
if (nbytes < (ssize_t)ip4.size()) {
XLOG_WARNING("proto_socket_read() failed: "
"packet size %d is smaller than minimum size %u",
XORP_INT_CAST(nbytes),
XORP_UINT_CAST(ip4.size()));
return; // Error
}
#ifndef HOST_OS_WINDOWS
// TODO: get rid of this and always use ip4.ip_src() ??
src_address.copy_in(_from4);
#else
src_address = ip4.ip_src();
#endif
dst_address = ip4.ip_dst();
ip_ttl = ip4.ip_ttl();
ip_tos = ip4.ip_tos();
ip_hdr_len = ip4.ip_header_len();
#ifdef IPV4_RAW_INPUT_IS_RAW
ip_data_len = ip4.ip_len() - ip_hdr_len;
#else
//
// XXX: The original value is in host order, and excludes the
// IPv4 header length.
//
ip_data_len = ip4.ip_len_host();
#endif // ! IPV4_RAW_INPUT_IS_RAW
// Check length
is_datalen_error = false;
do {
if (ip_hdr_len + ip_data_len == static_cast<size_t>(nbytes)) {
is_datalen_error = false;
break; // OK
}
if (nbytes < static_cast<ssize_t>(ip_hdr_len)) {
is_datalen_error = true;
break;
}
if (ip4.ip_p() == IPPROTO_PIM) {
if (nbytes < static_cast<ssize_t>(ip_hdr_len + PIM_REG_MINLEN)) {
is_datalen_error = true;
break;
}
struct pim pim;
memcpy(&pim, ip4.data() + ip_hdr_len, sizeof(pim));
if (PIM_VT_T(pim.pim_vt) != PIM_REGISTER) {
is_datalen_error = true;
break;
}
//
// XXX: the *BSD kernel might truncate the encapsulated data
// in PIM Register messages before passing the message up
// to user level. The reason for the truncation is to reduce
// the bandwidth overhead, but the price for this is
// messages with weird IP header. Sigh...
//
is_datalen_error = false;
break;
}
break;
} while (false);
if (is_datalen_error) {
XLOG_ERROR("proto_socket_read() failed: "
"RX packet size from %s to %s with %d bytes instead of "
"hdr+datalen=%u+%u=%u",
cstring(src_address), cstring(dst_address),
XORP_INT_CAST(nbytes),
XORP_UINT_CAST(ip_hdr_len),
XORP_UINT_CAST(ip_data_len),
XORP_UINT_CAST(ip_hdr_len + ip_data_len));
return; // Error
}
//
// Get the pif_index.
//
#ifndef HOST_OS_WINDOWS
for (struct cmsghdr *cmsgp = reinterpret_cast<struct cmsghdr *>(CMSG_FIRSTHDR(&_rcvmh));
cmsgp != NULL;
cmsgp = reinterpret_cast<struct cmsghdr *>(CMSG_NXTHDR(&_rcvmh, cmsgp))) {
if (cmsgp->cmsg_level != IPPROTO_IP)
continue;
switch (cmsgp->cmsg_type) {
#ifdef IP_RECVIF
case IP_RECVIF:
{
struct sockaddr_dl *sdl = NULL;
if (cmsgp->cmsg_len < CMSG_LEN(sizeof(struct sockaddr_dl)))
continue;
cmsg_data = CMSG_DATA(cmsgp);
sdl = reinterpret_cast<struct sockaddr_dl *>(cmsg_data);
pif_index = sdl->sdl_index;
}
break;
#endif // IP_RECVIF
#ifdef IP_PKTINFO
case IP_PKTINFO:
{
struct in_pktinfo *inp = NULL;
if (cmsgp->cmsg_len < CMSG_LEN(sizeof(struct in_pktinfo)))
continue;
cmsg_data = CMSG_DATA(cmsgp);
inp = reinterpret_cast<struct in_pktinfo *>(cmsg_data);
pif_index = inp->ipi_ifindex;
}
break;
#endif // IP_PKTINFO
default:
break;
}
}
#endif // ! HOST_OS_WINDOWS
//
// Check for Router Alert option
//
do {
const uint8_t *option_p = ip4.data() + ip4.size();
uint8_t option_value, option_len;
uint32_t test_ip_options_len = ip_hdr_len - ip4.size();
while (test_ip_options_len) {
if (test_ip_options_len < 4)
break;
option_value = *option_p;
option_len = *(option_p + 1);
if (test_ip_options_len < option_len)
break;
if ((option_value == IPOPT_RA) && (option_len == 4)) {
is_router_alert = true;
break;
}
if (option_len == 0)
break; // XXX: a guard against bogus option_len value
test_ip_options_len -= option_len;
option_p += option_len;
}
break;
} while (false);
}
break;
#ifdef HAVE_IPV6
case AF_INET6:
{
struct in6_pktinfo *pi = NULL;
src_address.copy_in(_from6);
if (_rcvmh.msg_flags & MSG_CTRUNC) {
XLOG_ERROR("proto_socket_read() failed: "
"RX packet from %s with size of %d bytes is truncated",
cstring(src_address),
XORP_INT_CAST(nbytes));
return; // Error
}
size_t controllen = static_cast<size_t>(_rcvmh.msg_controllen);
if (controllen < sizeof(struct cmsghdr)) {
XLOG_ERROR("proto_socket_read() failed: "
"RX packet from %s has too short msg_controllen "
"(%u instead of %u)",
cstring(src_address),
XORP_UINT_CAST(controllen),
XORP_UINT_CAST(sizeof(struct cmsghdr)));
return; // Error
}
//
// Get pif_index, hop limit, Router Alert option, etc.
//
for (struct cmsghdr *cmsgp = reinterpret_cast<struct cmsghdr *>(CMSG_FIRSTHDR(&_rcvmh));
cmsgp != NULL;
cmsgp = reinterpret_cast<struct cmsghdr *>(CMSG_NXTHDR(&_rcvmh, cmsgp))) {
if (cmsgp->cmsg_level != IPPROTO_IPV6)
continue;
switch (cmsgp->cmsg_type) {
case IPV6_PKTINFO:
{
if (cmsgp->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo)))
continue;
cmsg_data = CMSG_DATA(cmsgp);
pi = reinterpret_cast<struct in6_pktinfo *>(cmsg_data);
pif_index = pi->ipi6_ifindex;
dst_address.copy_in(pi->ipi6_addr);
}
break;
case IPV6_HOPLIMIT:
{
if (cmsgp->cmsg_len < CMSG_LEN(sizeof(int)))
continue;
int_val = extract_host_int(CMSG_DATA(cmsgp));
ip_ttl = int_val;
}
break;
case IPV6_HOPOPTS:
{
//
// Check for Router Alert option
//
#ifdef HAVE_RFC3542
struct ip6_hbh *ext;
int currentlen;
uint8_t type;
size_t extlen;
socklen_t len;
void *databuf;
cmsg_data = CMSG_DATA(cmsgp);
ext = reinterpret_cast<struct ip6_hbh *>(cmsg_data);
extlen = (ext->ip6h_len + 1) * 8;
currentlen = 0;
while (true) {
currentlen = inet6_opt_next(ext, extlen, currentlen,
&type, &len, &databuf);
if (currentlen == -1)
break;
if (type == IP6OPT_ROUTER_ALERT) {
is_router_alert = true;
break;
}
}
#else // ! HAVE_RFC3542 (i.e., the old advanced API)
#ifdef HAVE_IPV6_MULTICAST_ROUTING
//
// TODO: XXX: temporary use HAVE_IPV6_MULTICAST_ROUTING
// to conditionally compile, because Linux doesn't
// have inet6_option_*
//
uint8_t *tptr = NULL;
while (inet6_option_next(cmsgp, &tptr) == 0) {
if (*tptr == IP6OPT_ROUTER_ALERT) {
is_router_alert = true;
break;
}
}
#endif // HAVE_IPV6_MULTICAST_ROUTING
#endif // ! HAVE_RFC3542
}
break;
#ifdef IPV6_TCLASS
case IPV6_TCLASS:
if (cmsgp->cmsg_len < CMSG_LEN(sizeof(int)))
continue;
int_val = extract_host_int(CMSG_DATA(cmsgp));
ip_tos = int_val;
break;
#endif // IPV6_TCLASS
#ifdef IPV6_RTHDR
case IPV6_RTHDR:
{
vector<uint8_t> opt_payload(cmsgp->cmsg_len);
if (cmsgp->cmsg_len < CMSG_LEN(sizeof(struct ip6_rthdr)))
continue;
cmsg_data = CMSG_DATA(cmsgp);
memcpy(&opt_payload[0], cmsg_data, cmsgp->cmsg_len);
// XXX: Use the protocol number instead of message type
ext_headers_type.push_back(IPPROTO_ROUTING);
ext_headers_payload.push_back(opt_payload);
}
break;
#endif // IPV6_RTHDR
#ifdef IPV6_DSTOPTS
case IPV6_DSTOPTS:
{
vector<uint8_t> opt_payload(cmsgp->cmsg_len);
if (cmsgp->cmsg_len < CMSG_LEN(sizeof(struct ip6_dest)))
continue;
cmsg_data = CMSG_DATA(cmsgp);
memcpy(&opt_payload[0], cmsg_data, cmsgp->cmsg_len);
// XXX: Use the protocol number instead of message type
ext_headers_type.push_back(IPPROTO_DSTOPTS);
ext_headers_payload.push_back(opt_payload);
}
break;
#endif // IPV6_DSTOPTS
default:
break;
}
}
ip_hdr_len = 0;
ip_data_len = nbytes;
}
break;
#endif // HAVE_IPV6
default:
XLOG_UNREACHABLE();
return; // Error
}
// Various checks
if (! (src_address.is_unicast() || src_address.is_zero())) {
// XXX: Accept zero source addresses because of protocols like IGMPv3
XLOG_ERROR("proto_socket_read() failed: "
"invalid unicast sender address: %s", cstring(src_address));
return; // Error
}
if (! (dst_address.is_multicast() || dst_address.is_unicast())) {
XLOG_ERROR("proto_socket_read() failed: "
"invalid destination address: %s", cstring(dst_address));
return; // Error
}
if (ip_ttl < 0) {
// TODO: what about ip_ttl = 0? Is it OK?
XLOG_ERROR("proto_socket_read() failed: "
"invalid TTL (hop-limit) from %s to %s: %d",
cstring(src_address), cstring(dst_address), ip_ttl);
return; // Error
}
if (pif_index == 0) {
switch (family()) {
case AF_INET:
// TODO: take care of Linux??
break; // XXX: in IPv4 (except Linux?) there is no pif_index
#ifdef HAVE_IPV6
case AF_INET6:
XLOG_ERROR("proto_socket_read() failed: "
"invalid interface pif_index from %s to %s: %u",
cstring(src_address), cstring(dst_address),
XORP_UINT_CAST(pif_index));
return; // Error
#endif // HAVE_IPV6
default:
XLOG_UNREACHABLE();
return; // Error
}
}
//
// Find the interface and the vif this message was received on.
// XXX: note that in case of IPv4 (except Linux?) we may be able
// only to "guess" by using the sender or the destination address.
//
const IfTreeInterface* iftree_if = NULL;
const IfTreeVif* iftree_vif = NULL;
do {
//
// Find interface and vif based on src address, if a directly
// connected sender, or based on dst address if unicast and one
// of my local addresses.
// However, check first whether we can use 'pif_index' instead.
//
if (pif_index != 0) {
find_interface_vif_by_pif_index(pif_index, iftree_if, iftree_vif);
break;
}
if (dst_address.is_multicast()) {
find_interface_vif_same_subnet_or_p2p(src_address, iftree_if,
iftree_vif);
} else {
find_interface_vif_by_addr(dst_address, iftree_if, iftree_vif);
}
break;
} while (false);
if ((iftree_if == NULL) || (iftree_vif == NULL)) {
// No vif found. Ignore this packet.
XLOG_WARNING("proto_socket_read() failed: "
"RX packet from %s to %s: no vif found",
cstring(src_address), cstring(dst_address));
return; // Error
}
if (! (iftree_if->enabled() || iftree_vif->enabled())) {
// This vif is down. Silently ignore this packet.
return; // Error
}
// Process the result
vector<uint8_t> payload(nbytes - ip_hdr_len);
memcpy(&payload[0], _rcvbuf + ip_hdr_len, nbytes - ip_hdr_len);
process_recv_data(iftree_if->ifname(),
iftree_vif->vifname(),
src_address, dst_address, ip_ttl, ip_tos,
is_router_alert,
ext_headers_type,
ext_headers_payload,
payload);
return; // OK
}
int
RawSocket::proto_socket_write(const string& if_name,
const string& vif_name,
const IPvX& src_address,
const IPvX& dst_address,
int32_t ip_ttl,
int32_t ip_tos,
bool is_router_alert,
const vector<uint8_t>& ext_headers_type,
const vector<vector<uint8_t> >& ext_headers_payload,
const vector<uint8_t>& payload,
string& error_msg)
{
size_t ip_hdr_len = 0;
int int_val;
int ret_value = XORP_OK;
const IfTreeInterface* iftree_if = NULL;
const IfTreeVif* iftree_vif = NULL;
void* cmsg_data; // XXX: CMSG_DATA() is aligned, hence void ptr
UNUSED(int_val);
UNUSED(cmsg_data);
XLOG_ASSERT(ext_headers_type.size() == ext_headers_payload.size());
find_interface_vif_by_name(if_name, vif_name, iftree_if, iftree_vif);
if (iftree_if == NULL) {
error_msg = c_format("No interface %s", if_name.c_str());
return (XORP_ERROR);
}
if (iftree_vif == NULL) {
error_msg = c_format("No interface %s vif %s",
if_name.c_str(), vif_name.c_str());
return (XORP_ERROR);
}
if (! iftree_if->enabled()) {
error_msg = c_format("Interface %s is down",
iftree_if->ifname().c_str());
return (XORP_ERROR);
}
if (! iftree_vif->enabled()) {
error_msg = c_format("Interface %s vif %s is down",
iftree_if->ifname().c_str(),
iftree_vif->vifname().c_str());
return (XORP_ERROR);
}
if (payload.size() > IO_BUF_SIZE) {
error_msg = c_format("proto_socket_write() failed: "
"cannot send packet on interface %s vif %s "
"from %s to %s: "
"too much data: %u octets (max = %u)",
if_name.c_str(),
vif_name.c_str(),
src_address.str().c_str(),
dst_address.str().c_str(),
XORP_UINT_CAST(payload.size()),
XORP_UINT_CAST(IO_BUF_SIZE));
return (XORP_ERROR);
}
//
// Assign the TTL and TOS if they were not specified
//
switch (family()) {
case AF_INET:
// Assign the TTL
if (ip_ttl < 0) {
if (is_router_alert)
ip_ttl = MINTTL;
else
ip_ttl = IPDEFTTL;
}
// Assign the TOS
if (ip_tos < 0) {
if (is_router_alert)
ip_tos = IPTOS_PREC_INTERNETCONTROL; // Internet Control
else
ip_tos = 0;
}
break;
#ifdef HAVE_IPV6
case AF_INET6:
{
// Assign the TTL
if (ip_ttl < 0) {
if (is_router_alert)
ip_ttl = MINTTL;
else
ip_ttl = IPDEFTTL;
}
// Assign the TOS
if (ip_tos < 0) {
if (is_router_alert) {
// TODO: XXX: IPTOS for IPv6 is bogus??
// ip_tos = IPTOS_PREC_INTERNETCONTROL; // Internet Control
ip_tos = 0;
} else {
ip_tos = 0;
}
}
}
break;
#endif // HAVE_IPV6
default:
XLOG_UNREACHABLE();
error_msg = c_format("Invalid address family %d", family());
return (XORP_ERROR);
}
//
// Setup the IP header (including the Router Alert option, if specified)
// In case of IPv4, if the IP_HDRINCL socket option is enabled, the IP
// header and the data are specified as a single entry in the sndiov[]
// scatter/gatter array, otherwise we use socket options to set the IP
// header information and a single sndiov[] entry with the payload.
// In case of IPv6, the IP header information is specified as
// ancillary data.
//
switch (family()) {
case AF_INET:
{
//
// XXX: In case of Linux IP_HDRINCL IP packets are not fragmented and
// are limited to the interface MTU. The raw(7) Linux manual is wrong
// by saying it is a limitation only in Linux 2.2. It is a limitation
// in 2.4 and 2.6 and there is no indication this is going to be fixed
// in the future.
// Hence, in case of Linux we do the following:
// - If the IP packet fits in the MTU, then send the packet using
// IP_HDRINCL option.
// - Otherwise, if the IP source address belongs to the outgoing
// interface, then use various socket options to specify some of
// the IP header options.
// - Otherwise, use IP_HDRINCL and fragment the IP packet in user
// space before transmitting it. Note that in this case we need
// to manage the ip_id field in the IP header.
//
// The reasoning behind the above logic is: (1) If the IP source
// address doesn't belong to one of the router's IP addresses, then
// we must use IP_HDRINCL; (2) We want to avoid as much as possible
// user-level IP packet fragmentation, because managing the ip_id
// field in user space does not guarantee the same ip_id is reused
// by the kernel as well (for the same tuple of <src, dest, protocol>).
//
// Note that in case of all other OS-es we always use the IP_HDRINCL
// option to transmit the packets.
//
bool do_fragmentation = false;
bool do_ip_hdr_include = true;
//
// Decide whether we should use IP_HDRINCL and whether we should
// do IP packet fragmentation.
//
do {
#ifndef HOST_OS_LINUX
break; // XXX: The extra processing below is for Linux only
#endif
// Calculate the final packet size and whether it fits in the MTU
ip_hdr_len = IpHeader4::SIZE;
if (is_router_alert)
ip_hdr_len += sizeof(ra_opt4);
if (ip_hdr_len + payload.size() <= iftree_if->mtu())
break;
if (iftree_vif->get_addr(src_address.get_ipv4())
!= iftree_vif->v4addrs().end()) {
do_ip_hdr_include = false;
break;
}
do_fragmentation = true;
break;
} while (false);
if (do_ip_hdr_include != _is_ip_hdr_included) {
if (enable_ip_hdr_include(do_ip_hdr_include, error_msg)
!= XORP_OK) {
XLOG_ERROR("%s", error_msg.c_str());
return (XORP_ERROR);
}
}
if (! _is_ip_hdr_included) {
//
// Use socket options to set the IP header information
//
//
// Include the Router Alert option
//
#ifdef IP_OPTIONS
if (is_router_alert) {
if (setsockopt(_proto_socket_out, IPPROTO_IP, IP_OPTIONS,
XORP_SOCKOPT_CAST(&ra_opt4),
sizeof(ra_opt4))
< 0) {
error_msg = c_format("setsockopt(IP_OPTIONS, IPOPT_RA) "
"failed: %s",
strerror(errno));
XLOG_ERROR("%s", error_msg.c_str());
return (XORP_ERROR);
}
}
#endif // IP_OPTIONS
//
// Set the TTL
//
#ifdef IP_TTL
int_val = ip_ttl;
if (setsockopt(_proto_socket_out, IPPROTO_IP, IP_TTL,
XORP_SOCKOPT_CAST(&int_val), sizeof(int_val))
< 0) {
error_msg = c_format("setsockopt(IP_TTL, %d) failed: %s",
int_val, strerror(errno));
XLOG_ERROR("%s", error_msg.c_str());
return (XORP_ERROR);
}
#endif // IP_TTL
//
// Set the TOS
//
#ifdef IP_TOS
int_val = ip_tos;
if (setsockopt(_proto_socket_out, IPPROTO_IP, IP_TOS,
XORP_SOCKOPT_CAST(&int_val), sizeof(int_val))
< 0) {
error_msg = c_format("setsockopt(IP_TOS, 0x%x) failed: %s",
int_val, strerror(errno));
XLOG_ERROR("%s", error_msg.c_str());
return (XORP_ERROR);
}
#endif // IP_TOS
//
// XXX: Bind to the source address so the outgoing IP packet
// will use that address.
//
struct sockaddr_in sin;
src_address.copy_out(sin);
if (bind(_proto_socket_out,
reinterpret_cast<struct sockaddr*>(&sin),
sizeof(sin))
< 0) {
error_msg = c_format("raw socket bind(%s) failed: %s",
cstring(src_address), strerror(errno));
XLOG_ERROR("%s", error_msg.c_str());
return (XORP_ERROR);
}
//
// Now hook the data
//
memcpy(_sndbuf, &payload[0], payload.size()); // XXX: _sndiov[0].iov_base
_sndiov[0].iov_len = payload.size();
// Transmit the packet
ret_value = proto_socket_transmit(iftree_if, iftree_vif,
src_address, dst_address,
error_msg);
break;
}
//
// Set the IPv4 header
//
IpHeader4Writer ip4(_sndbuf);
if (is_router_alert) {
// Include the Router Alert option
uint8_t* ip_option_p = ip4.data() + ip4.size();
//
// XXX: ra_opt4 is in network order, hence we write it as-is
// by using embed_host_32().
//
embed_host_32(ip_option_p, ra_opt4);
ip_hdr_len = ip4.size() + sizeof(ra_opt4);
} else {
ip_hdr_len = ip4.size();
}
ip4.set_ip_version(IPVERSION);
ip4.set_ip_header_len(ip_hdr_len);
ip4.set_ip_tos(ip_tos);
ip4.set_ip_id(0); // Let kernel fill in
ip4.set_ip_off(0);
ip4.set_ip_ttl(ip_ttl);
ip4.set_ip_p(_ip_protocol);
ip4.set_ip_sum(0); // Let kernel fill in
ip4.set_ip_src(src_address.get_ipv4());
ip4.set_ip_dst(dst_address.get_ipv4());
ip4.set_ip_len(ip_hdr_len + payload.size());
//
// Now hook the data
//
memcpy(_sndbuf + ip_hdr_len, &payload[0], payload.size()); // XXX: _sndiov[0].iov_base
_sndiov[0].iov_len = ip_hdr_len + payload.size();
if (! do_fragmentation) {
// Transmit the packet
ret_value = proto_socket_transmit(iftree_if, iftree_vif,
src_address, dst_address,
error_msg);
break;
}
//
// Perform fragmentation and transmit each fragment
//
list<vector<uint8_t> > fragments;
list<vector<uint8_t> >::iterator iter;
// Calculate and set the IPv4 packet ID
_ip_id++;
if (_ip_id == 0)
_ip_id++;
ip4.set_ip_id(_ip_id);
if (ip4.fragment(iftree_if->mtu(), fragments, false, error_msg)
!= XORP_OK) {
return (XORP_ERROR);
}
XLOG_ASSERT(! fragments.empty());
for (iter = fragments.begin(); iter != fragments.end(); ++iter) {
vector<uint8_t>& ip_fragment = *iter;
_sndiov[0].iov_len = ip_fragment.size();
memcpy(_sndbuf, &ip_fragment[0], ip_fragment.size());
ret_value = proto_socket_transmit(iftree_if, iftree_vif,
src_address, dst_address,
error_msg);
if (ret_value != XORP_OK)
break;
}
}
break;
#ifdef HAVE_IPV6
case AF_INET6:
{
int ctllen = 0;
int hbhlen = 0;
struct cmsghdr *cmsgp;
struct in6_pktinfo *sndpktinfo;
//
// XXX: unlikely IPv4, in IPv6 the 'header' is specified as
// ancillary data.
//
//
// First, estimate total length of ancillary data
//
// Space for IPV6_PKTINFO
ctllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
if (is_router_alert) {
// Space for Router Alert option
#ifdef HAVE_RFC3542
if ((hbhlen = inet6_opt_init(NULL, 0)) == -1) {
error_msg = c_format("inet6_opt_init(NULL) failed");
return (XORP_ERROR);
}
if ((hbhlen = inet6_opt_append(NULL, 0, hbhlen,
IP6OPT_ROUTER_ALERT, 2,
2, NULL)) == -1) {
error_msg = c_format("inet6_opt_append(NULL) failed");
return (XORP_ERROR);
}
if ((hbhlen = inet6_opt_finish(NULL, 0, hbhlen)) == -1) {
error_msg = c_format("inet6_opt_finish(NULL) failed");
return (XORP_ERROR);
}
ctllen += CMSG_SPACE(hbhlen);
#else
#ifdef HAVE_IPV6_MULTICAST_ROUTING
//
// TODO: XXX: temporary use HAVE_IPV6_MULTICAST_ROUTING
// to conditionally compile, because Linux doesn't
// have inet6_option_*
//
hbhlen = inet6_option_space(sizeof(ra_opt6));
ctllen += hbhlen;
#else
UNUSED(hbhlen);
#endif
#endif // ! HAVE_RFC3542
}
// Space for IPV6_TCLASS
#ifdef IPV6_TCLASS
ctllen += CMSG_SPACE(sizeof(int));
#endif
// Space for IPV6_HOPLIMIT
ctllen += CMSG_SPACE(sizeof(int));
// Space for the Extension headers
for (size_t i = 0; i < ext_headers_type.size(); i++) {
// Ignore the types that are not supported
switch (ext_headers_type[i]) {
case IPPROTO_ROUTING:
break;
case IPPROTO_DSTOPTS:
break;
default:
continue; // XXX: all other types are not supported
break;
}
ctllen += CMSG_SPACE(ext_headers_payload[i].size());
}
XLOG_ASSERT(ctllen <= CMSG_BUF_SIZE); // XXX
//
// Now setup the ancillary data
//
_sndmh.msg_controllen = ctllen;
cmsgp = CMSG_FIRSTHDR(&_sndmh);
// Add the IPV6_PKTINFO ancillary data
cmsgp->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
cmsgp->cmsg_level = IPPROTO_IPV6;
cmsgp->cmsg_type = IPV6_PKTINFO;
cmsg_data = CMSG_DATA(cmsgp);
sndpktinfo = reinterpret_cast<struct in6_pktinfo *>(cmsg_data);
memset(sndpktinfo, 0, sizeof(*sndpktinfo));
if ((iftree_if->pif_index() > 0)
&& !(dst_address.is_unicast()
&& !dst_address.is_linklocal_unicast())) {
//
// XXX: don't set the outgoing interface index if we are
// sending an unicast packet to a non-link local unicast address.
// Otherwise, the sending may fail with EHOSTUNREACH error.
//
sndpktinfo->ipi6_ifindex = iftree_if->pif_index();
} else {
sndpktinfo->ipi6_ifindex = 0; // Let kernel fill in
}
src_address.copy_out(sndpktinfo->ipi6_addr);
cmsgp = CMSG_NXTHDR(&_sndmh, cmsgp);
//
// Include the Router Alert option if needed
//
if (is_router_alert) {
#ifdef HAVE_RFC3542
int currentlen;
void *hbhbuf, *optp = NULL;
cmsgp->cmsg_len = CMSG_LEN(hbhlen);
cmsgp->cmsg_level = IPPROTO_IPV6;
cmsgp->cmsg_type = IPV6_HOPOPTS;
hbhbuf = CMSG_DATA(cmsgp);
currentlen = inet6_opt_init(hbhbuf, hbhlen);
if (currentlen == -1) {
error_msg = c_format("inet6_opt_init(len = %d) failed",
hbhlen);
return (XORP_ERROR);
}
currentlen = inet6_opt_append(hbhbuf, hbhlen, currentlen,
IP6OPT_ROUTER_ALERT, 2, 2, &optp);
if (currentlen == -1) {
error_msg = c_format("inet6_opt_append(len = %d) failed",
currentlen);
return (XORP_ERROR);
}
inet6_opt_set_val(optp, 0, &rtalert_code6, sizeof(rtalert_code6));
if (inet6_opt_finish(hbhbuf, hbhlen, currentlen) == -1) {
error_msg = c_format("inet6_opt_finish(len = %d) failed",
currentlen);
return (XORP_ERROR);
}
#else // ! HAVE_RFC3542 (i.e., the old advanced API)
#ifdef HAVE_IPV6_MULTICAST_ROUTING
//
// TODO: XXX: temporary use HAVE_IPV6_MULTICAST_ROUTING
// to conditionally compile, because Linux doesn't
// have inet6_option_*
//
if (inet6_option_init((void *)cmsgp, &cmsgp, IPV6_HOPOPTS)) {
error_msg = c_format("inet6_option_init(IPV6_HOPOPTS) failed");
return (XORP_ERROR);
}
if (inet6_option_append(cmsgp, ra_opt6, 4, 0)) {
error_msg = c_format("inet6_option_append(Router Alert) failed");
return (XORP_ERROR);
}
#endif // HAVE_IPV6_MULTICAST_ROUTING
#endif // ! HAVE_RFC3542
cmsgp = CMSG_NXTHDR(&_sndmh, cmsgp);
}
//
// Set the TTL
//
cmsgp->cmsg_len = CMSG_LEN(sizeof(int));
cmsgp->cmsg_level = IPPROTO_IPV6;
cmsgp->cmsg_type = IPV6_HOPLIMIT;
int_val = ip_ttl;
embed_host_int(CMSG_DATA(cmsgp), int_val);
cmsgp = CMSG_NXTHDR(&_sndmh, cmsgp);
//
// Set the TOS
//
#ifdef IPV6_TCLASS
cmsgp->cmsg_len = CMSG_LEN(sizeof(int));
cmsgp->cmsg_level = IPPROTO_IPV6;
cmsgp->cmsg_type = IPV6_TCLASS;
int_val = ip_tos;
embed_host_int(CMSG_DATA(cmsgp), int_val);
cmsgp = CMSG_NXTHDR(&_sndmh, cmsgp);
#endif // IPV6_TCLASS
//
// Set the Extension headers
//
for (size_t i = 0; i < ext_headers_type.size(); i++) {
uint8_t header_type = 0;
// Translate protocol number (header type) to message type
switch (ext_headers_type[i]) {
case IPPROTO_ROUTING:
header_type = IPV6_RTHDR;
break;
case IPPROTO_DSTOPTS:
header_type = IPV6_DSTOPTS;
break;
default:
continue; // XXX: all other types are not supported
break;
}
// Set the header
const vector<uint8_t>& opt_payload(ext_headers_payload[i]);
cmsgp->cmsg_len = CMSG_LEN(opt_payload.size());
cmsgp->cmsg_level = IPPROTO_IPV6;
cmsgp->cmsg_type = header_type;
cmsg_data = CMSG_DATA(cmsgp);
memcpy(cmsg_data, &opt_payload[0], opt_payload.size());
cmsgp = CMSG_NXTHDR(&_sndmh, cmsgp);
}
//
// Now hook the data
//
memcpy(_sndbuf, &payload[0], payload.size()); // XXX: _sndiov[0].iov_base
_sndiov[0].iov_len = payload.size();
// Transmit the packet
ret_value = proto_socket_transmit(iftree_if, iftree_vif,
src_address, dst_address,
error_msg);
}
break;
#endif // HAVE_IPV6
default:
XLOG_UNREACHABLE();
error_msg = c_format("Invalid address family %d", family());
return (XORP_ERROR);
}
return (ret_value);
}
int
RawSocket::proto_socket_transmit(const IfTreeInterface* iftree_if,
const IfTreeVif* iftree_vif,
const IPvX& src_address,
const IPvX& dst_address,
string& error_msg)
{
bool setloop = false;
int ret_value = XORP_OK;
//
// Adjust some IPv4 header fields
//
#ifndef IPV4_RAW_OUTPUT_IS_RAW
if (_is_ip_hdr_included && src_address.is_ipv4()) {
//
// XXX: The stored value should be in host order, and should
// include the IPv4 header length.
//
IpHeader4Writer ip4(_sndbuf);
ip4.set_ip_len_host(ip4.ip_len());
}
#endif // ! IPV4_RAW_INPUT_IS_RAW
//
// Multicast-related setting
//
if (dst_address.is_multicast()) {
if (set_default_multicast_interface(iftree_if->ifname(),
iftree_vif->vifname(),
error_msg)
!= XORP_OK) {
return (XORP_ERROR);
}
//
// XXX: we need to enable the multicast loopback so other processes
// on the same host can receive the multicast packets.
//
if (enable_multicast_loopback(true, error_msg) != XORP_OK) {
return (XORP_ERROR);
}
setloop = true;
}
//
// Transmit the packet
//
#ifndef HOST_OS_WINDOWS
// Set some sendmsg()-related fields
switch (family()) {
case AF_INET:
dst_address.copy_out(_to4);
_sndmh.msg_namelen = sizeof(_to4);
_sndmh.msg_control = NULL;
_sndmh.msg_controllen = 0;
break;
#ifdef HAVE_IPV6
case AF_INET6:
dst_address.copy_out(_to6);
kernel_adjust_sockaddr_in6_send(_to6, iftree_if->pif_index());
_sndmh.msg_namelen = sizeof(_to6);
break;
#endif // HAVE_IPV6
default:
XLOG_UNREACHABLE();
error_msg = c_format("Invalid address family %d", family());
return (XORP_ERROR);
}
if (sendmsg(_proto_socket_out, &_sndmh, 0) < 0) {
ret_value = XORP_ERROR;
if (errno == ENETDOWN) {
// TODO: check the interface status. E.g. vif_state_check(family);
} else {
error_msg = c_format("sendmsg(proto %d size %u from %s to %s "
"on interface %s vif %s) failed: %s",
_ip_protocol,
XORP_UINT_CAST(_sndiov[0].iov_len),
cstring(src_address),
cstring(dst_address),
iftree_if->ifname().c_str(),
iftree_vif->vifname().c_str(),
strerror(errno));
}
}
#else // HOST_OS_WINDOWS
// XXX: We may use WSASendMsg() on Longhorn to support IPv6.
DWORD sent, error;
struct sockaddr_storage to;
DWORD buffer_count = 1;
int to_len = 0;
memset(&to, 0, sizeof(to));
dst_address.copy_out(reinterpret_cast<struct sockaddr&>(to));
// Set some family-specific arguments
switch (family()) {
case AF_INET:
to_len = sizeof(struct sockaddr_in);
break;
#ifdef HAVE_IPV6
case AF_INET6:
to_len = sizeof(struct sockaddr_in6);
break;
#endif // HAVE_IPV6
default:
XLOG_UNREACHABLE();
error_msg = c_format("Invalid address family %d", family());
return (XORP_ERROR);
}
error = WSASendTo(_proto_socket_out,
reinterpret_cast<WSABUF *>(_sndiov),
buffer_count, &sent, 0,
reinterpret_cast<struct sockaddr *>(&to),
to_len, NULL, NULL);
if (error != 0) {
ret_value = XORP_ERROR;
error_msg = c_format("WSASendTo(proto %d size %u from %s to %s "
"on interface %s vif %s) failed: %s",
_ip_protocol,
XORP_UINT_CAST(_sndiov[0].iov_len),
cstring(src_address),
cstring(dst_address),
iftree_if->ifname().c_str(),
iftree_vif->vifname().c_str(),
win_strerror(GetLastError()));
XLOG_ERROR("%s", error_msg.c_str());
}
#endif // HOST_OS_WINDOWS
//
// Restore some multicast related settings
//
if (setloop) {
string dummy_error_msg;
enable_multicast_loopback(false, dummy_error_msg);
}
return (ret_value);
}
bool
RawSocket::find_interface_vif_by_name(const string& if_name,
const string& vif_name,
const IfTreeInterface*& iftree_if,
const IfTreeVif*& iftree_vif) const
{
IfTree::IfMap::const_iterator ii;
IfTreeInterface::VifMap::const_iterator vi;
iftree_if = NULL;
iftree_vif = NULL;
// Find the interface
ii = _iftree.get_if(if_name);
if (ii == _iftree.ifs().end())
return (false);
const IfTreeInterface& fi = ii->second;
// Find the vif
vi = fi.get_vif(vif_name);
if (vi == fi.vifs().end())
return (false);
const IfTreeVif& fv = vi->second;
// Found a match
iftree_if = &fi;
iftree_vif = &fv;
return (true);
}
bool
RawSocket::find_interface_vif_by_pif_index(uint32_t pif_index,
const IfTreeInterface*& iftree_if,
const IfTreeVif*& iftree_vif) const
{
IfTree::IfMap::const_iterator ii;
IfTreeInterface::VifMap::const_iterator vi;
iftree_if = NULL;
iftree_vif = NULL;
for (ii = _iftree.ifs().begin(); ii != _iftree.ifs().end(); ++ii) {
const IfTreeInterface& fi = ii->second;
if (fi.pif_index() != pif_index)
continue;
for (vi = fi.vifs().begin(); vi != fi.vifs().end(); ++vi) {
const IfTreeVif& fv = vi->second;
if (fv.pif_index() == pif_index) {
// Found a match
iftree_if = &fi;
iftree_vif = &fv;
return (true);
}
}
}
return (false);
}
bool
RawSocket::find_interface_vif_same_subnet_or_p2p(
const IPvX& addr,
const IfTreeInterface*& iftree_if,
const IfTreeVif*& iftree_vif) const
{
IfTree::IfMap::const_iterator ii;
IfTreeInterface::VifMap::const_iterator vi;
IfTreeVif::V4Map::const_iterator ai4;
IfTreeVif::V6Map::const_iterator ai6;
iftree_if = NULL;
iftree_vif = NULL;
for (ii = _iftree.ifs().begin(); ii != _iftree.ifs().end(); ++ii) {
const IfTreeInterface& fi = ii->second;
for (vi = fi.vifs().begin(); vi != fi.vifs().end(); ++vi) {
const IfTreeVif& fv = vi->second;
if (addr.is_ipv4()) {
IPv4 addr4 = addr.get_ipv4();
for (ai4 = fv.v4addrs().begin(); ai4 != fv.v4addrs().end(); ++ai4) {
const IfTreeAddr4& a4 = ai4->second;
// Test if same subnet
IPv4Net subnet(a4.addr(), a4.prefix_len());
if (subnet.contains(addr4)) {
// Found a match
iftree_if = &fi;
iftree_vif = &fv;
return (true);
}
// Test if same p2p
if (! a4.point_to_point())
continue;
if ((a4.addr() == addr4) || (a4.endpoint() == addr4)) {
// Found a match
iftree_if = &fi;
iftree_vif = &fv;
return (true);
}
}
continue;
}
if (addr.is_ipv6()) {
IPv6 addr6 = addr.get_ipv6();
for (ai6 = fv.v6addrs().begin(); ai6 != fv.v6addrs().end(); ++ai6) {
const IfTreeAddr6& a6 = ai6->second;
// Test if same subnet
IPv6Net subnet(a6.addr(), a6.prefix_len());
if (subnet.contains(addr6)) {
// Found a match
iftree_if = &fi;
iftree_vif = &fv;
return (true);
}
// Test if same p2p
if (! a6.point_to_point())
continue;
if ((a6.addr() == addr6) || (a6.endpoint() == addr6)) {
// Found a match
iftree_if = &fi;
iftree_vif = &fv;
return (true);
}
}
continue;
}
}
}
return (false);
}
bool
RawSocket::find_interface_vif_by_addr(
const IPvX& addr,
const IfTreeInterface*& iftree_if,
const IfTreeVif*& iftree_vif) const
{
IfTree::IfMap::const_iterator ii;
IfTreeInterface::VifMap::const_iterator vi;
IfTreeVif::V4Map::const_iterator ai4;
IfTreeVif::V6Map::const_iterator ai6;
iftree_if = NULL;
iftree_vif = NULL;
for (ii = _iftree.ifs().begin(); ii != _iftree.ifs().end(); ++ii) {
const IfTreeInterface& fi = ii->second;
for (vi = fi.vifs().begin(); vi != fi.vifs().end(); ++vi) {
const IfTreeVif& fv = vi->second;
if (addr.is_ipv4()) {
IPv4 addr4 = addr.get_ipv4();
for (ai4 = fv.v4addrs().begin(); ai4 != fv.v4addrs().end(); ++ai4) {
const IfTreeAddr4& a4 = ai4->second;
if (a4.addr() == addr4) {
// Found a match
iftree_if = &fi;
iftree_vif = &fv;
return (true);
}
}
continue;
}
if (addr.is_ipv6()) {
IPv6 addr6 = addr.get_ipv6();
for (ai6 = fv.v6addrs().begin(); ai6 != fv.v6addrs().end(); ++ai6) {
const IfTreeAddr6& a6 = ai6->second;
if (a6.addr() == addr6) {
// Found a match
iftree_if = &fi;
iftree_vif = &fv;
return (true);
}
}
continue;
}
}
}
return (false);
}
syntax highlighted by Code2HTML, v. 0.9.1