blob: 4c1393f2234c9a41f39404a6628db1479190572e [file] [log] [blame]
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <byteswap.h>
#include <gpxe/list.h>
#include <gpxe/in.h>
#include <gpxe/arp.h>
#include <gpxe/if_ether.h>
#include <gpxe/iobuf.h>
#include <gpxe/netdevice.h>
#include <gpxe/ip.h>
#include <gpxe/tcpip.h>
#include <gpxe/dhcp.h>
#include <gpxe/settings.h>
/** @file
*
* IPv4 protocol
*
*/
FILE_LICENCE ( GPL2_OR_LATER );
/* Unique IP datagram identification number */
static uint16_t next_ident = 0;
struct net_protocol ipv4_protocol;
/** List of IPv4 miniroutes */
struct list_head ipv4_miniroutes = LIST_HEAD_INIT ( ipv4_miniroutes );
/** List of fragment reassembly buffers */
static LIST_HEAD ( frag_buffers );
/**
* Add IPv4 minirouting table entry
*
* @v netdev Network device
* @v address IPv4 address
* @v netmask Subnet mask
* @v gateway Gateway address (if any)
* @ret miniroute Routing table entry, or NULL
*/
static struct ipv4_miniroute * __malloc
add_ipv4_miniroute ( struct net_device *netdev, struct in_addr address,
struct in_addr netmask, struct in_addr gateway ) {
struct ipv4_miniroute *miniroute;
DBG ( "IPv4 add %s", inet_ntoa ( address ) );
DBG ( "/%s ", inet_ntoa ( netmask ) );
if ( gateway.s_addr )
DBG ( "gw %s ", inet_ntoa ( gateway ) );
DBG ( "via %s\n", netdev->name );
/* Allocate and populate miniroute structure */
miniroute = malloc ( sizeof ( *miniroute ) );
if ( ! miniroute ) {
DBG ( "IPv4 could not add miniroute\n" );
return NULL;
}
/* Record routing information */
miniroute->netdev = netdev_get ( netdev );
miniroute->address = address;
miniroute->netmask = netmask;
miniroute->gateway = gateway;
/* Add to end of list if we have a gateway, otherwise
* to start of list.
*/
if ( gateway.s_addr ) {
list_add_tail ( &miniroute->list, &ipv4_miniroutes );
} else {
list_add ( &miniroute->list, &ipv4_miniroutes );
}
return miniroute;
}
/**
* Delete IPv4 minirouting table entry
*
* @v miniroute Routing table entry
*/
static void del_ipv4_miniroute ( struct ipv4_miniroute *miniroute ) {
DBG ( "IPv4 del %s", inet_ntoa ( miniroute->address ) );
DBG ( "/%s ", inet_ntoa ( miniroute->netmask ) );
if ( miniroute->gateway.s_addr )
DBG ( "gw %s ", inet_ntoa ( miniroute->gateway ) );
DBG ( "via %s\n", miniroute->netdev->name );
netdev_put ( miniroute->netdev );
list_del ( &miniroute->list );
free ( miniroute );
}
/**
* Perform IPv4 routing
*
* @v dest Final destination address
* @ret dest Next hop destination address
* @ret miniroute Routing table entry to use, or NULL if no route
*
* If the route requires use of a gateway, the next hop destination
* address will be overwritten with the gateway address.
*/
static struct ipv4_miniroute * ipv4_route ( struct in_addr *dest ) {
struct ipv4_miniroute *miniroute;
int local;
int has_gw;
/* Never attempt to route the broadcast address */
if ( dest->s_addr == INADDR_BROADCAST )
return NULL;
/* Find first usable route in routing table */
list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
if ( ! ( miniroute->netdev->state & NETDEV_OPEN ) )
continue;
local = ( ( ( dest->s_addr ^ miniroute->address.s_addr )
& miniroute->netmask.s_addr ) == 0 );
has_gw = ( miniroute->gateway.s_addr );
if ( local || has_gw ) {
if ( ! local )
*dest = miniroute->gateway;
return miniroute;
}
}
return NULL;
}
/**
* Fragment reassembly counter timeout
*
* @v timer Retry timer
* @v over If asserted, the timer is greater than @c MAX_TIMEOUT
*/
static void ipv4_frag_expired ( struct retry_timer *timer __unused,
int over ) {
if ( over ) {
DBG ( "Fragment reassembly timeout" );
/* Free the fragment buffer */
}
}
/**
* Free fragment buffer
*
* @v fragbug Fragment buffer
*/
static void free_fragbuf ( struct frag_buffer *fragbuf ) {
free ( fragbuf );
}
/**
* Fragment reassembler
*
* @v iobuf I/O buffer, fragment of the datagram
* @ret frag_iob Reassembled packet, or NULL
*/
static struct io_buffer * ipv4_reassemble ( struct io_buffer * iobuf ) {
struct iphdr *iphdr = iobuf->data;
struct frag_buffer *fragbuf;
/**
* Check if the fragment belongs to any fragment series
*/
list_for_each_entry ( fragbuf, &frag_buffers, list ) {
if ( fragbuf->ident == iphdr->ident &&
fragbuf->src.s_addr == iphdr->src.s_addr ) {
/**
* Check if the packet is the expected fragment
*
* The offset of the new packet must be equal to the
* length of the data accumulated so far (the length of
* the reassembled I/O buffer
*/
if ( iob_len ( fragbuf->frag_iob ) ==
( iphdr->frags & IP_MASK_OFFSET ) ) {
/**
* Append the contents of the fragment to the
* reassembled I/O buffer
*/
iob_pull ( iobuf, sizeof ( *iphdr ) );
memcpy ( iob_put ( fragbuf->frag_iob,
iob_len ( iobuf ) ),
iobuf->data, iob_len ( iobuf ) );
free_iob ( iobuf );
/** Check if the fragment series is over */
if ( ! ( iphdr->frags & IP_MASK_MOREFRAGS ) ) {
iobuf = fragbuf->frag_iob;
free_fragbuf ( fragbuf );
return iobuf;
}
} else {
/* Discard the fragment series */
free_fragbuf ( fragbuf );
free_iob ( iobuf );
}
return NULL;
}
}
/** Check if the fragment is the first in the fragment series */
if ( iphdr->frags & IP_MASK_MOREFRAGS &&
( ( iphdr->frags & IP_MASK_OFFSET ) == 0 ) ) {
/** Create a new fragment buffer */
fragbuf = ( struct frag_buffer* ) malloc ( sizeof( *fragbuf ) );
fragbuf->ident = iphdr->ident;
fragbuf->src = iphdr->src;
/* Set up the reassembly I/O buffer */
fragbuf->frag_iob = alloc_iob ( IP_FRAG_IOB_SIZE );
iob_pull ( iobuf, sizeof ( *iphdr ) );
memcpy ( iob_put ( fragbuf->frag_iob, iob_len ( iobuf ) ),
iobuf->data, iob_len ( iobuf ) );
free_iob ( iobuf );
/* Set the reassembly timer */
fragbuf->frag_timer.timeout = IP_FRAG_TIMEOUT;
fragbuf->frag_timer.expired = ipv4_frag_expired;
start_timer ( &fragbuf->frag_timer );
/* Add the fragment buffer to the list of fragment buffers */
list_add ( &fragbuf->list, &frag_buffers );
}
return NULL;
}
/**
* Add IPv4 pseudo-header checksum to existing checksum
*
* @v iobuf I/O buffer
* @v csum Existing checksum
* @ret csum Updated checksum
*/
static uint16_t ipv4_pshdr_chksum ( struct io_buffer *iobuf, uint16_t csum ) {
struct ipv4_pseudo_header pshdr;
struct iphdr *iphdr = iobuf->data;
size_t hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
/* Build pseudo-header */
pshdr.src = iphdr->src;
pshdr.dest = iphdr->dest;
pshdr.zero_padding = 0x00;
pshdr.protocol = iphdr->protocol;
pshdr.len = htons ( iob_len ( iobuf ) - hdrlen );
/* Update the checksum value */
return tcpip_continue_chksum ( csum, &pshdr, sizeof ( pshdr ) );
}
/**
* Determine link-layer address
*
* @v dest IPv4 destination address
* @v src IPv4 source address
* @v netdev Network device
* @v ll_dest Link-layer destination address buffer
* @ret rc Return status code
*/
static int ipv4_ll_addr ( struct in_addr dest, struct in_addr src,
struct net_device *netdev, uint8_t *ll_dest ) {
struct ll_protocol *ll_protocol = netdev->ll_protocol;
if ( dest.s_addr == INADDR_BROADCAST ) {
/* Broadcast address */
memcpy ( ll_dest, netdev->ll_broadcast,
ll_protocol->ll_addr_len );
return 0;
} else if ( IN_MULTICAST ( ntohl ( dest.s_addr ) ) ) {
return ll_protocol->mc_hash ( AF_INET, &dest, ll_dest );
} else {
/* Unicast address: resolve via ARP */
return arp_resolve ( netdev, &ipv4_protocol, &dest,
&src, ll_dest );
}
}
/**
* Transmit IP packet
*
* @v iobuf I/O buffer
* @v tcpip Transport-layer protocol
* @v st_src Source network-layer address
* @v st_dest Destination network-layer address
* @v netdev Network device to use if no route found, or NULL
* @v trans_csum Transport-layer checksum to complete, or NULL
* @ret rc Status
*
* This function expects a transport-layer segment and prepends the IP header
*/
static int ipv4_tx ( struct io_buffer *iobuf,
struct tcpip_protocol *tcpip_protocol,
struct sockaddr_tcpip *st_src,
struct sockaddr_tcpip *st_dest,
struct net_device *netdev,
uint16_t *trans_csum ) {
struct iphdr *iphdr = iob_push ( iobuf, sizeof ( *iphdr ) );
struct sockaddr_in *sin_src = ( ( struct sockaddr_in * ) st_src );
struct sockaddr_in *sin_dest = ( ( struct sockaddr_in * ) st_dest );
struct ipv4_miniroute *miniroute;
struct in_addr next_hop;
uint8_t ll_dest[MAX_LL_ADDR_LEN];
int rc;
/* Fill up the IP header, except source address */
memset ( iphdr, 0, sizeof ( *iphdr ) );
iphdr->verhdrlen = ( IP_VER | ( sizeof ( *iphdr ) / 4 ) );
iphdr->service = IP_TOS;
iphdr->len = htons ( iob_len ( iobuf ) );
iphdr->ident = htons ( ++next_ident );
iphdr->ttl = IP_TTL;
iphdr->protocol = tcpip_protocol->tcpip_proto;
iphdr->dest = sin_dest->sin_addr;
/* Use routing table to identify next hop and transmitting netdev */
next_hop = iphdr->dest;
if ( sin_src )
iphdr->src = sin_src->sin_addr;
if ( ( next_hop.s_addr != INADDR_BROADCAST ) &&
( ! IN_MULTICAST ( ntohl ( next_hop.s_addr ) ) ) &&
( ( miniroute = ipv4_route ( &next_hop ) ) != NULL ) ) {
iphdr->src = miniroute->address;
netdev = miniroute->netdev;
}
if ( ! netdev ) {
DBG ( "IPv4 has no route to %s\n", inet_ntoa ( iphdr->dest ) );
rc = -ENETUNREACH;
goto err;
}
/* Determine link-layer destination address */
if ( ( rc = ipv4_ll_addr ( next_hop, iphdr->src, netdev,
ll_dest ) ) != 0 ) {
DBG ( "IPv4 has no link-layer address for %s: %s\n",
inet_ntoa ( next_hop ), strerror ( rc ) );
goto err;
}
/* Fix up checksums */
if ( trans_csum )
*trans_csum = ipv4_pshdr_chksum ( iobuf, *trans_csum );
iphdr->chksum = tcpip_chksum ( iphdr, sizeof ( *iphdr ) );
/* Print IP4 header for debugging */
DBG ( "IPv4 TX %s->", inet_ntoa ( iphdr->src ) );
DBG ( "%s len %d proto %d id %04x csum %04x\n",
inet_ntoa ( iphdr->dest ), ntohs ( iphdr->len ), iphdr->protocol,
ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
/* Hand off to link layer */
if ( ( rc = net_tx ( iobuf, netdev, &ipv4_protocol, ll_dest ) ) != 0 ) {
DBG ( "IPv4 could not transmit packet via %s: %s\n",
netdev->name, strerror ( rc ) );
return rc;
}
return 0;
err:
free_iob ( iobuf );
return rc;
}
/**
* Process incoming packets
*
* @v iobuf I/O buffer
* @v netdev Network device
* @v ll_source Link-layer destination source
*
* This function expects an IP4 network datagram. It processes the headers
* and sends it to the transport layer.
*/
static int ipv4_rx ( struct io_buffer *iobuf, struct net_device *netdev __unused,
const void *ll_source __unused ) {
struct iphdr *iphdr = iobuf->data;
size_t hdrlen;
size_t len;
union {
struct sockaddr_in sin;
struct sockaddr_tcpip st;
} src, dest;
uint16_t csum;
uint16_t pshdr_csum;
int rc;
/* Sanity check the IPv4 header */
if ( iob_len ( iobuf ) < sizeof ( *iphdr ) ) {
DBG ( "IPv4 packet too short at %zd bytes (min %zd bytes)\n",
iob_len ( iobuf ), sizeof ( *iphdr ) );
goto err;
}
if ( ( iphdr->verhdrlen & IP_MASK_VER ) != IP_VER ) {
DBG ( "IPv4 version %#02x not supported\n", iphdr->verhdrlen );
goto err;
}
hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
if ( hdrlen < sizeof ( *iphdr ) ) {
DBG ( "IPv4 header too short at %zd bytes (min %zd bytes)\n",
hdrlen, sizeof ( *iphdr ) );
goto err;
}
if ( hdrlen > iob_len ( iobuf ) ) {
DBG ( "IPv4 header too long at %zd bytes "
"(packet is %zd bytes)\n", hdrlen, iob_len ( iobuf ) );
goto err;
}
if ( ( csum = tcpip_chksum ( iphdr, hdrlen ) ) != 0 ) {
DBG ( "IPv4 checksum incorrect (is %04x including checksum "
"field, should be 0000)\n", csum );
goto err;
}
len = ntohs ( iphdr->len );
if ( len < hdrlen ) {
DBG ( "IPv4 length too short at %zd bytes "
"(header is %zd bytes)\n", len, hdrlen );
goto err;
}
if ( len > iob_len ( iobuf ) ) {
DBG ( "IPv4 length too long at %zd bytes "
"(packet is %zd bytes)\n", len, iob_len ( iobuf ) );
goto err;
}
/* Print IPv4 header for debugging */
DBG ( "IPv4 RX %s<-", inet_ntoa ( iphdr->dest ) );
DBG ( "%s len %d proto %d id %04x csum %04x\n",
inet_ntoa ( iphdr->src ), ntohs ( iphdr->len ), iphdr->protocol,
ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
/* Truncate packet to correct length, calculate pseudo-header
* checksum and then strip off the IPv4 header.
*/
iob_unput ( iobuf, ( iob_len ( iobuf ) - len ) );
pshdr_csum = ipv4_pshdr_chksum ( iobuf, TCPIP_EMPTY_CSUM );
iob_pull ( iobuf, hdrlen );
/* Fragment reassembly */
if ( ( iphdr->frags & htons ( IP_MASK_MOREFRAGS ) ) ||
( ( iphdr->frags & htons ( IP_MASK_OFFSET ) ) != 0 ) ) {
/* Pass the fragment to ipv4_reassemble() which either
* returns a fully reassembled I/O buffer or NULL.
*/
iobuf = ipv4_reassemble ( iobuf );
if ( ! iobuf )
return 0;
}
/* Construct socket addresses and hand off to transport layer */
memset ( &src, 0, sizeof ( src ) );
src.sin.sin_family = AF_INET;
src.sin.sin_addr = iphdr->src;
memset ( &dest, 0, sizeof ( dest ) );
dest.sin.sin_family = AF_INET;
dest.sin.sin_addr = iphdr->dest;
if ( ( rc = tcpip_rx ( iobuf, iphdr->protocol, &src.st,
&dest.st, pshdr_csum ) ) != 0 ) {
DBG ( "IPv4 received packet rejected by stack: %s\n",
strerror ( rc ) );
return rc;
}
return 0;
err:
free_iob ( iobuf );
return -EINVAL;
}
/**
* Check existence of IPv4 address for ARP
*
* @v netdev Network device
* @v net_addr Network-layer address
* @ret rc Return status code
*/
static int ipv4_arp_check ( struct net_device *netdev, const void *net_addr ) {
const struct in_addr *address = net_addr;
struct ipv4_miniroute *miniroute;
list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
if ( ( miniroute->netdev == netdev ) &&
( miniroute->address.s_addr == address->s_addr ) ) {
/* Found matching address */
return 0;
}
}
return -ENOENT;
}
/**
* Convert IPv4 address to dotted-quad notation
*
* @v in IP address
* @ret string IP address in dotted-quad notation
*/
char * inet_ntoa ( struct in_addr in ) {
static char buf[16]; /* "xxx.xxx.xxx.xxx" */
uint8_t *bytes = ( uint8_t * ) &in;
sprintf ( buf, "%d.%d.%d.%d", bytes[0], bytes[1], bytes[2], bytes[3] );
return buf;
}
/**
* Transcribe IP address
*
* @v net_addr IP address
* @ret string IP address in dotted-quad notation
*
*/
static const char * ipv4_ntoa ( const void *net_addr ) {
return inet_ntoa ( * ( ( struct in_addr * ) net_addr ) );
}
/** IPv4 protocol */
struct net_protocol ipv4_protocol __net_protocol = {
.name = "IP",
.net_proto = htons ( ETH_P_IP ),
.net_addr_len = sizeof ( struct in_addr ),
.rx = ipv4_rx,
.ntoa = ipv4_ntoa,
};
/** IPv4 TCPIP net protocol */
struct tcpip_net_protocol ipv4_tcpip_protocol __tcpip_net_protocol = {
.name = "IPv4",
.sa_family = AF_INET,
.tx = ipv4_tx,
};
/** IPv4 ARP protocol */
struct arp_net_protocol ipv4_arp_protocol __arp_net_protocol = {
.net_protocol = &ipv4_protocol,
.check = ipv4_arp_check,
};
/******************************************************************************
*
* Settings
*
******************************************************************************
*/
/** IPv4 address setting */
struct setting ip_setting __setting = {
.name = "ip",
.description = "IPv4 address",
.tag = DHCP_EB_YIADDR,
.type = &setting_type_ipv4,
};
/** IPv4 subnet mask setting */
struct setting netmask_setting __setting = {
.name = "netmask",
.description = "IPv4 subnet mask",
.tag = DHCP_SUBNET_MASK,
.type = &setting_type_ipv4,
};
/** Default gateway setting */
struct setting gateway_setting __setting = {
.name = "gateway",
.description = "Default gateway",
.tag = DHCP_ROUTERS,
.type = &setting_type_ipv4,
};
/**
* Create IPv4 routing table based on configured settings
*
* @ret rc Return status code
*/
static int ipv4_create_routes ( void ) {
struct ipv4_miniroute *miniroute;
struct ipv4_miniroute *tmp;
struct net_device *netdev;
struct settings *settings;
struct in_addr address = { 0 };
struct in_addr netmask = { 0 };
struct in_addr gateway = { 0 };
/* Delete all existing routes */
list_for_each_entry_safe ( miniroute, tmp, &ipv4_miniroutes, list )
del_ipv4_miniroute ( miniroute );
/* Create a route for each configured network device */
for_each_netdev ( netdev ) {
settings = netdev_settings ( netdev );
/* Get IPv4 address */
address.s_addr = 0;
fetch_ipv4_setting ( settings, &ip_setting, &address );
if ( ! address.s_addr )
continue;
/* Get subnet mask */
fetch_ipv4_setting ( settings, &netmask_setting, &netmask );
/* Calculate default netmask, if necessary */
if ( ! netmask.s_addr ) {
if ( IN_CLASSA ( ntohl ( address.s_addr ) ) ) {
netmask.s_addr = htonl ( IN_CLASSA_NET );
} else if ( IN_CLASSB ( ntohl ( address.s_addr ) ) ) {
netmask.s_addr = htonl ( IN_CLASSB_NET );
} else if ( IN_CLASSC ( ntohl ( address.s_addr ) ) ) {
netmask.s_addr = htonl ( IN_CLASSC_NET );
}
}
/* Get default gateway, if present */
fetch_ipv4_setting ( settings, &gateway_setting, &gateway );
/* Configure route */
miniroute = add_ipv4_miniroute ( netdev, address,
netmask, gateway );
if ( ! miniroute )
return -ENOMEM;
}
return 0;
}
/** IPv4 settings applicator */
struct settings_applicator ipv4_settings_applicator __settings_applicator = {
.apply = ipv4_create_routes,
};
/* Drag in ICMP */
REQUIRE_OBJECT ( icmp );