blob: ac2e1245f7f7bfbbff130ca7201a4ec55f2c20c6 [file] [log] [blame]
/************************************************* -*- linux-c -*-
* Myricom 10Gb Network Interface Card Software
* Copyright 2009, Myricom, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
****************************************************************/
FILE_LICENCE ( GPL2_ONLY );
/*
* Author: Glenn Brown <glenn@myri.com>
*/
/*
* General Theory of Operation
*
* This is a minimal Myricom 10 gigabit Ethernet driver for network
* boot.
*
* Initialization
*
* myri10ge_pci_probe() is called by gPXE during initialization.
* Minimal NIC initialization is performed to minimize resources
* consumed when the driver is resident but unused.
*
* Network Boot
*
* myri10ge_net_open() is called by gPXE before attempting to network
* boot from the card. Packet buffers are allocated and the NIC
* interface is initialized.
*
* Transmit
*
* myri10ge_net_transmit() enqueues frames for transmission by writing
* discriptors to the NIC's tx ring. For simplicity and to avoid
* copies, we always have the NIC DMA up the packet. The sent I/O
* buffer is released once the NIC signals myri10ge_interrupt_handler()
* that the send has completed.
*
* Receive
*
* Receives are posted to the NIC's receive ring. The NIC fills a
* DMAable receive_completion ring with completion notifications.
* myri10ge_net_poll() polls for these receive notifications, posts
* replacement receive buffers to the NIC, and passes received frames
* to netdev_rx().
*/
/*
* Debugging levels:
* - DBG() is for any errors, i.e. failed alloc_iob(), malloc_dma(),
* TX overflow, corrupted packets, ...
* - DBG2() is for successful events, like packet received,
* packet transmitted, and other general notifications.
* - DBGP() prints the name of each called function on entry
*/
#include <stdint.h>
#include <byteswap.h>
#include <errno.h>
#include <gpxe/ethernet.h>
#include <gpxe/if_ether.h>
#include <gpxe/iobuf.h>
#include <gpxe/malloc.h>
#include <gpxe/netdevice.h>
#include <gpxe/pci.h>
#include <gpxe/timer.h>
#include "myri10ge_mcp.h"
/****************************************************************
* Forward declarations
****************************************************************/
/* PCI driver entry points */
static int myri10ge_pci_probe ( struct pci_device*,
const struct pci_device_id* );
static void myri10ge_pci_remove ( struct pci_device* );
/* Network device operations */
static void myri10ge_net_close ( struct net_device* );
static void myri10ge_net_irq ( struct net_device*, int enable );
static int myri10ge_net_open ( struct net_device* );
static void myri10ge_net_poll ( struct net_device* );
static int myri10ge_net_transmit ( struct net_device*, struct io_buffer* );
/****************************************************************
* Constants
****************************************************************/
/* Maximum ring indices, used to wrap ring indices. These must be 2**N-1. */
#define MYRI10GE_TRANSMIT_WRAP 1U
#define MYRI10GE_RECEIVE_WRAP 7U
#define MYRI10GE_RECEIVE_COMPLETION_WRAP 31U
/****************************************************************
* Driver internal data types.
****************************************************************/
/* Structure holding all DMA buffers for a NIC, which we will
allocated as contiguous read/write DMAable memory when the NIC is
initialized. */
struct myri10ge_dma_buffers
{
/* The NIC DMAs receive completion notifications into this ring */
mcp_slot_t receive_completion[1+MYRI10GE_RECEIVE_COMPLETION_WRAP];
/* Interrupt details are DMAd here before interrupting. */
mcp_irq_data_t irq_data; /* 64B */
/* NIC command completion status is DMAd here. */
mcp_cmd_response_t command_response; /* 8B */
};
struct myri10ge_private
{
/* Interrupt support */
uint32 *irq_claim; /* in NIC SRAM */
uint32 *irq_deassert; /* in NIC SRAM */
/* DMA buffers. */
struct myri10ge_dma_buffers *dma;
/*
* Transmit state.
*
* The counts here are uint32 for easy comparison with
* priv->dma->irq_data.send_done_count and with each other.
*/
mcp_kreq_ether_send_t *transmit_ring; /* in NIC SRAM */
uint32 transmit_ring_wrap;
uint32 transmits_posted;
uint32 transmits_done;
struct io_buffer *transmit_iob[1 + MYRI10GE_TRANSMIT_WRAP];
/*
* Receive state.
*/
mcp_kreq_ether_recv_t *receive_post_ring; /* in NIC SRAM */
unsigned int receive_post_ring_wrap;
unsigned int receives_posted;
unsigned int receives_done;
struct io_buffer *receive_iob[1 + MYRI10GE_RECEIVE_WRAP];
/* Address for writing commands to the firmware.
BEWARE: the value must be written 32 bits at a time. */
mcp_cmd_t *command;
};
/****************************************************************
* Driver internal functions.
****************************************************************/
/* Print ring status when debugging. Use this only after a printed
value changes. */
#define DBG2_RINGS( priv ) \
DBG2 ( "tx %x/%x rx %x/%x in %s() \n", \
( priv ) ->transmits_done, ( priv ) -> transmits_posted, \
( priv ) ->receives_done, ( priv ) -> receives_posted, \
__FUNCTION__ )
/*
* Return a pointer to the driver private data for a network device.
*
* @v netdev Network device created by this driver.
* @ret priv The corresponding driver private data.
*/
static inline struct myri10ge_private *myri10ge_priv ( struct net_device *nd )
{
/* Our private data always follows the network device in memory,
since we use alloc_netdev() to allocate the storage. */
return ( struct myri10ge_private * ) ( nd + 1 );
}
/*
* Pass a receive buffer to the NIC to be filled.
*
* @v priv The network device to receive the buffer.
* @v iob The I/O buffer to fill.
*
* Receive buffers are filled in FIFO order.
*/
static void myri10ge_post_receive ( struct myri10ge_private *priv,
struct io_buffer *iob )
{
unsigned int receives_posted;
mcp_kreq_ether_recv_t *request;
/* Record the posted I/O buffer, to be passed to netdev_rx() on
receive. */
receives_posted = priv->receives_posted;
priv->receive_iob[receives_posted & MYRI10GE_RECEIVE_WRAP] = iob;
/* Post the receive. */
request = &priv->receive_post_ring[receives_posted
& priv->receive_post_ring_wrap];
request->addr_high = 0;
wmb();
request->addr_low = htonl ( virt_to_bus ( iob->data ) );
priv->receives_posted = ++receives_posted;
}
/*
* Execute a command on the NIC.
*
* @v priv NIC to perform the command.
* @v cmd The command to perform.
* @v data I/O copy buffer for parameters/results
* @ret rc 0 on success, else an error code.
*/
static int myri10ge_command ( struct myri10ge_private *priv,
uint32 cmd,
uint32 data[3] )
{
int i;
mcp_cmd_t *command;
uint32 result;
unsigned int slept_ms;
volatile mcp_cmd_response_t *response;
DBGP ( "myri10ge_command ( ,%d, ) \n", cmd );
command = priv->command;
response = &priv->dma->command_response;
/* Mark the command as incomplete. */
response->result = 0xFFFFFFFF;
/* Pass the command to the NIC. */
command->cmd = htonl ( cmd );
command->data0 = htonl ( data[0] );
command->data1 = htonl ( data[1] );
command->data2 = htonl ( data[2] );
command->response_addr.high = 0;
command->response_addr.low
= htonl ( virt_to_bus ( &priv->dma->command_response ) );
for ( i=0; i<36; i+=4 )
* ( uint32 * ) &command->pad[i] = 0;
wmb();
* ( uint32 * ) &command->pad[36] = 0;
/* Wait up to 2 seconds for a response. */
for ( slept_ms=0; slept_ms<2000; slept_ms++ ) {
result = response->result;
if ( result == 0 ) {
data[0] = ntohl ( response->data );
return 0;
} else if ( result != 0xFFFFFFFF ) {
DBG ( "cmd%d:0x%x\n",
cmd,
ntohl ( response->result ) );
return -EIO;
}
udelay ( 1000 );
rmb();
}
DBG ( "cmd%d:timed out\n", cmd );
return -ETIMEDOUT;
}
/*
* Handle any pending interrupt.
*
* @v netdev Device being polled for interrupts.
*
* This is called periodically to let the driver check for interrupts.
*/
static void myri10ge_interrupt_handler ( struct net_device *netdev )
{
struct myri10ge_private *priv;
mcp_irq_data_t *irq_data;
uint8 valid;
priv = myri10ge_priv ( netdev );
irq_data = &priv->dma->irq_data;
/* Return if there was no interrupt. */
rmb();
valid = irq_data->valid;
if ( !valid )
return;
DBG2 ( "irq " );
/* Tell the NIC to deassert the interrupt and clear
irq_data->valid.*/
*priv->irq_deassert = 0; /* any value is OK. */
mb();
/* Handle any new receives. */
if ( valid & 1 ) {
/* Pass the receive interrupt token back to the NIC. */
DBG2 ( "rx " );
*priv->irq_claim = htonl ( 3 );
wmb();
}
/* Handle any sent packet by freeing its I/O buffer, now that
we know it has been DMAd. */
if ( valid & 2 ) {
unsigned int nic_done_count;
DBG2 ( "snt " );
nic_done_count = ntohl ( priv->dma->irq_data.send_done_count );
while ( priv->transmits_done != nic_done_count ) {
struct io_buffer *iob;
iob = priv->transmit_iob [priv->transmits_done
& MYRI10GE_TRANSMIT_WRAP];
DBG2 ( "%p ", iob );
netdev_tx_complete ( netdev, iob );
++priv->transmits_done;
}
}
/* Record any statistics update. */
if ( irq_data->stats_updated ) {
/* Update the link status. */
DBG2 ( "stats " );
if ( ntohl ( irq_data->link_up ) == MXGEFW_LINK_UP )
netdev_link_up ( netdev );
else
netdev_link_down ( netdev );
/* Ignore all error counters from the NIC. */
}
/* Wait for the interrupt to be deasserted, as indicated by
irq_data->valid, which is set by the NIC after the deassert. */
DBG2 ( "wait " );
do {
mb();
} while ( irq_data->valid );
/* Claim the interrupt to enable future interrupt generation. */
DBG2 ( "claim\n" );
* ( priv->irq_claim + 1 ) = htonl ( 3 );
mb();
}
/* Constants for reading the STRING_SPECS via the Myricom
Vendor Specific PCI configuration space capability. */
#define VS_ADDR ( vs + 0x18 )
#define VS_DATA ( vs + 0x14 )
#define VS_MODE ( vs + 0x10 )
#define VS_MODE_READ32 0x3
#define VS_MODE_LOCATE 0x8
#define VS_LOCATE_STRING_SPECS 0x3
/*
* Read MAC address from its 'string specs' via the vendor-specific
* capability. (This capability allows NIC SRAM and ROM to be read
* before it is mapped.)
*
* @v pci The device.
* @v mac Buffer to store the MAC address.
* @ret rc Returns 0 on success, else an error code.
*/
static int mac_address_from_string_specs ( struct pci_device *pci,
uint8 mac[ETH_ALEN] )
{
char string_specs[256];
char *ptr, *limit;
char *to = string_specs;
uint32 addr;
uint32 len;
unsigned int vs;
int mac_set = 0;
/* Find the "vendor specific" capability. */
vs = pci_find_capability ( pci, 9 );
if ( vs == 0 ) {
DBG ( "no VS\n" );
return -ENOTSUP;
}
/* Locate the String specs in LANai SRAM. */
pci_write_config_byte ( pci, VS_MODE, VS_MODE_LOCATE );
pci_write_config_dword ( pci, VS_ADDR, VS_LOCATE_STRING_SPECS );
pci_read_config_dword ( pci, VS_ADDR, &addr );
pci_read_config_dword ( pci, VS_DATA, &len );
DBG2 ( "ss@%x,%x\n", addr, len );
/* Copy in the string specs. Use 32-bit reads for performance. */
if ( len > sizeof ( string_specs ) || ( len & 3 ) ) {
DBG ( "SS too big\n" );
return -ENOTSUP;
}
pci_write_config_byte ( pci, VS_MODE, VS_MODE_READ32 );
while ( len >= 4 ) {
uint32 tmp;
pci_write_config_byte ( pci, VS_ADDR, addr );
pci_read_config_dword ( pci, VS_DATA, &tmp );
tmp = ntohl ( tmp );
memcpy ( to, &tmp, 4 );
to += 4;
addr += 4;
len -= 4;
}
pci_write_config_byte ( pci, VS_MODE, 0 );
/* Parse the string specs. */
DBG2 ( "STRING_SPECS:\n" );
ptr = string_specs;
limit = string_specs + sizeof ( string_specs );
while ( *ptr != '\0' && ptr < limit ) {
DBG2 ( "%s\n", ptr );
if ( memcmp ( ptr, "MAC=", 4 ) == 0 ) {
unsigned int i;
ptr += 4;
for ( i=0; i<6; i++ ) {
if ( ( ptr + 2 ) > limit ) {
DBG ( "bad MAC addr\n" );
return -ENOTSUP;
}
mac[i] = strtoul ( ptr, &ptr, 16 );
ptr += 1;
}
mac_set = 1;
}
else
while ( ptr < limit && *ptr++ );
}
/* Verify we parsed all we need. */
if ( !mac_set ) {
DBG ( "no MAC addr\n" );
return -ENOTSUP;
}
DBG2 ( "MAC %02x:%02x:%02x:%02x:%02x:%02x\n",
mac[0], mac[1], mac[2], mac[3], mac[4], mac[5] );
return 0;
}
/****************************************************************
* gPXE PCI Device Driver API functions
****************************************************************/
/*
* Initialize the PCI device.
*
* @v pci The device's associated pci_device structure.
* @v id The PCI device + vendor id.
* @ret rc Returns zero if successfully initialized.
*
* This function is called very early on, while gPXE is initializing.
* This is a gPXE PCI Device Driver API function.
*/
static int myri10ge_pci_probe ( struct pci_device *pci,
const struct pci_device_id *id __unused )
{
static struct net_device_operations myri10ge_operations = {
.open = myri10ge_net_open,
.close = myri10ge_net_close,
.transmit = myri10ge_net_transmit,
.poll = myri10ge_net_poll,
.irq = myri10ge_net_irq
};
const char *dbg;
int rc;
struct net_device *netdev;
struct myri10ge_private *priv;
DBGP ( "myri10ge_pci_probe: " );
netdev = alloc_etherdev ( sizeof ( *priv ) );
if ( !netdev ) {
rc = -ENOMEM;
dbg = "alloc_etherdev";
goto abort_with_nothing;
}
netdev_init ( netdev, &myri10ge_operations );
priv = myri10ge_priv ( netdev );
pci_set_drvdata ( pci, netdev );
netdev->dev = &pci->dev;
/* Make sure interrupts are disabled. */
myri10ge_net_irq ( netdev, 0 );
/* Read the NIC HW address. */
rc = mac_address_from_string_specs ( pci, netdev->hw_addr );
if ( rc ) {
dbg = "mac_from_ss";
goto abort_with_netdev_init;
}
DBGP ( "mac " );
/* Enable bus master, etc. */
adjust_pci_device ( pci );
DBGP ( "pci " );
/* Register the initialized network device. */
rc = register_netdev ( netdev );
if ( rc ) {
dbg = "register_netdev";
goto abort_with_netdev_init;
}
DBGP ( "done\n" );
return 0;
abort_with_netdev_init:
netdev_nullify ( netdev );
netdev_put ( netdev );
abort_with_nothing:
DBG ( "%s:%s\n", dbg, strerror ( rc ) );
return rc;
}
/*
* Remove a device from the PCI device list.
*
* @v pci PCI device to remove.
*
* This is a PCI Device Driver API function.
*/
static void myri10ge_pci_remove ( struct pci_device *pci )
{
struct net_device *netdev;
DBGP ( "myri10ge_pci_remove\n" );
netdev = pci_get_drvdata ( pci );
unregister_netdev ( netdev );
netdev_nullify ( netdev );
netdev_put ( netdev );
}
/****************************************************************
* gPXE Network Device Driver Operations
****************************************************************/
/*
* Close a network device.
*
* @v netdev Device to close.
*
* This is a gPXE Network Device Driver API function.
*/
static void myri10ge_net_close ( struct net_device *netdev )
{
struct myri10ge_private *priv;
uint32 data[3];
DBGP ( "myri10ge_net_close\n" );
priv = myri10ge_priv ( netdev );
/* disable interrupts */
myri10ge_net_irq ( netdev, 0 );
/* Reset the NIC interface, so we won't get any more events from
the NIC. */
myri10ge_command ( priv, MXGEFW_CMD_RESET, data );
/* Free receive buffers that were never filled. */
while ( priv->receives_done != priv->receives_posted ) {
free_iob ( priv->receive_iob[priv->receives_done
& MYRI10GE_RECEIVE_WRAP] );
++priv->receives_done;
}
/* Release DMAable memory. */
free_dma ( priv->dma, sizeof ( *priv->dma ) );
/* Erase all state from the open. */
memset ( priv, 0, sizeof ( *priv ) );
DBG2_RINGS ( priv );
}
/*
* Enable or disable IRQ masking.
*
* @v netdev Device to control.
* @v enable Zero to mask off IRQ, non-zero to enable IRQ.
*
* This is a gPXE Network Driver API function.
*/
static void myri10ge_net_irq ( struct net_device *netdev, int enable )
{
struct pci_device *pci_dev;
uint16 val;
DBGP ( "myri10ge_net_irq\n" );
pci_dev = ( struct pci_device * ) netdev->dev;
/* Adjust the Interrupt Disable bit in the Command register of the
PCI Device. */
pci_read_config_word ( pci_dev, PCI_COMMAND, &val );
if ( enable )
val &= ~PCI_COMMAND_INTX_DISABLE;
else
val |= PCI_COMMAND_INTX_DISABLE;
pci_write_config_word ( pci_dev, PCI_COMMAND, val );
}
/*
* Opens a network device.
*
* @v netdev Device to be opened.
* @ret rc Non-zero if failed to open.
*
* This enables tx and rx on the device.
* This is a gPXE Network Device Driver API function.
*/
static int myri10ge_net_open ( struct net_device *netdev )
{
const char *dbg; /* printed upon error return */
int rc;
struct io_buffer *iob;
struct myri10ge_private *priv;
uint32 data[3];
struct pci_device *pci_dev;
void *membase;
DBGP ( "myri10ge_net_open\n" );
priv = myri10ge_priv ( netdev );
pci_dev = ( struct pci_device * ) netdev->dev;
membase = phys_to_virt ( pci_dev->membase );
/* Compute address for passing commands to the firmware. */
priv->command = membase + MXGEFW_ETH_CMD;
/* Ensure interrupts are disabled. */
myri10ge_net_irq ( netdev, 0 );
/* Allocate cleared DMAable buffers. */
priv->dma = malloc_dma ( sizeof ( *priv->dma ) , 128 );
if ( !priv->dma ) {
rc = -ENOMEM;
dbg = "DMA";
goto abort_with_nothing;
}
memset ( priv->dma, 0, sizeof ( *priv->dma ) );
/* Simplify following code. */
#define TRY( prefix, base, suffix ) do { \
rc = myri10ge_command ( priv, \
MXGEFW_ \
## prefix \
## base \
## suffix, \
data ); \
if ( rc ) { \
dbg = #base; \
goto abort_with_dma; \
} \
} while ( 0 )
/* Send a reset command to the card to see if it is alive,
and to reset its queue state. */
TRY ( CMD_, RESET , );
/* Set the interrupt queue size. */
data[0] = ( sizeof ( priv->dma->receive_completion )
| MXGEFW_CMD_SET_INTRQ_SIZE_FLAG_NO_STRICT_SIZE_CHECK );
TRY ( CMD_SET_ , INTRQ_SIZE , );
/* Set the interrupt queue DMA address. */
data[0] = virt_to_bus ( &priv->dma->receive_completion );
data[1] = 0;
TRY ( CMD_SET_, INTRQ_DMA, );
/* Get the NIC interrupt claim address. */
TRY ( CMD_GET_, IRQ_ACK, _OFFSET );
priv->irq_claim = membase + data[0];
/* Get the NIC interrupt assert address. */
TRY ( CMD_GET_, IRQ_DEASSERT, _OFFSET );
priv->irq_deassert = membase + data[0];
/* Disable interrupt coalescing, which is inappropriate for the
minimal buffering we provide. */
TRY ( CMD_GET_, INTR_COAL, _DELAY_OFFSET );
* ( ( uint32 * ) ( membase + data[0] ) ) = 0;
/* Set the NIC mac address. */
data[0] = ( netdev->ll_addr[0] << 24
| netdev->ll_addr[1] << 16
| netdev->ll_addr[2] << 8
| netdev->ll_addr[3] );
data[1] = ( ( netdev->ll_addr[4] << 8 )
| netdev->ll_addr[5] );
TRY ( SET_ , MAC_ADDRESS , );
/* Enable multicast receives, because some gPXE clients don't work
without multicast. . */
TRY ( ENABLE_ , ALLMULTI , );
/* Disable Ethernet flow control, so the NIC cannot deadlock the
network under any circumstances. */
TRY ( DISABLE_ , FLOW , _CONTROL );
/* Compute transmit ring sizes. */
data[0] = 0; /* slice 0 */
TRY ( CMD_GET_, SEND_RING, _SIZE );
priv->transmit_ring_wrap
= data[0] / sizeof ( mcp_kreq_ether_send_t ) - 1;
if ( priv->transmit_ring_wrap
& ( priv->transmit_ring_wrap + 1 ) ) {
rc = -EPROTO;
dbg = "TX_RING";
goto abort_with_dma;
}
/* Compute receive ring sizes. */
data[0] = 0; /* slice 0 */
TRY ( CMD_GET_ , RX_RING , _SIZE );
priv->receive_post_ring_wrap = data[0] / sizeof ( mcp_dma_addr_t ) - 1;
if ( priv->receive_post_ring_wrap
& ( priv->receive_post_ring_wrap + 1 ) ) {
rc = -EPROTO;
dbg = "RX_RING";
goto abort_with_dma;
}
/* Get NIC transmit ring address. */
data[0] = 0; /* slice 0. */
TRY ( CMD_GET_, SEND, _OFFSET );
priv->transmit_ring = membase + data[0];
/* Get the NIC receive ring address. */
data[0] = 0; /* slice 0. */
TRY ( CMD_GET_, SMALL_RX, _OFFSET );
priv->receive_post_ring = membase + data[0];
/* Set the Nic MTU. */
data[0] = ETH_FRAME_LEN;
TRY ( CMD_SET_, MTU, );
/* Tell the NIC our buffer sizes. ( We use only small buffers, so we
set both buffer sizes to the same value, which will force all
received frames to use small buffers. ) */
data[0] = MXGEFW_PAD + ETH_FRAME_LEN;
TRY ( CMD_SET_, SMALL_BUFFER, _SIZE );
data[0] = MXGEFW_PAD + ETH_FRAME_LEN;
TRY ( CMD_SET_, BIG_BUFFER, _SIZE );
/* Tell firmware where to DMA IRQ data */
data[0] = virt_to_bus ( &priv->dma->irq_data );
data[1] = 0;
data[2] = sizeof ( priv->dma->irq_data );
TRY ( CMD_SET_, STATS_DMA_V2, );
/* Post receives. */
while ( priv->receives_posted <= MYRI10GE_RECEIVE_WRAP ) {
/* Reserve 2 extra bytes at the start of packets, since
the firmware always skips the first 2 bytes of the buffer
so TCP headers will be aligned. */
iob = alloc_iob ( MXGEFW_PAD + ETH_FRAME_LEN );
if ( !iob ) {
rc = -ENOMEM;
dbg = "alloc_iob";
goto abort_with_receives_posted;
}
iob_reserve ( iob, MXGEFW_PAD );
myri10ge_post_receive ( priv, iob );
}
/* Bring up the link. */
TRY ( CMD_, ETHERNET_UP, );
DBG2_RINGS ( priv );
return 0;
abort_with_receives_posted:
while ( priv->receives_posted-- )
free_iob ( priv->receive_iob[priv->receives_posted] );
abort_with_dma:
/* Because the link is not up, we don't have to reset the NIC here. */
free_dma ( priv->dma, sizeof ( *priv->dma ) );
abort_with_nothing:
/* Erase all signs of the failed open. */
memset ( priv, 0, sizeof ( *priv ) );
DBG ( "%s: %s\n", dbg, strerror ( rc ) );
return ( rc );
}
/*
* This function allows a driver to process events during operation.
*
* @v netdev Device being polled.
*
* This is called periodically by gPXE to let the driver check the status of
* transmitted packets and to allow the driver to check for received packets.
* This is a gPXE Network Device Driver API function.
*/
static void myri10ge_net_poll ( struct net_device *netdev )
{
struct io_buffer *iob;
struct io_buffer *replacement;
struct myri10ge_dma_buffers *dma;
struct myri10ge_private *priv;
unsigned int length;
unsigned int orig_receives_posted;
DBGP ( "myri10ge_net_poll\n" );
priv = myri10ge_priv ( netdev );
dma = priv->dma;
/* Process any pending interrupt. */
myri10ge_interrupt_handler ( netdev );
/* Pass up received frames, but limit ourselves to receives posted
before this function was called, so we cannot livelock if
receives are arriving faster than we process them. */
orig_receives_posted = priv->receives_posted;
while ( priv->receives_done != orig_receives_posted ) {
/* Stop if there is no pending receive. */
length = ntohs ( dma->receive_completion
[priv->receives_done
& MYRI10GE_RECEIVE_COMPLETION_WRAP]
.length );
if ( length == 0 )
break;
/* Allocate a replacement buffer. If none is available,
stop passing up packets until a buffer is available.
Reserve 2 extra bytes at the start of packets, since
the firmware always skips the first 2 bytes of the buffer
so TCP headers will be aligned. */
replacement = alloc_iob ( MXGEFW_PAD + ETH_FRAME_LEN );
if ( !replacement ) {
DBG ( "NO RX BUF\n" );
break;
}
iob_reserve ( replacement, MXGEFW_PAD );
/* Pass up the received frame. */
iob = priv->receive_iob[priv->receives_done
& MYRI10GE_RECEIVE_WRAP];
iob_put ( iob, length );
netdev_rx ( netdev, iob );
/* We have consumed the packet, so clear the receive
notification. */
dma->receive_completion [priv->receives_done
& MYRI10GE_RECEIVE_COMPLETION_WRAP]
.length = 0;
wmb();
/* Replace the passed-up I/O buffer. */
myri10ge_post_receive ( priv, replacement );
++priv->receives_done;
DBG2_RINGS ( priv );
}
}
/*
* This transmits a packet.
*
* @v netdev Device to transmit from.
* @v iobuf Data to transmit.
* @ret rc Non-zero if failed to transmit.
*
* This is a gPXE Network Driver API function.
*/
static int myri10ge_net_transmit ( struct net_device *netdev,
struct io_buffer *iobuf )
{
mcp_kreq_ether_send_t *kreq;
size_t len;
struct myri10ge_private *priv;
uint32 transmits_posted;
DBGP ( "myri10ge_net_transmit\n" );
priv = myri10ge_priv ( netdev );
/* Confirm space in the send ring. */
transmits_posted = priv->transmits_posted;
if ( transmits_posted - priv->transmits_done
> MYRI10GE_TRANSMIT_WRAP ) {
DBG ( "TX ring full\n" );
return -ENOBUFS;
}
DBG2 ( "TX %p+%d ", iobuf->data, iob_len ( iobuf ) );
DBG2_HD ( iobuf->data, 14 );
/* Record the packet being transmitted, so we can later report
send completion. */
priv->transmit_iob[transmits_posted & MYRI10GE_TRANSMIT_WRAP] = iobuf;
/* Copy and pad undersized frames, because the NIC does not pad,
and we would rather copy small frames than do a gather. */
len = iob_len ( iobuf );
if ( len < ETH_ZLEN ) {
iob_pad ( iobuf, ETH_ZLEN );
len = ETH_ZLEN;
}
/* Enqueue the packet by writing a descriptor to the NIC.
This is a bit tricky because the HW requires 32-bit writes,
but the structure has smaller fields. */
kreq = &priv->transmit_ring[transmits_posted
& priv->transmit_ring_wrap];
kreq->addr_high = 0;
kreq->addr_low = htonl ( virt_to_bus ( iobuf->data ) );
( ( uint32 * ) kreq ) [2] = htonl (
0x0000 << 16 /* pseudo_header_offset */
| ( len & 0xFFFF ) /* length */
);
wmb();
( ( uint32 * ) kreq ) [3] = htonl (
0x00 << 24 /* pad */
| 0x01 << 16 /* rdma_count */
| 0x00 << 8 /* cksum_offset */
| ( MXGEFW_FLAGS_SMALL
| MXGEFW_FLAGS_FIRST
| MXGEFW_FLAGS_NO_TSO ) /* flags */
);
wmb();
/* Mark the slot as consumed and return. */
priv->transmits_posted = ++transmits_posted;
DBG2_RINGS ( priv );
return 0;
}
static struct pci_device_id myri10ge_nics[] = {
/* Each of these macros must be a single line to satisfy a script. */
PCI_ROM ( 0x14c1, 0x0008, "myri10ge", "Myricom 10Gb Ethernet Adapter", 0 ) ,
};
struct pci_driver myri10ge_driver __pci_driver = {
.ids = myri10ge_nics,
.id_count = ( sizeof ( myri10ge_nics ) / sizeof ( myri10ge_nics[0] ) ) ,
.probe = myri10ge_pci_probe,
.remove = myri10ge_pci_remove
};
/*
* Local variables:
* c-basic-offset: 8
* c-indent-level: 8
* tab-width: 8
* End:
*/