diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 713c283..cc68efc 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -780,6 +780,19 @@ conf/all/forwarding - BOOLEAN proxy_ndp - BOOLEAN Do proxy ndp. +nlnotify_on_addr_add - BOOLEAN + By default, netlink messages are not sent when an IPv6 address + is added if it is in tentative state. This makes it harder + for some user-space applications to function properly. To + ensure that a netlink message is always sent when an IPv6 addr + is added, regardless of the state of the address, set this value + to 1. For the old (default) behaviour, set this value to 0. + + If only certain interfaces should have this behaviour, leave the + 'all' config set to 0 and set the individual interface's value + to 1. + + conf/interface/*: Change special settings per interface. diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 2cfc7b0..cb06938 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c @@ -123,6 +123,7 @@ unsigned long long sched_clock(void) /* return the value in ns */ return cycles_2_ns(this_offset); } +EXPORT_SYMBOL(sched_clock); static unsigned long calculate_cpu_khz(void) { diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 5cc76d0..b000678 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -493,6 +493,7 @@ unsigned long long sched_clock(void) rdtscll(a); return cycles_2_ns(a); } +EXPORT_SYMBOL(sched_clock); static unsigned long get_cmos_time(void) { diff --git a/drivers/net/e100.c b/drivers/net/e100.c index 0cefef5..f833389 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -1,4 +1,4 @@ -/******************************************************************************* +/************************************************************** Intel PRO/100 Linux driver Copyright(c) 1999 - 2006 Intel Corporation. @@ -383,6 +383,7 @@ enum cb_command { cb_ucode = 0x0005, cb_dump = 0x0006, cb_tx_sf = 0x0008, + cb_tx_nc = 0x0010, /* 0 == controler does CRC, ie normal. 1 == CRC from memory */ cb_cid = 0x1f00, cb_i = 0x2000, cb_s = 0x4000, @@ -419,7 +420,7 @@ struct config { /*5*/ u8 X(tx_dma_max_count:7, dma_max_count_enable:1); /*6*/ u8 X(X(X(X(X(X(X(late_scb_update:1, direct_rx_dma:1), tno_intr:1), cna_intr:1), standard_tcb:1), standard_stat_counter:1), - rx_discard_overruns:1), rx_save_bad_frames:1); + rx_save_overruns:1), rx_save_bad_frames:1); /*7*/ u8 X(X(X(X(X(rx_discard_short_frames:1, tx_underrun_retry:2), pad7:2), rx_extended_rfd:1), tx_two_frames_in_fifo:1), tx_dynamic_tbd:1); @@ -549,6 +550,8 @@ struct nic { multicast_all = (1 << 2), wol_magic = (1 << 3), ich_10h_workaround = (1 << 4), + accept_all_frames = (1 << 5), + save_fcs = (1 << 6), } flags ____cacheline_aligned; enum mac mac; @@ -1009,6 +1012,16 @@ static void e100_configure(struct nic *nic, struct cb *cb, struct sk_buff *skb) config->promiscuous_mode = 0x1; /* 1=on, 0=off */ } + if(nic->flags & accept_all_frames) { + config->rx_save_overruns = 0x1; /* 1=save, 0=discard */ + config->rx_save_bad_frames = 0x1; /* 1=save, 0=discard */ + config->rx_discard_short_frames = 0x0; /* 1=discard, 0=save */ + } + + if(nic->flags & save_fcs) { + config->rx_crc_transfer = 0x1; /* 1=save, 0=discard */ + } + if(nic->flags & multicast_all) config->multicast_all = 0x1; /* 1=accept, 0=no */ @@ -1463,6 +1476,16 @@ static void e100_set_multicast_list(struct net_device *netdev) else nic->flags &= ~promiscuous; + if(netdev->flags & IFF_ACCEPT_ALL_FRAMES) + nic->flags |= accept_all_frames; + else + nic->flags &= ~accept_all_frames; + + if(netdev->flags & IFF_SAVE_FCS) + nic->flags |= save_fcs; + else + nic->flags &= ~save_fcs; + if(netdev->flags & IFF_ALLMULTI || netdev->mc_count > E100_MAX_MULTICAST_ADDRS) nic->flags |= multicast_all; @@ -1604,6 +1627,19 @@ static void e100_xmit_prepare(struct nic *nic, struct cb *cb, struct sk_buff *skb) { cb->command = nic->tx_command; + +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + /* Use the last 4 bytes of the SKB payload packet as the CRC, used for + * testing, ie sending frames with bad CRC. + */ + if (unlikely(skb->use_specified_ether_crc)) { + cb->command |= __constant_cpu_to_le16(cb_tx_nc); + } + else { + cb->command &= ~__constant_cpu_to_le16(cb_tx_nc); + } +#endif + /* interrupt every 16 packets regardless of delay */ if((nic->cbs_avail & ~15) == nic->cbs_avail) cb->command |= cpu_to_le16(cb_i); @@ -1833,7 +1869,21 @@ static int e100_rx_indicate(struct nic *nic, struct rx *rx, skb_reserve(skb, sizeof(struct rfd)); skb_put(skb, actual_size); skb->protocol = eth_type_trans(skb, nic->netdev); - + /* NOTE: The config step turns on acceptance of various bogus frames + * when in loopback or promisc mode, but this code will still throw + * them away unless you also set the new 'accept_all_frames' flag. + * Perhaps the implementors meant to accept the bogus frames in + * promisc mode here?? --Ben + */ + if(unlikely(!(nic->flags & accept_all_frames))) { + if(actual_size > nic->netdev->mtu + VLAN_ETH_HLEN) { + /* Received oversized frame */ + nic->net_stats.rx_over_errors++; + } + /* We're accepting all, so pass the bogons on up the stack. */ + goto process_skb; + } + if(unlikely(!(rfd_status & cb_ok))) { /* Don't indicate if hardware indicates errors */ dev_kfree_skb_any(skb); @@ -1842,6 +1892,7 @@ static int e100_rx_indicate(struct nic *nic, struct rx *rx, nic->rx_over_length_errors++; dev_kfree_skb_any(skb); } else { + process_skb: nic->net_stats.rx_packets++; nic->net_stats.rx_bytes += actual_size; nic->netdev->last_rx = jiffies; @@ -2203,6 +2254,63 @@ static int e100_set_settings(struct net_device *netdev, struct ethtool_cmd *cmd) return err; } +static int e100_set_rxall(struct net_device *netdev, u32 data) +{ + struct nic *nic = netdev->priv; + if (data) { + netdev->priv_flags |= IFF_ACCEPT_ALL_FRAMES; + nic->flags |= accept_all_frames; + } + else { + netdev->priv_flags &= ~(IFF_ACCEPT_ALL_FRAMES); + nic->flags &= ~accept_all_frames; + } + + e100_exec_cb(nic, NULL, e100_configure); + + return 0; +} + +static int e100_get_rxall(struct net_device *netdev, u32* data) +{ + struct nic *nic = netdev->priv; + if (nic->flags & accept_all_frames) { + *data = 1; + } + else { + *data = 0; + } + + return 0; +} + +static int e100_set_save_fcs(struct net_device *netdev, u32 data) +{ + struct nic *nic = netdev->priv; + if (data) { + nic->flags |= save_fcs; + } + else { + nic->flags &= ~save_fcs; + } + e100_exec_cb(nic, NULL, e100_configure); + + return 0; +} + +static int e100_get_save_fcs(struct net_device *netdev, u32* data) +{ + struct nic *nic = netdev->priv; + if (nic->flags & save_fcs) { + *data = 1; + } + else { + *data = 0; + } + + return 0; +} + static void e100_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) { @@ -2500,6 +2608,10 @@ static const struct ethtool_ops e100_ethtool_ops = { .get_stats_count = e100_get_stats_count, .get_ethtool_stats = e100_get_ethtool_stats, .get_perm_addr = ethtool_op_get_perm_addr, + .set_rx_all = e100_set_rxall, + .get_rx_all = e100_get_rxall, + .set_save_fcs = e100_set_save_fcs, + .get_save_fcs = e100_get_save_fcs, }; static int e100_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) diff --git a/drivers/net/e1000/e1000.h b/drivers/net/e1000/e1000.h index f091042..0dc2ad6 100644 --- a/drivers/net/e1000/e1000.h +++ b/drivers/net/e1000/e1000.h @@ -363,4 +363,7 @@ enum e1000_state_t { __E1000_DOWN }; + +void e1000_set_multi(struct net_device *netdev); + #endif /* _E1000_H_ */ diff --git a/drivers/net/e1000/e1000_ethtool.c b/drivers/net/e1000/e1000_ethtool.c index fb96c87..3d82ab8 100644 --- a/drivers/net/e1000/e1000_ethtool.c +++ b/drivers/net/e1000/e1000_ethtool.c @@ -1,4 +1,4 @@ -/******************************************************************************* +/***************************************************************** Intel PRO/1000 Linux driver Copyright(c) 1999 - 2006 Intel Corporation. @@ -1946,6 +1946,59 @@ e1000_get_strings(struct net_device *netdev, uint32_t stringset, uint8_t *data) } } +static int e1000_ethtool_setrxall(struct net_device *netdev, uint32_t val) { + unsigned short old_flags = netdev->priv_flags; + if (val) { + netdev->priv_flags |= IFF_ACCEPT_ALL_FRAMES; + } + else { + netdev->priv_flags &= ~(IFF_ACCEPT_ALL_FRAMES); + } + + /* printk("e1000_ethtool_setrxall (%s) val: %d\n", + netdev->name, val); */ + if (old_flags != netdev->priv_flags) { + netif_tx_lock_bh(netdev); + if (netif_running(netdev)) { + /*printk("Kicking e1000 for setrxall..\n");*/ + e1000_set_multi(netdev); + } else { + /* Value will be flushed into the hardware when the device is + * brought up. + */ + } + netif_tx_unlock_bh(netdev); + } + return 0; +} + +static int e1000_ethtool_set_save_fcs(struct net_device *netdev, uint32_t val) { + netif_tx_lock_bh(netdev); + if (val) { + netdev->priv_flags |= IFF_SAVE_FCS; + } + else { + netdev->priv_flags &= ~IFF_SAVE_FCS; + } + netif_tx_unlock_bh(netdev); + return 0; +} + +static int e1000_ethtool_get_save_fcs(struct net_device *netdev, uint32_t* val) { + *val = !!(netdev->priv_flags & IFF_SAVE_FCS); + /*printk("GETRXALL, data: %d priv_flags: %hx\n", + edata.data, netdev->priv_flags);*/ + return 0; +} + +static int e1000_ethtool_getrxall(struct net_device *netdev, uint32_t* val) { + *val = !!(netdev->priv_flags & IFF_ACCEPT_ALL_FRAMES); + /*printk("GETRXALL, data: %d priv_flags: %hx\n", + edata.data, netdev->priv_flags);*/ + return 0; +} + + static const struct ethtool_ops e1000_ethtool_ops = { .get_settings = e1000_get_settings, .set_settings = e1000_set_settings, @@ -1982,6 +2035,10 @@ static const struct ethtool_ops e1000_ethtool_ops = { .get_stats_count = e1000_get_stats_count, .get_ethtool_stats = e1000_get_ethtool_stats, .get_perm_addr = ethtool_op_get_perm_addr, + .get_rx_all = e1000_ethtool_getrxall, + .set_rx_all = e1000_ethtool_setrxall, + .set_save_fcs = e1000_ethtool_set_save_fcs, + .get_save_fcs = e1000_ethtool_get_save_fcs, }; void e1000_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index c6259c7..1883c3e 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -1,4 +1,4 @@ -/******************************************************************************* +/***************************************************************** Intel PRO/1000 Linux driver Copyright(c) 1999 - 2006 Intel Corporation. @@ -149,7 +149,7 @@ static void e1000_clean_tx_ring(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring); static void e1000_clean_rx_ring(struct e1000_adapter *adapter, struct e1000_rx_ring *rx_ring); -static void e1000_set_multi(struct net_device *netdev); +void e1000_set_multi(struct net_device *netdev); static void e1000_update_phy_info(unsigned long data); static void e1000_watchdog(unsigned long data); static void e1000_82547_tx_fifo_stall(unsigned long data); @@ -1003,6 +1003,9 @@ e1000_probe(struct pci_dev *pdev, if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; + /* Has ability to receive all frames (even bad CRCs and such) */ + netdev->features |= NETIF_F_RX_ALL | NETIF_F_SAVE_CRC; + netdev->features |= NETIF_F_LLTX; adapter->en_mng_pt = e1000_enable_mng_pass_thru(&adapter->hw); @@ -2418,7 +2421,7 @@ e1000_set_mac(struct net_device *netdev, void *p) * promiscuous mode, and all-multi behavior. **/ -static void +void e1000_set_multi(struct net_device *netdev) { struct e1000_adapter *adapter = netdev_priv(netdev); @@ -2453,6 +2456,35 @@ e1000_set_multi(struct net_device *netdev) E1000_WRITE_REG(hw, RCTL, rctl); + + /* This is useful for using ethereal or tcpdump to sniff + * packets in promiscuous mode without stripping VLAN/priority + * information, and also letting bad packets through. + * + * THIS IS NOT PRODUCTION CODE - FOR INTERNAL USE ONLY!!! + * + */ + if (netdev->priv_flags & IFF_ACCEPT_ALL_FRAMES) { + uint32_t ctrl; + /*printk("%s: Enabling acceptance of ALL frames (bad CRC too).\n", + netdev->name); */ + /* store bad packets, promisc/multicast all, no VLAN + * filter */ + rctl = E1000_READ_REG(hw, RCTL); + rctl |= (E1000_RCTL_SBP | E1000_RCTL_UPE | E1000_RCTL_MPE); + rctl &= ~(E1000_RCTL_VFE | E1000_RCTL_CFIEN); + E1000_WRITE_REG(hw, RCTL, rctl); + /* disable VLAN tagging/striping */ + ctrl = E1000_READ_REG(hw, CTRL); + ctrl &= ~E1000_CTRL_VME; + E1000_WRITE_REG(hw, CTRL, ctrl); + } + else { + /* TODO: Do we need a way to explicitly turn this off if it was + * previously enabled, or will it magically go back to normal??? --Ben + */ + } + /* 82542 2.0 needs to be in reset to write receive address registers */ if (hw->mac_type == e1000_82542_rev2_0) @@ -2868,6 +2900,7 @@ set_itr_now: #define E1000_TX_FLAGS_VLAN 0x00000002 #define E1000_TX_FLAGS_TSO 0x00000004 #define E1000_TX_FLAGS_IPV4 0x00000008 +#define E1000_TX_FLAGS_NO_FCS 0x00000010 #define E1000_TX_FLAGS_VLAN_MASK 0xffff0000 #define E1000_TX_FLAGS_VLAN_SHIFT 16 @@ -3128,6 +3161,13 @@ e1000_tx_queue(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, txd_upper |= (tx_flags & E1000_TX_FLAGS_VLAN_MASK); } +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + if (unlikely(tx_flags & E1000_TX_FLAGS_NO_FCS)) { + txd_lower &= ~(E1000_TXD_CMD_IFCS); + /* printk("Disabling CRC in tx_queue, txd_lower: 0x%x\n", txd_lower); */ + } +#endif + i = tx_ring->next_to_use; while (count--) { @@ -3142,6 +3182,14 @@ e1000_tx_queue(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd); +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + /* txd_cmd re-enables FCS, so we'll re-disable it here as desired. */ + if (unlikely(tx_flags & E1000_TX_FLAGS_NO_FCS)) { + tx_desc->lower.data &= ~(cpu_to_le32(E1000_TXD_CMD_IFCS)); + /* printk("Disabling2 CRC in tx_queue, txd_lower: 0x%x\n", tx_desc->lower.data); */ + } +#endif + /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, @@ -3430,6 +3478,12 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) if (likely(skb->protocol == htons(ETH_P_IP))) tx_flags |= E1000_TX_FLAGS_IPV4; +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + if (unlikely(skb->use_specified_ether_crc)) { + tx_flags |= E1000_TX_FLAGS_NO_FCS; + } +#endif + e1000_tx_queue(adapter, tx_ring, tx_flags, e1000_tx_map(adapter, tx_ring, skb, first, max_per_txd, nr_frags, mss)); @@ -4230,7 +4284,11 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, goto next_desc; } - if (unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) { + /* If we are accepting all frames, then do not pay attention to the + * framing errors. + */ + if (unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) && + !(netdev->priv_flags & IFF_ACCEPT_ALL_FRAMES)) { last_byte = *(skb->data + length - 1); if (TBI_ACCEPT(&adapter->hw, status, rx_desc->errors, length, last_byte)) { @@ -4256,6 +4314,16 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, total_rx_bytes += length; total_rx_packets++; + + // This may not be needed now. --Ben + //if (netdev->priv_flags & IFF_SAVE_FCS) { + // skb_put(skb, length); + //} + //else { + // skb_put(skb, length - ETHERNET_FCS_SIZE); + //} + + /* code added for copybreak, this should improve * performance for small packets with large amounts * of reassembly being done in the stack */ @@ -4393,7 +4461,8 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, goto next_desc; } - if (unlikely(staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)) { + if ((unlikely(staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)) && + !(netdev->priv_flags & IFF_ACCEPT_ALL_FRAMES)) { dev_kfree_skb_irq(skb); goto next_desc; } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 74d3ccb..83cebdc 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -120,6 +120,7 @@ struct TCP_Server_Info { struct sockaddr_in sockAddr; struct sockaddr_in6 sockAddr6; } addr; + u32 ip4_local_ip; /* if != 0, will bind locally to this IP */ wait_queue_head_t response_q; wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ struct list_head pending_mid_q; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 2caca06..7977cac 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -91,12 +91,14 @@ struct smb_vol { unsigned int sockopt; unsigned short int port; char * prepath; + u32 local_ip; /* allow binding to a local IP address if != 0 */ }; static int ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, char * netb_name, - char * server_netb_name); + char * server_netb_name, + u32 local_ip); static int ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket); @@ -191,7 +193,8 @@ cifs_reconnect(struct TCP_Server_Info *server) rc = ipv4_connect(&server->addr.sockAddr, &server->ssocket, server->workstation_RFC1001_name, - server->server_RFC1001_name); + server->server_RFC1001_name, + server->ip4_local_ip); } if(rc) { cFYI(1,("reconnect error %d",rc)); @@ -1003,6 +1006,18 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) printk(KERN_WARNING "CIFS: domain name too long\n"); return 1; } + } else if (strnicmp(data, "local_ip", 8) == 0) { + if (!value || !*value) { + printk(KERN_WARNING "CIFS: local_ip value not specified.\n"); + return 1; /* needs_arg; */ + } + i = cifs_inet_pton(AF_INET, value, &(vol->local_ip)); + if (i < 0) { + vol->local_ip = 0; + printk(KERN_WARNING "CIFS: Could not parse local_ip: %s\n", + value); + return 1; + } } else if (strnicmp(data, "prefixpath", 10) == 0) { if (!value || !*value) { printk(KERN_WARNING @@ -1263,7 +1278,8 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) static struct cifsSesInfo * cifs_find_tcp_session(struct in_addr * target_ip_addr, struct in6_addr *target_ip6_addr, - char *userName, struct TCP_Server_Info **psrvTcp) + char *userName, struct TCP_Server_Info **psrvTcp, + u32 local_ip) { struct list_head *tmp; struct cifsSesInfo *ses; @@ -1273,7 +1289,11 @@ cifs_find_tcp_session(struct in_addr * target_ip_addr, list_for_each(tmp, &GlobalSMBSessionList) { ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); if (ses->server) { - if((target_ip_addr && + if((target_ip_addr && + /* If binding to a local IP, do not re-use sessions bound to different + * local IP addresses. + */ + (local_ip == ses->server->ip4_local_ip) && (ses->server->addr.sockAddr.sin_addr.s_addr == target_ip_addr->s_addr)) || (target_ip6_addr && memcmp(&ses->server->addr.sockAddr6.sin6_addr, @@ -1296,7 +1316,7 @@ cifs_find_tcp_session(struct in_addr * target_ip_addr, } static struct cifsTconInfo * -find_unc(__be32 new_target_ip_addr, char *uncName, char *userName) +find_unc(__be32 new_target_ip_addr, char *uncName, char *userName, u32 local_ip) { struct list_head *tmp; struct cifsTconInfo *tcon; @@ -1311,8 +1331,9 @@ find_unc(__be32 new_target_ip_addr, char *uncName, char *userName) ("old ip addr: %x == new ip %x ?", tcon->ses->server->addr.sockAddr.sin_addr. s_addr, new_target_ip_addr)); - if (tcon->ses->server->addr.sockAddr.sin_addr. - s_addr == new_target_ip_addr) { + if ((local_ip == tcon->ses->server->ip4_local_ip) && + (tcon->ses->server->addr.sockAddr.sin_addr. + s_addr == new_target_ip_addr)) { /* BB lock tcon, server and tcp session and increment use count here? */ /* found a match on the TCP session */ /* BB check if reconnection needed */ @@ -1414,7 +1435,8 @@ static void rfc1002mangle(char * target,char * source, unsigned int length) static int ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, - char * netbios_name, char * target_name) + char * netbios_name, char * target_name, + u32 local_ip /* in network byte order */) { int rc = 0; int connected = 0; @@ -1433,6 +1455,24 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, } } + /* Bind to the local IP address if specified */ + if (local_ip) { + struct sockaddr_in myaddr = { + .sin_family = AF_INET, + }; + myaddr.sin_addr.s_addr = local_ip; + myaddr.sin_port = 0; /* any */ + rc = (*csocket)->ops->bind(*csocket, (struct sockaddr *) &myaddr, + sizeof(myaddr)); + if (rc < 0) { + printk("Tried to bind to local ip: 0x%x, but failed with error: %d\n", + local_ip, rc); + } + else { + printk("CIFS: Successfully bound to local ip: 0x%x\n", local_ip); + } + } + psin_server->sin_family = AF_INET; if(psin_server->sin_port) { /* user overrode default port */ rc = (*csocket)->ops->connect(*csocket, @@ -1720,11 +1760,12 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, if(address_type == AF_INET) existingCifsSes = cifs_find_tcp_session(&sin_server.sin_addr, NULL /* no ipv6 addr */, - volume_info.username, &srvTcp); + volume_info.username, &srvTcp, + volume_info.local_ip); else if(address_type == AF_INET6) existingCifsSes = cifs_find_tcp_session(NULL /* no ipv4 addr */, &sin_server6.sin6_addr, - volume_info.username, &srvTcp); + volume_info.username, &srvTcp, 0); else { kfree(volume_info.UNC); kfree(volume_info.password); @@ -1743,7 +1784,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, sin_server.sin_port = 0; rc = ipv4_connect(&sin_server,&csocket, volume_info.source_rfc1001_name, - volume_info.target_rfc1001_name); + volume_info.target_rfc1001_name, + volume_info.local_ip); if (rc < 0) { cERROR(1, ("Error connecting to IPv4 socket. Aborting operation")); @@ -1772,6 +1814,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, /* BB Add code for ipv6 case too */ srvTcp->ssocket = csocket; srvTcp->protocolType = IPV4; + srvTcp->ip4_local_ip = volume_info.local_ip; init_waitqueue_head(&srvTcp->response_q); init_waitqueue_head(&srvTcp->request_q); INIT_LIST_HEAD(&srvTcp->pending_mid_q); @@ -1914,7 +1957,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, tcon = find_unc(sin_server.sin_addr.s_addr, volume_info.UNC, - volume_info.username); + volume_info.username, volume_info.local_ip); if (tcon) { cFYI(1, ("Found match on UNC path")); /* we can have only one retry value for a connection diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 7933e2e..c8eddc2 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -170,7 +170,7 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp) struct nfs_client *clp; /* Don't talk to strangers */ - clp = nfs_find_client(addr, 4); + clp = nfs_find_client(addr, 4, 0xFFFFFFFF /* any */); if (clp == NULL) return SVC_DROP; dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr->sin_addr)); diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 72e55d8..c392815 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -23,7 +23,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres * res->bitmap[0] = res->bitmap[1] = 0; res->status = htonl(NFS4ERR_BADHANDLE); - clp = nfs_find_client(args->addr, 4); + clp = nfs_find_client(args->addr, 4, 0xFFFFFFF /* any IP */); if (clp == NULL) goto out; inode = nfs_delegation_find_inode(clp, &args->fh); @@ -62,7 +62,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy) __be32 res; res = htonl(NFS4ERR_BADHANDLE); - clp = nfs_find_client(args->addr, 4); + clp = nfs_find_client(args->addr, 4, 0xFFFFFFFF /* any IP addr */); if (clp == NULL) goto out; inode = nfs_delegation_find_inode(clp, &args->fh); diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 23ab145..8b5a692 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -100,7 +100,7 @@ struct rpc_program nfsacl_program = { */ static struct nfs_client *nfs_alloc_client(const char *hostname, const struct sockaddr_in *addr, - int nfsversion) + int nfsversion, u32 local_ipv4_addr) { struct nfs_client *clp; int error; @@ -127,7 +127,8 @@ static struct nfs_client *nfs_alloc_client(const char *hostname, clp->cl_nfsversion = nfsversion; memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr)); - + clp->local_ipv4_addr = local_ipv4_addr; + if (hostname) { clp->cl_hostname = kstrdup(hostname, GFP_KERNEL); if (!clp->cl_hostname) @@ -232,7 +233,8 @@ void nfs_put_client(struct nfs_client *clp) * Find a client by address * - caller must hold nfs_client_lock */ -static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int nfsversion, int match_port) +static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int nfsversion, int match_port, + u32 local_ipv4_addr) { struct nfs_client *clp; @@ -249,6 +251,13 @@ static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int sizeof(clp->cl_addr.sin_addr)) != 0) continue; + if (local_ipv4_addr != 0xFFFFFFFF) { + // all ff's is 'ANY'. + if ((clp->local_ipv4_addr || local_ipv4_addr) + && (clp->local_ipv4_addr != local_ipv4_addr)) + continue; + } + if (!match_port || clp->cl_addr.sin_port == addr->sin_port) goto found; } @@ -264,12 +273,12 @@ found: * Find a client by IP address and protocol version * - returns NULL if no such client */ -struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion) +struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion, u32 local_ipv4_addr) { struct nfs_client *clp; spin_lock(&nfs_client_lock); - clp = __nfs_find_client(addr, nfsversion, 0); + clp = __nfs_find_client(addr, nfsversion, 0, local_ipv4_addr); spin_unlock(&nfs_client_lock); if (clp != NULL && clp->cl_cons_state != NFS_CS_READY) { nfs_put_client(clp); @@ -284,7 +293,7 @@ struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversio */ static struct nfs_client *nfs_get_client(const char *hostname, const struct sockaddr_in *addr, - int nfsversion) + int nfsversion, u32 local_ipv4_addr) { struct nfs_client *clp, *new = NULL; int error; @@ -297,7 +306,7 @@ static struct nfs_client *nfs_get_client(const char *hostname, do { spin_lock(&nfs_client_lock); - clp = __nfs_find_client(addr, nfsversion, 1); + clp = __nfs_find_client(addr, nfsversion, 1, local_ipv4_addr); if (clp) goto found_client; if (new) @@ -305,7 +314,7 @@ static struct nfs_client *nfs_get_client(const char *hostname, spin_unlock(&nfs_client_lock); - new = nfs_alloc_client(hostname, addr, nfsversion); + new = nfs_alloc_client(hostname, addr, nfsversion, local_ipv4_addr); } while (new); return ERR_PTR(-ENOMEM); @@ -407,6 +416,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp, int proto, .program = &nfs_program, .version = clp->rpc_ops->version, .authflavor = flavor, + .local_ipv4_addr = clp->local_ipv4_addr, }; if (!IS_ERR(clp->cl_rpcclient)) @@ -576,7 +586,7 @@ static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_dat #endif /* Allocate or find a client reference we can use */ - clp = nfs_get_client(data->hostname, &data->addr, nfsvers); + clp = nfs_get_client(data->hostname, &data->addr, nfsvers, data->local_ip); if (IS_ERR(clp)) { dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); return PTR_ERR(clp); @@ -897,7 +907,7 @@ static int nfs4_set_client(struct nfs_server *server, const char *hostname, const struct sockaddr_in *addr, const char *ip_addr, rpc_authflavor_t authflavour, - int proto, int timeo, int retrans) + int proto, int timeo, int retrans, u32 local_ipv4_addr) { struct nfs_client *clp; int error; @@ -905,7 +915,7 @@ static int nfs4_set_client(struct nfs_server *server, dprintk("--> nfs4_set_client()\n"); /* Allocate or find a client reference we can use */ - clp = nfs_get_client(hostname, addr, 4); + clp = nfs_get_client(hostname, addr, 4, local_ipv4_addr); if (IS_ERR(clp)) { error = PTR_ERR(clp); goto error; @@ -966,7 +976,8 @@ struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data, const char *mntpath, const char *ip_addr, rpc_authflavor_t authflavour, - struct nfs_fh *mntfh) + struct nfs_fh *mntfh, + u32 local_ipv4_addr) { struct nfs_fattr fattr; struct nfs_server *server; @@ -980,7 +991,7 @@ struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data, /* Get a client record */ error = nfs4_set_client(server, hostname, addr, ip_addr, authflavour, - data->proto, data->timeo, data->retrans); + data->proto, data->timeo, data->retrans, local_ipv4_addr); if (error < 0) goto error; @@ -1053,7 +1064,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, data->authflavor, parent_server->client->cl_xprt->prot, parent_client->retrans_timeo, - parent_client->retrans_count); + parent_client->retrans_count, + parent_client->local_ipv4_addr); if (error < 0) goto error; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index a28f6ce..8ada6f6 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -31,7 +31,7 @@ struct nfs_clone_mount { extern struct rpc_program nfs_program; extern void nfs_put_client(struct nfs_client *); -extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int); +extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int version, u32 local_ipv4_addr); extern struct nfs_server *nfs_create_server(const struct nfs_mount_data *, struct nfs_fh *); extern struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *, @@ -40,7 +40,8 @@ extern struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *, const char *, const char *, rpc_authflavor_t, - struct nfs_fh *); + struct nfs_fh *, + u32 local_ipv4_addr); extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, struct nfs_fh *); extern void nfs_free_server(struct nfs_server *server); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 28108c8..5fe9082 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -603,6 +603,11 @@ static int nfs_compare_super(struct super_block *sb, void *data) return 0; if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0) return 0; + if (old->nfs_client->local_ipv4_addr != server->nfs_client->local_ipv4_addr) { + /*printk("nfs_compare_super, old->ip: %x server->ip: %x\n", + old->local_ip, server->local_ip); */ + return 0; + } return 1; } @@ -872,7 +877,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type, /* Get a volume representation */ server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr, - authflavour, &mntfh); + authflavour, &mntfh, data->local_ipv4_addr); if (IS_ERR(server)) { error = PTR_ERR(server); goto out_err_noserver; diff --git a/include/asm-i386/socket.h b/include/asm-i386/socket.h index 5755d57..3c7c7d2 100644 --- a/include/asm-i386/socket.h +++ b/include/asm-i386/socket.h @@ -50,4 +50,8 @@ #define SO_PEERSEC 31 #define SO_PASSSEC 34 +/* Instruct lower device to not calculate the frame + * checksum. Useful only for testing, afaik. --Ben */ +#define SO_NOFCS 50 + #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-x86_64/socket.h b/include/asm-x86_64/socket.h index b467026..cffefd4 100644 --- a/include/asm-x86_64/socket.h +++ b/include/asm-x86_64/socket.h @@ -50,4 +50,9 @@ #define SO_PEERSEC 31 #define SO_PASSSEC 34 +/* Instruct lower device to not calculate the frame + * checksum. Useful only for testing, afaik. --Ben */ +#define SO_NOFCS 50 + + #endif /* _ASM_SOCKET_H */ diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h index c26c3ad..4d1cf33 100644 --- a/include/linux/compat_ioctl.h +++ b/include/linux/compat_ioctl.h @@ -275,6 +275,11 @@ COMPATIBLE_IOCTL(SIOCGMIIREG) COMPATIBLE_IOCTL(SIOCSMIIREG) COMPATIBLE_IOCTL(SIOCGIFVLAN) COMPATIBLE_IOCTL(SIOCSIFVLAN) +COMPATIBLE_IOCTL(SIOCSIFMACVLAN) +COMPATIBLE_IOCTL(SIOCGIFMACVLAN) +COMPATIBLE_IOCTL(SIOCGIFREDIRDEV) +COMPATIBLE_IOCTL(SIOCSIFREDIRDEV) +COMPATIBLE_IOCTL(0x7450 /* GET_PKTGEN_INTERFACE_INFO */) COMPATIBLE_IOCTL(SIOCBRADDBR) COMPATIBLE_IOCTL(SIOCBRDELBR) /* SG stuff */ diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c6310ae..8ed7f16 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1,4 +1,4 @@ -/* +/* -*-linux-c-*- * ethtool.h: Defines for Linux ethtool. * * Copyright (C) 1998 David S. Miller (davem@redhat.com) @@ -309,7 +309,11 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data); * phys_id: Identify the device * get_stats: Return statistics about the device * get_perm_addr: Gets the permanent hardware address - * + * set_rx_all: Set or clear IFF_ACCEPT_ALL_FRAMES, see if.h + * get_rx_all: Return 1 if set, 0 if not. + * set_save_fcs: Set or clear IFF_SAVE_FCS, see if.h + * get_save_fcs: Return 1 if set, 0 if not. + * * Description: * * get_settings: @@ -368,6 +372,10 @@ struct ethtool_ops { int (*get_stats_count)(struct net_device *); void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *, u64 *); int (*get_perm_addr)(struct net_device *, struct ethtool_perm_addr *, u8 *); + int (*set_rx_all)(struct net_device *, u32); + int (*get_rx_all)(struct net_device *, u32 *); + int (*set_save_fcs)(struct net_device *, u32); + int (*get_save_fcs)(struct net_device *, u32 *); int (*begin)(struct net_device *); void (*complete)(struct net_device *); u32 (*get_ufo)(struct net_device *); @@ -375,6 +383,13 @@ struct ethtool_ops { }; #endif /* __KERNEL__ */ +/* for dumping net-device statistics */ +struct ethtool_ndstats { + u32 cmd; /* ETHTOOL_GNDSTATS */ + u8 data[0]; /* sizeof(struct net_device_stats) */ +}; + + /* CMDs currently supported */ #define ETHTOOL_GSET 0x00000001 /* Get settings. */ #define ETHTOOL_SSET 0x00000002 /* Set settings. */ @@ -414,6 +429,15 @@ struct ethtool_ops { #define ETHTOOL_GGSO 0x00000023 /* Get GSO enable (ethtool_value) */ #define ETHTOOL_SGSO 0x00000024 /* Set GSO enable (ethtool_value) */ + +#define ETHTOOL_GNDSTATS 0x00000070 /* get standard net-device statistics */ +#define ETHTOOL_GETRXALL 0x00000071 /* Retrieve whether or not + * IFF_ACCEPT_ALL_FRAMES is set. */ +#define ETHTOOL_SETRXALL 0x00000072 /* Set IFF_ACCEPT_ALL_FRAMES */ +#define ETHTOOL_GETRXFCS 0x00000073 /* Set IFF_SAVE_FCS */ +#define ETHTOOL_SETRXFCS 0x00000074 /* Set IFF_SAVE_FCS */ + + /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET #define SPARC_ETH_SSET ETHTOOL_SSET diff --git a/include/linux/if.h b/include/linux/if.h index 32bf419..ce8f1d3 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -62,6 +62,14 @@ #define IFF_BONDING 0x20 /* bonding master or slave */ #define IFF_SLAVE_NEEDARP 0x40 /* need ARPs for validation */ +#define IFF_ACCEPT_ALL_FRAMES 0x0400 /** Accept all frames, even ones with bad CRCs. + * Should only be used in debugging/testing situations + * Do NOT enable this unless you understand the + * consequences! */ +#define IFF_SAVE_FCS 0x0800 /** Save the Frame Check Sum (FCS) on receive, if + * possible. */ +#define IFF_MAC_VLAN 0x1000 /* MAC VLAN device. */ + #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h new file mode 100644 index 0000000..0f56ae5 --- /dev/null +++ b/include/linux/if_macvlan.h @@ -0,0 +1,58 @@ +/* -*- linux-c -*- */ +#ifndef _LINUX_IF_MACVLAN_H +#define _LINUX_IF_MACVLAN_H + +/* the ioctl commands */ + +/* actions */ +#define MACVLAN_ENABLE 1 +#define MACVLAN_DISABLE 2 +#define MACVLAN_ADD 3 +#define MACVLAN_DEL 4 +//#define MACVLAN_BIND 5 +//#define MACVLAN_UNBIND 6 + +/* informative */ +#define MACVLAN_GET_NUM_PORTS 7 +#define MACVLAN_GET_PORT_NAME 8 +#define MACVLAN_GET_NUM_VLANS 9 +#define MACVLAN_GET_VLAN_NAME 10 +//#define MACVLAN_GET_NUM_MACS 11 +//#define MACVLAN_GET_MAC_NAME 12 + +#define MACVLAN_SET_PORT_FLAGS 13 +#define MACVLAN_GET_PORT_FLAGS 14 + +/* If this IOCTL succeedes, we are a MAC-VLAN interface, otherwise, we are not. */ +#define MACVLAN_IS_MACVLAN 15 +#define MACVLAN_IS_MACVLAN2 16 /* new ioctl API */ + + +#ifdef __KERNEL__ +#include +#include +extern int (*macvlan_ioctl_hook)(unsigned long arg); + +/* Returns >= 0 if it consumed the packet, otherwise let the pkt + * be processed by the netif_rx method, as if macvlan's didn't + * exist. + */ +extern int (*macvlan_handle_frame_hook)(struct sk_buff *skb); +#endif + +struct macvlan_ioctl_reply { + int32_t num; + char name[IFNAMSIZ]; +}; + +struct macvlan_ioctl { + int32_t cmd; + int32_t portidx; + int32_t ifidx; /* flags when setting port flags */ + int32_t macaddridx; + char ifname[IFNAMSIZ]; + unsigned char macaddr[8]; + struct macvlan_ioctl_reply reply; +}; + +#endif diff --git a/include/linux/if_redirdev.h b/include/linux/if_redirdev.h new file mode 100644 index 0000000..cf8055c --- /dev/null +++ b/include/linux/if_redirdev.h @@ -0,0 +1,35 @@ +/* -*- linux-c -*- */ +#ifndef _LINUX_IF_REDIRDEV_H +#define _LINUX_IF_REDIRDEV_H + +/* the ioctl commands */ + +#define REDIRDEV_ADD 2090 +#define REDIRDEV_DEL 2091 +/* If this IOCTL succeedes, we are a Redirect-Device + interface, otherwise, we are not. */ +#define REDIRDEV_IS_REDIRDEV 2092 +#define REDIRDEV_GET_BY_IDX 2093 +#define REDIRDEV_GET_BY_NAME 2094 +#define REDIRDEV_SET_QUOTA 2095 + +#ifdef __KERNEL__ +#include +#include +extern int (*redirdev_ioctl_hook)(void*); + +#endif + +/* Request and response */ +struct redirdev_ioctl { + u32 cmd; + u32 ifidx; /* when getting info by idx */ + +#define RDD_ASSOCIATED (1<<0) + u32 flags; /* 1<<0: Is the interface associated with tx-dev or not */ + u32 not_used; /* explicitly align 64-bit */ + char ifname[IFNAMSIZ]; + char txifname[IFNAMSIZ]; +}; + +#endif diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index c0f7aec..ad8aef9 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -31,6 +31,7 @@ struct ipv4_devconf int no_policy; int force_igmp_version; int promote_secondaries; + int accept_sts; void *sysctl; }; @@ -84,6 +85,7 @@ struct in_device #define IN_DEV_ARPFILTER(in_dev) (ipv4_devconf.arp_filter || (in_dev)->cnf.arp_filter) #define IN_DEV_ARP_ANNOUNCE(in_dev) (max(ipv4_devconf.arp_announce, (in_dev)->cnf.arp_announce)) #define IN_DEV_ARP_IGNORE(in_dev) (max(ipv4_devconf.arp_ignore, (in_dev)->cnf.arp_ignore)) +#define IN_DEV_ACCEPT_STS(in_dev) (max(ipv4_devconf.accept_sts, (in_dev)->cnf.accept_sts)) struct in_ifaddr { diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 713eb5e..a718555 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -178,6 +178,11 @@ struct ipv6_devconf { #endif __s32 proxy_ndp; __s32 accept_source_route; + __s32 nlnotify_on_addr_add; /* Always notify netlink on addr add, even if it is tentative. + * As currently implemented, this will often cause multiple netlink + * RTM_NEWADDR messages, as a new notification will be sent when + * the address becomes un-tentative. + */ void *sysctl; }; @@ -208,6 +213,7 @@ enum { DEVCONF_PROXY_NDP, __DEVCONF_OPTIMISTIC_DAD, DEVCONF_ACCEPT_SOURCE_ROUTE, + DEVCONF_NLNOTIFY_ON_ADDR_ADD, DEVCONF_MAX }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fea0d9d..959361b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -164,6 +164,7 @@ enum { struct neighbour; struct neigh_parms; struct sk_buff; +struct pktgen_dev; struct netif_rx_stats { @@ -323,6 +324,11 @@ struct net_device #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ #define NETIF_F_GSO 2048 /* Enable software GSO. */ #define NETIF_F_LLTX 4096 /* LockLess TX */ +#define NETIF_F_RX_ALL 16384 /* Can be configured to receive all packets, even + * ones with busted CRC. May disable VLAN filtering + * in the NIC, users should NOT enable this feature + * unless they understand the consequences. */ +#define NETIF_F_SAVE_CRC 32768 /* Can save FCS in skb, last 4 bytes for ethernet */ /* Segmentation offload features */ #define NETIF_F_GSO_SHIFT 16 @@ -524,7 +530,18 @@ struct net_device #ifdef CONFIG_NET_POLL_CONTROLLER void (*poll_controller)(struct net_device *dev); #endif - + /* Callback for when the queue is woken, used by pktgen currently */ + int (*notify_queue_woken)(struct net_device *dev); + void* nqw_data; /* To be used by the method above as needed */ + + struct pktgen_dev* pkt_dev; /* to quickly find the pkt-gen dev registered with this + * interface, if any. + */ + long dflt_skb_mark; /* Specify skb->mark for pkts received on this interface. */ +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) + struct macvlan_port *macvlan_priv; +#endif + /* bridge stuff */ struct net_bridge_port *br_port; @@ -649,8 +666,13 @@ static inline void netif_wake_queue(struct net_device *dev) if (netpoll_trap()) return; #endif - if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state)) + if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state)) { __netif_schedule(dev); + + if (dev->notify_queue_woken) { + dev->notify_queue_woken(dev); + } + } } static inline void netif_stop_queue(struct net_device *dev) diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h index c228bde..349ac87 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h @@ -79,6 +79,8 @@ struct ip_conntrack_tuple /* The direction (for tuplehash) */ u_int8_t dir; } dst; + + u_int32_t mark; }; /* This is optimized opposed to a memset of the whole structure. Everything we @@ -114,7 +116,8 @@ static inline int ip_ct_tuple_src_equal(const struct ip_conntrack_tuple *t1, const struct ip_conntrack_tuple *t2) { return t1->src.ip == t2->src.ip - && t1->src.u.all == t2->src.u.all; + && t1->src.u.all == t2->src.u.all + && t1->mark == t2->mark; } static inline int ip_ct_tuple_dst_equal(const struct ip_conntrack_tuple *t1, @@ -122,7 +125,8 @@ static inline int ip_ct_tuple_dst_equal(const struct ip_conntrack_tuple *t1, { return t1->dst.ip == t2->dst.ip && t1->dst.u.all == t2->dst.u.all - && t1->dst.protonum == t2->dst.protonum; + && t1->dst.protonum == t2->dst.protonum + && t1->mark == t2->mark; } static inline int ip_ct_tuple_equal(const struct ip_conntrack_tuple *t1, @@ -140,7 +144,8 @@ static inline int ip_ct_tuple_mask_cmp(const struct ip_conntrack_tuple *t, || ((t->src.u.all ^ tuple->src.u.all) & mask->src.u.all) || ((t->dst.u.all ^ tuple->dst.u.all) & mask->dst.u.all) || ((t->dst.protonum ^ tuple->dst.protonum) - & mask->dst.protonum)); + & mask->dst.protonum) + || ((t->mark ^ tuple->mark) & mask->mark)); } #endif /* _IP_CONNTRACK_TUPLE_H */ diff --git a/include/linux/nfs4_mount.h b/include/linux/nfs4_mount.h index 26b4c83..25fcd54 100644 --- a/include/linux/nfs4_mount.h +++ b/include/linux/nfs4_mount.h @@ -53,6 +53,7 @@ struct nfs4_mount_data { /* Pseudo-flavours to use for authentication. See RFC2623 */ int auth_flavourlen; /* 1 */ int __user *auth_flavours; /* 1 */ + u32 local_ipv4_addr; /* 2 Allow binding to a local IP address. --Ben */ }; /* bits in the flags field */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 95796e6..4e1de3c 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -29,7 +29,7 @@ struct nfs_client { const struct nfs_rpc_ops *rpc_ops; /* NFS protocol vector */ unsigned long retrans_timeo; /* retransmit timeout */ unsigned int retrans_count; /* number of retransmit tries */ - + u32 local_ipv4_addr; /* local IP addr to bind to, zero for 'any' */ #ifdef CONFIG_NFS_V4 u64 cl_clientid; /* constant */ nfs4_verifier cl_confirm; diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h index 659c754..77700de 100644 --- a/include/linux/nfs_mount.h +++ b/include/linux/nfs_mount.h @@ -20,7 +20,7 @@ * mount-to-kernel version compatibility. Some of these aren't used yet * but here they are anyway. */ -#define NFS_MOUNT_VERSION 6 +#define NFS_MOUNT_VERSION 7 #define NFS_MAX_CONTEXT_LEN 256 struct nfs_mount_data { @@ -43,6 +43,8 @@ struct nfs_mount_data { struct nfs3_fh root; /* 4 */ int pseudoflavor; /* 5 */ char context[NFS_MAX_CONTEXT_LEN + 1]; /* 6 */ + char pad[3]; /* 7 Align the context above */ + unsigned int local_ip; /* 7 */ }; /* bits in the flags field */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 82f43ad..d0f3d06 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -285,7 +285,11 @@ struct sk_buff { nfctinfo:3; __u8 pkt_type:3, fclone:2, - ipvs_property:1; + ipvs_property:1, + /* Use the last 4 bytes of the payload for the ethernet CRC. Only supported on some + * NICs, such as e1000. --Ben + */ + use_specified_ether_crc:1; __be16 protocol; void (*destructor)(struct sk_buff *skb); diff --git a/include/linux/sockios.h b/include/linux/sockios.h index abef759..367287c 100644 --- a/include/linux/sockios.h +++ b/include/linux/sockios.h @@ -94,6 +94,13 @@ #define SIOCGRARP 0x8961 /* get RARP table entry */ #define SIOCSRARP 0x8962 /* set RARP table entry */ +/* MAC address based VLAN control calls */ +#define SIOCGIFMACVLAN 0x8965 /* Mac address multiplex/demultiplex support */ +#define SIOCSIFMACVLAN 0x8966 /* Set macvlan options */ + +#define SIOCGIFREDIRDEV 0x8967 /* Redirect device get ioctl */ +#define SIOCSIFREDIRDEV 0x8968 /* Set redirect dev options */ + /* Driver configuration calls */ #define SIOCGIFMAP 0x8970 /* Get device parameters */ @@ -122,6 +129,7 @@ #define SIOCBRADDIF 0x89a2 /* add interface to bridge */ #define SIOCBRDELIF 0x89a3 /* remove interface from bridge */ + /* Device private ioctl calls */ /* diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index a1be89d..814c68e 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -102,6 +102,7 @@ struct rpc_create_args { u32 version; rpc_authflavor_t authflavor; unsigned long flags; + u32 local_ipv4_addr; }; /* Values for "flags" field */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index f780e72..6bedd5d 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -201,7 +201,7 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long /* * Generic internal transport functions */ -struct rpc_xprt * xprt_create_transport(int proto, struct sockaddr *addr, size_t size, struct rpc_timeout *toparms); +struct rpc_xprt * xprt_create_transport(int proto, struct sockaddr *addr, size_t size, struct rpc_timeout *toparms, u32 local_ipv4_addr /* 0 == don't care */); void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_task *task); @@ -239,8 +239,8 @@ void xprt_disconnect(struct rpc_xprt *xprt); /* * Socket transport setup operations */ -struct rpc_xprt * xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to); -struct rpc_xprt * xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to); +struct rpc_xprt * xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to, u32 local_ipv4_addr); +struct rpc_xprt * xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to, u32 local_ipv4_addr); /* * Reserved bit positions in xprt->state diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 6f34622..2e204f1 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -483,6 +483,7 @@ enum NET_IPV4_CONF_ARP_IGNORE=19, NET_IPV4_CONF_PROMOTE_SECONDARIES=20, NET_IPV4_CONF_ARP_ACCEPT=21, + NET_IPV4_CONF_ACCEPT_STS=22, __NET_IPV4_CONF_MAX }; @@ -571,6 +572,7 @@ enum { NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN=22, NET_IPV6_PROXY_NDP=23, NET_IPV6_ACCEPT_SOURCE_ROUTE=25, + NET_IPV6_NLNOTIFY_ON_ADDR_ADD=26, __NET_IPV6_MAX }; diff --git a/include/net/ip.h b/include/net/ip.h index e79c3e3..8609cdc 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -138,6 +138,7 @@ struct ip_reply_arg { __wsum csum; int csumoffset; /* u16 offset of csum in iov[0].iov_base */ /* -1 if not needed */ + int oif; }; void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, diff --git a/include/net/sock.h b/include/net/sock.h index 03684e7..37beb59 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -392,6 +392,10 @@ enum sock_flags { SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */ SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */ SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ + SOCK_DONT_DO_LL_FCS, /* Tell NIC not to do the ethernet FCS. Will use + * last 4 bytes of packet sent from user-space + * instead. + */ }; static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) diff --git a/kernel/panic.c b/kernel/panic.c index 525e365..2586b03 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -173,7 +173,7 @@ const char *print_tainted(void) void add_taint(unsigned flag) { - debug_locks = 0; /* can't trust the integrity of the kernel anymore */ + /* debug_locks = 0; --Ben */ /* can't trust the integrity of the kernel anymore */ tainted |= flag; } EXPORT_SYMBOL(add_taint); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 18fcb9f..f605754 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -3,7 +3,8 @@ * Ethernet-type device handling. * * Authors: Ben Greear - * Please send support related email to: vlan@scry.wanfear.com + * Please send support related email to: vlan@candelatech.com + * after subscribing using the link below. * VLAN Home Page: http://www.candelatech.com/~greear/vlan.html * * Fixes: @@ -592,7 +593,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, { struct net_device *dev = ptr; struct vlan_group *grp = __vlan_find_group(dev->ifindex); - int i, flgs; + int i; /*, flgs; */ struct net_device *vlandev; if (!grp) @@ -614,6 +615,11 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, } break; +#if 0 + /* Don't propagate management state from base dev to VLANs. If you do this, + * then if you 'ifconfig eth0 down; ifconfig eth0 up', you also lose all the + * routes for eth0.* VLANs. --Ben + */ case NETDEV_DOWN: /* Put all VLANs for this dev in the down state too. */ for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { @@ -643,6 +649,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, dev_change_flags(vlandev, flgs | IFF_UP); } break; +#endif case NETDEV_UNREGISTER: /* Delete all VLANs for this dev. */ diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 60a508e..9201649 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -3,7 +3,8 @@ * Ethernet-type device handling. * * Authors: Ben Greear - * Please send support related email to: vlan@scry.wanfear.com + * Please send support related email to: vlan@candelatech.com + * after subscribing using the web page below. * VLAN Home Page: http://www.candelatech.com/~greear/vlan.html * * Fixes: Mar 22 2001: Martin Bokaemper @@ -439,6 +440,11 @@ int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) struct net_device_stats *stats = vlan_dev_get_stats(dev); struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); + /* Please note, dev_queue_xmit consumes the pkt regardless of the + * return value. So, will copy the skb first and free if successful. + */ + struct sk_buff* skb2 = skb_get(skb); + /* Handle non-VLAN frames if they are sent to us, for example by DHCP. * * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING @@ -468,6 +474,10 @@ int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) skb = __vlan_put_tag(skb, veth_TCI); if (!skb) { stats->tx_dropped++; + /* Free the extra copy, assuming this is a non-recoverable + * issue and we don't want calling code to retry. + */ + kfree_skb(skb2); return 0; } @@ -485,13 +495,24 @@ int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) veth->h_vlan_proto, veth->h_vlan_TCI, veth->h_vlan_encapsulated_proto); #endif - stats->tx_packets++; /* for statics only */ - stats->tx_bytes += skb->len; - skb->dev = VLAN_DEV_INFO(dev)->real_dev; - dev_queue_xmit(skb); - return 0; + { + int rv = dev_queue_xmit(skb); + if (rv == 0) { + /* Was success, need to free the skb reference since + * we bumped up the user count above. If there was an + * error instead, then the skb2 will not be freed, and so + * the calling code will be able to re-send it. + */ + + stats->tx_packets++; /* for statics only */ + stats->tx_bytes += skb2->len; + + kfree_skb(skb2); + } + return rv; + } } int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) diff --git a/net/Kconfig b/net/Kconfig index 7dfc949..c17132d 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -169,6 +169,8 @@ source "net/tipc/Kconfig" source "net/atm/Kconfig" source "net/bridge/Kconfig" source "net/8021q/Kconfig" +source "net/macvlan/Kconfig" +source "net/redir/Kconfig" source "net/decnet/Kconfig" source "net/llc/Kconfig" source "net/ipx/Kconfig" @@ -211,6 +213,14 @@ config NET_TCPPROBE To compile this code as a module, choose M here: the module will be called tcp_probe. +config SUPPORT_SEND_BAD_CRC + bool "Support Send Bad CRC (USE WITH CAUTION)" + ---help--- + When enabled, one can send a specially crafted packet to the ethernet + device via a raw socket and it will be sent with the last 4 bytes of + the packet as the ethernet CRC. Requires driver support. Current driver + support is limited to e100 and e1000. + endmenu endmenu diff --git a/net/Makefile b/net/Makefile index ad4d14f..e0df5ce 100644 --- a/net/Makefile +++ b/net/Makefile @@ -47,6 +47,8 @@ obj-$(CONFIG_IP_SCTP) += sctp/ obj-$(CONFIG_IEEE80211) += ieee80211/ obj-$(CONFIG_TIPC) += tipc/ obj-$(CONFIG_NETLABEL) += netlabel/ +obj-$(CONFIG_MACVLAN) += macvlan/ +obj-$(CONFIG_REDIRDEV) += redir/ ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o diff --git a/net/core/dev.c b/net/core/dev.c index 2a587b8..5a6fedb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include #include @@ -117,6 +118,22 @@ #include #include +#if defined(CONFIG_NET_PKTGEN) || defined(CONFIG_NET_PKTGEN_MODULE) +#include "pktgen.h" + +#warning "Compiling dev.c for pktgen."; + +int (*handle_pktgen_hook)(struct sk_buff *skb) = NULL; +EXPORT_SYMBOL(handle_pktgen_hook); + +static __inline__ int handle_pktgen_rcv(struct sk_buff* skb) { + if (handle_pktgen_hook) { + return handle_pktgen_hook(skb); + } + return -1; +} +#endif + /* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. @@ -1761,6 +1778,23 @@ static int ing_filter(struct sk_buff *skb) } #endif + +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) +/* Returns >= 0 if we consume the packet. Otherwise, let + * it fall through the rest of the packet processing. + */ +int (*macvlan_handle_frame_hook)(struct sk_buff *skb) = NULL; +EXPORT_SYMBOL(macvlan_handle_frame_hook); +#endif + +/* Returns >= 0 if we consume the packet. Otherwise, let + * it fall through the rest of the packet processing. + */ +static __inline__ int handle_macvlan(struct sk_buff *skb) { + return macvlan_handle_frame_hook(skb); +} + + int netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; @@ -1788,6 +1822,11 @@ int netif_receive_skb(struct sk_buff *skb) skb->h.raw = skb->nh.raw = skb->data; skb->mac_len = skb->nh.raw - skb->mac.raw; + /* Set the default 'mark' for this skb. dflt_skb_mark may be set through + * the /sys/class/net/[dev-name]/dflt_skb_mark file. + */ + skb->mark = skb->dev->dflt_skb_mark; + pt_prev = NULL; rcu_read_lock(); @@ -1829,6 +1868,32 @@ ncls: if (handle_bridge(&skb, &pt_prev, &ret, orig_dev)) goto out; +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) + if (skb->dev->macvlan_priv != NULL && + macvlan_handle_frame_hook != NULL) { + if (handle_macvlan(skb) >= 0) { + /* consumed by mac-vlan...it would have been + * re-sent to this method with a different + * device... + */ + goto out; + } + else { + /* Let it fall through and be processed normally */ + } + } +#endif + +#if defined(CONFIG_NET_PKTGEN) || defined(CONFIG_NET_PKTGEN_MODULE) + if ((skb->dev->pkt_dev) && + (handle_pktgen_rcv(skb) >= 0)) { + /* Pktgen may consume the packet, no need to send + * to further protocols. + */ + goto out; + } +#endif + type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { if (ptype->type == type && @@ -3564,6 +3629,10 @@ EXPORT_SYMBOL(net_enable_timestamp); EXPORT_SYMBOL(net_disable_timestamp); EXPORT_SYMBOL(dev_get_flags); +#if defined(CONFIG_NET_PKTGEN) || defined(CONFIG_NET_PKTGEN_MODULE) +EXPORT_SYMBOL(handle_pktgen_rcv); +#endif + #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) EXPORT_SYMBOL(br_handle_frame_hook); EXPORT_SYMBOL(br_fdb_get_hook); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 87dc556..b16383e 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1,4 +1,4 @@ -/* +/* -*- linux-c -*- * net/core/ethtool.c - Ethtool ioctl handler * Copyright (c) 2003 Matthew Wilcox * @@ -33,6 +33,12 @@ u32 ethtool_op_get_tx_csum(struct net_device *dev) return (dev->features & NETIF_F_ALL_CSUM) != 0; } +u32 ethtool_op_get_rx_all(struct net_device *dev, u32* retval) +{ + *retval = ((dev->priv_flags & IFF_ACCEPT_ALL_FRAMES) != 0); + return 0; +} + int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) { if (data) @@ -796,6 +802,88 @@ static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr) return ret; } + +static int ethtool_get_rx_all(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GSG }; + int rv = 0; + + if (!dev->ethtool_ops->get_rx_all) + return -EOPNOTSUPP; + + if ((rv = dev->ethtool_ops->get_rx_all(dev, &edata.data)) < 0) { + return rv; + } + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + + +static int ethtool_set_rx_all(struct net_device *dev, void *useraddr) +{ + struct ethtool_value id; + + if (!dev->ethtool_ops->set_rx_all) + return -EOPNOTSUPP; + + if (copy_from_user(&id, useraddr, sizeof(id))) + return -EFAULT; + + return dev->ethtool_ops->set_rx_all(dev, id.data); +} + +static int ethtool_get_rx_fcs(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GSG }; + int rv = 0; + + if (!dev->ethtool_ops->get_save_fcs) + return -EOPNOTSUPP; + + if ((rv = dev->ethtool_ops->get_save_fcs(dev, &edata.data)) < 0) { + return rv; + } + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + + +static int ethtool_set_rx_fcs(struct net_device *dev, void *useraddr) +{ + struct ethtool_value id; + + if (!dev->ethtool_ops->set_save_fcs) + return -EOPNOTSUPP; + + if (copy_from_user(&id, useraddr, sizeof(id))) + return -EFAULT; + + return dev->ethtool_ops->set_save_fcs(dev, id.data); +} + + +/* Handle some generic ethtool commands here */ +static int ethtool_get_netdev_stats(struct net_device *dev, void *useraddr) { + + struct ethtool_ndstats* nds = (struct ethtool_ndstats*)(useraddr); + + struct net_device_stats *stats = dev->get_stats(dev); + if (stats) { + if (copy_to_user(nds->data, stats, sizeof(*stats))) { + return -EFAULT; + } + } + else { + return -EOPNOTSUPP; + } + return 0; +} + + /* The main entry point in this file. Called from net/core/dev.c */ int dev_ethtool(struct ifreq *ifr) @@ -809,9 +897,6 @@ int dev_ethtool(struct ifreq *ifr) if (!dev || !netif_device_present(dev)) return -ENODEV; - if (!dev->ethtool_ops) - goto ioctl; - if (copy_from_user(ðcmd, useraddr, sizeof (ethcmd))) return -EFAULT; @@ -835,13 +920,25 @@ int dev_ethtool(struct ifreq *ifr) if (!capable(CAP_NET_ADMIN)) return -EPERM; } - - if(dev->ethtool_ops->begin) + if(dev->ethtool_ops && dev->ethtool_ops->begin) if ((rc = dev->ethtool_ops->begin(dev)) < 0) return rc; old_features = dev->features; + /* Handle some generic operations that do not require specific + * ethtool handlers. + */ + switch (ethcmd) { + case ETHTOOL_GNDSTATS: + return ethtool_get_netdev_stats(dev, useraddr); + default: + break; + } + + if (!dev->ethtool_ops) + goto ioctl; + switch (ethcmd) { case ETHTOOL_GSET: rc = ethtool_get_settings(dev, useraddr); @@ -930,6 +1027,18 @@ int dev_ethtool(struct ifreq *ifr) case ETHTOOL_PHYS_ID: rc = ethtool_phys_id(dev, useraddr); break; + case ETHTOOL_SETRXALL: + rc = ethtool_set_rx_all(dev, useraddr); + break; + case ETHTOOL_GETRXALL: + rc = ethtool_get_rx_all(dev, useraddr); + break; + case ETHTOOL_SETRXFCS: + rc = ethtool_set_rx_fcs(dev, useraddr); + break; + case ETHTOOL_GETRXFCS: + rc = ethtool_get_rx_fcs(dev, useraddr); + break; case ETHTOOL_GSTATS: rc = ethtool_get_stats(dev, useraddr); break; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f47f319..02b855b 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -208,6 +208,29 @@ static ssize_t store_tx_queue_len(struct class_device *dev, const char *buf, siz return netdev_store(dev, buf, len, change_tx_queue_len); } +NETDEVICE_SHOW(dflt_skb_mark, fmt_ulong); + +static ssize_t format_state(const struct net_device *net, char *buf) +{ + return sprintf(buf, fmt_long_hex, net->state); +} + +static ssize_t nd_show_state(struct class_device *cd, char *buf) +{ + return netdev_show(cd, buf, format_state); +} + +static int change_dflt_skb_mark(struct net_device *net, unsigned long new_val) +{ + net->dflt_skb_mark = new_val; + return 0; +} + +static ssize_t store_dflt_skb_mark(struct class_device *dev, const char *buf, size_t len) +{ + return netdev_store(dev, buf, len, change_dflt_skb_mark); +} + NETDEVICE_SHOW(weight, fmt_dec); static int change_weight(struct net_device *net, unsigned long new_weight) @@ -233,10 +256,13 @@ static struct class_device_attribute net_class_attributes[] = { __ATTR(carrier, S_IRUGO, show_carrier, NULL), __ATTR(dormant, S_IRUGO, show_dormant, NULL), __ATTR(operstate, S_IRUGO, show_operstate, NULL), + __ATTR(state, S_IRUGO, nd_show_state, NULL), __ATTR(mtu, S_IRUGO | S_IWUSR, show_mtu, store_mtu), __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, store_tx_queue_len), + __ATTR(dflt_skb_mark, S_IRUGO | S_IWUSR, show_dflt_skb_mark, + store_dflt_skb_mark), __ATTR(weight, S_IRUGO | S_IWUSR, show_weight, store_weight), {} }; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 04d4b93..45a009d 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -160,46 +160,23 @@ #include #include /* do_div */ #include +#include /* sched_clock() */ +#include "pktgen.h" + +#define USE_NQW_CALLBACK +#ifdef USE_NQW_CALLBACK +# include +# if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) +# include +# include "../macvlan/macvlan.h" +# endif +#endif -#define VERSION "pktgen v2.68: Packet Generator for packet performance testing.\n" +static const char* version = "pktgen v2.68: Packet Generator for packet performance testing.\n"; /* #define PG_DEBUG(a) a */ #define PG_DEBUG(a) -/* The buckets are exponential in 'width' */ -#define LAT_BUCKETS_MAX 32 -#define IP_NAME_SZ 32 -#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ -#define MPLS_STACK_BOTTOM __constant_htonl(0x00000100) - -/* Device flag bits */ -#define F_IPSRC_RND (1<<0) /* IP-Src Random */ -#define F_IPDST_RND (1<<1) /* IP-Dst Random */ -#define F_UDPSRC_RND (1<<2) /* UDP-Src Random */ -#define F_UDPDST_RND (1<<3) /* UDP-Dst Random */ -#define F_MACSRC_RND (1<<4) /* MAC-Src Random */ -#define F_MACDST_RND (1<<5) /* MAC-Dst Random */ -#define F_TXSIZE_RND (1<<6) /* Transmit size is random */ -#define F_IPV6 (1<<7) /* Interface in IPV6 Mode */ -#define F_MPLS_RND (1<<8) /* Random MPLS labels */ -#define F_VID_RND (1<<9) /* Random VLAN ID */ -#define F_SVID_RND (1<<10) /* Random SVLAN ID */ - -/* Thread control flag bits */ -#define T_TERMINATE (1<<0) -#define T_STOP (1<<1) /* Stop run */ -#define T_RUN (1<<2) /* Start run */ -#define T_REMDEVALL (1<<3) /* Remove all devs */ -#define T_REMDEV (1<<4) /* Remove one dev */ - -/* If lock -- can be removed after some work */ -#define if_lock(t) spin_lock(&(t->if_lock)); -#define if_unlock(t) spin_unlock(&(t->if_lock)); - -/* Used to help with determining the pkts on receive */ -#define PKTGEN_MAGIC 0xbe9be955 -#define PG_PROC_DIR "pktgen" -#define PGCTRL "pgctrl" static struct proc_dir_entry *pg_proc_dir = NULL; #define MAX_CFLOWS 65536 @@ -207,176 +184,41 @@ static struct proc_dir_entry *pg_proc_dir = NULL; #define VLAN_TAG_SIZE(x) ((x)->vlan_id == 0xffff ? 0 : 4) #define SVLAN_TAG_SIZE(x) ((x)->svlan_id == 0xffff ? 0 : 4) -struct flow_state { - __be32 cur_daddr; - int count; -}; - -struct pktgen_dev { - - /* - * Try to keep frequent/infrequent used vars. separated. - */ - - char ifname[IFNAMSIZ]; - char result[512]; - - struct pktgen_thread *pg_thread; /* the owner */ - struct list_head list; /* Used for chaining in the thread's run-queue */ - - int running; /* if this changes to false, the test will stop */ - - /* If min != max, then we will either do a linear iteration, or - * we will do a random selection from within the range. - */ - __u32 flags; - int removal_mark; /* non-zero => the device is marked for - * removal by worker thread */ - - int min_pkt_size; /* = ETH_ZLEN; */ - int max_pkt_size; /* = ETH_ZLEN; */ - int nfrags; - __u32 delay_us; /* Default delay */ - __u32 delay_ns; - __u64 count; /* Default No packets to send */ - __u64 sofar; /* How many pkts we've sent so far */ - __u64 tx_bytes; /* How many bytes we've transmitted */ - __u64 errors; /* Errors when trying to transmit, pkts will be re-sent */ - - /* runtime counters relating to clone_skb */ - __u64 next_tx_us; /* timestamp of when to tx next */ - __u32 next_tx_ns; - - __u64 allocated_skbs; - __u32 clone_count; - int last_ok; /* Was last skb sent? - * Or a failed transmit of some sort? This will keep - * sequence numbers in order, for example. - */ - __u64 started_at; /* micro-seconds */ - __u64 stopped_at; /* micro-seconds */ - __u64 idle_acc; /* micro-seconds */ - __u32 seq_num; - - int clone_skb; /* Use multiple SKBs during packet gen. If this number - * is greater than 1, then that many copies of the same - * packet will be sent before a new packet is allocated. - * For instance, if you want to send 1024 identical packets - * before creating a new packet, set clone_skb to 1024. - */ - - char dst_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ - char dst_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ - char src_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ - char src_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ - - struct in6_addr in6_saddr; - struct in6_addr in6_daddr; - struct in6_addr cur_in6_daddr; - struct in6_addr cur_in6_saddr; - /* For ranges */ - struct in6_addr min_in6_daddr; - struct in6_addr max_in6_daddr; - struct in6_addr min_in6_saddr; - struct in6_addr max_in6_saddr; - - /* If we're doing ranges, random or incremental, then this - * defines the min/max for those ranges. - */ - __be32 saddr_min; /* inclusive, source IP address */ - __be32 saddr_max; /* exclusive, source IP address */ - __be32 daddr_min; /* inclusive, dest IP address */ - __be32 daddr_max; /* exclusive, dest IP address */ - - __u16 udp_src_min; /* inclusive, source UDP port */ - __u16 udp_src_max; /* exclusive, source UDP port */ - __u16 udp_dst_min; /* inclusive, dest UDP port */ - __u16 udp_dst_max; /* exclusive, dest UDP port */ - - /* DSCP + ECN */ - __u8 tos; /* six most significant bits of (former) IPv4 TOS are for dscp codepoint */ - __u8 traffic_class; /* ditto for the (former) Traffic Class in IPv6 (see RFC 3260, sec. 4) */ - - /* MPLS */ - unsigned nr_labels; /* Depth of stack, 0 = no MPLS */ - __be32 labels[MAX_MPLS_LABELS]; - - /* VLAN/SVLAN (802.1Q/Q-in-Q) */ - __u8 vlan_p; - __u8 vlan_cfi; - __u16 vlan_id; /* 0xffff means no vlan tag */ - - __u8 svlan_p; - __u8 svlan_cfi; - __u16 svlan_id; /* 0xffff means no svlan tag */ - - __u32 src_mac_count; /* How many MACs to iterate through */ - __u32 dst_mac_count; /* How many MACs to iterate through */ - - unsigned char dst_mac[ETH_ALEN]; - unsigned char src_mac[ETH_ALEN]; - - __u32 cur_dst_mac_offset; - __u32 cur_src_mac_offset; - __be32 cur_saddr; - __be32 cur_daddr; - __u16 cur_udp_dst; - __u16 cur_udp_src; - __u32 cur_pkt_size; - - __u8 hh[14]; - /* = { - 0x00, 0x80, 0xC8, 0x79, 0xB3, 0xCB, - - We fill in SRC address later - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x08, 0x00 - }; - */ - __u16 pad; /* pad out the hh struct to an even 16 bytes */ - - struct sk_buff *skb; /* skb we are to transmit next, mainly used for when we - * are transmitting the same one multiple times - */ - struct net_device *odev; /* The out-going device. Note that the device should - * have it's pg_info pointer pointing back to this - * device. This will be set when the user specifies - * the out-going device name (not when the inject is - * started as it used to do.) - */ - struct flow_state *flows; - unsigned cflows; /* Concurrent flows (config) */ - unsigned lflow; /* Flow length (config) */ - unsigned nflows; /* accumulated flows (stats) */ -}; - -struct pktgen_hdr { - __be32 pgh_magic; - __be32 seq_num; - __be32 tv_sec; - __be32 tv_usec; -}; - -struct pktgen_thread { - spinlock_t if_lock; - struct list_head if_list; /* All device here */ - struct list_head th_list; - struct task_struct *tsk; - char result[512]; - u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */ - - /* Field for thread to receive "posted" events terminate, stop ifs etc. */ - - u32 control; - int pid; - int cpu; - - wait_queue_head_t queue; -}; - #define REMOVE 1 #define FIND 0 +static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove); +static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i); +static int pktgen_add_device(struct pktgen_thread *t, const char *ifname); +static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, + const char *ifname); +static int pktgen_device_event(struct notifier_block *, unsigned long, void *); +static void pktgen_run_all_threads(int background); +static void pktgen_stop_all_threads_ifs(void); +static int pktgen_stop_device(struct pktgen_dev *pkt_dev); +static void pktgen_stop(struct pktgen_thread *t); +static void pktgen_clear_counters(struct pktgen_dev *pkt_dev, int seq_too); +static int pktgen_mark_device(const char *ifname); +static unsigned int scan_ip6(const char *s, char ip[16]); +static unsigned int fmt_ip6(char *s, const char ip[16]); +static void clear_nqw_hook(struct pktgen_thread* t, struct net_device* dev); +static int set_nqw_hook(struct pktgen_thread* t, struct net_device* dev, int gfp); + +/* Module parameters, defaults. */ +static int pg_count_d = 1000; /* 1000 pkts by default */ +static int pg_delay_d = 0x7FFFFFFF; /* Don't run until someone sets a different delay. */ + +static int pg_clone_skb_d; +static int debug; + +static DEFINE_MUTEX(pktgen_thread_lock); +static LIST_HEAD(pktgen_threads); + +static struct notifier_block pktgen_notifier_block = { + .notifier_call = pktgen_device_event, +}; + + /* This code works around the fact that do_div cannot handle two 64-bit numbers, and regular 64-bit division doesn't work on x86 kernels. --Ben @@ -478,13 +320,6 @@ static inline u32 pktgen_random(void) #endif } -static inline __u64 getCurMs(void) -{ - struct timeval tv; - do_gettimeofday(&tv); - return tv_to_ms(&tv); -} - static inline __u64 getCurUs(void) { struct timeval tv; @@ -497,36 +332,17 @@ static inline __u64 tv_diff(const struct timeval *a, const struct timeval *b) return tv_to_us(a) - tv_to_us(b); } -/* old include end */ - -static char version[] __initdata = VERSION; - -static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i); -static int pktgen_add_device(struct pktgen_thread *t, const char *ifname); -static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, - const char *ifname); -static int pktgen_device_event(struct notifier_block *, unsigned long, void *); -static void pktgen_run_all_threads(void); -static void pktgen_stop_all_threads_ifs(void); -static int pktgen_stop_device(struct pktgen_dev *pkt_dev); -static void pktgen_stop(struct pktgen_thread *t); -static void pktgen_clear_counters(struct pktgen_dev *pkt_dev); -static int pktgen_mark_device(const char *ifname); -static unsigned int scan_ip6(const char *s, char ip[16]); -static unsigned int fmt_ip6(char *s, const char ip[16]); - -/* Module parameters, defaults. */ -static int pg_count_d = 1000; /* 1000 pkts by default */ -static int pg_delay_d; -static int pg_clone_skb_d; -static int debug; +/* Since the machine booted. */ +static inline __u64 getRelativeCurUs(void) { + return pg_div(sched_clock(), 1000); +} -static DEFINE_MUTEX(pktgen_thread_lock); -static LIST_HEAD(pktgen_threads); +/* Since the machine booted. */ +static inline __u64 getRelativeCurNs(void) { + return sched_clock(); +} -static struct notifier_block pktgen_notifier_block = { - .notifier_call = pktgen_device_event, -}; +/* old include end */ /* * /proc handling functions @@ -535,7 +351,7 @@ static struct notifier_block pktgen_notifier_block = { static int pgctrl_show(struct seq_file *seq, void *v) { - seq_puts(seq, VERSION); + seq_puts(seq, version); return 0; } @@ -563,10 +379,14 @@ static ssize_t pgctrl_write(struct file *file, const char __user * buf, pktgen_stop_all_threads_ifs(); else if (!strcmp(data, "start")) - pktgen_run_all_threads(); + pktgen_run_all_threads(0); + + /* Run in the background. */ + else if (!strcmp(data, "bg_start")) + pktgen_run_all_threads(1); else - printk("pktgen: Unknown command: %s\n", data); + printk("pktgen: Unknown command: \"%s\"\n", data); err = count; @@ -579,6 +399,137 @@ static int pgctrl_open(struct inode *inode, struct file *file) return single_open(file, pgctrl_show, PDE(inode)->data); } +static int pg_populate_report(struct pktgen_dev_report* rpt, struct pktgen_dev* pkt_dev) { + int i; + + memset(rpt, 0, sizeof(*rpt)); + rpt->api_version = 1; + rpt->flags = pkt_dev->flags; + strncpy(rpt->thread_name, pkt_dev->pg_thread->tsk->comm, 32); + strncpy(rpt->interface_name, pkt_dev->ifname, 32); + rpt->min_pkt_size = pkt_dev->min_pkt_size; + rpt->max_pkt_size = pkt_dev->max_pkt_size; + rpt->clone_skb = pkt_dev->clone_skb; + rpt->peer_clone_skb = pkt_dev->peer_clone_skb; + rpt->nfrags = pkt_dev->nfrags; + + strncpy(rpt->dst_min, pkt_dev->dst_min, IP_NAME_SZ); + strncpy(rpt->dst_max, pkt_dev->dst_max, IP_NAME_SZ); + strncpy(rpt->src_min, pkt_dev->src_min, IP_NAME_SZ); + strncpy(rpt->src_max, pkt_dev->src_max, IP_NAME_SZ); + + memcpy(&rpt->in6_saddr, &pkt_dev->in6_saddr, sizeof(struct in6_addr)); + memcpy(&rpt->in6_daddr, &pkt_dev->in6_daddr, sizeof(struct in6_addr)); + + /* For ranges */ + memcpy(&rpt->min_in6_daddr, &pkt_dev->min_in6_daddr, sizeof(struct in6_addr)); + memcpy(&rpt->max_in6_daddr, &pkt_dev->max_in6_daddr, sizeof(struct in6_addr)); + memcpy(&rpt->min_in6_saddr, &pkt_dev->min_in6_saddr, sizeof(struct in6_addr)); + memcpy(&rpt->max_in6_saddr, &pkt_dev->max_in6_saddr, sizeof(struct in6_addr)); + + /* If we're doing ranges, random or incremental, then this + * defines the min/max for those ranges. + */ + rpt->saddr_min = pkt_dev->saddr_min; + rpt->saddr_max = pkt_dev->saddr_max; + rpt->daddr_min = pkt_dev->daddr_min; + rpt->daddr_max = pkt_dev->daddr_max; + + rpt->udp_src_min = pkt_dev->udp_src_min; + rpt->udp_src_max = pkt_dev->udp_src_max; + rpt->udp_dst_min = pkt_dev->udp_dst_min; + rpt->udp_dst_max = pkt_dev->udp_dst_max; + + /* MPLS */ + rpt->nr_labels = pkt_dev->nr_labels; /* Depth of stack, 0 = no MPLS */ + for (i = 0; ilabels[i] = pkt_dev->labels[i]; + } + + rpt->src_mac_count = pkt_dev->src_mac_count; + rpt->dst_mac_count = pkt_dev->dst_mac_count; + + memcpy(&rpt->dst_mac, &pkt_dev->dst_mac, ETH_ALEN); + memcpy(&rpt->src_mac, &pkt_dev->src_mac, ETH_ALEN); + + rpt->nflows = pkt_dev->nflows; + rpt->cflows = pkt_dev->cflows; + rpt->lflow = pkt_dev->lflow; + + rpt->delay_ns = pkt_dev->delay_ns; + rpt->count = pkt_dev->count; /* Default No packets to send */ + rpt->sofar = pkt_dev->sofar; /* How many pkts we've sent so far */ + rpt->tx_bytes = pkt_dev->tx_bytes; /* How many bytes we've transmitted */ + rpt->errors = pkt_dev->errors; /* Errors when trying to transmit, pkts will be re-sent */ + + /* Fields relating to receiving pkts */ + rpt->avg_latency = pkt_dev->avg_latency; /* in micro-seconds */ + rpt->min_latency = pkt_dev->min_latency; + rpt->max_latency = pkt_dev->max_latency; + for (i = 0; ilatency_bkts[i] = pkt_dev->latency_bkts[i]; + } + rpt->pkts_rcvd_since_clear = pkt_dev->pkts_rcvd_since_clear; + + rpt->ooo_rcvd = pkt_dev->ooo_rcvd; + rpt->pkts_rcvd = pkt_dev->pkts_rcvd; + rpt->dup_rcvd = pkt_dev->dup_rcvd; + rpt->bytes_rcvd = pkt_dev->bytes_rcvd; + rpt->seq_gap_rcvd = pkt_dev->seq_gap_rcvd; + rpt->non_pg_pkts_rcvd = pkt_dev->non_pg_pkts_rcvd; + return 0; +}; /* populate report */ + + +int pktgen_proc_ioctl(struct inode* inode, struct file* file, unsigned int cmd, + unsigned long arg) { + int err = 0; + struct pktgen_ioctl_info args; + struct pktgen_dev* pkt_dev = NULL; + + if (copy_from_user(&args, (void*)arg, sizeof(args))) { + return -EFAULT; + } + + /* Null terminate the names */ + args.thread_name[31] = 0; + args.interface_name[31] = 0; + + /* printk("pktgen: thread_name: %s interface_name: %s\n", + * args.thread_name, args.interface_name); + */ + + switch (cmd) { + case GET_PKTGEN_INTERFACE_INFO: { + mutex_lock(&pktgen_thread_lock); + pkt_dev = __pktgen_NN_threads(args.interface_name, FIND); + if (pkt_dev) { + pg_populate_report(&(args.report), pkt_dev); + if (copy_to_user((void*)(arg), &args, sizeof(args))) { + printk("ERROR: pktgen: copy_to_user failed.\n"); + err = -EFAULT; + } + else { + err = 0; + } + } + else { + printk("ERROR: pktgen: Could not find interface -:%s:-\n", + args.interface_name); + err = -ENODEV; + } + mutex_unlock(&pktgen_thread_lock); + break; + } + default: + printk("%s: Unknown pktgen IOCTL: %x \n", __FUNCTION__, + cmd); + return -EINVAL; + } + + return err; +}/* pktgen_proc_ioctl */ + static struct file_operations pktgen_fops = { .owner = THIS_MODULE, .open = pgctrl_open, @@ -586,6 +537,7 @@ static struct file_operations pktgen_fops = { .llseek = seq_lseek, .write = pgctrl_write, .release = single_release, + .ioctl = pktgen_proc_ioctl, }; static int pktgen_if_show(struct seq_file *seq, void *v) @@ -602,10 +554,11 @@ static int pktgen_if_show(struct seq_file *seq, void *v) pkt_dev->max_pkt_size); seq_printf(seq, - " frags: %d delay: %u clone_skb: %d ifname: %s\n", + " frags: %d delay: %lluns clone_skb: %d peer_clone_skb: %d ifname: %s\n", pkt_dev->nfrags, - 1000 * pkt_dev->delay_us + pkt_dev->delay_ns, - pkt_dev->clone_skb, pkt_dev->ifname); + (unsigned long long)pkt_dev->delay_ns, + pkt_dev->clone_skb, pkt_dev->peer_clone_skb, + pkt_dev->ifname); seq_printf(seq, " flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow); @@ -726,11 +679,29 @@ static int pktgen_if_show(struct seq_file *seq, void *v) stopped = now; /* not really stopped, more like last-running-at */ seq_printf(seq, - "Current:\n pkts-sofar: %llu errors: %llu\n started: %lluus stopped: %lluus idle: %lluus\n", + "Current:\n tx-pkts: %llu tx-errors: %llu tx-bytes: %llu\n", (unsigned long long)pkt_dev->sofar, - (unsigned long long)pkt_dev->errors, (unsigned long long)sa, + (unsigned long long)pkt_dev->errors, + (unsigned long long)pkt_dev->tx_bytes); + seq_printf(seq, + " rx-pkts: %llu rx-bytes: %llu\n", + (unsigned long long)pkt_dev->pkts_rcvd, + (unsigned long long)pkt_dev->bytes_rcvd); + + seq_printf(seq, + " blocked: %s next-tx-ns: %llu (%lli) started: %lluus stopped: %lluus idle: %lluns\n", + pkt_dev->tx_blocked ? "TRUE" : "false", + (unsigned long long)pkt_dev->next_tx_ns, + (long long)(pkt_dev->next_tx_ns - getRelativeCurNs()), + (unsigned long long)sa, (unsigned long long)stopped, - (unsigned long long)pkt_dev->idle_acc); + (unsigned long long)pkt_dev->idle_acc_ns); + seq_printf(seq, + " nanodelays: %llu sleeps: %llu queue_stopped: %llu tx-early: %llu\n", + (unsigned long long)pkt_dev->nanodelays, + (unsigned long long)pkt_dev->sleeps, + (unsigned long long)pkt_dev->queue_stopped, + (unsigned long long)pkt_dev->req_tx_early); seq_printf(seq, " seq_num: %d cur_dst_mac_offset: %d cur_src_mac_offset: %d\n", @@ -1014,15 +985,11 @@ static ssize_t pktgen_if_write(struct file *file, return len; } i += len; - if (value == 0x7FFFFFFF) { - pkt_dev->delay_us = 0x7FFFFFFF; - pkt_dev->delay_ns = 0; - } else { - pkt_dev->delay_us = value / 1000; - pkt_dev->delay_ns = value % 1000; + pkt_dev->delay_ns = value; + if ((getRelativeCurNs() + pkt_dev->delay_ns) > pkt_dev->next_tx_ns) { + pkt_dev->next_tx_ns = getRelativeCurNs() + pkt_dev->delay_ns; } - sprintf(pg_result, "OK: delay=%u", - 1000 * pkt_dev->delay_us + pkt_dev->delay_ns); + sprintf(pg_result, "OK: delay=%lluns", (unsigned long long)pkt_dev->delay_ns); return count; } if (!strcmp(name, "udp_src_min")) { @@ -1088,6 +1055,17 @@ static ssize_t pktgen_if_write(struct file *file, sprintf(pg_result, "OK: clone_skb=%d", pkt_dev->clone_skb); return count; } + if (!strcmp(name, "peer_clone_skb")) { + len = num_arg(&user_buffer[i], 10, &value); + if (len < 0) { + return len; + } + i += len; + pkt_dev->peer_clone_skb = value; + + sprintf(pg_result, "OK: peer_clone_skb=%d", pkt_dev->peer_clone_skb); + return count; + } if (!strcmp(name, "count")) { len = num_arg(&user_buffer[i], 10, &value); if (len < 0) { @@ -1201,6 +1179,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->flags &= ~F_IPV6; else { + printk("pktgen: Flag -:%s:- unknown\n", f); sprintf(pg_result, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", f, @@ -1432,6 +1411,8 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "src_mac")) { char *v = valstr; unsigned char *m = pkt_dev->src_mac; + unsigned char old_dmac[ETH_ALEN]; + memcpy(old_dmac, m, ETH_ALEN); len = strn_len(&user_buffer[i], sizeof(valstr) - 1); if (len < 0) { @@ -1461,12 +1442,16 @@ static ssize_t pktgen_if_write(struct file *file, } } + /* Set up Dest MAC */ + if (compare_ether_addr(old_dmac, pkt_dev->src_mac)) + memcpy(&(pkt_dev->hh[6]), pkt_dev->src_mac, ETH_ALEN); + sprintf(pg_result, "OK: srcmac"); return count; } if (!strcmp(name, "clear_counters")) { - pktgen_clear_counters(pkt_dev); + pktgen_clear_counters(pkt_dev, 0); sprintf(pg_result, "OK: Clearing counters.\n"); return count; } @@ -1663,6 +1648,7 @@ static ssize_t pktgen_if_write(struct file *file, return count; } + printk("pktgen: No such parameter \"%s\"\n", name); sprintf(pkt_dev->result, "No such parameter \"%s\"", name); return -EINVAL; } @@ -1679,6 +1665,7 @@ static struct file_operations pktgen_if_fops = { .llseek = seq_lseek, .write = pktgen_if_write, .release = single_release, + .ioctl = pktgen_proc_ioctl, }; static int pktgen_thread_show(struct seq_file *seq, void *v) @@ -1688,12 +1675,15 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) BUG_ON(!t); - seq_printf(seq, "Name: %s max_before_softirq: %d\n", - t->tsk->comm, t->max_before_softirq); + mutex_lock(&pktgen_thread_lock); + /* versioning info. CFG_RT means we do not busy-spin, so can be configured for + * real-time scheduling if user-space so desires. */ + seq_printf(seq, "VERSION-2 CFG_RT\n"); + seq_printf(seq, "PID: %d Name: %s max_before_softirq: %d\n", + t->pid, t->tsk->comm, t->max_before_softirq); seq_printf(seq, "Running: "); - if_lock(t); list_for_each_entry(pkt_dev, &t->if_list, list) if (pkt_dev->running) seq_printf(seq, "%s ", pkt_dev->ifname); @@ -1709,8 +1699,7 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) else seq_printf(seq, "\nResult: NA\n"); - if_unlock(t); - + mutex_unlock(&pktgen_thread_lock); return 0; } @@ -1778,18 +1767,42 @@ static ssize_t pktgen_thread_write(struct file *file, return -EFAULT; i += len; mutex_lock(&pktgen_thread_lock); - pktgen_add_device(t, f); + t->control_arg = f; + t->control |= T_ADD_DEV; + while (t->control & T_ADD_DEV) { + schedule_timeout_interruptible(msecs_to_jiffies(10)); + } + t->control_arg = 0; mutex_unlock(&pktgen_thread_lock); ret = count; sprintf(pg_result, "OK: add_device=%s", f); goto out; } + if (!strcmp(name, "rem_device")) { + char f[32]; + memset(f, 0, 32); + len = strn_len(&user_buffer[i], sizeof(f) - 1); + if (len < 0) { + ret = len; + goto out; + } + if (copy_from_user(f, &user_buffer[i], len)) + return -EFAULT; + i += len; + pktgen_mark_device(f); + ret = count; + sprintf(pg_result, "OK: rem_device=%s", f); + goto out; + } + if (!strcmp(name, "rem_device_all")) { mutex_lock(&pktgen_thread_lock); t->control |= T_REMDEVALL; mutex_unlock(&pktgen_thread_lock); - schedule_timeout_interruptible(msecs_to_jiffies(125)); /* Propagate thread->control */ + while (t->control & T_REMDEVALL) { + schedule_timeout_interruptible(msecs_to_jiffies(10)); + } ret = count; sprintf(pg_result, "OK: rem_device_all"); goto out; @@ -1805,6 +1818,8 @@ static ssize_t pktgen_thread_write(struct file *file, goto out; } + printk("pktgen: un-known command to pktgen_thread: -:%s:-\n", name); + ret = -EINVAL; out: return ret; @@ -1822,8 +1837,10 @@ static struct file_operations pktgen_thread_fops = { .llseek = seq_lseek, .write = pktgen_thread_write, .release = single_release, + .ioctl = pktgen_proc_ioctl, }; + /* Think find or remove for NN */ static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove) { @@ -1834,14 +1851,14 @@ static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove) pkt_dev = pktgen_find_dev(t, ifname); if (pkt_dev) { if (remove) { - if_lock(t); pkt_dev->removal_mark = 1; t->control |= T_REMDEV; - if_unlock(t); } break; } } + /*PG_DEBUG(printk("pktgen_NN_threads, ifname: %s remove: %d pkt_dev: 0x%p\n", + ifname, remove, pkt_dev)); */ return pkt_dev; } @@ -1913,13 +1930,21 @@ static int pktgen_device_event(struct notifier_block *unused, /* Associate pktgen_dev with a device. */ -static struct net_device *pktgen_setup_dev(struct pktgen_dev *pkt_dev) +static struct net_device *pktgen_setup_dev(struct pktgen_dev *pkt_dev, struct pktgen_thread* t) { struct net_device *odev; /* Clean old setups */ if (pkt_dev->odev) { + +#ifdef USE_NQW_CALLBACK + /* Set the nqw callback hooks */ + rtnl_lock(); + clear_nqw_hook(t, pkt_dev->odev); + rtnl_unlock(); +#endif + pkt_dev->odev->pkt_dev = NULL; dev_put(pkt_dev->odev); pkt_dev->odev = NULL; } @@ -1941,6 +1966,14 @@ static struct net_device *pktgen_setup_dev(struct pktgen_dev *pkt_dev) } pkt_dev->odev = odev; +#ifdef USE_NQW_CALLBACK + /* Set the nqw callback hooks */ + rtnl_lock(); + set_nqw_hook(t, pkt_dev->odev, GFP_ATOMIC); + rtnl_unlock(); +#endif + + pkt_dev->odev->pkt_dev = pkt_dev; return pkt_dev->odev; out_put: @@ -1957,7 +1990,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) { /* Try once more, just in case it works now. */ if (!pkt_dev->odev) - pktgen_setup_dev(pkt_dev); + pktgen_setup_dev(pkt_dev, pkt_dev->pg_thread); if (!pkt_dev->odev) { printk("pktgen: ERROR: pkt_dev->odev == NULL in setup_inject.\n"); @@ -1970,6 +2003,9 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) if (is_zero_ether_addr(pkt_dev->src_mac)) memcpy(&(pkt_dev->hh[6]), pkt_dev->odev->dev_addr, ETH_ALEN); + else + memcpy(&(pkt_dev->hh[6]), pkt_dev->src_mac, ETH_ALEN); + /* Set up Dest MAC */ memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, ETH_ALEN); @@ -2060,30 +2096,188 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) pkt_dev->nflows = 0; } -static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us) -{ - __u64 start; - __u64 now; - - start = now = getCurUs(); - printk(KERN_INFO "sleeping for %d\n", (int)(spin_until_us - now)); - while (now < spin_until_us) { - /* TODO: optimize sleeping behavior */ - if (spin_until_us - now > jiffies_to_usecs(1) + 1) - schedule_timeout_interruptible(1); - else if (spin_until_us - now > 100) { - do_softirq(); - if (!pkt_dev->running) - return; - if (need_resched()) - schedule(); + +#ifdef USE_NQW_CALLBACK +/* Runs from interrupt */ +int pg_notify_queue_woken(struct net_device* dev) { + /* Find the thread that needs waking. */ + struct pktgen_thread* t = ((struct pg_nqw_data*)(dev->nqw_data))->pg_thread; + t->control |= T_WAKE_BLOCKED; + wake_up_interruptible(&(t->queue)); + return 0; +} + +/* Must hold RTNL lock while calling this. */ +static int set_nqw_hook(struct pktgen_thread* t, struct net_device* dev, int gfp) { + /* The notify-queue-woken magic only works for physical + * devices at this time. So, apply hook to underlying + * device. + */ + struct pg_nqw_data* nqwd; + ASSERT_RTNL(); + BUG_ON(!t); + + if (!dev) { + WARN_ON(!dev); + return -ENODEV; + } +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) + if (dev->priv_flags & IFF_MAC_VLAN) { + struct macvlan_vlan *vlan = dev->priv; + printk("pktgen: setting nqw_hook on lower mac-vlan dev: %p\n", vlan->lowerdev); + return set_nqw_hook(t, vlan->lowerdev, gfp); + } +#endif + + if (dev->priv_flags & IFF_802_1Q_VLAN) { + printk("pktgen: setting nqw_hook on real-dev of .1q vlan: %s\n", dev->name); + return set_nqw_hook(t, VLAN_DEV_INFO(dev)->real_dev, gfp); + } + + nqwd = (struct pg_nqw_data*)(dev->nqw_data); + + if (nqwd) { + if (nqwd->magic == PG_NQW_MAGIC) { + if (nqwd->pg_thread == t) { + atomic_inc(&(nqwd->nqw_ref_count)); + + printk("pktgen: Incremented nqw_ref_count: %d device: %s\n", + (int)(atomic_read(&(nqwd->nqw_ref_count))), dev->name); + return 0; + } + else { + printk("pktgen: ERROR: set_nqw_hook: nqwd thread does not match, dev: %s", + dev->name); + return -EINVAL; + } + } + else { + printk("wanlink: WARNING: set_nqw_hook: nqwd magic is NOT WanLink, dev: %s magic: 0x%x", + dev->name, nqwd->magic); + return 0; } + } + else { + nqwd = kmalloc(sizeof(*nqwd), gfp); + if (nqwd) { + memset(nqwd, 0, sizeof(*nqwd)); + nqwd->magic = PG_NQW_MAGIC; + atomic_inc(&(nqwd->nqw_ref_count)); + nqwd->pg_thread = t; + dev->nqw_data = nqwd; + dev->notify_queue_woken = pg_notify_queue_woken; + printk("pktgen: Added nqw callback to device: %s\n", + dev->name); + return 0; + } + else { + printk("pktgen: ERROR: could not allocate nqwd for dev: %s\n", dev->name); + return -ENOBUFS; + } + } +}//set_nqw_hook + + +/* Must hold RTNL lock while calling this. */ +static void clear_nqw_hook(struct pktgen_thread* t, struct net_device* dev) { + /* The notify-queue-woken magic only works for physical + * devices at this time. So, apply hook to underlying + * device. + */ + ASSERT_RTNL(); + BUG_ON(!t); + +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) + if (dev->priv_flags & IFF_MAC_VLAN) { + struct macvlan_vlan *vlan = dev->priv; + clear_nqw_hook(t, vlan->lowerdev); + return; + } +#endif - now = getCurUs(); + if (dev->priv_flags & IFF_802_1Q_VLAN) { + clear_nqw_hook(t, VLAN_DEV_INFO(dev)->real_dev); + return; } - pkt_dev->idle_acc += now - start; -} + if (dev->nqw_data) { + struct pg_nqw_data* nqwd = (struct pg_nqw_data*)(dev->nqw_data); + if (nqwd->magic == PG_NQW_MAGIC) { + if (t != nqwd->pg_thread) { + printk("pktgen ERROR: t != nqwd->pg_thread\n"); + } + atomic_dec(&(nqwd->nqw_ref_count)); + + printk("pktgen: Decremented nqw_ref_count: %d device: %s\n", + (int)(atomic_read(&(nqwd->nqw_ref_count))), + dev->name); + + BUG_ON(atomic_read(&(nqwd->nqw_ref_count)) < 0); + + if (atomic_read(&(nqwd->nqw_ref_count)) == 0) { + printk("pktgen: Removing nqw reference from device: %s\n", + dev->name); + dev->notify_queue_woken = NULL; + dev->nqw_data = NULL; + kfree(nqwd); + } + } + else { + printk("pktgen: WARNING: clear_nqw_hook: nqwd magic is NOT PKT-GEN, dev: %s magic: 0x%x", + dev->name, nqwd->magic); + } + } + else { + printk("pktgen: Warning: nqw_data is null in clear_nqw_hook, dev: %s\n", + dev->name); + } +}//clear_nqw_hook + +#endif + + +/* delay_ns is in nano-seconds */ +static void pg_nanodelay(u64 delay_ns, struct pktgen_dev* info) { + u64 idle_start = getRelativeCurNs(); + u64 last_time; + u64 _diff; + u64 itmp = idle_start; + struct pktgen_dev *p = NULL; + struct pktgen_thread* t = info->pg_thread; + + info->nanodelays++; + info->accum_delay_ns += delay_ns; + while (info->accum_delay_ns > PG_MAX_ACCUM_DELAY_NS) { + info->sleeps++; + interruptible_sleep_on_timeout(&(t->queue), 1); + /* will wake after one tick */ + last_time = itmp; + + /* Subtract delay from all interfaces for this thread, since all are blocked when + * any are blocked. + */ + itmp = getRelativeCurNs(); + _diff = (itmp - last_time); + list_for_each_entry(p, &t->if_list, list) { + p->accum_delay_ns -= _diff; + /* Limit saving up too much time... */ + if (p->accum_delay_ns < -10000000) { + p->accum_delay_ns = -10000000; + } + } + + /* For accounting, only charge this guy for the idle though...*/ + info->idle_acc_ns += _diff; + + /* break out if we are stopped or if we should transmit (maybe our ipg changed?) */ + if (info->removal_mark || (itmp >= info->next_tx_ns) || + (t->control && T_WAKE_BLOCKED) || + (t->control && T_STOP)) { + break; + } + }/* while */ +}//pg_nanodelay + /* Increment/randomize headers according to flags and current values * for IP src/dest, UDP src/dst port, MAC-Addr src/dst @@ -2472,6 +2666,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, pgh->tv_sec = htonl(timestamp.tv_sec); pgh->tv_usec = htonl(timestamp.tv_usec); } + pkt_dev->seq_num++; /* Increase the pktgen sequence number for the next packet. */ return skb; } @@ -2817,7 +3012,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, pgh->tv_sec = htonl(timestamp.tv_sec); pgh->tv_usec = htonl(timestamp.tv_usec); } - /* pkt_dev->seq_num++; FF: you really mean this? */ + pkt_dev->seq_num++; /* Increase the pktgen seq number for the next packet. */ return skb; } @@ -2831,13 +3026,209 @@ static inline struct sk_buff *fill_packet(struct net_device *odev, return fill_packet_ipv4(odev, pkt_dev); } -static void pktgen_clear_counters(struct pktgen_dev *pkt_dev) -{ - pkt_dev->seq_num = 1; - pkt_dev->idle_acc = 0; + +static void record_latency(struct pktgen_dev* pkt_dev, int latency) { + /* NOTE: Latency can be negative */ + int div = 100; + int diff; + int vl; + int i; + + pkt_dev->pkts_rcvd_since_clear++; + + if (pkt_dev->pkts_rcvd_since_clear < 100) { + div = pkt_dev->pkts_rcvd; + if (pkt_dev->pkts_rcvd_since_clear == 1) { + pkt_dev->avg_latency = latency; + } + } + + if ((div + 1) == 0) { + pkt_dev->avg_latency = 0; + } + else { + pkt_dev->avg_latency = ((pkt_dev->avg_latency * div + latency) / (div + 1)); + } + + if (latency < pkt_dev->min_latency) { + pkt_dev->min_latency = latency; + } + if (latency > pkt_dev->max_latency) { + pkt_dev->max_latency = latency; + } + + /* Place the latency in the right 'bucket' */ + diff = (latency - pkt_dev->min_latency); + for (i = 0; ilatency_bkts[i]++; + break; + } + } +}/* record latency */ + + +/* Returns < 0 if the skb is not a pktgen buffer. */ +int pktgen_receive(struct sk_buff* skb) { + /* See if we have a pktgen packet */ + /* TODO: Add support for detecting IPv6, TCP packets too. This will only + * catch UDP at the moment. --Ben + */ + /* printk("pktgen-rcv, skb->len: %d\n", skb->len); */ + + /* If this is a paged skb, make sure we pull up + * whatever data we need to look at. */ + if (!pskb_may_pull(skb, 20 + 8 + sizeof(struct pktgen_hdr))) { + return -1; + } + + if ((skb->len >= (20 + 8 + sizeof(struct pktgen_hdr))) && + (skb->protocol == __constant_htons(ETH_P_IP))) { + struct pktgen_hdr* pgh; + + /* It's IP, and long enough, lets check the magic number. + * TODO: This is a hack not always guaranteed to catch the right + * packets. + */ + + /* printk("Length & protocol passed, skb->data: %p, raw: %p\n", + skb->data, skb->h.raw); */ + + pgh = (struct pktgen_hdr*)(skb->data + 20 + 8); + + /* + tmp = (char*)(skb->data); + for (i = 0; i<90; i++) { + printk("%02hx ", tmp[i]); + if (((i + 1) % 15) == 0) { + printk("\n"); + } + } + printk("\n"); + */ + + if (pgh->pgh_magic == __constant_ntohl(PKTGEN_MAGIC)) { + struct net_device* dev = skb->dev; + struct pktgen_dev* pkt_dev; + __u32 seq = ntohl(pgh->seq_num); + + // TODO: Need lock..maybe + pkt_dev = dev->pkt_dev; + + if (!pkt_dev) { + return -1; + } + + pkt_dev->pkts_rcvd++; + pkt_dev->bytes_rcvd += skb->len; + + /* Check for out-of-sequence packets */ + if (pkt_dev->last_seq_rcvd == seq) { + pkt_dev->dup_rcvd++; + pkt_dev->dup_since_incr++; + } + else { + __s64 rx; + __s64 tx; + struct timeval txtv; + if (skb->tstamp.off_sec || skb->tstamp.off_usec) { + skb_get_timestamp(skb, &txtv); + } + else { + do_gettimeofday(&txtv); + skb_set_timestamp(skb, &txtv); + } + rx = tv_to_us(&txtv); + + txtv.tv_usec = ntohl(pgh->tv_usec); + txtv.tv_sec = ntohl(pgh->tv_sec); + tx = tv_to_us(&txtv); + record_latency(pkt_dev, rx - tx); + + if ((pkt_dev->last_seq_rcvd + 1) == seq) { + if ((pkt_dev->peer_clone_skb > 1) && + (pkt_dev->peer_clone_skb > (pkt_dev->dup_since_incr + 1))) { + + pkt_dev->seq_gap_rcvd += (pkt_dev->peer_clone_skb - + pkt_dev->dup_since_incr - 1); + } + /* Great, in order...all is well */ + } + else if (pkt_dev->last_seq_rcvd < seq) { + /* sequence gap, means we dropped a pkt most likely */ + if (pkt_dev->peer_clone_skb > 1) { + /* We dropped more than one sequence number's worth, + * and if we're using clone_skb, then this is quite + * a few. This number still will not be exact, but + * it will be closer. + */ + pkt_dev->seq_gap_rcvd += (((seq - pkt_dev->last_seq_rcvd) * + pkt_dev->peer_clone_skb) - + pkt_dev->dup_since_incr); + } + else { + pkt_dev->seq_gap_rcvd += (seq - pkt_dev->last_seq_rcvd - 1); + } + } + else { + pkt_dev->ooo_rcvd++; /* out-of-order */ + } + + pkt_dev->dup_since_incr = 0; + } + pkt_dev->last_seq_rcvd = seq; + kfree_skb(skb); + if (debug > 1) { + printk("done with pktgen_receive, free'd pkt\n"); + } + return 0; + } + } + return -1; /* Let another protocol handle it, it's not for us! */ +}/* pktgen_receive */ + +static void pg_reset_latency_counters(struct pktgen_dev* pkt_dev) { + int i; + pkt_dev->avg_latency = 0; + pkt_dev->min_latency = 0x7fffffff; /* largest integer */ + pkt_dev->max_latency = 0x80000000; /* smallest integer */ + pkt_dev->pkts_rcvd_since_clear = 0; + for (i = 0; ilatency_bkts[i] = 0; + } +} + + +static void pktgen_clear_counters(struct pktgen_dev *pkt_dev, int seq_too) { + pkt_dev->idle_acc_ns = 0; pkt_dev->sofar = 0; pkt_dev->tx_bytes = 0; pkt_dev->errors = 0; + pkt_dev->pkts_rcvd_since_clear = 0; + + pkt_dev->ooo_rcvd = 0; + pkt_dev->dup_rcvd = 0; + pkt_dev->pkts_rcvd = 0; + pkt_dev->bytes_rcvd = 0; + pkt_dev->non_pg_pkts_rcvd = 0; + pkt_dev->seq_gap_rcvd = 0; /* dropped */ + + /* Clear some transient state */ + pkt_dev->accum_delay_ns = 0; + pkt_dev->sleeps = 0; + pkt_dev->nanodelays = 0; + + /* This is a bit of a hack, but it gets the dup counters + * in line so we don't have false alarms on dropped pkts. + */ + if (seq_too) { + pkt_dev->dup_since_incr = pkt_dev->peer_clone_skb - 1; + pkt_dev->seq_num = 0; + pkt_dev->last_seq_rcvd = 0; + } + + pg_reset_latency_counters(pkt_dev); } /* Set up structure for sending pkts, clear counters */ @@ -2849,30 +3240,31 @@ static void pktgen_run(struct pktgen_thread *t) PG_DEBUG(printk("pktgen: entering pktgen_run. %p\n", t)); - if_lock(t); list_for_each_entry(pkt_dev, &t->if_list, list) { - - /* - * setup odev and create initial packet. - */ - pktgen_setup_inject(pkt_dev); - - if (pkt_dev->odev) { - pktgen_clear_counters(pkt_dev); - pkt_dev->running = 1; /* Cranke yeself! */ - pkt_dev->skb = NULL; - pkt_dev->started_at = getCurUs(); - pkt_dev->next_tx_us = getCurUs(); /* Transmit immediately */ - pkt_dev->next_tx_ns = 0; - - strcpy(pkt_dev->result, "Starting"); - started++; - } else - strcpy(pkt_dev->result, "Error starting"); + /* If already running, then ignore. */ + if (! pkt_dev->running) { + /** Clear counters before we setup the first packet. + */ + pktgen_clear_counters(pkt_dev, 1); + + /* + * setup odev and create initial packet. + */ + pktgen_setup_inject(pkt_dev); + + if (pkt_dev->odev) { + pkt_dev->running = 1; /* Cranke yeself! */ + pkt_dev->skb = NULL; + pkt_dev->started_at = getCurUs(); + /* Transmit first pkt in 20ms to give peer endpoint time to start. */ + pkt_dev->next_tx_ns = getRelativeCurNs() + 20 * 1000000; + + strcpy(pkt_dev->result, "Starting"); + started++; + } else + strcpy(pkt_dev->result, "Error starting"); + } } - if_unlock(t); - if (started) - t->control &= ~(T_STOP); } static void pktgen_stop_all_threads_ifs(void) @@ -2889,65 +3281,12 @@ static void pktgen_stop_all_threads_ifs(void) mutex_unlock(&pktgen_thread_lock); } -static int thread_is_running(struct pktgen_thread *t) -{ - struct pktgen_dev *pkt_dev; - int res = 0; - - list_for_each_entry(pkt_dev, &t->if_list, list) - if (pkt_dev->running) { - res = 1; - break; - } - return res; -} - -static int pktgen_wait_thread_run(struct pktgen_thread *t) -{ - if_lock(t); - - while (thread_is_running(t)) { - - if_unlock(t); - - msleep_interruptible(100); - - if (signal_pending(current)) - goto signal; - if_lock(t); - } - if_unlock(t); - return 1; -signal: - return 0; -} - -static int pktgen_wait_all_threads_run(void) -{ - struct pktgen_thread *t; - int sig = 1; - - mutex_lock(&pktgen_thread_lock); - - list_for_each_entry(t, &pktgen_threads, th_list) { - sig = pktgen_wait_thread_run(t); - if (sig == 0) - break; - } - - if (sig == 0) - list_for_each_entry(t, &pktgen_threads, th_list) - t->control |= (T_STOP); - mutex_unlock(&pktgen_thread_lock); - return sig; -} - -static void pktgen_run_all_threads(void) -{ +static void pktgen_run_all_threads(int background) { struct pktgen_thread *t; - PG_DEBUG(printk("pktgen: entering pktgen_run_all_threads.\n")); + PG_DEBUG(printk("pktgen: entering pktgen_run_all_threads, background: %d\n", + background)); mutex_lock(&pktgen_thread_lock); @@ -2956,9 +3295,14 @@ static void pktgen_run_all_threads(void) mutex_unlock(&pktgen_thread_lock); - schedule_timeout_interruptible(msecs_to_jiffies(125)); /* Propagate thread->control */ + /* This is a hack at best...disabling, we should not have to depend on this. */ + /*schedule_timeout_interruptible(msecs_to_jiffies(125));*/ /* Propagate thread->control */ - pktgen_wait_all_threads_run(); + // Much harder to get rid of the if_lock if we allow this to block... + if (!background) { + printk("ERROR: non-background mode no longer supported.\n"); + //pktgen_wait_all_threads_run(); + } } static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) @@ -2968,7 +3312,7 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) total_us = pkt_dev->stopped_at - pkt_dev->started_at; - idle = pkt_dev->idle_acc; + idle = do_div(pkt_dev->idle_acc_ns, 1000); p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n", (unsigned long long)total_us, @@ -3017,22 +3361,62 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev) return 0; } -static struct pktgen_dev *next_to_run(struct pktgen_thread *t) -{ - struct pktgen_dev *pkt_dev, *best = NULL; - - if_lock(t); +/** Find the adapter that needs to tx next. + * We need to take the blocked adapters into account, but can't ignore + * them forever just in case we missed the tx-queue-wake event for some + * reason. + */ +static struct pktgen_dev *next_to_run(struct pktgen_thread *t, u64 now, u64* next_running_delay) { + struct pktgen_dev *pkt_dev = NULL; + struct pktgen_dev *best = NULL; + struct pktgen_dev *best_blocked = NULL; list_for_each_entry(pkt_dev, &t->if_list, list) { if (!pkt_dev->running) continue; - if (best == NULL) - best = pkt_dev; - else if (pkt_dev->next_tx_us < best->next_tx_us) - best = pkt_dev; + if (pkt_dev->tx_blocked) { + if (best_blocked == NULL) + best_blocked = pkt_dev; + else { + if (pkt_dev->next_tx_ns < best_blocked->next_tx_ns) { + best_blocked = pkt_dev; + } + } + } + else { + if (best == NULL) + best = pkt_dev; + else { + if (pkt_dev->next_tx_ns < best->next_tx_ns) { + best = pkt_dev; + } + } + } + } + + /** If we have a blocked device that is more than 1ms late, then try it again first. + * Otherwise, take best non-blocked device. + */ + if (best) { + if (best->next_tx_ns <= now) { + *next_running_delay = 0; + } + else { + *next_running_delay = best->next_tx_ns - now; + } + } + else { + *next_running_delay = 10000000; /* 10ms */ + } + + if (best_blocked && (best_blocked->next_tx_ns < (now - PG_TRY_TX_ANYWAY_NS))) { + return best_blocked; + } + + if (best) { + return best; } - if_unlock(t); - return best; + return best_blocked; } static void pktgen_stop(struct pktgen_thread *t) @@ -3041,8 +3425,6 @@ static void pktgen_stop(struct pktgen_thread *t) PG_DEBUG(printk("pktgen: entering pktgen_stop\n")); - if_lock(t); - list_for_each_entry(pkt_dev, &t->if_list, list) { pktgen_stop_device(pkt_dev); if (pkt_dev->skb) @@ -3050,8 +3432,6 @@ static void pktgen_stop(struct pktgen_thread *t) pkt_dev->skb = NULL; } - - if_unlock(t); } /* @@ -3065,8 +3445,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t) PG_DEBUG(printk("pktgen: entering pktgen_rem_one_if\n")); - if_lock(t); - list_for_each_safe(q, n, &t->if_list) { cur = list_entry(q, struct pktgen_dev, list); @@ -3081,10 +3459,15 @@ static void pktgen_rem_one_if(struct pktgen_thread *t) break; } - - if_unlock(t); } +static void pktgen_unblock_all_ifs(struct pktgen_thread *t) { + struct pktgen_dev *p = NULL;; + list_for_each_entry(p, &t->if_list, list) + p->tx_blocked = 0; +}/* wake all writers */ + + static void pktgen_rem_all_ifs(struct pktgen_thread *t) { struct list_head *q, *n; @@ -3093,8 +3476,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t) /* Remove all devices, free mem */ PG_DEBUG(printk("pktgen: entering pktgen_rem_all_ifs\n")); - if_lock(t); - list_for_each_safe(q, n, &t->if_list) { cur = list_entry(q, struct pktgen_dev, list); @@ -3104,8 +3485,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t) pktgen_remove_device(t, cur); } - - if_unlock(t); } static void pktgen_rem_thread(struct pktgen_thread *t) @@ -3121,34 +3500,41 @@ static void pktgen_rem_thread(struct pktgen_thread *t) mutex_unlock(&pktgen_thread_lock); } -static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) -{ - struct net_device *odev = NULL; +static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev, u64 now) { + struct net_device *odev; __u64 idle_start = 0; int ret; - + odev = pkt_dev->odev; - if (pkt_dev->delay_us || pkt_dev->delay_ns) { - u64 now; - - now = getCurUs(); - if (now < pkt_dev->next_tx_us) - spin(pkt_dev, pkt_dev->next_tx_us); - + if (pkt_dev->delay_ns || (pkt_dev->accum_delay_ns > 0)) { + if (now < pkt_dev->next_tx_ns) { + /* Don't tx early..*/ + pkt_dev->req_tx_early++; + goto out; + } + /* This is max DELAY, this has special meaning of * "never transmit" */ - if (pkt_dev->delay_us == 0x7FFFFFFF) { - pkt_dev->next_tx_us = getCurUs() + pkt_dev->delay_us; - pkt_dev->next_tx_ns = pkt_dev->delay_ns; + if (pkt_dev->delay_ns == 0x7FFFFFFF) { + pkt_dev->next_tx_ns = getRelativeCurNs() + pkt_dev->delay_ns; goto out; } } - if (netif_queue_stopped(odev) || need_resched()) { - idle_start = getCurUs(); - + if (need_resched()) { + idle_start = getRelativeCurNs(); + schedule(); + pkt_dev->idle_acc_ns += getRelativeCurNs() - idle_start; + } + + if (netif_queue_stopped(odev)) { + pkt_dev->queue_stopped++; + pkt_dev->tx_blocked = 1; + /* change tx time to now to show work was at least attempted. */ + pkt_dev->next_tx_ns = now; + if (!netif_running(odev)) { pktgen_stop_device(pkt_dev); if (pkt_dev->skb) @@ -3156,16 +3542,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->skb = NULL; goto out; } - if (need_resched()) - schedule(); - pkt_dev->idle_acc += getCurUs() - idle_start; - - if (netif_queue_stopped(odev)) { - pkt_dev->next_tx_us = getCurUs(); /* TODO */ - pkt_dev->next_tx_ns = 0; - goto out; /* Try the next interface */ - } + goto out; /* Try the next interface */ } if (pkt_dev->last_ok || !pkt_dev->skb) { @@ -3196,40 +3574,44 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) if (likely(ret == NETDEV_TX_OK)) { pkt_dev->last_ok = 1; pkt_dev->sofar++; - pkt_dev->seq_num++; pkt_dev->tx_bytes += pkt_dev->cur_pkt_size; + pkt_dev->next_tx_ns = getRelativeCurNs() + pkt_dev->delay_ns; + pkt_dev->tx_blocked = 0; } else if (ret == NETDEV_TX_LOCKED && (odev->features & NETIF_F_LLTX)) { cpu_relax(); goto retry_now; } else { /* Retry it next time */ - + static int do_once_hsx_wrn = 1; + if (do_once_hsx_wrn) { + printk(KERN_INFO "pktgen: Hard xmit error, driver for %s doesn't do queue-stopped quite right.\n", odev->name); + printk(KERN_INFO "pktgen: Transmit request will be retried, and this error msg will not be printed again..\n"); + do_once_hsx_wrn = 0; + } + atomic_dec(&(pkt_dev->skb->users)); - if (debug && net_ratelimit()) - printk(KERN_INFO "pktgen: Hard xmit error\n"); - + pkt_dev->queue_stopped++; pkt_dev->errors++; pkt_dev->last_ok = 0; - } - - pkt_dev->next_tx_us = getCurUs(); - pkt_dev->next_tx_ns = 0; - pkt_dev->next_tx_us += pkt_dev->delay_us; - pkt_dev->next_tx_ns += pkt_dev->delay_ns; - - if (pkt_dev->next_tx_ns > 1000) { - pkt_dev->next_tx_us++; - pkt_dev->next_tx_ns -= 1000; + /* Try a little later..flag us as wanting to tx, but unable. Will try again shortly. + */ + pkt_dev->tx_blocked = 1; + /* change tx time to now to show work was at least attempted. */ + pkt_dev->next_tx_ns = now; } } else { /* Retry it next time */ + pkt_dev->queue_stopped++; pkt_dev->last_ok = 0; - pkt_dev->next_tx_us = getCurUs(); /* TODO */ - pkt_dev->next_tx_ns = 0; + /* Try a little later..flag us as wanting to tx, but unable. Will try again shortly. + */ + pkt_dev->tx_blocked = 1; + /* change tx time to now to show work was at least attempted. */ + pkt_dev->next_tx_ns = now; } netif_tx_unlock_bh(odev); @@ -3237,14 +3619,14 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) /* If pkt_dev->count is zero, then run forever */ if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) { if (atomic_read(&(pkt_dev->skb->users)) != 1) { - idle_start = getCurUs(); + idle_start = getRelativeCurNs(); while (atomic_read(&(pkt_dev->skb->users)) != 1) { if (signal_pending(current)) { break; } schedule(); } - pkt_dev->idle_acc += getCurUs() - idle_start; + pkt_dev->idle_acc_ns += getRelativeCurNs() - idle_start; } /* Done with this */ @@ -3268,7 +3650,9 @@ static int pktgen_thread_worker(void *arg) int cpu = t->cpu; u32 max_before_softirq; u32 tx_since_softirq = 0; - + u64 now; + u64 next_running_delay; + BUG_ON(smp_processor_id() != cpu); init_waitqueue_head(&t->queue); @@ -3282,7 +3666,15 @@ static int pktgen_thread_worker(void *arg) set_current_state(TASK_INTERRUPTIBLE); while (!kthread_should_stop()) { - pkt_dev = next_to_run(t); + find_best: + + if (t->control & T_WAKE_BLOCKED) { + pktgen_unblock_all_ifs(t); + t->control &= ~(T_WAKE_BLOCKED); + } + + now = getRelativeCurNs(); + pkt_dev = next_to_run(t, now, &next_running_delay); if (!pkt_dev && (t->control & (T_STOP | T_RUN | T_REMDEVALL | T_REMDEV)) @@ -3296,8 +3688,40 @@ static int pktgen_thread_worker(void *arg) __set_current_state(TASK_RUNNING); if (pkt_dev) { - - pktgen_xmit(pkt_dev); + if (pkt_dev->tx_blocked) { + /* If blocked for less than 1ms, then sleep for up to 1ms. If the + * device un-blocks, then we will be woken by the wait-queue callback. + */ + u64 tx_anyway_ns = (now - PG_TRY_TX_ANYWAY_NS); + if (pkt_dev->next_tx_ns > tx_anyway_ns) { + pg_nanodelay(min(next_running_delay, (u64)(PG_TRY_TX_ANYWAY_NS)), + pkt_dev); + /* Maybe things have changed since we went to sleep. */ + goto find_best; + } + } + + /* If the best to run should not run yet, then sleep (or accumulate sleep) */ + if (now < pkt_dev->next_tx_ns) { + /* spin(pkt_dev, pkt_dev->next_tx_us); */ + u64 next_ipg = pkt_dev->next_tx_ns - now; + + /* These will not actually busy-spin now. Will run as + * much as 1ms fast, and will sleep in 1ms units, assuming + * our tick is 1ms. + */ + pg_nanodelay(next_ipg, pkt_dev); + now = getRelativeCurNs(); + if (pkt_dev->removal_mark || + (pkt_dev->pg_thread->control && T_STOP)) { + goto skip_tx; + } + } + + + pktgen_xmit(pkt_dev, now); + + skip_tx: /* * We like to stay RUNNING but must also give @@ -3323,6 +3747,11 @@ static int pktgen_thread_worker(void *arg) t->control &= ~(T_RUN); } + if (t->control & T_ADD_DEV) { + pktgen_add_device(t, (char*)(t->control_arg)); + t->control &= ~(T_ADD_DEV); + } + if (t->control & T_REMDEVALL) { pktgen_rem_all_ifs(t); t->control &= ~(T_REMDEVALL); @@ -3352,16 +3781,12 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, const char *ifname) { struct pktgen_dev *p, *pkt_dev = NULL; - if_lock(t); - list_for_each_entry(p, &t->if_list, list) if (strncmp(p->ifname, ifname, IFNAMSIZ) == 0) { pkt_dev = p; break; } - - if_unlock(t); - PG_DEBUG(printk("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev)); + /* PG_DEBUG(printk("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev)); */ return pkt_dev; } @@ -3374,8 +3799,6 @@ static int add_dev_to_thread(struct pktgen_thread *t, { int rv = 0; - if_lock(t); - if (pkt_dev->pg_thread) { printk("pktgen: ERROR: already assigned to a thread.\n"); rv = -EBUSY; @@ -3387,12 +3810,9 @@ static int add_dev_to_thread(struct pktgen_thread *t, pkt_dev->running = 0; out: - if_unlock(t); return rv; } -/* Called under thread lock */ - static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) { struct pktgen_dev *pkt_dev; @@ -3405,7 +3825,10 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) printk("pktgen: ERROR: interface already used.\n"); return -EBUSY; } - + else { + printk("pktgen: Attempting to add device: %s\n", ifname); + } + pkt_dev = kzalloc(sizeof(struct pktgen_dev), GFP_KERNEL); if (!pkt_dev) return -ENOMEM; @@ -3422,8 +3845,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->max_pkt_size = ETH_ZLEN; pkt_dev->nfrags = 0; pkt_dev->clone_skb = pg_clone_skb_d; - pkt_dev->delay_us = pg_delay_d / 1000; - pkt_dev->delay_ns = pg_delay_d % 1000; + pkt_dev->delay_ns = pg_delay_d; pkt_dev->count = pg_count_d; pkt_dev->sofar = 0; pkt_dev->udp_src_min = 9; /* sink port */ @@ -3440,7 +3862,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) strncpy(pkt_dev->ifname, ifname, IFNAMSIZ); - if (!pktgen_setup_dev(pkt_dev)) { + if (!pktgen_setup_dev(pkt_dev, t)) { printk("pktgen: ERROR: pktgen_setup_dev failed.\n"); if (pkt_dev->flows) vfree(pkt_dev->flows); @@ -3475,7 +3897,6 @@ static int __init pktgen_create_thread(int cpu) return -ENOMEM; } - spin_lock_init(&t->if_lock); t->cpu = cpu; INIT_LIST_HEAD(&t->if_list); @@ -3540,6 +3961,14 @@ static int pktgen_remove_device(struct pktgen_thread *t, /* Dis-associate from the interface */ if (pkt_dev->odev) { + +#ifdef USE_NQW_CALLBACK + /* Set the nqw callback hooks */ + rtnl_lock(); + clear_nqw_hook(t, pkt_dev->odev); + rtnl_unlock(); +#endif + pkt_dev->odev->pkt_dev = NULL; dev_put(pkt_dev->odev); pkt_dev->odev = NULL; } @@ -3550,6 +3979,7 @@ static int pktgen_remove_device(struct pktgen_thread *t, /* Clean up proc file system */ + PG_DEBUG(printk("pktgen: removing proc entry: %s (0x%p)\n", pkt_dev->ifname, pg_proc_dir)); remove_proc_entry(pkt_dev->ifname, pg_proc_dir); if (pkt_dev->flows) @@ -3564,7 +3994,17 @@ static int __init pg_init(void) struct proc_dir_entry *pe; printk(version); - + printk("sizeof report: %d, sizeof in6_addr: %d\n", + (int)(sizeof(struct pktgen_dev_report)), + (int)(sizeof(struct in6_addr))); + + if (handle_pktgen_hook) { + printk("pktgen: ERROR: pktgen is already loaded it seems..\n"); + /* Already loaded */ + return -EEXIST; + } + + pg_proc_dir = proc_mkdir(PG_PROC_DIR, proc_net); if (!pg_proc_dir) return -ENODEV; @@ -3601,6 +4041,9 @@ static int __init pg_init(void) return -ENODEV; } + handle_pktgen_hook = pktgen_receive; + PG_DEBUG(printk("pktgen initialization complete.\n")); + return 0; } @@ -3623,8 +4066,11 @@ static void __exit pg_cleanup(void) unregister_netdevice_notifier(&pktgen_notifier_block); /* Clean up proc file system */ + PG_DEBUG(printk("pktgen: removing proc entry: %s (0x%p)\n", PGCTRL, pg_proc_dir)); remove_proc_entry(PGCTRL, pg_proc_dir); proc_net_remove(PG_PROC_DIR); + + handle_pktgen_hook = NULL; } module_init(pg_init); diff --git a/net/core/pktgen.h b/net/core/pktgen.h new file mode 100644 index 0000000..1ef5a39 --- /dev/null +++ b/net/core/pktgen.h @@ -0,0 +1,358 @@ +/* -*-linux-c-*- + * $Id: candela_2.6.20.patch,v 1.13 2008/08/15 18:01:32 greear Exp $ + * pktgen.c: Packet Generator for performance evaluation. + * + * See pktgen.c for details of changes, etc. +*/ + + +#ifndef PKTGEN_H_INCLUDE_KERNEL__ +#define PKTGEN_H_INCLUDE_KERNEL__ + +#include +#include + +/* The buckets are exponential in 'width' */ +#define LAT_BUCKETS_MAX 32 +#define PG_MAX_ACCUM_DELAY_NS 1000000 /* one ms */ +#define PG_TRY_TX_ANYWAY_NS 1000000 /* try a blocked tx queue after 1 ms. */ + +#define IP_NAME_SZ 32 +#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ +#define MPLS_STACK_BOTTOM __constant_htonl(0x00000100) + +/* Device flag bits */ +#define F_IPSRC_RND (1<<0) /* IP-Src Random */ +#define F_IPDST_RND (1<<1) /* IP-Dst Random */ +#define F_UDPSRC_RND (1<<2) /* UDP-Src Random */ +#define F_UDPDST_RND (1<<3) /* UDP-Dst Random */ +#define F_MACSRC_RND (1<<4) /* MAC-Src Random */ +#define F_MACDST_RND (1<<5) /* MAC-Dst Random */ +#define F_TXSIZE_RND (1<<6) /* Transmit packet size is random */ +#define F_IPV6 (1<<7) /* Interface in IPV6 Mode */ +#define F_MPLS_RND (1<<8) /* Random MPLS labels */ +#define F_VID_RND (1<<9) /* Random VLAN ID */ +#define F_SVID_RND (1<<10) /* Random SVLAN ID */ + +/* Thread control flag bits */ +#define T_TERMINATE (1<<0) +#define T_STOP (1<<1) /* Stop run */ +#define T_RUN (1<<2) /* Start run */ +#define T_REMDEVALL (1<<3) /* Remove all devs */ +#define T_REMDEV (1<<4) /* Remove one dev */ +#define T_WAKE_BLOCKED (1<<5) /* Wake up all blocked net-devices. */ +#define T_ADD_DEV (1<<6) /* Add a device. */ + +/* Used to help with determining the pkts on receive */ +#define PKTGEN_MAGIC 0xbe9be955 +#define PG_PROC_DIR "pktgen" +#define PGCTRL "pgctrl" + + +struct flow_state { + __be32 cur_daddr; + int count; +}; + +struct pktgen_dev { + + /* + * Try to keep frequent/infrequent used vars. separated. + */ + + char ifname[IFNAMSIZ]; + char result[512]; + + struct pktgen_thread *pg_thread; /* the owner */ + struct list_head list; /* Used for chaining in the thread's run-queue */ + + int running; /* if this changes to false, the test will stop */ + + /* If min != max, then we will either do a linear iteration, or + * we will do a random selection from within the range. + */ + __u32 flags; + int removal_mark; /* non-zero => the device is marked for + * removal by worker thread */ + + __u32 min_pkt_size; /* = ETH_ZLEN; */ + __u32 max_pkt_size; /* = ETH_ZLEN; */ + __u32 nfrags; + __u64 delay_ns; /* Delay this much between sending packets. */ + __u64 count; /* Default No packets to send */ + __u64 sofar; /* How many pkts we've sent so far */ + __u64 tx_bytes; /* How many bytes we've transmitted */ + __u64 errors; /* Errors when trying to transmit, pkts will be re-sent */ + __u64 nanodelays; /* how many times have we called nano-delay on this device? */ + __s64 accum_delay_ns; /* Accumulated delay..when >= 1ms, we'll sleep on a wait queue. */ + __u64 sleeps; /* How many times have we gone to sleep on the wait queue. */ + __u64 queue_stopped; /* How many times was queue stopped when we tried to xmit? */ + /* runtime counters relating to clone_skb */ + __u64 next_tx_ns; /* timestamp of when to tx next */ + __u64 req_tx_early; /* requested to tx, but is too early for us to tx. */ + + __u64 allocated_skbs; + __u32 clone_count; + + int tx_blocked; /* Need to tx as soon as able... */ + int last_ok; /* Was last skb sent? + * Or a failed transmit of some sort? This will keep + * sequence numbers in order, for example. + */ + __u64 started_at; /* micro-seconds */ + __u64 stopped_at; /* micro-seconds */ + __u64 idle_acc_ns; /* nano-seconds */ + __u32 seq_num; + + __u32 clone_skb; /* Use multiple SKBs during packet gen. If this number + * is greater than 1, then that many copies of the same + * packet will be sent before a new packet is allocated. + * For instance, if you want to send 1024 identical packets + * before creating a new packet, set clone_skb to 1024. + */ + __u32 peer_clone_skb; /* Peer (transmitter's) clone setting. */ + + char dst_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ + char dst_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ + char src_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ + char src_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ + + struct in6_addr in6_saddr; + struct in6_addr in6_daddr; + struct in6_addr cur_in6_daddr; + struct in6_addr cur_in6_saddr; + /* For ranges */ + struct in6_addr min_in6_daddr; + struct in6_addr max_in6_dad