diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 7e0d8da..5609853 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c @@ -124,6 +124,7 @@ unsigned long long sched_clock(void) /* return the value in ns */ return cycles_2_ns(this_offset); } +EXPORT_SYMBOL(sched_clock); static unsigned long calculate_cpu_khz(void) { diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index ffd1cb8..74b2b85 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -510,6 +510,7 @@ unsigned long long sched_clock(void) rdtscll(a); return cycles_2_ns(a); } +EXPORT_SYMBOL(sched_clock); static unsigned long get_cmos_time(void) { diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c index e4f4eaf..c8806d9 100644 --- a/drivers/net/8139too.c +++ b/drivers/net/8139too.c @@ -1110,6 +1110,8 @@ static void __devexit rtl8139_remove_one (struct pci_dev *pdev) assert (dev != NULL); + flush_scheduled_work(); + unregister_netdev (dev); __rtl8139_cleanup_dev (dev); @@ -1603,18 +1605,20 @@ static void rtl8139_thread (void *_data) struct rtl8139_private *tp = netdev_priv(dev); unsigned long thr_delay = next_tick; + rtnl_lock(); + if (!netif_running(dev)) + goto out_unlock; + if (tp->watchdog_fired) { tp->watchdog_fired = 0; rtl8139_tx_timeout_task(_data); - } else if (rtnl_trylock()) { - rtl8139_thread_iter (dev, tp, tp->mmio_addr); - rtnl_unlock (); - } else { - /* unlikely race. mitigate with fast poll. */ - thr_delay = HZ / 2; - } + } else + rtl8139_thread_iter(dev, tp, tp->mmio_addr); - schedule_delayed_work(&tp->thread, thr_delay); + if (tp->have_thread) + schedule_delayed_work(&tp->thread, thr_delay); +out_unlock: + rtnl_unlock (); } static void rtl8139_start_thread(struct rtl8139_private *tp) @@ -1626,19 +1630,11 @@ static void rtl8139_start_thread(struct rtl8139_private *tp) return; tp->have_thread = 1; + tp->watchdog_fired = 0; schedule_delayed_work(&tp->thread, next_tick); } -static void rtl8139_stop_thread(struct rtl8139_private *tp) -{ - if (tp->have_thread) { - cancel_rearming_delayed_work(&tp->thread); - tp->have_thread = 0; - } else - flush_scheduled_work(); -} - static inline void rtl8139_tx_clear (struct rtl8139_private *tp) { tp->cur_tx = 0; @@ -1695,12 +1691,11 @@ static void rtl8139_tx_timeout (struct net_device *dev) { struct rtl8139_private *tp = netdev_priv(dev); + tp->watchdog_fired = 1; if (!tp->have_thread) { - INIT_WORK(&tp->thread, rtl8139_tx_timeout_task, dev); + INIT_WORK(&tp->thread, rtl8139_thread, dev); schedule_delayed_work(&tp->thread, next_tick); - } else - tp->watchdog_fired = 1; - + } } static int rtl8139_start_xmit (struct sk_buff *skb, struct net_device *dev) @@ -2232,8 +2227,6 @@ static int rtl8139_close (struct net_device *dev) netif_stop_queue (dev); - rtl8139_stop_thread(tp); - if (netif_msg_ifdown(tp)) printk(KERN_DEBUG "%s: Shutting down ethercard, status was 0x%4.4x.\n", dev->name, RTL_R16 (IntrStatus)); diff --git a/drivers/net/e100.c b/drivers/net/e100.c index ce850f1..fb5c305 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -1,4 +1,4 @@ -/******************************************************************************* +/************************************************************** Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved. @@ -383,6 +383,7 @@ enum cb_command { cb_ucode = 0x0005, cb_dump = 0x0006, cb_tx_sf = 0x0008, + cb_tx_nc = 0x0010, /* 0 == controler does CRC, ie normal. 1 == CRC from memory */ cb_cid = 0x1f00, cb_i = 0x2000, cb_s = 0x4000, @@ -419,7 +420,7 @@ struct config { /*5*/ u8 X(tx_dma_max_count:7, dma_max_count_enable:1); /*6*/ u8 X(X(X(X(X(X(X(late_scb_update:1, direct_rx_dma:1), tno_intr:1), cna_intr:1), standard_tcb:1), standard_stat_counter:1), - rx_discard_overruns:1), rx_save_bad_frames:1); + rx_save_overruns:1), rx_save_bad_frames:1); /*7*/ u8 X(X(X(X(X(rx_discard_short_frames:1, tx_underrun_retry:2), pad7:2), rx_extended_rfd:1), tx_two_frames_in_fifo:1), tx_dynamic_tbd:1); @@ -549,6 +550,8 @@ struct nic { multicast_all = (1 << 2), wol_magic = (1 << 3), ich_10h_workaround = (1 << 4), + accept_all_frames = (1 << 5), + save_fcs = (1 << 6), } flags ____cacheline_aligned; enum mac mac; @@ -1009,6 +1012,16 @@ static void e100_configure(struct nic *nic, struct cb *cb, struct sk_buff *skb) config->promiscuous_mode = 0x1; /* 1=on, 0=off */ } + if(nic->flags & accept_all_frames) { + config->rx_save_overruns = 0x1; /* 1=save, 0=discard */ + config->rx_save_bad_frames = 0x1; /* 1=save, 0=discard */ + config->rx_discard_short_frames = 0x0; /* 1=discard, 0=save */ + } + + if(nic->flags & save_fcs) { + config->rx_crc_transfer = 0x1; /* 1=save, 0=discard */ + } + if(nic->flags & multicast_all) config->multicast_all = 0x1; /* 1=accept, 0=no */ @@ -1467,6 +1480,16 @@ static void e100_set_multicast_list(struct net_device *netdev) else nic->flags &= ~promiscuous; + if(netdev->flags & IFF_ACCEPT_ALL_FRAMES) + nic->flags |= accept_all_frames; + else + nic->flags &= ~accept_all_frames; + + if(netdev->flags & IFF_SAVE_FCS) + nic->flags |= save_fcs; + else + nic->flags &= ~save_fcs; + if(netdev->flags & IFF_ALLMULTI || netdev->mc_count > E100_MAX_MULTICAST_ADDRS) nic->flags |= multicast_all; @@ -1608,6 +1631,19 @@ static void e100_xmit_prepare(struct nic *nic, struct cb *cb, struct sk_buff *skb) { cb->command = nic->tx_command; + +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + /* Use the last 4 bytes of the SKB payload packet as the CRC, used for + * testing, ie sending frames with bad CRC. + */ + if (unlikely(skb->use_specified_ether_crc)) { + cb->command |= __constant_cpu_to_le16(cb_tx_nc); + } + else { + cb->command &= ~__constant_cpu_to_le16(cb_tx_nc); + } +#endif + /* interrupt every 16 packets regardless of delay */ if((nic->cbs_avail & ~15) == nic->cbs_avail) cb->command |= cpu_to_le16(cb_i); @@ -1837,7 +1873,21 @@ static int e100_rx_indicate(struct nic *nic, struct rx *rx, skb_reserve(skb, sizeof(struct rfd)); skb_put(skb, actual_size); skb->protocol = eth_type_trans(skb, nic->netdev); - + /* NOTE: The config step turns on acceptance of various bogus frames + * when in loopback or promisc mode, but this code will still throw + * them away unless you also set the new 'accept_all_frames' flag. + * Perhaps the implementors meant to accept the bogus frames in + * promisc mode here?? --Ben + */ + if(unlikely(!(nic->flags & accept_all_frames))) { + if(actual_size > nic->netdev->mtu + VLAN_ETH_HLEN) { + /* Received oversized frame */ + nic->net_stats.rx_over_errors++; + } + /* We're accepting all, so pass the bogons on up the stack. */ + goto process_skb; + } + if(unlikely(!(rfd_status & cb_ok))) { /* Don't indicate if hardware indicates errors */ dev_kfree_skb_any(skb); @@ -1846,6 +1896,7 @@ static int e100_rx_indicate(struct nic *nic, struct rx *rx, nic->rx_over_length_errors++; dev_kfree_skb_any(skb); } else { + process_skb: nic->net_stats.rx_packets++; nic->net_stats.rx_bytes += actual_size; nic->netdev->last_rx = jiffies; @@ -2209,6 +2260,63 @@ static int e100_set_settings(struct net_device *netdev, struct ethtool_cmd *cmd) return err; } +static int e100_set_rxall(struct net_device *netdev, u32 data) +{ + struct nic *nic = netdev->priv; + if (data) { + netdev->priv_flags |= IFF_ACCEPT_ALL_FRAMES; + nic->flags |= accept_all_frames; + } + else { + netdev->priv_flags &= ~(IFF_ACCEPT_ALL_FRAMES); + nic->flags &= ~accept_all_frames; + } + + e100_exec_cb(nic, NULL, e100_configure); + + return 0; +} + +static int e100_get_rxall(struct net_device *netdev, u32* data) +{ + struct nic *nic = netdev->priv; + if (nic->flags & accept_all_frames) { + *data = 1; + } + else { + *data = 0; + } + + return 0; +} + +static int e100_set_save_fcs(struct net_device *netdev, u32 data) +{ + struct nic *nic = netdev->priv; + if (data) { + nic->flags |= save_fcs; + } + else { + nic->flags &= ~save_fcs; + } + e100_exec_cb(nic, NULL, e100_configure); + + return 0; +} + +static int e100_get_save_fcs(struct net_device *netdev, u32* data) +{ + struct nic *nic = netdev->priv; + if (nic->flags & save_fcs) { + *data = 1; + } + else { + *data = 0; + } + + return 0; +} + static void e100_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) { @@ -2506,6 +2614,10 @@ static struct ethtool_ops e100_ethtool_ops = { .get_stats_count = e100_get_stats_count, .get_ethtool_stats = e100_get_ethtool_stats, .get_perm_addr = ethtool_op_get_perm_addr, + .set_rx_all = e100_set_rxall, + .get_rx_all = e100_get_rxall, + .set_save_fcs = e100_set_save_fcs, + .get_save_fcs = e100_get_save_fcs, }; static int e100_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) diff --git a/drivers/net/e1000/e1000.h b/drivers/net/e1000/e1000.h index d304297..6de5129 100644 --- a/drivers/net/e1000/e1000.h +++ b/drivers/net/e1000/e1000.h @@ -366,6 +366,7 @@ int e1000_set_spd_dplx(struct e1000_adapter *adapter, uint16_t spddplx); /* e1000_ethtool.c */ void e1000_set_ethtool_ops(struct net_device *netdev); +void e1000_set_multi(struct net_device *netdev); /* e1000_param.c */ void e1000_check_options(struct e1000_adapter *adapter); diff --git a/drivers/net/e1000/e1000_ethtool.c b/drivers/net/e1000/e1000_ethtool.c index 88a82ba..d825b19 100644 --- a/drivers/net/e1000/e1000_ethtool.c +++ b/drivers/net/e1000/e1000_ethtool.c @@ -1,4 +1,4 @@ -/******************************************************************************* +/***************************************************************** Copyright(c) 1999 - 2006 Intel Corporation. All rights reserved. @@ -1887,6 +1887,58 @@ e1000_get_strings(struct net_device *netdev, uint32_t stringset, uint8_t *data) } } +static int e1000_ethtool_setrxall(struct net_device *netdev, uint32_t val) { + unsigned short old_flags = netdev->priv_flags; + if (val) { + netdev->priv_flags |= IFF_ACCEPT_ALL_FRAMES; + } + else { + netdev->priv_flags &= ~(IFF_ACCEPT_ALL_FRAMES); + } + + /* printk("e1000_ethtool_setrxall (%s) val: %d\n", + netdev->name, val); */ + if (old_flags != netdev->priv_flags) { + netif_tx_lock_bh(netdev); + if (netif_running(netdev)) { + /*printk("Kicking e1000 for setrxall..\n");*/ + e1000_set_multi(netdev); + } else { + /* Value will be flushed into the hardware when the device is + * brought up. + */ + } + netif_tx_unlock_bh(netdev); + } + return 0; +} + +static int e1000_ethtool_set_save_fcs(struct net_device *netdev, uint32_t val) { + netif_tx_lock_bh(netdev); + if (val) { + netdev->priv_flags |= IFF_SAVE_FCS; + } + else { + netdev->priv_flags &= ~IFF_SAVE_FCS; + } + netif_tx_unlock_bh(netdev); + return 0; +} + +static int e1000_ethtool_get_save_fcs(struct net_device *netdev, uint32_t* val) { + *val = !!(netdev->priv_flags & IFF_SAVE_FCS); + /*printk("GETRXALL, data: %d priv_flags: %hx\n", + edata.data, netdev->priv_flags);*/ + return 0; +} + +static int e1000_ethtool_getrxall(struct net_device *netdev, uint32_t* val) { + *val = !!(netdev->priv_flags & IFF_ACCEPT_ALL_FRAMES); + /*printk("GETRXALL, data: %d priv_flags: %hx\n", + edata.data, netdev->priv_flags);*/ + return 0; +} + static struct ethtool_ops e1000_ethtool_ops = { .get_settings = e1000_get_settings, .set_settings = e1000_set_settings, @@ -1923,6 +1975,10 @@ static struct ethtool_ops e1000_ethtool_ops = { .get_stats_count = e1000_get_stats_count, .get_ethtool_stats = e1000_get_ethtool_stats, .get_perm_addr = ethtool_op_get_perm_addr, + .get_rx_all = e1000_ethtool_getrxall, + .set_rx_all = e1000_ethtool_setrxall, + .set_save_fcs = e1000_ethtool_set_save_fcs, + .get_save_fcs = e1000_ethtool_get_save_fcs, }; void e1000_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/e1000/e1000_hw.c b/drivers/net/e1000/e1000_hw.c index b3b9191..f69b55b 100644 --- a/drivers/net/e1000/e1000_hw.c +++ b/drivers/net/e1000/e1000_hw.c @@ -7811,9 +7811,8 @@ e1000_enable_mng_pass_thru(struct e1000_hw *hw) fwsm = E1000_READ_REG(hw, FWSM); factps = E1000_READ_REG(hw, FACTPS); - if (((fwsm & E1000_FWSM_MODE_MASK) == - (e1000_mng_mode_pt << E1000_FWSM_MODE_SHIFT)) && - (factps & E1000_FACTPS_MNGCG)) + if ((((fwsm & E1000_FWSM_MODE_MASK) >> E1000_FWSM_MODE_SHIFT) == + e1000_mng_mode_pt) && !(factps & E1000_FACTPS_MNGCG)) return TRUE; } else if ((manc & E1000_MANC_SMBUS_EN) && !(manc & E1000_MANC_ASF_EN)) diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 98ef9f8..f4ff677 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -1,4 +1,4 @@ -/******************************************************************************* +/***************************************************************** Copyright(c) 1999 - 2006 Intel Corporation. All rights reserved. @@ -137,7 +137,7 @@ static void e1000_clean_tx_ring(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring); static void e1000_clean_rx_ring(struct e1000_adapter *adapter, struct e1000_rx_ring *rx_ring); -static void e1000_set_multi(struct net_device *netdev); +void e1000_set_multi(struct net_device *netdev); static void e1000_update_phy_info(unsigned long data); static void e1000_watchdog(unsigned long data); static void e1000_82547_tx_fifo_stall(unsigned long data); @@ -822,6 +822,9 @@ e1000_probe(struct pci_dev *pdev, if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; + /* Has ability to receive all frames (even bad CRCs and such) */ + netdev->features |= NETIF_F_RX_ALL | NETIF_F_SAVE_CRC; + netdev->features |= NETIF_F_LLTX; adapter->en_mng_pt = e1000_enable_mng_pass_thru(&adapter->hw); @@ -2194,7 +2197,7 @@ e1000_set_mac(struct net_device *netdev, void *p) * promiscuous mode, and all-multi behavior. **/ -static void +void e1000_set_multi(struct net_device *netdev) { struct e1000_adapter *adapter = netdev_priv(netdev); @@ -2229,6 +2232,35 @@ e1000_set_multi(struct net_device *netdev) E1000_WRITE_REG(hw, RCTL, rctl); + + /* This is useful for using ethereal or tcpdump to sniff + * packets in promiscuous mode without stripping VLAN/priority + * information, and also letting bad packets through. + * + * THIS IS NOT PRODUCTION CODE - FOR INTERNAL USE ONLY!!! + * + */ + if (netdev->priv_flags & IFF_ACCEPT_ALL_FRAMES) { + uint32_t ctrl; + /*printk("%s: Enabling acceptance of ALL frames (bad CRC too).\n", + netdev->name); */ + /* store bad packets, promisc/multicast all, no VLAN + * filter */ + rctl = E1000_READ_REG(hw, RCTL); + rctl |= (E1000_RCTL_SBP | E1000_RCTL_UPE | E1000_RCTL_MPE); + rctl &= ~(E1000_RCTL_VFE | E1000_RCTL_CFIEN); + E1000_WRITE_REG(hw, RCTL, rctl); + /* disable VLAN tagging/striping */ + ctrl = E1000_READ_REG(hw, CTRL); + ctrl &= ~E1000_CTRL_VME; + E1000_WRITE_REG(hw, CTRL, ctrl); + } + else { + /* TODO: Do we need a way to explicitly turn this off if it was + * previously enabled, or will it magically go back to normal??? --Ben + */ + } + /* 82542 2.0 needs to be in reset to write receive address registers */ if (hw->mac_type == e1000_82542_rev2_0) @@ -2508,6 +2540,7 @@ e1000_watchdog(unsigned long data) #define E1000_TX_FLAGS_VLAN 0x00000002 #define E1000_TX_FLAGS_TSO 0x00000004 #define E1000_TX_FLAGS_IPV4 0x00000008 +#define E1000_TX_FLAGS_NO_FCS 0x00000010 #define E1000_TX_FLAGS_VLAN_MASK 0xffff0000 #define E1000_TX_FLAGS_VLAN_SHIFT 16 @@ -2764,6 +2797,13 @@ e1000_tx_queue(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, txd_upper |= (tx_flags & E1000_TX_FLAGS_VLAN_MASK); } +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + if (unlikely(tx_flags & E1000_TX_FLAGS_NO_FCS)) { + txd_lower &= ~(E1000_TXD_CMD_IFCS); + /* printk("Disabling CRC in tx_queue, txd_lower: 0x%x\n", txd_lower); */ + } +#endif + i = tx_ring->next_to_use; while (count--) { @@ -2778,6 +2818,14 @@ e1000_tx_queue(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd); +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + /* txd_cmd re-enables FCS, so we'll re-disable it here as desired. */ + if (unlikely(tx_flags & E1000_TX_FLAGS_NO_FCS)) { + tx_desc->lower.data &= ~(cpu_to_le32(E1000_TXD_CMD_IFCS)); + /* printk("Disabling2 CRC in tx_queue, txd_lower: 0x%x\n", tx_desc->lower.data); */ + } +#endif + /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, @@ -3015,6 +3063,12 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) if (likely(skb->protocol == htons(ETH_P_IP))) tx_flags |= E1000_TX_FLAGS_IPV4; +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + if (unlikely(skb->use_specified_ether_crc)) { + tx_flags |= E1000_TX_FLAGS_NO_FCS; + } +#endif + e1000_tx_queue(adapter, tx_ring, tx_flags, e1000_tx_map(adapter, tx_ring, skb, first, max_per_txd, nr_frags, mss)); @@ -3684,7 +3738,11 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, goto next_desc; } - if (unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) { + /* If we are accepting all frames, then do not pay attention to the + * framing errors. + */ + if (unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) && + !(netdev->priv_flags & IFF_ACCEPT_ALL_FRAMES)) { last_byte = *(skb->data + length - 1); if (TBI_ACCEPT(&adapter->hw, status, rx_desc->errors, length, last_byte)) { @@ -3702,6 +3760,16 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, } } + + // This may not be needed now. --Ben + //if (netdev->priv_flags & IFF_SAVE_FCS) { + // skb_put(skb, length); + //} + //else { + // skb_put(skb, length - ETHERNET_FCS_SIZE); + //} + + /* code added for copybreak, this should improve * performance for small packets with large amounts * of reassembly being done in the stack */ @@ -3839,7 +3907,8 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, goto next_desc; } - if (unlikely(staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)) { + if ((unlikely(staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)) && + !(netdev->priv_flags & IFF_ACCEPT_ALL_FRAMES)) { dev_kfree_skb_irq(skb); goto next_desc; } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index b24006c..0ed7e16 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -120,6 +120,7 @@ struct TCP_Server_Info { struct sockaddr_in sockAddr; struct sockaddr_in6 sockAddr6; } addr; + u32 ip4_local_ip; /* if != 0, will bind locally to this IP */ wait_queue_head_t response_q; wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ struct list_head pending_mid_q; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 5d394c7..0327e0c 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -88,13 +88,15 @@ struct smb_vol { unsigned int rsize; unsigned int wsize; unsigned int sockopt; + u32 local_ip; /* allow binding to a local IP address if != 0 */ unsigned short int port; }; static int ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, char * netb_name, - char * server_netb_name); + char * server_netb_name, + u32 local_ip); static int ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket); @@ -189,7 +191,8 @@ cifs_reconnect(struct TCP_Server_Info *server) rc = ipv4_connect(&server->addr.sockAddr, &server->ssocket, server->workstation_RFC1001_name, - server->server_RFC1001_name); + server->server_RFC1001_name, + server->ip4_local_ip); } if(rc) { cFYI(1,("reconnect error %d",rc)); @@ -993,6 +996,18 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) printk(KERN_WARNING "CIFS: domain name too long\n"); return 1; } + } else if (strnicmp(data, "local_ip", 8) == 0) { + if (!value || !*value) { + printk(KERN_WARNING "CIFS: local_ip value not specified.\n"); + return 1; /* needs_arg; */ + } + i = cifs_inet_pton(AF_INET, value, &(vol->local_ip)); + if (i < 0) { + vol->local_ip = 0; + printk(KERN_WARNING "CIFS: Could not parse local_ip: %s\n", + value); + return 1; + } } else if (strnicmp(data, "iocharset", 9) == 0) { if (!value || !*value) { printk(KERN_WARNING "CIFS: invalid iocharset specified\n"); @@ -1231,7 +1246,8 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol) static struct cifsSesInfo * cifs_find_tcp_session(struct in_addr * target_ip_addr, struct in6_addr *target_ip6_addr, - char *userName, struct TCP_Server_Info **psrvTcp) + char *userName, struct TCP_Server_Info **psrvTcp, + u32 local_ip) { struct list_head *tmp; struct cifsSesInfo *ses; @@ -1241,7 +1257,11 @@ cifs_find_tcp_session(struct in_addr * target_ip_addr, list_for_each(tmp, &GlobalSMBSessionList) { ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); if (ses->server) { - if((target_ip_addr && + if((target_ip_addr && + /* If binding to a local IP, do not re-use sessions bound to different + * local IP addresses. + */ + (local_ip == ses->server->ip4_local_ip) && (ses->server->addr.sockAddr.sin_addr.s_addr == target_ip_addr->s_addr)) || (target_ip6_addr && memcmp(&ses->server->addr.sockAddr6.sin6_addr, @@ -1264,7 +1284,7 @@ cifs_find_tcp_session(struct in_addr * target_ip_addr, } static struct cifsTconInfo * -find_unc(__be32 new_target_ip_addr, char *uncName, char *userName) +find_unc(__be32 new_target_ip_addr, char *uncName, char *userName, u32 local_ip) { struct list_head *tmp; struct cifsTconInfo *tcon; @@ -1279,8 +1299,9 @@ find_unc(__be32 new_target_ip_addr, char *uncName, char *userName) ("old ip addr: %x == new ip %x ?", tcon->ses->server->addr.sockAddr.sin_addr. s_addr, new_target_ip_addr)); - if (tcon->ses->server->addr.sockAddr.sin_addr. - s_addr == new_target_ip_addr) { + if ((local_ip == tcon->ses->server->ip4_local_ip) && + (tcon->ses->server->addr.sockAddr.sin_addr. + s_addr == new_target_ip_addr)) { /* BB lock tcon, server and tcp session and increment use count here? */ /* found a match on the TCP session */ /* BB check if reconnection needed */ @@ -1382,7 +1403,8 @@ static void rfc1002mangle(char * target,char * source, unsigned int length) static int ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, - char * netbios_name, char * target_name) + char * netbios_name, char * target_name, + u32 local_ip /* in network byte order */) { int rc = 0; int connected = 0; @@ -1401,6 +1423,24 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, } } + /* Bind to the local IP address if specified */ + if (local_ip) { + struct sockaddr_in myaddr = { + .sin_family = AF_INET, + }; + myaddr.sin_addr.s_addr = local_ip; + myaddr.sin_port = 0; /* any */ + rc = (*csocket)->ops->bind(*csocket, (struct sockaddr *) &myaddr, + sizeof(myaddr)); + if (rc < 0) { + printk("Tried to bind to local ip: 0x%x, but failed with error: %d\n", + local_ip, rc); + } + else { + printk("CIFS: Successfully bound to local ip: 0x%x\n", local_ip); + } + } + psin_server->sin_family = AF_INET; if(psin_server->sin_port) { /* user overrode default port */ rc = (*csocket)->ops->connect(*csocket, @@ -1680,11 +1720,12 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, if(address_type == AF_INET) existingCifsSes = cifs_find_tcp_session(&sin_server.sin_addr, NULL /* no ipv6 addr */, - volume_info.username, &srvTcp); + volume_info.username, &srvTcp, + volume_info.local_ip); else if(address_type == AF_INET6) existingCifsSes = cifs_find_tcp_session(NULL /* no ipv4 addr */, &sin_server6.sin6_addr, - volume_info.username, &srvTcp); + volume_info.username, &srvTcp, 0); else { kfree(volume_info.UNC); kfree(volume_info.password); @@ -1702,7 +1743,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, sin_server.sin_port = 0; rc = ipv4_connect(&sin_server,&csocket, volume_info.source_rfc1001_name, - volume_info.target_rfc1001_name); + volume_info.target_rfc1001_name, + volume_info.local_ip); if (rc < 0) { cERROR(1, ("Error connecting to IPv4 socket. Aborting operation")); @@ -1729,6 +1771,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, /* BB Add code for ipv6 case too */ srvTcp->ssocket = csocket; srvTcp->protocolType = IPV4; + srvTcp->ip4_local_ip = volume_info.local_ip; init_waitqueue_head(&srvTcp->response_q); init_waitqueue_head(&srvTcp->request_q); INIT_LIST_HEAD(&srvTcp->pending_mid_q); @@ -1862,7 +1905,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, tcon = find_unc(sin_server.sin_addr.s_addr, volume_info.UNC, - volume_info.username); + volume_info.username, volume_info.local_ip); if (tcon) { cFYI(1, ("Found match on UNC path")); /* we can have only one retry value for a connection diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e8a9bee..5ac99b6 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -702,6 +702,11 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data) __FUNCTION__, PTR_ERR(xprt)); return (struct rpc_clnt *)xprt; } + if (data->local_ip != 0) { + printk("nfs: Configuring local ip address as: 0x%x\n", + data->local_ip); + } + xprt->local_address = data->local_ip; /* specify local IP address */ clnt = rpc_create_client(xprt, server->hostname, &nfs_program, server->rpc_ops->version, data->pseudoflavor); if (IS_ERR(clnt)) { @@ -955,6 +960,11 @@ static int nfs_compare_super(struct super_block *sb, void *data) return 0; if (old->addr.sin_port != server->addr.sin_port) return 0; + if (old->local_ip != server->local_ip) { + /*printk("nfs_compare_super, old->ip: %x server->ip: %x\n", + old->local_ip, server->local_ip); */ + return 0; + } return !nfs_compare_fh(&old->fh, &server->fh); } @@ -1037,6 +1047,10 @@ static int nfs_get_sb(struct file_system_type *fs_type, goto out_err; } + server->local_ip = data->local_ip; /* Allow unique local mounts when + * binding to local IP addresses. + */ + /* Fire up rpciod if not yet running */ error = rpciod_up(); if (error < 0) { @@ -1272,6 +1286,11 @@ static int nfs4_compare_super(struct super_block *sb, void *data) return 0; if (strcmp(server->mnt_path, old->mnt_path) != 0) return 0; + if (old->local_ip != server->local_ip) { + /*printk("nfs_compare_super, old->ip: %x server->ip: %x\n", + old->local_ip, server->local_ip); */ + return 0; + } return 1; } diff --git a/include/asm-i386/socket.h b/include/asm-i386/socket.h index 5755d57..3c7c7d2 100644 --- a/include/asm-i386/socket.h +++ b/include/asm-i386/socket.h @@ -50,4 +50,8 @@ #define SO_PEERSEC 31 #define SO_PASSSEC 34 +/* Instruct lower device to not calculate the frame + * checksum. Useful only for testing, afaik. --Ben */ +#define SO_NOFCS 50 + #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-x86_64/socket.h b/include/asm-x86_64/socket.h index b467026..cffefd4 100644 --- a/include/asm-x86_64/socket.h +++ b/include/asm-x86_64/socket.h @@ -50,4 +50,9 @@ #define SO_PEERSEC 31 #define SO_PASSSEC 34 +/* Instruct lower device to not calculate the frame + * checksum. Useful only for testing, afaik. --Ben */ +#define SO_NOFCS 50 + + #endif /* _ASM_SOCKET_H */ diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h index bea0255..d5ceec2 100644 --- a/include/linux/compat_ioctl.h +++ b/include/linux/compat_ioctl.h @@ -266,6 +266,10 @@ COMPATIBLE_IOCTL(SIOCGMIIREG) COMPATIBLE_IOCTL(SIOCSMIIREG) COMPATIBLE_IOCTL(SIOCGIFVLAN) COMPATIBLE_IOCTL(SIOCSIFVLAN) +COMPATIBLE_IOCTL(SIOCSIFMACVLAN) +COMPATIBLE_IOCTL(SIOCGIFMACVLAN) +COMPATIBLE_IOCTL(SIOCGIFREDIRDEV) +COMPATIBLE_IOCTL(SIOCSIFREDIRDEV) COMPATIBLE_IOCTL(SIOCBRADDBR) COMPATIBLE_IOCTL(SIOCBRDELBR) /* SG stuff */ diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c6310ae..8ed7f16 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1,4 +1,4 @@ -/* +/* -*-linux-c-*- * ethtool.h: Defines for Linux ethtool. * * Copyright (C) 1998 David S. Miller (davem@redhat.com) @@ -309,7 +309,11 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data); * phys_id: Identify the device * get_stats: Return statistics about the device * get_perm_addr: Gets the permanent hardware address - * + * set_rx_all: Set or clear IFF_ACCEPT_ALL_FRAMES, see if.h + * get_rx_all: Return 1 if set, 0 if not. + * set_save_fcs: Set or clear IFF_SAVE_FCS, see if.h + * get_save_fcs: Return 1 if set, 0 if not. + * * Description: * * get_settings: @@ -368,6 +372,10 @@ struct ethtool_ops { int (*get_stats_count)(struct net_device *); void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *, u64 *); int (*get_perm_addr)(struct net_device *, struct ethtool_perm_addr *, u8 *); + int (*set_rx_all)(struct net_device *, u32); + int (*get_rx_all)(struct net_device *, u32 *); + int (*set_save_fcs)(struct net_device *, u32); + int (*get_save_fcs)(struct net_device *, u32 *); int (*begin)(struct net_device *); void (*complete)(struct net_device *); u32 (*get_ufo)(struct net_device *); @@ -375,6 +383,13 @@ struct ethtool_ops { }; #endif /* __KERNEL__ */ +/* for dumping net-device statistics */ +struct ethtool_ndstats { + u32 cmd; /* ETHTOOL_GNDSTATS */ + u8 data[0]; /* sizeof(struct net_device_stats) */ +}; + + /* CMDs currently supported */ #define ETHTOOL_GSET 0x00000001 /* Get settings. */ #define ETHTOOL_SSET 0x00000002 /* Set settings. */ @@ -414,6 +429,15 @@ struct ethtool_ops { #define ETHTOOL_GGSO 0x00000023 /* Get GSO enable (ethtool_value) */ #define ETHTOOL_SGSO 0x00000024 /* Set GSO enable (ethtool_value) */ + +#define ETHTOOL_GNDSTATS 0x00000070 /* get standard net-device statistics */ +#define ETHTOOL_GETRXALL 0x00000071 /* Retrieve whether or not + * IFF_ACCEPT_ALL_FRAMES is set. */ +#define ETHTOOL_SETRXALL 0x00000072 /* Set IFF_ACCEPT_ALL_FRAMES */ +#define ETHTOOL_GETRXFCS 0x00000073 /* Set IFF_SAVE_FCS */ +#define ETHTOOL_SETRXFCS 0x00000074 /* Set IFF_SAVE_FCS */ + + /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET #define SPARC_ETH_SSET ETHTOOL_SSET diff --git a/include/linux/if.h b/include/linux/if.h index 374e20a..019717c 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -60,6 +60,19 @@ #define IFF_MASTER_8023AD 0x8 /* bonding master, 802.3ad. */ #define IFF_MASTER_ALB 0x10 /* bonding master, balance-alb. */ +#define IFF_ACCEPT_LOCAL_ADDRS 0x0200 /** Accept pkts even if they come from a local + * address. This lets use send pkts to ourselves + * over external interfaces (when used in conjunction + * with SO_BINDTODEVICE + */ +#define IFF_ACCEPT_ALL_FRAMES 0x0400 /** Accept all frames, even ones with bad CRCs. + * Should only be used in debugging/testing situations + * Do NOT enable this unless you understand the + * consequences! */ +#define IFF_SAVE_FCS 0x0800 /** Save the Frame Check Sum (FCS) on receive, if + * possible. */ +#define IFF_MAC_VLAN 0x1000 /* MAC VLAN device. */ + #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h new file mode 100644 index 0000000..0f56ae5 --- /dev/null +++ b/include/linux/if_macvlan.h @@ -0,0 +1,58 @@ +/* -*- linux-c -*- */ +#ifndef _LINUX_IF_MACVLAN_H +#define _LINUX_IF_MACVLAN_H + +/* the ioctl commands */ + +/* actions */ +#define MACVLAN_ENABLE 1 +#define MACVLAN_DISABLE 2 +#define MACVLAN_ADD 3 +#define MACVLAN_DEL 4 +//#define MACVLAN_BIND 5 +//#define MACVLAN_UNBIND 6 + +/* informative */ +#define MACVLAN_GET_NUM_PORTS 7 +#define MACVLAN_GET_PORT_NAME 8 +#define MACVLAN_GET_NUM_VLANS 9 +#define MACVLAN_GET_VLAN_NAME 10 +//#define MACVLAN_GET_NUM_MACS 11 +//#define MACVLAN_GET_MAC_NAME 12 + +#define MACVLAN_SET_PORT_FLAGS 13 +#define MACVLAN_GET_PORT_FLAGS 14 + +/* If this IOCTL succeedes, we are a MAC-VLAN interface, otherwise, we are not. */ +#define MACVLAN_IS_MACVLAN 15 +#define MACVLAN_IS_MACVLAN2 16 /* new ioctl API */ + + +#ifdef __KERNEL__ +#include +#include +extern int (*macvlan_ioctl_hook)(unsigned long arg); + +/* Returns >= 0 if it consumed the packet, otherwise let the pkt + * be processed by the netif_rx method, as if macvlan's didn't + * exist. + */ +extern int (*macvlan_handle_frame_hook)(struct sk_buff *skb); +#endif + +struct macvlan_ioctl_reply { + int32_t num; + char name[IFNAMSIZ]; +}; + +struct macvlan_ioctl { + int32_t cmd; + int32_t portidx; + int32_t ifidx; /* flags when setting port flags */ + int32_t macaddridx; + char ifname[IFNAMSIZ]; + unsigned char macaddr[8]; + struct macvlan_ioctl_reply reply; +}; + +#endif diff --git a/include/linux/if_redirdev.h b/include/linux/if_redirdev.h new file mode 100644 index 0000000..2acdd77 --- /dev/null +++ b/include/linux/if_redirdev.h @@ -0,0 +1,34 @@ +/* -*- linux-c -*- */ +#ifndef _LINUX_IF_REDIRDEV_H +#define _LINUX_IF_REDIRDEV_H + +/* the ioctl commands */ + +#define REDIRDEV_ADD 2090 +#define REDIRDEV_DEL 2091 +/* If this IOCTL succeedes, we are a Redirect-Device + interface, otherwise, we are not. */ +#define REDIRDEV_IS_REDIRDEV 2092 +#define REDIRDEV_GET_BY_IDX 2093 +#define REDIRDEV_GET_BY_NAME 2094 + +#ifdef __KERNEL__ +#include +#include +extern int (*redirdev_ioctl_hook)(void*); + +#endif + +/* Request and response */ +struct redirdev_ioctl { + u32 cmd; + u32 ifidx; /* when getting info by idx */ + +#define RDD_ASSOCIATED (1<<0) + u32 flags; /* 1<<0: Is the interface associated with tx-dev or not */ + u32 not_used; /* explicitly align 64-bit */ + char ifname[IFNAMSIZ]; + char txifname[IFNAMSIZ]; +}; + +#endif diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 50a4719..57b8204 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -158,6 +158,7 @@ enum { struct neighbour; struct neigh_parms; struct sk_buff; +struct pktgen_dev; struct netif_rx_stats { @@ -310,6 +311,11 @@ struct net_device #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ #define NETIF_F_GSO 2048 /* Enable software GSO. */ #define NETIF_F_LLTX 4096 /* LockLess TX */ +#define NETIF_F_RX_ALL 16384 /* Can be configured to receive all packets, even + * ones with busted CRC. May disable VLAN filtering + * in the NIC, users should NOT enable this feature + * unless they understand the consequences. */ +#define NETIF_F_SAVE_CRC 32768 /* Can save FCS in skb, last 4 bytes for ethernet */ /* Segmentation offload features */ #define NETIF_F_GSO_SHIFT 16 @@ -513,9 +519,23 @@ struct net_device void (*poll_controller)(struct net_device *dev); #endif + /* Callback for when the queue is woken, used by pktgen currently */ + int (*notify_queue_woken)(struct net_device *dev); + void* nqw_data; /* To be used by the method above as needed */ + + struct pktgen_dev* pkt_dev; /* to quickly find the pkt-gen dev registered with this + * interface, if any. + */ + /* bridge stuff */ struct net_bridge_port *br_port; + long dflt_skb_mark; /* Specify skb->mark for pkts received on this interface. */ +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) + struct macvlan_port *macvlan_priv; +#endif + + #ifdef CONFIG_NET_DIVERT /* this will get initialized at each interface type init routine */ struct divert_blk *divert; @@ -642,8 +662,13 @@ static inline void netif_wake_queue(struct net_device *dev) if (netpoll_trap()) return; #endif - if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state)) + if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state)) { __netif_schedule(dev); + + if (dev->notify_queue_woken) { + dev->notify_queue_woken(dev); + } + } } static inline void netif_stop_queue(struct net_device *dev) diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h index 2fdabdb..50a3761 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h @@ -79,6 +79,8 @@ struct ip_conntrack_tuple /* The direction (for tuplehash) */ u_int8_t dir; } dst; + + u_int32_t mark; }; /* This is optimized opposed to a memset of the whole structure. Everything we @@ -114,7 +116,8 @@ static inline int ip_ct_tuple_src_equal(const struct ip_conntrack_tuple *t1, const struct ip_conntrack_tuple *t2) { return t1->src.ip == t2->src.ip - && t1->src.u.all == t2->src.u.all; + && t1->src.u.all == t2->src.u.all + && t1->mark == t2->mark; } static inline int ip_ct_tuple_dst_equal(const struct ip_conntrack_tuple *t1, @@ -122,7 +125,8 @@ static inline int ip_ct_tuple_dst_equal(const struct ip_conntrack_tuple *t1, { return t1->dst.ip == t2->dst.ip && t1->dst.u.all == t2->dst.u.all - && t1->dst.protonum == t2->dst.protonum; + && t1->dst.protonum == t2->dst.protonum + && t1->mark == t2->mark; } static inline int ip_ct_tuple_equal(const struct ip_conntrack_tuple *t1, @@ -140,7 +144,8 @@ static inline int ip_ct_tuple_mask_cmp(const struct ip_conntrack_tuple *t, || ((t->src.u.all ^ tuple->src.u.all) & mask->src.u.all) || ((t->dst.u.all ^ tuple->dst.u.all) & mask->dst.u.all) || ((t->dst.protonum ^ tuple->dst.protonum) - & mask->dst.protonum)); + & mask->dst.protonum) + || ((t->mark ^ tuple->mark) & mask->mark)); } #endif /* _IP_CONNTRACK_TUPLE_H */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 6b4a13c..cfa089e 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -37,6 +37,8 @@ struct nfs_server { struct sockaddr_in addr; struct nfs_fsid fsid; unsigned long mount_time; /* when this fs was mounted */ + u32 local_ip; /* Allow local binding in .v3 */ + #ifdef CONFIG_NFS_V4 /* Our own IP address, as a null-terminated string. * This is used to generate the clientid, and the callback address. diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h index 659c754..77700de 100644 --- a/include/linux/nfs_mount.h +++ b/include/linux/nfs_mount.h @@ -20,7 +20,7 @@ * mount-to-kernel version compatibility. Some of these aren't used yet * but here they are anyway. */ -#define NFS_MOUNT_VERSION 6 +#define NFS_MOUNT_VERSION 7 #define NFS_MAX_CONTEXT_LEN 256 struct nfs_mount_data { @@ -43,6 +43,8 @@ struct nfs_mount_data { struct nfs3_fh root; /* 4 */ int pseudoflavor; /* 5 */ char context[NFS_MAX_CONTEXT_LEN + 1]; /* 6 */ + char pad[3]; /* 7 Align the context above */ + unsigned int local_ip; /* 7 */ }; /* bits in the flags field */ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index facd9ee..c1217e4 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -238,9 +238,8 @@ enum rt_class_t RT_TABLE_DEFAULT=253, RT_TABLE_MAIN=254, RT_TABLE_LOCAL=255, - __RT_TABLE_MAX }; -#define RT_TABLE_MAX (__RT_TABLE_MAX - 1) +#define RT_TABLE_MAX 0xFFFFFFFF @@ -263,6 +262,7 @@ enum rtattr_type_t RTA_CACHEINFO, RTA_SESSION, RTA_MP_ALGO, + RTA_TABLE, __RTA_MAX }; @@ -1065,6 +1065,13 @@ extern void __rtnl_unlock(void); } \ } while(0) +static inline u32 rtm_get_table(struct rtmsg *rtm, struct rtattr **rta) +{ + return RTA_GET_U32(rta[RTA_TABLE-1]); +rtattr_failure: + return rtm->rtm_table; +} + #endif /* __KERNEL__ */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 755e9cd..944f741 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -282,7 +282,11 @@ struct sk_buff { nfctinfo:3; __u8 pkt_type:3, fclone:2, - ipvs_property:1; + ipvs_property:1, + /* Use the last 4 bytes of the payload for the ethernet CRC. Only supported on some + * NICs, such as e1000. --Ben + */ + use_specified_ether_crc:1; __be16 protocol; void (*destructor)(struct sk_buff *skb); @@ -308,6 +312,7 @@ struct sk_buff { #ifdef CONFIG_NETWORK_SECMARK __u32 secmark; #endif + __u32 mark; /* These elements must be at the end, see alloc_skb() for details. */ diff --git a/include/linux/sockios.h b/include/linux/sockios.h index e6b9d1d..f6c8e31 100644 --- a/include/linux/sockios.h +++ b/include/linux/sockios.h @@ -94,6 +94,13 @@ #define SIOCGRARP 0x8961 /* get RARP table entry */ #define SIOCSRARP 0x8962 /* set RARP table entry */ +/* MAC address based VLAN control calls */ +#define SIOCGIFMACVLAN 0x8965 /* Mac address multiplex/demultiplex support */ +#define SIOCSIFMACVLAN 0x8966 /* Set macvlan options */ + +#define SIOCGIFREDIRDEV 0x8967 /* Redirect device get ioctl */ +#define SIOCSIFREDIRDEV 0x8968 /* Set redirect dev options */ + /* Driver configuration calls */ #define SIOCGIFMAP 0x8970 /* Get device parameters */ @@ -122,6 +129,15 @@ #define SIOCBRADDIF 0x89a2 /* add interface to bridge */ #define SIOCBRDELIF 0x89a3 /* remove interface from bridge */ +/* Ben's little hack land */ +#define SIOCSACCEPTLOCALADDRS 0x89ba /* Allow interfaces to accept pkts from + * local interfaces...use with SO_BINDTODEVICE + */ +#define SIOCGACCEPTLOCALADDRS 0x89bb /* Allow interfaces to accept pkts from + * local interfaces...use with SO_BINDTODEVICE + */ + + /* Device private ioctl calls */ /* diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 3a0cca2..927bbc6 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -171,7 +171,8 @@ struct rpc_xprt { reestablish_timeout; struct work_struct connect_worker; unsigned short port; - + u32 local_address; /* local IP address to bind to */ + /* * Disconnection of idle transports */ diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index a15dcf0..9464f48 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -94,7 +94,7 @@ struct dn_fib_node { struct dn_fib_table { - int n; + u32 n; int (*insert)(struct dn_fib_table *t, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, @@ -137,7 +137,7 @@ extern int dn_fib_sync_up(struct net_device *dev); /* * dn_tables.c */ -extern struct dn_fib_table *dn_fib_get_table(int n, int creat); +extern struct dn_fib_table *dn_fib_get_table(u32 n, int creat); extern struct dn_fib_table *dn_fib_empty_table(void); extern void dn_fib_table_init(void); extern void dn_fib_table_cleanup(void); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index a095d1d..4b764e2 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -149,7 +149,8 @@ struct fib_result_nl { #endif /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */ struct fib_table { - unsigned char tb_id; + struct hlist_node tb_hlist; + u32 tb_id; unsigned tb_stamp; int (*tb_lookup)(struct fib_table *tb, const struct flowi *flp, struct fib_result *res); int (*tb_insert)(struct fib_table *table, struct rtmsg *r, @@ -172,14 +173,14 @@ struct fib_table { extern struct fib_table *ip_fib_local_table; extern struct fib_table *ip_fib_main_table; -static inline struct fib_table *fib_get_table(int id) +static inline struct fib_table *fib_get_table(u32 id) { if (id != RT_TABLE_LOCAL) return ip_fib_main_table; return ip_fib_local_table; } -static inline struct fib_table *fib_new_table(int id) +static inline struct fib_table *fib_new_table(u32 id) { return fib_get_table(id); } @@ -199,30 +200,14 @@ static inline void fib_select_default(const struct flowi *flp, struct fib_result } #else /* CONFIG_IP_MULTIPLE_TABLES */ -#define ip_fib_local_table (fib_tables[RT_TABLE_LOCAL]) -#define ip_fib_main_table (fib_tables[RT_TABLE_MAIN]) +#define ip_fib_local_table fib_get_table(RT_TABLE_LOCAL) +#define ip_fib_main_table fib_get_table(RT_TABLE_MAIN) -extern struct fib_table * fib_tables[RT_TABLE_MAX+1]; extern int fib_lookup(const struct flowi *flp, struct fib_result *res); -extern struct fib_table *__fib_new_table(int id); +extern struct fib_table *fib_new_table(u32 id); +extern struct fib_table *fib_get_table(u32 id); extern void fib_rule_put(struct fib_rule *r); -static inline struct fib_table *fib_get_table(int id) -{ - if (id == 0) - id = RT_TABLE_MAIN; - - return fib_tables[id]; -} - -static inline struct fib_table *fib_new_table(int id) -{ - if (id == 0) - id = RT_TABLE_MAIN; - - return fib_tables[id] ? : __fib_new_table(id); -} - extern void fib_select_default(const struct flowi *flp, struct fib_result *res); #endif /* CONFIG_IP_MULTIPLE_TABLES */ @@ -248,7 +233,7 @@ extern int fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, extern u32 __fib_res_prefsrc(struct fib_result *res); /* Exported by fib_hash.c */ -extern struct fib_table *fib_hash_init(int id); +extern struct fib_table *fib_hash_init(u32 id); #ifdef CONFIG_IP_MULTIPLE_TABLES /* Exported by fib_rules.c */ diff --git a/include/net/sock.h b/include/net/sock.h index 324b3ea..1d9c9e6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -391,6 +391,10 @@ enum sock_flags { SOCK_RCVTSTAMP, /* %SO_TIMESTAMP setting */ SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */ SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ + SOCK_DONT_DO_LL_FCS, /* Tell NIC not to do the ethernet FCS. Will use + * last 4 bytes of packet sent from user-space + * instead. + */ }; static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) diff --git a/kernel/panic.c b/kernel/panic.c index 8010b9b..0c2e6dc 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -173,7 +173,7 @@ const char *print_tainted(void) void add_taint(unsigned flag) { - debug_locks = 0; /* can't trust the integrity of the kernel anymore */ + /*debug_locks = 0;*/ /* can't trust the integrity of the kernel anymore */ tainted |= flag; } EXPORT_SYMBOL(add_taint); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 18fcb9f..c4209c8 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -3,7 +3,8 @@ * Ethernet-type device handling. * * Authors: Ben Greear - * Please send support related email to: vlan@scry.wanfear.com + * Please send support related email to: vlan@candelatech.com + * after subscribing using the link below. * VLAN Home Page: http://www.candelatech.com/~greear/vlan.html * * Fixes: diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index da9cfe9..203cd54 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -3,7 +3,8 @@ * Ethernet-type device handling. * * Authors: Ben Greear - * Please send support related email to: vlan@scry.wanfear.com + * Please send support related email to: vlan@candelatech.com + * after subscribing using the web page below. * VLAN Home Page: http://www.candelatech.com/~greear/vlan.html * * Fixes: Mar 22 2001: Martin Bokaemper @@ -439,6 +440,11 @@ int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) struct net_device_stats *stats = vlan_dev_get_stats(dev); struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); + /* Please note, dev_queue_xmit consumes the pkt regardless of the + * return value. So, will copy the skb first and free if successful. + */ + struct sk_buff* skb2 = skb_get(skb); + /* Handle non-VLAN frames if they are sent to us, for example by DHCP. * * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING @@ -468,6 +474,10 @@ int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) skb = __vlan_put_tag(skb, veth_TCI); if (!skb) { stats->tx_dropped++; + /* Free the extra copy, assuming this is a non-recoverable + * issue and we don't want calling code to retry. + */ + kfree_skb(skb2); return 0; } @@ -485,13 +495,24 @@ int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) veth->h_vlan_proto, veth->h_vlan_TCI, veth->h_vlan_encapsulated_proto); #endif - stats->tx_packets++; /* for statics only */ - stats->tx_bytes += skb->len; - skb->dev = VLAN_DEV_INFO(dev)->real_dev; - dev_queue_xmit(skb); - return 0; + { + int rv = dev_queue_xmit(skb); + if (rv == 0) { + /* Was success, need to free the skb reference since + * we bumped up the user count above. If there was an + * error instead, then the skb2 will not be freed, and so + * the calling code will be able to re-send it. + */ + + stats->tx_packets++; /* for statics only */ + stats->tx_bytes += skb2->len; + + kfree_skb(skb2); + } + return rv; + } } int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) diff --git a/net/Kconfig b/net/Kconfig index 4959a4e..d345e4b 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -168,6 +168,8 @@ source "net/tipc/Kconfig" source "net/atm/Kconfig" source "net/bridge/Kconfig" source "net/8021q/Kconfig" +source "net/macvlan/Kconfig" +source "net/redir/Kconfig" source "net/decnet/Kconfig" source "net/llc/Kconfig" source "net/ipx/Kconfig" @@ -237,6 +239,14 @@ config NET_TCPPROBE To compile this code as a module, choose M here: the module will be called tcp_probe. +config SUPPORT_SEND_BAD_CRC + bool "Support Send Bad CRC (USE WITH CAUTION)" + ---help--- + When enabled, one can send a specially crafted packet to the ethernet + device via a raw socket and it will be sent with the last 4 bytes of + the packet as the ethernet CRC. Requires driver support. Current driver + support is limited to e100 and e1000. + endmenu endmenu diff --git a/net/Makefile b/net/Makefile index 065796f..0fdf59d 100644 --- a/net/Makefile +++ b/net/Makefile @@ -46,6 +46,8 @@ obj-$(CONFIG_IP_DCCP) += dccp/ obj-$(CONFIG_IP_SCTP) += sctp/ obj-$(CONFIG_IEEE80211) += ieee80211/ obj-$(CONFIG_TIPC) += tipc/ +obj-$(CONFIG_MACVLAN) += macvlan/ +obj-$(CONFIG_REDIRDEV) += redir/ ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o diff --git a/net/core/dev.c b/net/core/dev.c index a34f7c6..7c2e6c4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include #include @@ -118,6 +119,22 @@ #include #include +#if defined(CONFIG_NET_PKTGEN) || defined(CONFIG_NET_PKTGEN_MODULE) +#include "pktgen.h" + +#warning "Compiling dev.c for pktgen."; + +int (*handle_pktgen_hook)(struct sk_buff *skb) = NULL; +EXPORT_SYMBOL(handle_pktgen_hook); + +static __inline__ int handle_pktgen_rcv(struct sk_buff* skb) { + if (handle_pktgen_hook) { + return handle_pktgen_hook(skb); + } + return -1; +} +#endif + /* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. @@ -1760,6 +1777,23 @@ static int ing_filter(struct sk_buff *skb) } #endif + +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) +/* Returns >= 0 if we consume the packet. Otherwise, let + * it fall through the rest of the packet processing. + */ +int (*macvlan_handle_frame_hook)(struct sk_buff *skb) = NULL; +EXPORT_SYMBOL(macvlan_handle_frame_hook); +#endif + +/* Returns >= 0 if we consume the packet. Otherwise, let + * it fall through the rest of the packet processing. + */ +static __inline__ int handle_macvlan(struct sk_buff *skb) { + return macvlan_handle_frame_hook(skb); +} + + int netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; @@ -1787,6 +1821,11 @@ int netif_receive_skb(struct sk_buff *skb) skb->h.raw = skb->nh.raw = skb->data; skb->mac_len = skb->nh.raw - skb->mac.raw; + /* Set the default 'mark' for this skb. dflt_skb_mark may be set through + * the /sys/class/net/[dev-name]/dflt_skb_mark file. + */ + skb->mark = skb->dev->dflt_skb_mark; + pt_prev = NULL; rcu_read_lock(); @@ -1830,6 +1869,32 @@ ncls: if (handle_bridge(&skb, &pt_prev, &ret, orig_dev)) goto out; +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) + if (skb->dev->macvlan_priv != NULL && + macvlan_handle_frame_hook != NULL) { + if (handle_macvlan(skb) >= 0) { + /* consumed by mac-vlan...it would have been + * re-sent to this method with a different + * device... + */ + goto out; + } + else { + /* Let it fall through and be processed normally */ + } + } +#endif + +#if defined(CONFIG_NET_PKTGEN) || defined(CONFIG_NET_PKTGEN_MODULE) + if ((skb->dev->pkt_dev) && + (handle_pktgen_rcv(skb) >= 0)) { + /* Pktgen may consume the packet, no need to send + * to further protocols. + */ + goto out; + } +#endif + type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { if (ptype->type == type && @@ -2591,6 +2656,24 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) ifr->ifr_newname[IFNAMSIZ-1] = '\0'; return dev_change_name(dev, ifr->ifr_newname); + case SIOCSACCEPTLOCALADDRS: + if (ifr->ifr_flags) { + dev->priv_flags |= IFF_ACCEPT_LOCAL_ADDRS; + } + else { + dev->priv_flags &= ~IFF_ACCEPT_LOCAL_ADDRS; + } + return 0; + + case SIOCGACCEPTLOCALADDRS: + if (dev->priv_flags & IFF_ACCEPT_LOCAL_ADDRS) { + ifr->ifr_flags = 1; + } + else { + ifr->ifr_flags = 0; + } + return 0; + /* * Unknown or private ioctl */ @@ -2689,6 +2772,7 @@ int dev_ioctl(unsigned int cmd, void __user *arg) case SIOCGIFMAP: case SIOCGIFINDEX: case SIOCGIFTXQLEN: + case SIOCGACCEPTLOCALADDRS: dev_load(ifr.ifr_name); read_lock(&dev_base_lock); ret = dev_ifsioc(&ifr, cmd); @@ -2763,6 +2847,7 @@ int dev_ioctl(unsigned int cmd, void __user *arg) case SIOCBONDCHANGEACTIVE: case SIOCBRADDIF: case SIOCBRDELIF: + case SIOCSACCEPTLOCALADDRS: if (!capable(CAP_NET_ADMIN)) return -EPERM; /* fall through */ @@ -3585,6 +3670,10 @@ EXPORT_SYMBOL(net_enable_timestamp); EXPORT_SYMBOL(net_disable_timestamp); EXPORT_SYMBOL(dev_get_flags); +#if defined(CONFIG_NET_PKTGEN) || defined(CONFIG_NET_PKTGEN_MODULE) +EXPORT_SYMBOL(handle_pktgen_rcv); +#endif + #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) EXPORT_SYMBOL(br_handle_frame_hook); EXPORT_SYMBOL(br_fdb_get_hook); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 2797e28..9507c5c 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1,4 +1,4 @@ -/* +/* -*- linux-c -*- * net/core/ethtool.c - Ethtool ioctl handler * Copyright (c) 2003 Matthew Wilcox * @@ -33,6 +33,12 @@ u32 ethtool_op_get_tx_csum(struct net_device *dev) return (dev->features & NETIF_F_ALL_CSUM) != 0; } +u32 ethtool_op_get_rx_all(struct net_device *dev, u32* retval) +{ + *retval = ((dev->priv_flags & IFF_ACCEPT_ALL_FRAMES) != 0); + return 0; +} + int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) { if (data) @@ -796,6 +802,88 @@ static int ethtool_get_perm_addr(struct net_device *dev, void __user *useraddr) return ret; } + +static int ethtool_get_rx_all(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GSG }; + int rv = 0; + + if (!dev->ethtool_ops->get_rx_all) + return -EOPNOTSUPP; + + if ((rv = dev->ethtool_ops->get_rx_all(dev, &edata.data)) < 0) { + return rv; + } + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + + +static int ethtool_set_rx_all(struct net_device *dev, void *useraddr) +{ + struct ethtool_value id; + + if (!dev->ethtool_ops->set_rx_all) + return -EOPNOTSUPP; + + if (copy_from_user(&id, useraddr, sizeof(id))) + return -EFAULT; + + return dev->ethtool_ops->set_rx_all(dev, id.data); +} + +static int ethtool_get_rx_fcs(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GSG }; + int rv = 0; + + if (!dev->ethtool_ops->get_save_fcs) + return -EOPNOTSUPP; + + if ((rv = dev->ethtool_ops->get_save_fcs(dev, &edata.data)) < 0) { + return rv; + } + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + + +static int ethtool_set_rx_fcs(struct net_device *dev, void *useraddr) +{ + struct ethtool_value id; + + if (!dev->ethtool_ops->set_save_fcs) + return -EOPNOTSUPP; + + if (copy_from_user(&id, useraddr, sizeof(id))) + return -EFAULT; + + return dev->ethtool_ops->set_save_fcs(dev, id.data); +} + + +/* Handle some generic ethtool commands here */ +static int ethtool_get_netdev_stats(struct net_device *dev, void *useraddr) { + + struct ethtool_ndstats* nds = (struct ethtool_ndstats*)(useraddr); + + struct net_device_stats *stats = dev->get_stats(dev); + if (stats) { + if (copy_to_user(nds->data, stats, sizeof(*stats))) { + return -EFAULT; + } + } + else { + return -EOPNOTSUPP; + } + return 0; +} + + /* The main entry point in this file. Called from net/core/dev.c */ int dev_ethtool(struct ifreq *ifr) @@ -816,18 +904,28 @@ int dev_ethtool(struct ifreq *ifr) if (!dev || !netif_device_present(dev)) return -ENODEV; - if (!dev->ethtool_ops) - goto ioctl; - if (copy_from_user(ðcmd, useraddr, sizeof (ethcmd))) return -EFAULT; - if(dev->ethtool_ops->begin) + if(dev->ethtool_ops && dev->ethtool_ops->begin) if ((rc = dev->ethtool_ops->begin(dev)) < 0) return rc; old_features = dev->features; - + + /* Handle some generic operations that do not require specific + * ethtool handlers. + */ + switch (ethcmd) { + case ETHTOOL_GNDSTATS: + return ethtool_get_netdev_stats(dev, useraddr); + default: + break; + } + + if (!dev->ethtool_ops) + goto ioctl; + switch (ethcmd) { case ETHTOOL_GSET: rc = ethtool_get_settings(dev, useraddr); @@ -916,6 +1014,18 @@ int dev_ethtool(struct ifreq *ifr) case ETHTOOL_PHYS_ID: rc = ethtool_phys_id(dev, useraddr); break; + case ETHTOOL_SETRXALL: + rc = ethtool_set_rx_all(dev, useraddr); + break; + case ETHTOOL_GETRXALL: + rc = ethtool_get_rx_all(dev, useraddr); + break; + case ETHTOOL_SETRXFCS: + rc = ethtool_set_rx_fcs(dev, useraddr); + break; + case ETHTOOL_GETRXFCS: + rc = ethtool_get_rx_fcs(dev, useraddr); + break; case ETHTOOL_GSTATS: rc = ethtool_get_stats(dev, useraddr); break; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 1347276..677c672 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -208,6 +208,19 @@ static ssize_t store_tx_queue_len(struct class_device *dev, const char *buf, siz return netdev_store(dev, buf, len, change_tx_queue_len); } +NETDEVICE_SHOW(dflt_skb_mark, fmt_ulong); + +static int change_dflt_skb_mark(struct net_device *net, unsigned long new_val) +{ + net->dflt_skb_mark = new_val; + return 0; +} + +static ssize_t store_dflt_skb_mark(struct class_device *dev, const char *buf, size_t len) +{ + return netdev_store(dev, buf, len, change_dflt_skb_mark); +} + NETDEVICE_SHOW(weight, fmt_dec); static int change_weight(struct net_device *net, unsigned long new_weight) @@ -237,6 +250,8 @@ static struct class_device_attribute net_class_attributes[] = { __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, store_tx_queue_len), + __ATTR(dflt_skb_mark, S_IRUGO | S_IWUSR, show_dflt_skb_mark, + store_dflt_skb_mark), __ATTR(weight, S_IRUGO | S_IWUSR, show_weight, store_weight), {} }; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 6a7320b..068c321 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -156,44 +156,28 @@ #include #include /* do_div */ #include +#include /* sched_clock() */ +#include "pktgen.h" + +#define USE_NQW_CALLBACK + +#ifdef USE_NQW_CALLBACK +#include + +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) +#include +#include "../macvlan/macvlan.h" +#endif +#endif + +#define VERSION "pktgen v2.68: Packet Generator for packet performance testing.\n" + +#define PG_DEBUG(a) a +//#define PG_DEBUG(a) -#define VERSION "pktgen v2.67: Packet Generator for packet performance testing.\n" - -/* #define PG_DEBUG(a) a */ -#define PG_DEBUG(a) - -/* The buckets are exponential in 'width' */ -#define LAT_BUCKETS_MAX 32 -#define IP_NAME_SZ 32 -#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ -#define MPLS_STACK_BOTTOM __constant_htonl(0x00000100) - -/* Device flag bits */ -#define F_IPSRC_RND (1<<0) /* IP-Src Random */ -#define F_IPDST_RND (1<<1) /* IP-Dst Random */ -#define F_UDPSRC_RND (1<<2) /* UDP-Src Random */ -#define F_UDPDST_RND (1<<3) /* UDP-Dst Random */ -#define F_MACSRC_RND (1<<4) /* MAC-Src Random */ -#define F_MACDST_RND (1<<5) /* MAC-Dst Random */ -#define F_TXSIZE_RND (1<<6) /* Transmit size is random */ -#define F_IPV6 (1<<7) /* Interface in IPV6 Mode */ -#define F_MPLS_RND (1<<8) /* Random MPLS labels */ - -/* Thread control flag bits */ -#define T_TERMINATE (1<<0) -#define T_STOP (1<<1) /* Stop run */ -#define T_RUN (1<<2) /* Start run */ -#define T_REMDEVALL (1<<3) /* Remove all devs */ -#define T_REMDEV (1<<4) /* Remove one dev */ /* If lock -- can be removed after some work */ -#define if_lock(t) spin_lock(&(t->if_lock)); -#define if_unlock(t) spin_unlock(&(t->if_lock)); -/* Used to help with determining the pkts on receive */ -#define PKTGEN_MAGIC 0xbe9be955 -#define PG_PROC_DIR "pktgen" -#define PGCTRL "pgctrl" static struct proc_dir_entry *pg_proc_dir = NULL; #define MAX_CFLOWS 65536 @@ -203,158 +187,43 @@ struct flow_state { int count; }; -struct pktgen_dev { - - /* - * Try to keep frequent/infrequent used vars. separated. - */ - - char ifname[IFNAMSIZ]; - char result[512]; - - struct pktgen_thread *pg_thread; /* the owner */ - struct list_head list; /* Used for chaining in the thread's run-queue */ - - int running; /* if this changes to false, the test will stop */ - - /* If min != max, then we will either do a linear iteration, or - * we will do a random selection from within the range. - */ - __u32 flags; - int removal_mark; /* non-zero => the device is marked for - * removal by worker thread */ - - int min_pkt_size; /* = ETH_ZLEN; */ - int max_pkt_size; /* = ETH_ZLEN; */ - int nfrags; - __u32 delay_us; /* Default delay */ - __u32 delay_ns; - __u64 count; /* Default No packets to send */ - __u64 sofar; /* How many pkts we've sent so far */ - __u64 tx_bytes; /* How many bytes we've transmitted */ - __u64 errors; /* Errors when trying to transmit, pkts will be re-sent */ - - /* runtime counters relating to clone_skb */ - __u64 next_tx_us; /* timestamp of when to tx next */ - __u32 next_tx_ns; - - __u64 allocated_skbs; - __u32 clone_count; - int last_ok; /* Was last skb sent? - * Or a failed transmit of some sort? This will keep - * sequence numbers in order, for example. - */ - __u64 started_at; /* micro-seconds */ - __u64 stopped_at; /* micro-seconds */ - __u64 idle_acc; /* micro-seconds */ - __u32 seq_num; - - int clone_skb; /* Use multiple SKBs during packet gen. If this number - * is greater than 1, then that many copies of the same - * packet will be sent before a new packet is allocated. - * For instance, if you want to send 1024 identical packets - * before creating a new packet, set clone_skb to 1024. - */ - - char dst_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ - char dst_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ - char src_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ - char src_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ - - struct in6_addr in6_saddr; - struct in6_addr in6_daddr; - struct in6_addr cur_in6_daddr; - struct in6_addr cur_in6_saddr; - /* For ranges */ - struct in6_addr min_in6_daddr; - struct in6_addr max_in6_daddr; - struct in6_addr min_in6_saddr; - struct in6_addr max_in6_saddr; - - /* If we're doing ranges, random or incremental, then this - * defines the min/max for those ranges. - */ - __u32 saddr_min; /* inclusive, source IP address */ - __u32 saddr_max; /* exclusive, source IP address */ - __u32 daddr_min; /* inclusive, dest IP address */ - __u32 daddr_max; /* exclusive, dest IP address */ - - __u16 udp_src_min; /* inclusive, source UDP port */ - __u16 udp_src_max; /* exclusive, source UDP port */ - __u16 udp_dst_min; /* inclusive, dest UDP port */ - __u16 udp_dst_max; /* exclusive, dest UDP port */ - - /* MPLS */ - unsigned nr_labels; /* Depth of stack, 0 = no MPLS */ - __be32 labels[MAX_MPLS_LABELS]; - - __u32 src_mac_count; /* How many MACs to iterate through */ - __u32 dst_mac_count; /* How many MACs to iterate through */ - - unsigned char dst_mac[ETH_ALEN]; - unsigned char src_mac[ETH_ALEN]; +static char version[] __initdata = VERSION; - __u32 cur_dst_mac_offset; - __u32 cur_src_mac_offset; - __u32 cur_saddr; - __u32 cur_daddr; - __u16 cur_udp_dst; - __u16 cur_udp_src; - __u32 cur_pkt_size; - - __u8 hh[14]; - /* = { - 0x00, 0x80, 0xC8, 0x79, 0xB3, 0xCB, - - We fill in SRC address later - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x08, 0x00 - }; - */ - __u16 pad; /* pad out the hh struct to an even 16 bytes */ - struct sk_buff *skb; /* skb we are to transmit next, mainly used for when we - * are transmitting the same one multiple times - */ - struct net_device *odev; /* The out-going device. Note that the device should - * have it's pg_info pointer pointing back to this - * device. This will be set when the user specifies - * the out-going device name (not when the inject is - * started as it used to do.) - */ - struct flow_state *flows; - unsigned cflows; /* Concurrent flows (config) */ - unsigned lflow; /* Flow length (config) */ - unsigned nflows; /* accumulated flows (stats) */ -}; +#define REMOVE 1 +#define FIND 0 -struct pktgen_hdr { - __u32 pgh_magic; - __u32 seq_num; - __u32 tv_sec; - __u32 tv_usec; -}; +static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove); +static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i); +static int pktgen_add_device(struct pktgen_thread *t, const char *ifname); +static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, + const char *ifname); +static int pktgen_device_event(struct notifier_block *, unsigned long, void *); +static void pktgen_run_all_threads(int background); +static void pktgen_stop_all_threads_ifs(void); +static int pktgen_stop_device(struct pktgen_dev *pkt_dev); +static void pktgen_stop(struct pktgen_thread *t); +static void pktgen_clear_counters(struct pktgen_dev *pkt_dev, int seq_too); +static int pktgen_mark_device(const char *ifname); +static unsigned int scan_ip6(const char *s, char ip[16]); +static unsigned int fmt_ip6(char *s, const char ip[16]); +static void clear_nqw_hook(struct pktgen_thread* t, struct net_device* dev); +static int set_nqw_hook(struct pktgen_thread* t, struct net_device* dev, int gfp); -struct pktgen_thread { - spinlock_t if_lock; - struct list_head if_list; /* All device here */ - struct list_head th_list; - int removed; - char name[32]; - char result[512]; - u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */ +/* Module parameters, defaults. */ +static int pg_count_d = 1000; /* 1000 pkts by default */ +static int pg_delay_d = 0x7FFFFFFF; /* Don't run until someone sets a different delay. */ - /* Field for thread to receive "posted" events terminate, stop ifs etc. */ +static int pg_clone_skb_d; +static int debug; - u32 control; - int pid; - int cpu; +static DEFINE_MUTEX(pktgen_thread_lock); +static LIST_HEAD(pktgen_threads); - wait_queue_head_t queue; +static struct notifier_block pktgen_notifier_block = { + .notifier_call = pktgen_device_event, }; -#define REMOVE 1 -#define FIND 0 /* This code works around the fact that do_div cannot handle two 64-bit numbers, and regular 64-bit division doesn't work on x86 kernels. @@ -457,13 +326,6 @@ static inline u32 pktgen_random(void) #endif } -static inline __u64 getCurMs(void) -{ - struct timeval tv; - do_gettimeofday(&tv); - return tv_to_ms(&tv); -} - static inline __u64 getCurUs(void) { struct timeval tv; @@ -476,36 +338,17 @@ static inline __u64 tv_diff(const struct timeval *a, const struct timeval *b) return tv_to_us(a) - tv_to_us(b); } -/* old include end */ - -static char version[] __initdata = VERSION; - -static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i); -static int pktgen_add_device(struct pktgen_thread *t, const char *ifname); -static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, - const char *ifname); -static int pktgen_device_event(struct notifier_block *, unsigned long, void *); -static void pktgen_run_all_threads(void); -static void pktgen_stop_all_threads_ifs(void); -static int pktgen_stop_device(struct pktgen_dev *pkt_dev); -static void pktgen_stop(struct pktgen_thread *t); -static void pktgen_clear_counters(struct pktgen_dev *pkt_dev); -static int pktgen_mark_device(const char *ifname); -static unsigned int scan_ip6(const char *s, char ip[16]); -static unsigned int fmt_ip6(char *s, const char ip[16]); - -/* Module parameters, defaults. */ -static int pg_count_d = 1000; /* 1000 pkts by default */ -static int pg_delay_d; -static int pg_clone_skb_d; -static int debug; +/* Since the machine booted. */ +static inline __u64 getRelativeCurUs(void) { + return pg_div(sched_clock(), 1000); +} -static DEFINE_MUTEX(pktgen_thread_lock); -static LIST_HEAD(pktgen_threads); +/* Since the machine booted. */ +static inline __u64 getRelativeCurNs(void) { + return sched_clock(); +} -static struct notifier_block pktgen_notifier_block = { - .notifier_call = pktgen_device_event, -}; +/* old include end */ /* * /proc handling functions @@ -542,10 +385,14 @@ static ssize_t pgctrl_write(struct file *file, const char __user * buf, pktgen_stop_all_threads_ifs(); else if (!strcmp(data, "start")) - pktgen_run_all_threads(); + pktgen_run_all_threads(0); + + /* Run in the background. */ + else if (!strcmp(data, "bg_start")) + pktgen_run_all_threads(1); else - printk("pktgen: Unknown command: %s\n", data); + printk("pktgen: Unknown command: \"%s\"\n", data); err = count; @@ -558,6 +405,137 @@ static int pgctrl_open(struct inode *inode, struct file *file) return single_open(file, pgctrl_show, PDE(inode)->data); } +static int pg_populate_report(struct pktgen_dev_report* rpt, struct pktgen_dev* pkt_dev) { + int i; + + memset(rpt, 0, sizeof(*rpt)); + rpt->api_version = 1; + rpt->flags = pkt_dev->flags; + strncpy(rpt->thread_name, pkt_dev->pg_thread->name, 32); + strncpy(rpt->interface_name, pkt_dev->ifname, 32); + rpt->min_pkt_size = pkt_dev->min_pkt_size; + rpt->max_pkt_size = pkt_dev->max_pkt_size; + rpt->clone_skb = pkt_dev->clone_skb; + rpt->peer_clone_skb = pkt_dev->peer_clone_skb; + rpt->nfrags = pkt_dev->nfrags; + + strncpy(rpt->dst_min, pkt_dev->dst_min, IP_NAME_SZ); + strncpy(rpt->dst_max, pkt_dev->dst_max, IP_NAME_SZ); + strncpy(rpt->src_min, pkt_dev->src_min, IP_NAME_SZ); + strncpy(rpt->src_max, pkt_dev->src_max, IP_NAME_SZ); + + memcpy(&rpt->in6_saddr, &pkt_dev->in6_saddr, sizeof(struct in6_addr)); + memcpy(&rpt->in6_daddr, &pkt_dev->in6_daddr, sizeof(struct in6_addr)); + + /* For ranges */ + memcpy(&rpt->min_in6_daddr, &pkt_dev->min_in6_daddr, sizeof(struct in6_addr)); + memcpy(&rpt->max_in6_daddr, &pkt_dev->max_in6_daddr, sizeof(struct in6_addr)); + memcpy(&rpt->min_in6_saddr, &pkt_dev->min_in6_saddr, sizeof(struct in6_addr)); + memcpy(&rpt->max_in6_saddr, &pkt_dev->max_in6_saddr, sizeof(struct in6_addr)); + + /* If we're doing ranges, random or incremental, then this + * defines the min/max for those ranges. + */ + rpt->saddr_min = pkt_dev->saddr_min; + rpt->saddr_max = pkt_dev->saddr_max; + rpt->daddr_min = pkt_dev->daddr_min; + rpt->daddr_max = pkt_dev->daddr_max; + + rpt->udp_src_min = pkt_dev->udp_src_min; + rpt->udp_src_max = pkt_dev->udp_src_max; + rpt->udp_dst_min = pkt_dev->udp_dst_min; + rpt->udp_dst_max = pkt_dev->udp_dst_max; + + /* MPLS */ + rpt->nr_labels = pkt_dev->nr_labels; /* Depth of stack, 0 = no MPLS */ + for (i = 0; ilabels[i] = pkt_dev->labels[i]; + } + + rpt->src_mac_count = pkt_dev->src_mac_count; + rpt->dst_mac_count = pkt_dev->dst_mac_count; + + memcpy(&rpt->dst_mac, &pkt_dev->dst_mac, ETH_ALEN); + memcpy(&rpt->src_mac, &pkt_dev->src_mac, ETH_ALEN); + + rpt->nflows = pkt_dev->nflows; + rpt->cflows = pkt_dev->cflows; + rpt->lflow = pkt_dev->lflow; + + rpt->delay_ns = pkt_dev->delay_ns; + rpt->count = pkt_dev->count; /* Default No packets to send */ + rpt->sofar = pkt_dev->sofar; /* How many pkts we've sent so far */ + rpt->tx_bytes = pkt_dev->tx_bytes; /* How many bytes we've transmitted */ + rpt->errors = pkt_dev->errors; /* Errors when trying to transmit, pkts will be re-sent */ + + /* Fields relating to receiving pkts */ + rpt->avg_latency = pkt_dev->avg_latency; /* in micro-seconds */ + rpt->min_latency = pkt_dev->min_latency; + rpt->max_latency = pkt_dev->max_latency; + for (i = 0; ilatency_bkts[i] = pkt_dev->latency_bkts[i]; + } + rpt->pkts_rcvd_since_clear = pkt_dev->pkts_rcvd_since_clear; + + rpt->ooo_rcvd = pkt_dev->ooo_rcvd; + rpt->pkts_rcvd = pkt_dev->pkts_rcvd; + rpt->dup_rcvd = pkt_dev->dup_rcvd; + rpt->bytes_rcvd = pkt_dev->bytes_rcvd; + rpt->seq_gap_rcvd = pkt_dev->seq_gap_rcvd; + rpt->non_pg_pkts_rcvd = pkt_dev->non_pg_pkts_rcvd; + return 0; +}; /* populate report */ + + +int pktgen_proc_ioctl(struct inode* inode, struct file* file, unsigned int cmd, + unsigned long arg) { + int err = 0; + struct pktgen_ioctl_info args; + struct pktgen_dev* pkt_dev = NULL; + + if (copy_from_user(&args, (void*)arg, sizeof(args))) { + return -EFAULT; + } + + /* Null terminate the names */ + args.thread_name[31] = 0; + args.interface_name[31] = 0; + + /* printk("pktgen: thread_name: %s interface_name: %s\n", + * args.thread_name, args.interface_name); + */ + + switch (cmd) { + case GET_PKTGEN_INTERFACE_INFO: { + mutex_lock(&pktgen_thread_lock); + pkt_dev = __pktgen_NN_threads(args.interface_name, FIND); + if (pkt_dev) { + pg_populate_report(&(args.report), pkt_dev); + if (copy_to_user((void*)(arg), &args, sizeof(args))) { + printk("ERROR: pktgen: copy_to_user failed.\n"); + err = -EFAULT; + } + else { + err = 0; + } + } + else { + printk("ERROR: pktgen: Could not find interface -:%s:-\n", + args.interface_name); + err = -ENODEV; + } + mutex_unlock(&pktgen_thread_lock); + break; + } + default: + printk("%s: Unknown pktgen IOCTL: %x \n", __FUNCTION__, + cmd); + return -EINVAL; + } + + return err; +}/* pktgen_proc_ioctl */ + static struct file_operations pktgen_fops = { .owner = THIS_MODULE, .open = pgctrl_open, @@ -565,6 +543,7 @@ static struct file_operations pktgen_fops = { .llseek = seq_lseek, .write = pgctrl_write, .release = single_release, + .ioctl = pktgen_proc_ioctl, }; static int pktgen_if_show(struct seq_file *seq, void *v) @@ -581,10 +560,11 @@ static int pktgen_if_show(struct seq_file *seq, void *v) pkt_dev->max_pkt_size); seq_printf(seq, - " frags: %d delay: %u clone_skb: %d ifname: %s\n", + " frags: %d delay: %lluns clone_skb: %d peer_clone_skb: %d ifname: %s\n", pkt_dev->nfrags, - 1000 * pkt_dev->delay_us + pkt_dev->delay_ns, - pkt_dev->clone_skb, pkt_dev->ifname); + (unsigned long long)pkt_dev->delay_ns, + pkt_dev->clone_skb, pkt_dev->peer_clone_skb, + pkt_dev->ifname); seq_printf(seq, " flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow); @@ -681,11 +661,29 @@ static int pktgen_if_show(struct seq_file *seq, void *v) stopped = now; /* not really stopped, more like last-running-at */ seq_printf(seq, - "Current:\n pkts-sofar: %llu errors: %llu\n started: %lluus stopped: %lluus idle: %lluus\n", + "Current:\n tx-pkts: %llu tx-errors: %llu tx-bytes: %llu\n", (unsigned long long)pkt_dev->sofar, - (unsigned long long)pkt_dev->errors, (unsigned long long)sa, + (unsigned long long)pkt_dev->errors, + (unsigned long long)pkt_dev->tx_bytes); + seq_printf(seq, + " rx-pkts: %llu rx-bytes: %llu\n", + (unsigned long long)pkt_dev->pkts_rcvd, + (unsigned long long)pkt_dev->bytes_rcvd); + + seq_printf(seq, + " blocked: %s next-tx-ns: %llu (%lli) started: %lluus stopped: %lluus idle: %lluns\n", + pkt_dev->tx_blocked ? "TRUE" : "false", + (unsigned long long)pkt_dev->next_tx_ns, + (long long)(pkt_dev->next_tx_ns - getRelativeCurNs()), + (unsigned long long)sa, (unsigned long long)stopped, - (unsigned long long)pkt_dev->idle_acc); + (unsigned long long)pkt_dev->idle_acc_ns); + seq_printf(seq, + " nanodelays: %llu sleeps: %llu queue_stopped: %llu tx-early: %llu\n", + (unsigned long long)pkt_dev->nanodelays, + (unsigned long long)pkt_dev->sleeps, + (unsigned long long)pkt_dev->queue_stopped, + (unsigned long long)pkt_dev->req_tx_early); seq_printf(seq, " seq_num: %d cur_dst_mac_offset: %d cur_src_mac_offset: %d\n", @@ -969,15 +967,11 @@ static ssize_t pktgen_if_write(struct file *file, return len; } i += len; - if (value == 0x7FFFFFFF) { - pkt_dev->delay_us = 0x7FFFFFFF; - pkt_dev->delay_ns = 0; - } else { - pkt_dev->delay_us = value / 1000; - pkt_dev->delay_ns = value % 1000; + pkt_dev->delay_ns = value; + if ((getRelativeCurNs() + pkt_dev->delay_ns) > pkt_dev->next_tx_ns) { + pkt_dev->next_tx_ns = getRelativeCurNs() + pkt_dev->delay_ns; } - sprintf(pg_result, "OK: delay=%u", - 1000 * pkt_dev->delay_us + pkt_dev->delay_ns); + sprintf(pg_result, "OK: delay=%lluns", (unsigned long long)pkt_dev->delay_ns); return count; } if (!strcmp(name, "udp_src_min")) { @@ -1043,6 +1037,17 @@ static ssize_t pktgen_if_write(struct file *file, sprintf(pg_result, "OK: clone_skb=%d", pkt_dev->clone_skb); return count; } + if (!strcmp(name, "peer_clone_skb")) { + len = num_arg(&user_buffer[i], 10, &value); + if (len < 0) { + return len; + } + i += len; + pkt_dev->peer_clone_skb = value; + + sprintf(pg_result, "OK: peer_clone_skb=%d", pkt_dev->peer_clone_skb); + return count; + } if (!strcmp(name, "count")) { len = num_arg(&user_buffer[i], 10, &value); if (len < 0) { @@ -1141,6 +1146,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->flags &= ~F_MPLS_RND; else { + printk("pktgen: Flag -:%s:- unknown\n", f); sprintf(pg_result, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", f, @@ -1371,6 +1377,8 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "src_mac")) { char *v = valstr; unsigned char *m = pkt_dev->src_mac; + unsigned char old_dmac[ETH_ALEN]; + memcpy(old_dmac, m, ETH_ALEN); len = strn_len(&user_buffer[i], sizeof(valstr) - 1); if (len < 0) { @@ -1400,12 +1408,16 @@ static ssize_t pktgen_if_write(struct file *file, } } + /* Set up Dest MAC */ + if (compare_ether_addr(old_dmac, pkt_dev->src_mac)) + memcpy(&(pkt_dev->hh[6]), pkt_dev->src_mac, ETH_ALEN); + sprintf(pg_result, "OK: srcmac"); return count; } if (!strcmp(name, "clear_counters")) { - pktgen_clear_counters(pkt_dev); + pktgen_clear_counters(pkt_dev, 0); sprintf(pg_result, "OK: Clearing counters.\n"); return count; } @@ -1448,6 +1460,7 @@ static ssize_t pktgen_if_write(struct file *file, return count; } + printk("pktgen: No such parameter \"%s\"\n", name); sprintf(pkt_dev->result, "No such parameter \"%s\"", name); return -EINVAL; } @@ -1464,6 +1477,7 @@ static struct file_operations pktgen_if_fops = { .llseek = seq_lseek, .write = pktgen_if_write, .release = single_release, + .ioctl = pktgen_proc_ioctl, }; static int pktgen_thread_show(struct seq_file *seq, void *v) @@ -1473,12 +1487,17 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) BUG_ON(!t); - seq_printf(seq, "Name: %s max_before_softirq: %d\n", - t->name, t->max_before_softirq); + mutex_lock(&pktgen_thread_lock); + + /* versioning info. CFG_RT means we do not busy-spin, so can be configured for + * real-time scheduling if user-space so desires. */ + seq_printf(seq, "VERSION-2 CFG_RT\n"); + + seq_printf(seq, "PID: %d Name: %s max_before_softirq: %d\n", + t->pid, t->name, t->max_before_softirq); seq_printf(seq, "Running: "); - if_lock(t); list_for_each_entry(pkt_dev, &t->if_list, list) if (pkt_dev->running) seq_printf(seq, "%s ", pkt_dev->ifname); @@ -1494,8 +1513,7 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) else seq_printf(seq, "\nResult: NA\n"); - if_unlock(t); - + mutex_unlock(&pktgen_thread_lock); return 0; } @@ -1563,18 +1581,42 @@ static ssize_t pktgen_thread_write(struct file *file, return -EFAULT; i += len; mutex_lock(&pktgen_thread_lock); - pktgen_add_device(t, f); + t->control_arg = f; + t->control |= T_ADD_DEV; + while (t->control & T_ADD_DEV) { + schedule_timeout_interruptible(msecs_to_jiffies(10)); + } + t->control_arg = 0; mutex_unlock(&pktgen_thread_lock); ret = count; sprintf(pg_result, "OK: add_device=%s", f); goto out; } + if (!strcmp(name, "rem_device")) { + char f[32]; + memset(f, 0, 32); + len = strn_len(&user_buffer[i], sizeof(f) - 1); + if (len < 0) { + ret = len; + goto out; + } + if (copy_from_user(f, &user_buffer[i], len)) + return -EFAULT; + i += len; + pktgen_mark_device(f); + ret = count; + sprintf(pg_result, "OK: rem_device=%s", f); + goto out; + } + if (!strcmp(name, "rem_device_all")) { mutex_lock(&pktgen_thread_lock); t->control |= T_REMDEVALL; mutex_unlock(&pktgen_thread_lock); - schedule_timeout_interruptible(msecs_to_jiffies(125)); /* Propagate thread->control */ + while (t->control & T_REMDEVALL) { + schedule_timeout_interruptible(msecs_to_jiffies(10)); + } ret = count; sprintf(pg_result, "OK: rem_device_all"); goto out; @@ -1590,6 +1632,8 @@ static ssize_t pktgen_thread_write(struct file *file, goto out; } + printk("pktgen: un-known command to pktgen_thread: -:%s:-\n", name); + ret = -EINVAL; out: return ret; @@ -1607,8 +1651,10 @@ static struct file_operations pktgen_thread_fops = { .llseek = seq_lseek, .write = pktgen_thread_write, .release = single_release, + .ioctl = pktgen_proc_ioctl, }; + /* Think find or remove for NN */ static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove) { @@ -1619,14 +1665,14 @@ static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove) pkt_dev = pktgen_find_dev(t, ifname); if (pkt_dev) { if (remove) { - if_lock(t); pkt_dev->removal_mark = 1; t->control |= T_REMDEV; - if_unlock(t); } break; } } + /*PG_DEBUG(printk("pktgen_NN_threads, ifname: %s remove: %d pkt_dev: 0x%p\n", + ifname, remove, pkt_dev)); */ return pkt_dev; } @@ -1698,13 +1744,21 @@ static int pktgen_device_event(struct notifier_block *unused, /* Associate pktgen_dev with a device. */ -static struct net_device *pktgen_setup_dev(struct pktgen_dev *pkt_dev) +static struct net_device *pktgen_setup_dev(struct pktgen_dev *pkt_dev, struct pktgen_thread* t) { struct net_device *odev; /* Clean old setups */ if (pkt_dev->odev) { + +#ifdef USE_NQW_CALLBACK + /* Set the nqw callback hooks */ + rtnl_lock(); + clear_nqw_hook(t, pkt_dev->odev); + rtnl_unlock(); +#endif + pkt_dev->odev->pkt_dev = NULL; dev_put(pkt_dev->odev); pkt_dev->odev = NULL; } @@ -1726,6 +1780,14 @@ static struct net_device *pktgen_setup_dev(struct pktgen_dev *pkt_dev) } pkt_dev->odev = odev; +#ifdef USE_NQW_CALLBACK + /* Set the nqw callback hooks */ + rtnl_lock(); + set_nqw_hook(t, pkt_dev->odev, GFP_ATOMIC); + rtnl_unlock(); +#endif + + pkt_dev->odev->pkt_dev = pkt_dev; return pkt_dev->odev; out_put: @@ -1742,7 +1804,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) { /* Try once more, just in case it works now. */ if (!pkt_dev->odev) - pktgen_setup_dev(pkt_dev); + pktgen_setup_dev(pkt_dev, pkt_dev->pg_thread); if (!pkt_dev->odev) { printk("pktgen: ERROR: pkt_dev->odev == NULL in setup_inject.\n"); @@ -1755,6 +1817,9 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) if (is_zero_ether_addr(pkt_dev->src_mac)) memcpy(&(pkt_dev->hh[6]), pkt_dev->odev->dev_addr, ETH_ALEN); + else + memcpy(&(pkt_dev->hh[6]), pkt_dev->src_mac, ETH_ALEN); + /* Set up Dest MAC */ memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, ETH_ALEN); @@ -1845,30 +1910,188 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) pkt_dev->nflows = 0; } -static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us) -{ - __u64 start; - __u64 now; - - start = now = getCurUs(); - printk(KERN_INFO "sleeping for %d\n", (int)(spin_until_us - now)); - while (now < spin_until_us) { - /* TODO: optimize sleeping behavior */ - if (spin_until_us - now > jiffies_to_usecs(1) + 1) - schedule_timeout_interruptible(1); - else if (spin_until_us - now > 100) { - do_softirq(); - if (!pkt_dev->running) - return; - if (need_resched()) - schedule(); + +#ifdef USE_NQW_CALLBACK +/* Runs from interrupt */ +int pg_notify_queue_woken(struct net_device* dev) { + /* Find the thread that needs waking. */ + struct pktgen_thread* t = ((struct pg_nqw_data*)(dev->nqw_data))->pg_thread; + t->control |= T_WAKE_BLOCKED; + wake_up_interruptible(&(t->queue)); + return 0; +} + +/* Must hold RTNL lock while calling this. */ +static int set_nqw_hook(struct pktgen_thread* t, struct net_device* dev, int gfp) { + /* The notify-queue-woken magic only works for physical + * devices at this time. So, apply hook to underlying + * device. + */ + struct pg_nqw_data* nqwd; + ASSERT_RTNL(); + BUG_ON(!t); + + if (!dev) { + WARN_ON(!dev); + return -ENODEV; + } +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) + if (dev->priv_flags & IFF_MAC_VLAN) { + struct macvlan_vlan *vlan = dev->priv; + printk("pktgen: setting nqw_hook on lower mac-vlan dev: %p\n", vlan->lowerdev); + return set_nqw_hook(t, vlan->lowerdev, gfp); + } +#endif + + if (dev->priv_flags & IFF_802_1Q_VLAN) { + printk("pktgen: setting nqw_hook on real-dev of .1q vlan: %s\n", dev->name); + return set_nqw_hook(t, VLAN_DEV_INFO(dev)->real_dev, gfp); + } + + nqwd = (struct pg_nqw_data*)(dev->nqw_data); + + if (nqwd) { + if (nqwd->magic == PG_NQW_MAGIC) { + if (nqwd->pg_thread == t) { + atomic_inc(&(nqwd->nqw_ref_count)); + + printk("pktgen: Incremented nqw_ref_count: %d device: %s\n", + (int)(atomic_read(&(nqwd->nqw_ref_count))), dev->name); + return 0; + } + else { + printk("pktgen: ERROR: set_nqw_hook: nqwd thread does not match, dev: %s", + dev->name); + return -EINVAL; + } } + else { + printk("wanlink: WARNING: set_nqw_hook: nqwd magic is NOT WanLink, dev: %s magic: 0x%x", + dev->name, nqwd->magic); + return 0; + } + } + else { + nqwd = kmalloc(sizeof(*nqwd), gfp); + if (nqwd) { + memset(nqwd, 0, sizeof(*nqwd)); + nqwd->magic = PG_NQW_MAGIC; + atomic_inc(&(nqwd->nqw_ref_count)); + nqwd->pg_thread = t; + dev->nqw_data = nqwd; + dev->notify_queue_woken = pg_notify_queue_woken; + printk("pktgen: Added nqw callback to device: %s\n", + dev->name); + return 0; + } + else { + printk("pktgen: ERROR: could not allocate nqwd for dev: %s\n", dev->name); + return -ENOBUFS; + } + } +}//set_nqw_hook + - now = getCurUs(); +/* Must hold RTNL lock while calling this. */ +static void clear_nqw_hook(struct pktgen_thread* t, struct net_device* dev) { + /* The notify-queue-woken magic only works for physical + * devices at this time. So, apply hook to underlying + * device. + */ + ASSERT_RTNL(); + BUG_ON(!t); + +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) + if (dev->priv_flags & IFF_MAC_VLAN) { + struct macvlan_vlan *vlan = dev->priv; + clear_nqw_hook(t, vlan->lowerdev); + return; } +#endif + + if (dev->priv_flags & IFF_802_1Q_VLAN) { + clear_nqw_hook(t, VLAN_DEV_INFO(dev)->real_dev); + return; + } + + if (dev->nqw_data) { + struct pg_nqw_data* nqwd = (struct pg_nqw_data*)(dev->nqw_data); + if (nqwd->magic == PG_NQW_MAGIC) { + if (t != nqwd->pg_thread) { + printk("pktgen ERROR: t != nqwd->pg_thread\n"); + } + atomic_dec(&(nqwd->nqw_ref_count)); + + printk("pktgen: Decremented nqw_ref_count: %d device: %s\n", + (int)(atomic_read(&(nqwd->nqw_ref_count))), + dev->name); + + BUG_ON(atomic_read(&(nqwd->nqw_ref_count)) < 0); + + if (atomic_read(&(nqwd->nqw_ref_count)) == 0) { + printk("pktgen: Removing nqw reference from device: %s\n", + dev->name); + dev->notify_queue_woken = NULL; + dev->nqw_data = NULL; + kfree(nqwd); + } + } + else { + printk("pktgen: WARNING: clear_nqw_hook: nqwd magic is NOT PKT-GEN, dev: %s magic: 0x%x", + dev->name, nqwd->magic); + } + } + else { + printk("pktgen: Warning: nqw_data is null in clear_nqw_hook, dev: %s\n", + dev->name); + } +}//clear_nqw_hook + +#endif + + +/* delay_ns is in nano-seconds */ +static void pg_nanodelay(u64 delay_ns, struct pktgen_dev* info) { + u64 idle_start = getRelativeCurNs(); + u64 last_time; + u64 _diff; + u64 itmp = idle_start; + struct pktgen_dev *p = NULL; + struct pktgen_thread* t = info->pg_thread; + + info->nanodelays++; + info->accum_delay_ns += delay_ns; + while (info->accum_delay_ns > PG_MAX_ACCUM_DELAY_NS) { + info->sleeps++; + interruptible_sleep_on_timeout(&(t->queue), 1); + /* will wake after one tick */ + last_time = itmp; + + /* Subtract delay from all interfaces for this thread, since all are blocked when + * any are blocked. + */ + itmp = getRelativeCurNs(); + _diff = (itmp - last_time); + list_for_each_entry(p, &t->if_list, list) { + p->accum_delay_ns -= _diff; + /* Limit saving up too much time... */ + if (p->accum_delay_ns < -10000000) { + p->accum_delay_ns = -10000000; + } + } + + /* For accounting, only charge this guy for the idle though...*/ + info->idle_acc_ns += _diff; + + /* break out if we are stopped or if we should transmit (maybe our ipg changed?) */ + if (info->removal_mark || (itmp >= info->next_tx_ns) || + (t->control && T_WAKE_BLOCKED) || + (t->control && T_STOP)) { + break; + } + }/* while */ +}//pg_nanodelay - pkt_dev->idle_acc += now - start; -} /* Increment/randomize headers according to flags and current values * for IP src/dest, UDP src/dst port, MAC-Addr src/dst @@ -2545,13 +2768,209 @@ static inline struct sk_buff *fill_packet(struct net_device *odev, return fill_packet_ipv4(odev, pkt_dev); } -static void pktgen_clear_counters(struct pktgen_dev *pkt_dev) -{ - pkt_dev->seq_num = 1; - pkt_dev->idle_acc = 0; + +static void record_latency(struct pktgen_dev* pkt_dev, int latency) { + /* NOTE: Latency can be negative */ + int div = 100; + int diff; + int vl; + int i; + + pkt_dev->pkts_rcvd_since_clear++; + + if (pkt_dev->pkts_rcvd_since_clear < 100) { + div = pkt_dev->pkts_rcvd; + if (pkt_dev->pkts_rcvd_since_clear == 1) { + pkt_dev->avg_latency = latency; + } + } + + if ((div + 1) == 0) { + pkt_dev->avg_latency = 0; + } + else { + pkt_dev->avg_latency = ((pkt_dev->avg_latency * div + latency) / (div + 1)); + } + + if (latency < pkt_dev->min_latency) { + pkt_dev->min_latency = latency; + } + if (latency > pkt_dev->max_latency) { + pkt_dev->max_latency = latency; + } + + /* Place the latency in the right 'bucket' */ + diff = (latency - pkt_dev->min_latency); + for (i = 0; ilatency_bkts[i]++; + break; + } + } +}/* record latency */ + + +/* Returns < 0 if the skb is not a pktgen buffer. */ +int pktgen_receive(struct sk_buff* skb) { + /* See if we have a pktgen packet */ + /* TODO: Add support for detecting IPv6, TCP packets too. This will only + * catch UDP at the moment. --Ben + */ + /* printk("pktgen-rcv, skb->len: %d\n", skb->len); */ + + /* If this is a paged skb, make sure we pull up + * whatever data we need to look at. */ + if (!pskb_may_pull(skb, 20 + 8 + sizeof(struct pktgen_hdr))) { + return -1; + } + + if ((skb->len >= (20 + 8 + sizeof(struct pktgen_hdr))) && + (skb->protocol == __constant_htons(ETH_P_IP))) { + struct pktgen_hdr* pgh; + + /* It's IP, and long enough, lets check the magic number. + * TODO: This is a hack not always guaranteed to catch the right + * packets. + */ + + /* printk("Length & protocol passed, skb->data: %p, raw: %p\n", + skb->data, skb->h.raw); */ + + pgh = (struct pktgen_hdr*)(skb->data + 20 + 8); + + /* + tmp = (char*)(skb->data); + for (i = 0; i<90; i++) { + printk("%02hx ", tmp[i]); + if (((i + 1) % 15) == 0) { + printk("\n"); + } + } + printk("\n"); + */ + + if (pgh->pgh_magic == __constant_ntohl(PKTGEN_MAGIC)) { + struct net_device* dev = skb->dev; + struct pktgen_dev* pkt_dev; + __u32 seq = ntohl(pgh->seq_num); + + // TODO: Need lock..maybe + pkt_dev = dev->pkt_dev; + + if (!pkt_dev) { + return -1; + } + + pkt_dev->pkts_rcvd++; + pkt_dev->bytes_rcvd += skb->len; + + /* Check for out-of-sequence packets */ + if (pkt_dev->last_seq_rcvd == seq) { + pkt_dev->dup_rcvd++; + pkt_dev->dup_since_incr++; + } + else { + __s64 rx; + __s64 tx; + struct timeval txtv; + if (skb->tstamp.off_sec || skb->tstamp.off_usec) { + skb_get_timestamp(skb, &txtv); + } + else { + do_gettimeofday(&txtv); + skb_set_timestamp(skb, &txtv); + } + rx = tv_to_us(&txtv); + + txtv.tv_usec = ntohl(pgh->tv_usec); + txtv.tv_sec = ntohl(pgh->tv_sec); + tx = tv_to_us(&txtv); + record_latency(pkt_dev, rx - tx); + + if ((pkt_dev->last_seq_rcvd + 1) == seq) { + if ((pkt_dev->peer_clone_skb > 1) && + (pkt_dev->peer_clone_skb > (pkt_dev->dup_since_incr + 1))) { + + pkt_dev->seq_gap_rcvd += (pkt_dev->peer_clone_skb - + pkt_dev->dup_since_incr - 1); + } + /* Great, in order...all is well */ + } + else if (pkt_dev->last_seq_rcvd < seq) { + /* sequence gap, means we dropped a pkt most likely */ + if (pkt_dev->peer_clone_skb > 1) { + /* We dropped more than one sequence number's worth, + * and if we're using clone_skb, then this is quite + * a few. This number still will not be exact, but + * it will be closer. + */ + pkt_dev->seq_gap_rcvd += (((seq - pkt_dev->last_seq_rcvd) * + pkt_dev->peer_clone_skb) - + pkt_dev->dup_since_incr); + } + else { + pkt_dev->seq_gap_rcvd += (seq - pkt_dev->last_seq_rcvd - 1); + } + } + else { + pkt_dev->ooo_rcvd++; /* out-of-order */ + } + + pkt_dev->dup_since_incr = 0; + } + pkt_dev->last_seq_rcvd = seq; + kfree_skb(skb); + if (debug > 1) { + printk("done with pktgen_receive, free'd pkt\n"); + } + return 0; + } + } + return -1; /* Let another protocol handle it, it's not for us! */ +}/* pktgen_receive */ + +static void pg_reset_latency_counters(struct pktgen_dev* pkt_dev) { + int i; + pkt_dev->avg_latency = 0; + pkt_dev->min_latency = 0x7fffffff; /* largest integer */ + pkt_dev->max_latency = 0x80000000; /* smallest integer */ + pkt_dev->pkts_rcvd_since_clear = 0; + for (i = 0; ilatency_bkts[i] = 0; + } +} + + +static void pktgen_clear_counters(struct pktgen_dev *pkt_dev, int seq_too) { + pkt_dev->idle_acc_ns = 0; pkt_dev->sofar = 0; pkt_dev->tx_bytes = 0; pkt_dev->errors = 0; + pkt_dev->pkts_rcvd_since_clear = 0; + + pkt_dev->ooo_rcvd = 0; + pkt_dev->dup_rcvd = 0; + pkt_dev->pkts_rcvd = 0; + pkt_dev->bytes_rcvd = 0; + pkt_dev->non_pg_pkts_rcvd = 0; + pkt_dev->seq_gap_rcvd = 0; /* dropped */ + + /* Clear some transient state */ + pkt_dev->accum_delay_ns = 0; + pkt_dev->sleeps = 0; + pkt_dev->nanodelays = 0; + + /* This is a bit of a hack, but it gets the dup counters + * in line so we don't have false alarms on dropped pkts. + */ + if (seq_too) { + pkt_dev->dup_since_incr = pkt_dev->peer_clone_skb - 1; + pkt_dev->seq_num = 1; + pkt_dev->last_seq_rcvd = 0; + } + + pg_reset_latency_counters(pkt_dev); } /* Set up structure for sending pkts, clear counters */ @@ -2563,30 +2982,28 @@ static void pktgen_run(struct pktgen_thread *t) PG_DEBUG(printk("pktgen: entering pktgen_run. %p\n", t)); - if_lock(t); list_for_each_entry(pkt_dev, &t->if_list, list) { - - /* - * setup odev and create initial packet. - */ - pktgen_setup_inject(pkt_dev); - - if (pkt_dev->odev) { - pktgen_clear_counters(pkt_dev); - pkt_dev->running = 1; /* Cranke yeself! */ - pkt_dev->skb = NULL; - pkt_dev->started_at = getCurUs(); - pkt_dev->next_tx_us = getCurUs(); /* Transmit immediately */ - pkt_dev->next_tx_ns = 0; - - strcpy(pkt_dev->result, "Starting"); - started++; - } else - strcpy(pkt_dev->result, "Error starting"); + /* If already running, then ignore. */ + if (! pkt_dev->running) { + /* + * setup odev and create initial packet. + */ + pktgen_setup_inject(pkt_dev); + + if (pkt_dev->odev) { + pktgen_clear_counters(pkt_dev, 1); + pkt_dev->running = 1; /* Cranke yeself! */ + pkt_dev->skb = NULL; + pkt_dev->started_at = getCurUs(); + /* Transmit first pkt immediately */ + pkt_dev->next_tx_ns = getRelativeCurNs(); + + strcpy(pkt_dev->result, "Starting"); + started++; + } else + strcpy(pkt_dev->result, "Error starting"); + } } - if_unlock(t); - if (started) - t->control &= ~(T_STOP); } static void pktgen_stop_all_threads_ifs(void) @@ -2603,65 +3020,12 @@ static void pktgen_stop_all_threads_ifs(void) mutex_unlock(&pktgen_thread_lock); } -static int thread_is_running(struct pktgen_thread *t) -{ - struct pktgen_dev *pkt_dev; - int res = 0; - list_for_each_entry(pkt_dev, &t->if_list, list) - if (pkt_dev->running) { - res = 1; - break; - } - return res; -} - -static int pktgen_wait_thread_run(struct pktgen_thread *t) -{ - if_lock(t); - - while (thread_is_running(t)) { - - if_unlock(t); - - msleep_interruptible(100); - - if (signal_pending(current)) - goto signal; - if_lock(t); - } - if_unlock(t); - return 1; -signal: - return 0; -} - -static int pktgen_wait_all_threads_run(void) -{ +static void pktgen_run_all_threads(int background) { struct pktgen_thread *t; - int sig = 1; - - mutex_lock(&pktgen_thread_lock); - - list_for_each_entry(t, &pktgen_threads, th_list) { - sig = pktgen_wait_thread_run(t); - if (sig == 0) - break; - } - - if (sig == 0) - list_for_each_entry(t, &pktgen_threads, th_list) - t->control |= (T_STOP); - mutex_unlock(&pktgen_thread_lock); - return sig; -} - -static void pktgen_run_all_threads(void) -{ - struct pktgen_thread *t; - - PG_DEBUG(printk("pktgen: entering pktgen_run_all_threads.\n")); + PG_DEBUG(printk("pktgen: entering pktgen_run_all_threads, background: %d\n", + background)); mutex_lock(&pktgen_thread_lock); @@ -2670,9 +3034,14 @@ static void pktgen_run_all_threads(void) mutex_unlock(&pktgen_thread_lock); - schedule_timeout_interruptible(msecs_to_jiffies(125)); /* Propagate thread->control */ + /* This is a hack at best...disabling, we should not have to depend on this. */ + /*schedule_timeout_interruptible(msecs_to_jiffies(125));*/ /* Propagate thread->control */ - pktgen_wait_all_threads_run(); + // Much harder to get rid of the if_lock if we allow this to block... + if (!background) { + printk("ERROR: non-background mode no longer supported.\n"); + //pktgen_wait_all_threads_run(); + } } static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) @@ -2682,7 +3051,7 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) total_us = pkt_dev->stopped_at - pkt_dev->started_at; - idle = pkt_dev->idle_acc; + idle = do_div(pkt_dev->idle_acc_ns, 1000); p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n", (unsigned long long)total_us, @@ -2731,22 +3100,62 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev) return 0; } -static struct pktgen_dev *next_to_run(struct pktgen_thread *t) -{ - struct pktgen_dev *pkt_dev, *best = NULL; - - if_lock(t); +/** Find the adapter that needs to tx next. + * We need to take the blocked adapters into account, but can't ignore + * them forever just in case we missed the tx-queue-wake event for some + * reason. + */ +static struct pktgen_dev *next_to_run(struct pktgen_thread *t, u64 now, u64* next_running_delay) { + struct pktgen_dev *pkt_dev = NULL; + struct pktgen_dev *best = NULL; + struct pktgen_dev *best_blocked = NULL; list_for_each_entry(pkt_dev, &t->if_list, list) { if (!pkt_dev->running) continue; - if (best == NULL) - best = pkt_dev; - else if (pkt_dev->next_tx_us < best->next_tx_us) - best = pkt_dev; + if (pkt_dev->tx_blocked) { + if (best_blocked == NULL) + best_blocked = pkt_dev; + else { + if (pkt_dev->next_tx_ns < best_blocked->next_tx_ns) { + best_blocked = pkt_dev; + } + } + } + else { + if (best == NULL) + best = pkt_dev; + else { + if (pkt_dev->next_tx_ns < best->next_tx_ns) { + best = pkt_dev; + } + } + } } - if_unlock(t); - return best; + + /** If we have a blocked device that is more than 1ms late, then try it again first. + * Otherwise, take best non-blocked device. + */ + if (best) { + if (best->next_tx_ns <= now) { + *next_running_delay = 0; + } + else { + *next_running_delay = best->next_tx_ns - now; + } + } + else { + *next_running_delay = 10000000; /* 10ms */ + } + + if (best_blocked && (best_blocked->next_tx_ns < (now - PG_TRY_TX_ANYWAY_NS))) { + return best_blocked; + } + + if (best) { + return best; + } + return best_blocked; } static void pktgen_stop(struct pktgen_thread *t) @@ -2755,8 +3164,6 @@ static void pktgen_stop(struct pktgen_thread *t) PG_DEBUG(printk("pktgen: entering pktgen_stop\n")); - if_lock(t); - list_for_each_entry(pkt_dev, &t->if_list, list) { pktgen_stop_device(pkt_dev); if (pkt_dev->skb) @@ -2764,8 +3171,6 @@ static void pktgen_stop(struct pktgen_thread *t) pkt_dev->skb = NULL; } - - if_unlock(t); } /* @@ -2779,8 +3184,6 @@ static void pktgen_rem_one_if(struct pktgen_thread *t) PG_DEBUG(printk("pktgen: entering pktgen_rem_one_if\n")); - if_lock(t); - list_for_each_safe(q, n, &t->if_list) { cur = list_entry(q, struct pktgen_dev, list); @@ -2795,10 +3198,15 @@ static void pktgen_rem_one_if(struct pktgen_thread *t) break; } - - if_unlock(t); } +static void pktgen_unblock_all_ifs(struct pktgen_thread *t) { + struct pktgen_dev *p = NULL;; + list_for_each_entry(p, &t->if_list, list) + p->tx_blocked = 0; +}/* wake all writers */ + + static void pktgen_rem_all_ifs(struct pktgen_thread *t) { struct list_head *q, *n; @@ -2807,8 +3215,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t) /* Remove all devices, free mem */ PG_DEBUG(printk("pktgen: entering pktgen_rem_all_ifs\n")); - if_lock(t); - list_for_each_safe(q, n, &t->if_list) { cur = list_entry(q, struct pktgen_dev, list); @@ -2818,8 +3224,6 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t) pktgen_remove_device(t, cur); } - - if_unlock(t); } static void pktgen_rem_thread(struct pktgen_thread *t) @@ -2835,34 +3239,41 @@ static void pktgen_rem_thread(struct pktgen_thread *t) mutex_unlock(&pktgen_thread_lock); } -static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) -{ - struct net_device *odev = NULL; +static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev, u64 now) { + struct net_device *odev; __u64 idle_start = 0; int ret; - + odev = pkt_dev->odev; - if (pkt_dev->delay_us || pkt_dev->delay_ns) { - u64 now; - - now = getCurUs(); - if (now < pkt_dev->next_tx_us) - spin(pkt_dev, pkt_dev->next_tx_us); - + if (pkt_dev->delay_ns || (pkt_dev->accum_delay_ns > 0)) { + if (now < pkt_dev->next_tx_ns) { + /* Don't tx early..*/ + pkt_dev->req_tx_early++; + goto out; + } + /* This is max DELAY, this has special meaning of * "never transmit" */ - if (pkt_dev->delay_us == 0x7FFFFFFF) { - pkt_dev->next_tx_us = getCurUs() + pkt_dev->delay_us; - pkt_dev->next_tx_ns = pkt_dev->delay_ns; + if (pkt_dev->delay_ns == 0x7FFFFFFF) { + pkt_dev->next_tx_ns = getRelativeCurNs() + pkt_dev->delay_ns; goto out; } } - if (netif_queue_stopped(odev) || need_resched()) { - idle_start = getCurUs(); - + if (need_resched()) { + idle_start = getRelativeCurNs(); + schedule(); + pkt_dev->idle_acc_ns += getRelativeCurNs() - idle_start; + } + + if (netif_queue_stopped(odev)) { + pkt_dev->queue_stopped++; + pkt_dev->tx_blocked = 1; + /* change tx time to now to show work was at least attempted. */ + pkt_dev->next_tx_ns = now; + if (!netif_running(odev)) { pktgen_stop_device(pkt_dev); if (pkt_dev->skb) @@ -2870,16 +3281,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->skb = NULL; goto out; } - if (need_resched()) - schedule(); - pkt_dev->idle_acc += getCurUs() - idle_start; - - if (netif_queue_stopped(odev)) { - pkt_dev->next_tx_us = getCurUs(); /* TODO */ - pkt_dev->next_tx_ns = 0; - goto out; /* Try the next interface */ - } + goto out; /* Try the next interface */ } if (pkt_dev->last_ok || !pkt_dev->skb) { @@ -2910,40 +3313,44 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) if (likely(ret == NETDEV_TX_OK)) { pkt_dev->last_ok = 1; pkt_dev->sofar++; - pkt_dev->seq_num++; pkt_dev->tx_bytes += pkt_dev->cur_pkt_size; + pkt_dev->next_tx_ns = getRelativeCurNs() + pkt_dev->delay_ns; + pkt_dev->tx_blocked = 0; } else if (ret == NETDEV_TX_LOCKED && (odev->features & NETIF_F_LLTX)) { cpu_relax(); goto retry_now; } else { /* Retry it next time */ - + static int do_once_hsx_wrn = 1; + if (do_once_hsx_wrn) { + printk(KERN_INFO "pktgen: Hard xmit error, driver for %s doesn't do queue-stopped quite right.\n", odev->name); + printk(KERN_INFO "pktgen: Transmit request will be retried, and this error msg will not be printed again..\n"); + do_once_hsx_wrn = 0; + } + atomic_dec(&(pkt_dev->skb->users)); - if (debug && net_ratelimit()) - printk(KERN_INFO "pktgen: Hard xmit error\n"); - + pkt_dev->queue_stopped++; pkt_dev->errors++; pkt_dev->last_ok = 0; - } - - pkt_dev->next_tx_us = getCurUs(); - pkt_dev->next_tx_ns = 0; - pkt_dev->next_tx_us += pkt_dev->delay_us; - pkt_dev->next_tx_ns += pkt_dev->delay_ns; - - if (pkt_dev->next_tx_ns > 1000) { - pkt_dev->next_tx_us++; - pkt_dev->next_tx_ns -= 1000; + /* Try a little later..flag us as wanting to tx, but unable. Will try again shortly. + */ + pkt_dev->tx_blocked = 1; + /* change tx time to now to show work was at least attempted. */ + pkt_dev->next_tx_ns = now; } } else { /* Retry it next time */ + pkt_dev->queue_stopped++; pkt_dev->last_ok = 0; - pkt_dev->next_tx_us = getCurUs(); /* TODO */ - pkt_dev->next_tx_ns = 0; + /* Try a little later..flag us as wanting to tx, but unable. Will try again shortly. + */ + pkt_dev->tx_blocked = 1; + /* change tx time to now to show work was at least attempted. */ + pkt_dev->next_tx_ns = now; } netif_tx_unlock_bh(odev); @@ -2951,14 +3358,14 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) /* If pkt_dev->count is zero, then run forever */ if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) { if (atomic_read(&(pkt_dev->skb->users)) != 1) { - idle_start = getCurUs(); + idle_start = getRelativeCurNs(); while (atomic_read(&(pkt_dev->skb->users)) != 1) { if (signal_pending(current)) { break; } schedule(); } - pkt_dev->idle_acc += getCurUs() - idle_start; + pkt_dev->idle_acc_ns += getRelativeCurNs() - idle_start; } /* Done with this */ @@ -2982,7 +3389,9 @@ static void pktgen_thread_worker(struct pktgen_thread *t) sigset_t tmpsig; u32 max_before_softirq; u32 tx_since_softirq = 0; - + u64 now; + u64 next_running_delay; + daemonize("pktgen/%d", cpu); /* Block all signals except SIGKILL, SIGSTOP and SIGTERM */ @@ -3007,6 +3416,8 @@ static void pktgen_thread_worker(struct pktgen_thread *t) t->control &= ~(T_STOP); t->control &= ~(T_REMDEVALL); t->control &= ~(T_REMDEV); + t->control &= ~(T_WAKE_BLOCKED); + t->control &= ~(T_ADD_DEV); t->pid = current->pid; @@ -3024,13 +3435,52 @@ static void pktgen_thread_worker(struct pktgen_thread *t) /* * Get next dev to xmit -- if any. */ + find_best: - pkt_dev = next_to_run(t); + if (t->control & T_WAKE_BLOCKED) { + pktgen_unblock_all_ifs(t); + t->control &= ~(T_WAKE_BLOCKED); + } + + now = getRelativeCurNs(); + pkt_dev = next_to_run(t, now, &next_running_delay); if (pkt_dev) { - pktgen_xmit(pkt_dev); + if (pkt_dev->tx_blocked) { + /* If blocked for less than 1ms, then sleep for up to 1ms. If the + * device un-blocks, then we will be woken by the wait-queue callback. + */ + u64 tx_anyway_ns = (now - PG_TRY_TX_ANYWAY_NS); + if (pkt_dev->next_tx_ns > tx_anyway_ns) { + pg_nanodelay(min(next_running_delay, (u64)(PG_TRY_TX_ANYWAY_NS)), + pkt_dev); + /* Maybe things have changed since we went to sleep. */ + goto find_best; + } + } + + /* If the best to run should not run yet, then sleep (or accumulate sleep) */ + if (now < pkt_dev->next_tx_ns) { + /* spin(pkt_dev, pkt_dev->next_tx_us); */ + u64 next_ipg = pkt_dev->next_tx_ns - now; + + /* These will not actually busy-spin now. Will run as + * much as 1ms fast, and will sleep in 1ms units, assuming + * our tick is 1ms. + */ + pg_nanodelay(next_ipg, pkt_dev); + now = getRelativeCurNs(); + if (pkt_dev->removal_mark || + (pkt_dev->pg_thread->control && T_STOP)) { + goto skip_tx; + } + } + + + pktgen_xmit(pkt_dev, now); + skip_tx: /* * We like to stay RUNNING but must also give * others fair share. @@ -3068,6 +3518,11 @@ static void pktgen_thread_worker(struct pktgen_thread *t) t->control &= ~(T_RUN); } + if (t->control & T_ADD_DEV) { + pktgen_add_device(t, (char*)(t->control_arg)); + t->control &= ~(T_ADD_DEV); + } + if (t->control & T_REMDEVALL) { pktgen_rem_all_ifs(t); t->control &= ~(T_REMDEVALL); @@ -3098,16 +3553,12 @@ static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, const char *ifname) { struct pktgen_dev *p, *pkt_dev = NULL; - if_lock(t); - list_for_each_entry(p, &t->if_list, list) if (strncmp(p->ifname, ifname, IFNAMSIZ) == 0) { pkt_dev = p; break; } - - if_unlock(t); - PG_DEBUG(printk("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev)); + /* PG_DEBUG(printk("pktgen: find_dev(%s) returning %p\n", ifname, pkt_dev)); */ return pkt_dev; } @@ -3120,8 +3571,6 @@ static int add_dev_to_thread(struct pktgen_thread *t, { int rv = 0; - if_lock(t); - if (pkt_dev->pg_thread) { printk("pktgen: ERROR: already assigned to a thread.\n"); rv = -EBUSY; @@ -3133,12 +3582,9 @@ static int add_dev_to_thread(struct pktgen_thread *t, pkt_dev->running = 0; out: - if_unlock(t); return rv; } -/* Called under thread lock */ - static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) { struct pktgen_dev *pkt_dev; @@ -3151,7 +3597,10 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) printk("pktgen: ERROR: interface already used.\n"); return -EBUSY; } - + else { + printk("pktgen: Attempting to add device: %s\n", ifname); + } + pkt_dev = kzalloc(sizeof(struct pktgen_dev), GFP_KERNEL); if (!pkt_dev) return -ENOMEM; @@ -3168,8 +3617,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->max_pkt_size = ETH_ZLEN; pkt_dev->nfrags = 0; pkt_dev->clone_skb = pg_clone_skb_d; - pkt_dev->delay_us = pg_delay_d / 1000; - pkt_dev->delay_ns = pg_delay_d % 1000; + pkt_dev->delay_ns = pg_delay_d; pkt_dev->count = pg_count_d; pkt_dev->sofar = 0; pkt_dev->udp_src_min = 9; /* sink port */ @@ -3179,7 +3627,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) strncpy(pkt_dev->ifname, ifname, IFNAMSIZ); - if (!pktgen_setup_dev(pkt_dev)) { + if (!pktgen_setup_dev(pkt_dev, t)) { printk("pktgen: ERROR: pktgen_setup_dev failed.\n"); if (pkt_dev->flows) vfree(pkt_dev->flows); @@ -3202,7 +3650,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) return add_dev_to_thread(t, pkt_dev); } -static struct pktgen_thread *__init pktgen_find_thread(const char *name) +static struct pktgen_thread* pktgen_find_thread(const char *name) { struct pktgen_thread *t; @@ -3241,7 +3689,6 @@ static int __init pktgen_create_thread(const char *name, int cpu) } strcpy(t->name, name); - spin_lock_init(&t->if_lock); t->cpu = cpu; pe = create_proc_entry(t->name, 0600, pg_proc_dir); @@ -3304,6 +3751,14 @@ static int pktgen_remove_device(struct pktgen_thread *t, /* Dis-associate from the interface */ if (pkt_dev->odev) { + +#ifdef USE_NQW_CALLBACK + /* Set the nqw callback hooks */ + rtnl_lock(); + clear_nqw_hook(t, pkt_dev->odev); + rtnl_unlock(); +#endif + pkt_dev->odev->pkt_dev = NULL; dev_put(pkt_dev->odev); pkt_dev->odev = NULL; } @@ -3314,6 +3769,7 @@ static int pktgen_remove_device(struct pktgen_thread *t, /* Clean up proc file system */ + PG_DEBUG(printk("pktgen: removing proc entry: %s (0x%p)\n", pkt_dev->ifname, pg_proc_dir)); remove_proc_entry(pkt_dev->ifname, pg_proc_dir); if (pkt_dev->flows) @@ -3328,7 +3784,17 @@ static int __init pg_init(void) struct proc_dir_entry *pe; printk(version); - + printk("sizeof report: %d, sizeof in6_addr: %d\n", + (int)(sizeof(struct pktgen_dev_report)), + (int)(sizeof(struct in6_addr))); + + if (handle_pktgen_hook) { + printk("pktgen: ERROR: pktgen is already loaded it seems..\n"); + /* Already loaded */ + return -EEXIST; + } + + pg_proc_dir = proc_mkdir(PG_PROC_DIR, proc_net); if (!pg_proc_dir) return -ENODEV; @@ -3367,6 +3833,9 @@ static int __init pg_init(void) return -ENODEV; } + handle_pktgen_hook = pktgen_receive; + PG_DEBUG(printk("pktgen initialization complete.\n")); + return 0; } @@ -3377,10 +3846,15 @@ static void __exit pg_cleanup(void) wait_queue_head_t queue; init_waitqueue_head(&queue); + /* Un-Register receive handler */ + handle_pktgen_hook = NULL; + /* Stop all interfaces & threads */ - + pktgen_stop_all_threads_ifs(); + list_for_each_safe(q, n, &pktgen_threads) { t = list_entry(q, struct pktgen_thread, th_list); + t->control |= (T_STOP); t->control |= (T_TERMINATE); wait_event_interruptible_timeout(queue, (t->removed == 1), HZ); @@ -3390,6 +3864,7 @@ static void __exit pg_cleanup(void) unregister_netdevice_notifier(&pktgen_notifier_block); /* Clean up proc file system */ + PG_DEBUG(printk("pktgen: removing proc entry: %s (0x%p)\n", PGCTRL, pg_proc_dir)); remove_proc_entry(PGCTRL, pg_proc_dir); proc_net_remove(PG_PROC_DIR); } diff --git a/net/core/pktgen.h b/net/core/pktgen.h new file mode 100644 index 0000000..921f3b0 --- /dev/null +++ b/net/core/pktgen.h @@ -0,0 +1,335 @@ +/* -*-linux-c-*- + * $Id: candela_2.6.13.patch,v 1.3 2005/09/30 04:45:31 greear Exp $ + * pktgen.c: Packet Generator for performance evaluation. + * + * See pktgen.c for details of changes, etc. +*/ + + +#ifndef PKTGEN_H_INCLUDE_KERNEL__ +#define PKTGEN_H_INCLUDE_KERNEL__ + +#include +#include + +/* The buckets are exponential in 'width' */ +#define LAT_BUCKETS_MAX 32 +#define PG_MAX_ACCUM_DELAY_NS 1000000 /* one ms */ +#define PG_TRY_TX_ANYWAY_NS 1000000 /* try a blocked tx queue after 1 ms. */ + +#define IP_NAME_SZ 32 +#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ +#define MPLS_STACK_BOTTOM __constant_htonl(0x00000100) + +/* Device flag bits */ +#define F_IPSRC_RND (1<<0) /* IP-Src Random */ +#define F_IPDST_RND (1<<1) /* IP-Dst Random */ +#define F_UDPSRC_RND (1<<2) /* UDP-Src Random */ +#define F_UDPDST_RND (1<<3) /* UDP-Dst Random */ +#define F_MACSRC_RND (1<<4) /* MAC-Src Random */ +#define F_MACDST_RND (1<<5) /* MAC-Dst Random */ +#define F_TXSIZE_RND (1<<6) /* Transmit packet size is random */ +#define F_IPV6 (1<<7) /* Interface in IPV6 Mode */ +#define F_MPLS_RND (1<<8) /* Random MPLS labels */ + +/* Thread control flag bits */ +#define T_TERMINATE (1<<0) +#define T_STOP (1<<1) /* Stop run */ +#define T_RUN (1<<2) /* Start run */ +#define T_REMDEVALL (1<<3) /* Remove all devs */ +#define T_REMDEV (1<<4) /* Remove one dev */ +#define T_WAKE_BLOCKED (1<<5) /* Wake up all blocked net-devices. */ +#define T_ADD_DEV (1<<6) /* Add a device. */ + +/* Used to help with determining the pkts on receive */ +#define PKTGEN_MAGIC 0xbe9be955 +#define PG_PROC_DIR "pktgen" +#define PGCTRL "pgctrl" + +struct pktgen_dev { + + /* + * Try to keep frequent/infrequent used vars. separated. + */ + + char ifname[IFNAMSIZ]; + char result[512]; + + struct pktgen_thread *pg_thread; /* the owner */ + struct list_head list; /* Used for chaining in the thread's run-queue */ + + int running; /* if this changes to false, the test will stop */ + + /* If min != max, then we will either do a linear iteration, or + * we will do a random selection from within the range. + */ + __u32 flags; + int removal_mark; /* non-zero => the device is marked for + * removal by worker thread */ + + __u32 min_pkt_size; /* = ETH_ZLEN; */ + __u32 max_pkt_size; /* = ETH_ZLEN; */ + __u32 nfrags; + __u64 delay_ns; /* Delay this much between sending packets. */ + __u64 count; /* Default No packets to send */ + __u64 sofar; /* How many pkts we've sent so far */ + __u64 tx_bytes; /* How many bytes we've transmitted */ + __u64 errors; /* Errors when trying to transmit, pkts will be re-sent */ + __u64 nanodelays; /* how many times have we called nano-delay on this device? */ + __s64 accum_delay_ns; /* Accumulated delay..when >= 1ms, we'll sleep on a wait queue. */ + __u64 sleeps; /* How many times have we gone to sleep on the wait queue. */ + __u64 queue_stopped; /* How many times was queue stopped when we tried to xmit? */ + /* runtime counters relating to clone_skb */ + __u64 next_tx_ns; /* timestamp of when to tx next */ + __u64 req_tx_early; /* requested to tx, but is too early for us to tx. */ + + __u64 allocated_skbs; + __u32 clone_count; + + int tx_blocked; /* Need to tx as soon as able... */ + int last_ok; /* Was last skb sent? + * Or a failed transmit of some sort? This will keep + * sequence numbers in order, for example. + */ + __u64 started_at; /* micro-seconds */ + __u64 stopped_at; /* micro-seconds */ + __u64 idle_acc_ns; /* nano-seconds */ + __u32 seq_num; + + __u32 clone_skb; /* Use multiple SKBs during packet gen. If this number + * is greater than 1, then that many copies of the same + * packet will be sent before a new packet is allocated. + * For instance, if you want to send 1024 identical packets + * before creating a new packet, set clone_skb to 1024. + */ + __u32 peer_clone_skb; /* Peer (transmitter's) clone setting. */ + + char dst_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ + char dst_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ + char src_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ + char src_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ + + struct in6_addr in6_saddr; + struct in6_addr in6_daddr; + struct in6_addr cur_in6_daddr; + struct in6_addr cur_in6_saddr; + /* For ranges */ + struct in6_addr min_in6_daddr; + struct in6_addr max_in6_daddr; + struct in6_addr min_in6_saddr; + struct in6_addr max_in6_saddr; + + /* If we're doing ranges, random or incremental, then this + * defines the min/max for those ranges. + */ + __u32 saddr_min; /* inclusive, source IP address */ + __u32 saddr_max; /* exclusive, source IP address */ + __u32 daddr_min; /* inclusive, dest IP address */ + __u32 daddr_max; /* exclusive, dest IP address */ + + __u16 udp_src_min; /* inclusive, source UDP port */ + __u16 udp_src_max; /* exclusive, source UDP port */ + __u16 udp_dst_min; /* inclusive, dest UDP port */ + __u16 udp_dst_max; /* exclusive, dest UDP port */ + + /* MPLS */ + unsigned nr_labels; /* Depth of stack, 0 = no MPLS */ + __be32 labels[MAX_MPLS_LABELS]; + + __u32 src_mac_count; /* How many MACs to iterate through */ + __u32 dst_mac_count; /* How many MACs to iterate through */ + + unsigned char dst_mac[ETH_ALEN]; + unsigned char src_mac[ETH_ALEN]; + + __u32 cur_dst_mac_offset; + __u32 cur_src_mac_offset; + __u32 cur_saddr; + __u32 cur_daddr; + __u16 cur_udp_dst; + __u16 cur_udp_src; + __u32 cur_pkt_size; + + __u8 hh[14]; + /* = { + 0x00, 0x80, 0xC8, 0x79, 0xB3, 0xCB, + + We fill in SRC address later + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00 + }; + */ + __u16 pad; /* pad out the hh struct to an even 16 bytes */ + + struct sk_buff *skb; /* skb we are to transmit next, mainly used for when we + * are transmitting the same one multiple times + */ + struct net_device *odev; /* The out-going device. Note that the device should + * have it's pg_info pointer pointing back to this + * device. This will be set when the user specifies + * the out-going device name (not when the inject is + * started as it used to do.) + */ + struct flow_state *flows; + unsigned cflows; /* Concurrent flows (config) */ + unsigned lflow; /* Flow length (config) */ + unsigned nflows; /* accumulated flows (stats) */ + + int avg_latency; /* in micro-seconds */ + int min_latency; + int max_latency; + __u64 latency_bkts[LAT_BUCKETS_MAX]; + __u64 pkts_rcvd_since_clear; /* with regard to clearing/resetting the latency logic */ + + + /* Fields relating to receiving pkts */ + __u32 last_seq_rcvd; + __u64 ooo_rcvd; /* out-of-order packets received */ + __u64 pkts_rcvd; /* packets received */ + __u64 dup_rcvd; /* duplicate packets received */ + __u64 bytes_rcvd; /* total bytes received, as obtained from the skb */ + __u64 seq_gap_rcvd; /* how many gaps we received. This coorelates to + * dropped pkts, except perhaps in cases where we also + * have re-ordered pkts. In that case, you have to tie-break + * by looking at send v/s received pkt totals for the interfaces + * involved. + */ + __u64 non_pg_pkts_rcvd; /* Count how many non-pktgen skb's we are sent to check. */ + __u64 dup_since_incr; /* How many dumplicates since the last seq number increment, + * used to detect gaps when multiskb > 1 + */ +}; + +struct pktgen_hdr { + __u32 pgh_magic; + __u32 seq_num; + __u32 tv_sec; + __u32 tv_usec; +}; + +struct pktgen_thread { + struct list_head if_list; /* All device here */ + struct list_head th_list; + int removed; + char name[32]; + char result[512]; + u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */ + + /* Field for thread to receive "posted" events terminate, stop ifs etc. */ + + u32 control; + char* control_arg; + int pid; + int cpu; + + wait_queue_head_t queue; +}; + +struct pg_nqw_data { + #define PG_NQW_MAGIC 0x82743ab6 + u32 magic; + atomic_t nqw_ref_count; + struct pktgen_thread* pg_thread; +}; + +struct pktgen_dev_report { + __u32 api_version; + __u32 flags; + __u32 min_pkt_size; + __u32 max_pkt_size; + __u32 nfrags; + + __u32 clone_skb; /* Use multiple SKBs during packet gen. If this number + * is greater than 1, then that many copies of the same + * packet will be sent before a new packet is allocated. + * For instance, if you want to send 1024 identical packets + * before creating a new packet, set clone_skb to 1024. + */ + __u32 peer_clone_skb; /* Peer (transmitter's) clone setting. */ + __s32 avg_latency; /* in micro-seconds */ + __s32 min_latency; + __s32 max_latency; + + char thread_name[32]; + char interface_name[32]; + char dst_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ + char dst_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ + char src_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ + char src_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ + unsigned char dst_mac[ETH_ALEN]; + unsigned char src_mac[ETH_ALEN]; + __u32 pad_32; /* pad to 8-byte boundary */ + + /* If we're doing ranges, random or incremental, then this + * defines the min/max for those ranges. + */ + __u32 saddr_min; /* inclusive, source IP address */