diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 17a6e46..f128106 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -866,6 +866,18 @@ conf/all/forwarding - BOOLEAN proxy_ndp - BOOLEAN Do proxy ndp. +nlnotify_on_addr_add - BOOLEAN + By default, netlink messages are not sent when an IPv6 address + is added if it is in tentative state. This makes it harder + for some user-space applications to function properly. To + ensure that a netlink message is always sent when an IPv6 addr + is added, regardless of the state of the address, set this value + to 1. For the old (default) behaviour, set this value to 0. + + If only certain interfaces should have this behaviour, leave the + 'all' config set to 0 and set the individual interface's value + to 1. + conf/interface/*: Change special settings per interface. diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index 01e4f8b..ba21346 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -143,6 +143,7 @@ unsigned long long sched_clock(void) unsigned long long sched_clock(void) __attribute__((alias("native_sched_clock"))); #endif +EXPORT_SYMBOL(sched_clock); unsigned long native_calculate_cpu_khz(void) { diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index 5153afc..c05f544 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c @@ -62,6 +62,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) sched_clock_idle_wakeup_event(0); local_irq_restore(flags); } +EXPORT_SYMBOL(sched_clock); unsigned long long native_sched_clock(void) { diff --git a/drivers/net/e100.c b/drivers/net/e100.c index 2d139ec..af4a026 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -1,4 +1,4 @@ -/******************************************************************************* +/************************************************************** Intel PRO/100 Linux driver Copyright(c) 1999 - 2006 Intel Corporation. @@ -394,6 +394,7 @@ enum cb_command { cb_ucode = 0x0005, cb_dump = 0x0006, cb_tx_sf = 0x0008, + cb_tx_nc = 0x0010, /* 0 == controler does CRC, ie normal. 1 == CRC from memory */ cb_cid = 0x1f00, cb_i = 0x2000, cb_s = 0x4000, @@ -430,7 +431,7 @@ struct config { /*5*/ u8 X(tx_dma_max_count:7, dma_max_count_enable:1); /*6*/ u8 X(X(X(X(X(X(X(late_scb_update:1, direct_rx_dma:1), tno_intr:1), cna_intr:1), standard_tcb:1), standard_stat_counter:1), - rx_discard_overruns:1), rx_save_bad_frames:1); + rx_save_overruns:1), rx_save_bad_frames:1); /*7*/ u8 X(X(X(X(X(rx_discard_short_frames:1, tx_underrun_retry:2), pad7:2), rx_extended_rfd:1), tx_two_frames_in_fifo:1), tx_dynamic_tbd:1); @@ -561,6 +562,8 @@ struct nic { multicast_all = (1 << 2), wol_magic = (1 << 3), ich_10h_workaround = (1 << 4), + accept_all_frames = (1 << 5), + save_fcs = (1 << 6), } flags ____cacheline_aligned; enum mac mac; @@ -1017,6 +1020,16 @@ static void e100_configure(struct nic *nic, struct cb *cb, struct sk_buff *skb) config->promiscuous_mode = 0x1; /* 1=on, 0=off */ } + if(nic->flags & accept_all_frames) { + config->rx_save_overruns = 0x1; /* 1=save, 0=discard */ + config->rx_save_bad_frames = 0x1; /* 1=save, 0=discard */ + config->rx_discard_short_frames = 0x0; /* 1=discard, 0=save */ + } + + if(nic->flags & save_fcs) { + config->rx_crc_transfer = 0x1; /* 1=save, 0=discard */ + } + if(nic->flags & multicast_all) config->multicast_all = 0x1; /* 1=accept, 0=no */ @@ -1477,6 +1490,16 @@ static void e100_set_multicast_list(struct net_device *netdev) else nic->flags &= ~promiscuous; + if(netdev->flags & IFF_ACCEPT_ALL_FRAMES) + nic->flags |= accept_all_frames; + else + nic->flags &= ~accept_all_frames; + + if(netdev->flags & IFF_SAVE_FCS) + nic->flags |= save_fcs; + else + nic->flags &= ~save_fcs; + if(netdev->flags & IFF_ALLMULTI || netdev->mc_count > E100_MAX_MULTICAST_ADDRS) nic->flags |= multicast_all; @@ -1620,6 +1643,19 @@ static void e100_xmit_prepare(struct nic *nic, struct cb *cb, struct sk_buff *skb) { cb->command = nic->tx_command; + +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + /* Use the last 4 bytes of the SKB payload packet as the CRC, used for + * testing, ie sending frames with bad CRC. + */ + if (unlikely(skb->use_specified_ether_crc)) { + cb->command |= __constant_cpu_to_le16(cb_tx_nc); + } + else { + cb->command &= ~__constant_cpu_to_le16(cb_tx_nc); + } +#endif + /* interrupt every 16 packets regardless of delay */ if((nic->cbs_avail & ~15) == nic->cbs_avail) cb->command |= cpu_to_le16(cb_i); @@ -1867,7 +1903,21 @@ static int e100_rx_indicate(struct nic *nic, struct rx *rx, skb_reserve(skb, sizeof(struct rfd)); skb_put(skb, actual_size); skb->protocol = eth_type_trans(skb, nic->netdev); - + /* NOTE: The config step turns on acceptance of various bogus frames + * when in loopback or promisc mode, but this code will still throw + * them away unless you also set the new 'accept_all_frames' flag. + * Perhaps the implementors meant to accept the bogus frames in + * promisc mode here?? --Ben + */ + if(unlikely(!(nic->flags & accept_all_frames))) { + if(actual_size > nic->netdev->mtu + VLAN_ETH_HLEN) { + /* Received oversized frame */ + dev->stats.rx_over_errors++; + } + /* We're accepting all, so pass the bogons on up the stack. */ + goto process_skb; + } + if(unlikely(!(rfd_status & cb_ok))) { /* Don't indicate if hardware indicates errors */ dev_kfree_skb_any(skb); @@ -1876,6 +1926,7 @@ static int e100_rx_indicate(struct nic *nic, struct rx *rx, nic->rx_over_length_errors++; dev_kfree_skb_any(skb); } else { + process_skb: dev->stats.rx_packets++; dev->stats.rx_bytes += actual_size; nic->netdev->last_rx = jiffies; @@ -2271,6 +2322,63 @@ static int e100_set_settings(struct net_device *netdev, struct ethtool_cmd *cmd) return err; } +static int e100_set_rxall(struct net_device *netdev, u32 data) +{ + struct nic *nic = netdev->priv; + if (data) { + netdev->priv_flags |= IFF_ACCEPT_ALL_FRAMES; + nic->flags |= accept_all_frames; + } + else { + netdev->priv_flags &= ~(IFF_ACCEPT_ALL_FRAMES); + nic->flags &= ~accept_all_frames; + } + + e100_exec_cb(nic, NULL, e100_configure); + + return 0; +} + +static int e100_get_rxall(struct net_device *netdev, u32* data) +{ + struct nic *nic = netdev->priv; + if (nic->flags & accept_all_frames) { + *data = 1; + } + else { + *data = 0; + } + + return 0; +} + +static int e100_set_save_fcs(struct net_device *netdev, u32 data) +{ + struct nic *nic = netdev->priv; + if (data) { + nic->flags |= save_fcs; + } + else { + nic->flags &= ~save_fcs; + } + e100_exec_cb(nic, NULL, e100_configure); + + return 0; +} + +static int e100_get_save_fcs(struct net_device *netdev, u32* data) +{ + struct nic *nic = netdev->priv; + if (nic->flags & save_fcs) { + *data = 1; + } + else { + *data = 0; + } + + return 0; +} + static void e100_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) { @@ -2565,7 +2673,12 @@ static const struct ethtool_ops e100_ethtool_ops = { .get_strings = e100_get_strings, .phys_id = e100_phys_id, .get_ethtool_stats = e100_get_ethtool_stats, + .set_rx_all = e100_set_rxall, + .get_rx_all = e100_get_rxall, + .set_save_fcs = e100_set_save_fcs, + .get_save_fcs = e100_get_save_fcs, .get_sset_count = e100_get_sset_count, + }; static int e100_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) diff --git a/drivers/net/e1000/e1000.h b/drivers/net/e1000/e1000.h index 3b84028..863f46d 100644 --- a/drivers/net/e1000/e1000.h +++ b/drivers/net/e1000/e1000.h @@ -359,4 +359,7 @@ extern void e1000_set_ethtool_ops(struct net_device *netdev); extern void e1000_check_options(struct e1000_adapter *adapter); + +void e1000_set_rx_mode(struct net_device *netdev); + #endif /* _E1000_H_ */ diff --git a/drivers/net/e1000/e1000_ethtool.c b/drivers/net/e1000/e1000_ethtool.c index 85e66f4..af418a8 100644 --- a/drivers/net/e1000/e1000_ethtool.c +++ b/drivers/net/e1000/e1000_ethtool.c @@ -1,4 +1,4 @@ -/******************************************************************************* +/***************************************************************** Intel PRO/1000 Linux driver Copyright(c) 1999 - 2006 Intel Corporation. @@ -1962,6 +1962,59 @@ e1000_get_strings(struct net_device *netdev, uint32_t stringset, uint8_t *data) } } +static int e1000_ethtool_setrxall(struct net_device *netdev, uint32_t val) { + unsigned short old_flags = netdev->priv_flags; + if (val) { + netdev->priv_flags |= IFF_ACCEPT_ALL_FRAMES; + } + else { + netdev->priv_flags &= ~(IFF_ACCEPT_ALL_FRAMES); + } + + /* printk("e1000_ethtool_setrxall (%s) val: %d\n", + netdev->name, val); */ + if (old_flags != netdev->priv_flags) { + netif_tx_lock_bh(netdev); + if (netif_running(netdev)) { + /*printk("Kicking e1000 for setrxall..\n");*/ + e1000_set_rx_mode(netdev); + } else { + /* Value will be flushed into the hardware when the device is + * brought up. + */ + } + netif_tx_unlock_bh(netdev); + } + return 0; +} + +static int e1000_ethtool_set_save_fcs(struct net_device *netdev, uint32_t val) { + netif_tx_lock_bh(netdev); + if (val) { + netdev->priv_flags |= IFF_SAVE_FCS; + } + else { + netdev->priv_flags &= ~IFF_SAVE_FCS; + } + netif_tx_unlock_bh(netdev); + return 0; +} + +static int e1000_ethtool_get_save_fcs(struct net_device *netdev, uint32_t* val) { + *val = !!(netdev->priv_flags & IFF_SAVE_FCS); + /*printk("GETRXALL, data: %d priv_flags: %hx\n", + edata.data, netdev->priv_flags);*/ + return 0; +} + +static int e1000_ethtool_getrxall(struct net_device *netdev, uint32_t* val) { + *val = !!(netdev->priv_flags & IFF_ACCEPT_ALL_FRAMES); + /*printk("GETRXALL, data: %d priv_flags: %hx\n", + edata.data, netdev->priv_flags);*/ + return 0; +} + + static const struct ethtool_ops e1000_ethtool_ops = { .get_settings = e1000_get_settings, .set_settings = e1000_set_settings, @@ -1991,6 +2044,10 @@ static const struct ethtool_ops e1000_ethtool_ops = { .get_strings = e1000_get_strings, .phys_id = e1000_phys_id, .get_ethtool_stats = e1000_get_ethtool_stats, + .get_rx_all = e1000_ethtool_getrxall, + .set_rx_all = e1000_ethtool_setrxall, + .set_save_fcs = e1000_ethtool_set_save_fcs, + .get_save_fcs = e1000_ethtool_get_save_fcs, .get_sset_count = e1000_get_sset_count, }; diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 0991648..187c618 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -1,4 +1,4 @@ -/******************************************************************************* +/***************************************************************** Intel PRO/1000 Linux driver Copyright(c) 1999 - 2006 Intel Corporation. @@ -159,7 +159,7 @@ static void e1000_clean_tx_ring(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring); static void e1000_clean_rx_ring(struct e1000_adapter *adapter, struct e1000_rx_ring *rx_ring); -static void e1000_set_rx_mode(struct net_device *netdev); +void e1000_set_rx_mode(struct net_device *netdev); static void e1000_update_phy_info(unsigned long data); static void e1000_watchdog(unsigned long data); static void e1000_82547_tx_fifo_stall(unsigned long data); @@ -1046,6 +1046,9 @@ e1000_probe(struct pci_dev *pdev, if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; + /* Has ability to receive all frames (even bad CRCs and such) */ + netdev->features |= NETIF_F_RX_ALL | NETIF_F_SAVE_CRC; + netdev->features |= NETIF_F_LLTX; adapter->en_mng_pt = e1000_enable_mng_pass_thru(&adapter->hw); @@ -2488,7 +2491,7 @@ e1000_set_mac(struct net_device *netdev, void *p) * promiscuous mode, and all-multi behavior. **/ -static void +void e1000_set_rx_mode(struct net_device *netdev) { struct e1000_adapter *adapter = netdev_priv(netdev); @@ -2531,6 +2534,35 @@ e1000_set_rx_mode(struct net_device *netdev) E1000_WRITE_REG(hw, RCTL, rctl); + + /* This is useful for using ethereal or tcpdump to sniff + * packets in promiscuous mode without stripping VLAN/priority + * information, and also letting bad packets through. + * + * THIS IS NOT PRODUCTION CODE - FOR INTERNAL USE ONLY!!! + * + */ + if (netdev->priv_flags & IFF_ACCEPT_ALL_FRAMES) { + uint32_t ctrl; + /*printk("%s: Enabling acceptance of ALL frames (bad CRC too).\n", + netdev->name); */ + /* store bad packets, promisc/multicast all, no VLAN + * filter */ + rctl = E1000_READ_REG(hw, RCTL); + rctl |= (E1000_RCTL_SBP | E1000_RCTL_UPE | E1000_RCTL_MPE); + rctl &= ~(E1000_RCTL_VFE | E1000_RCTL_CFIEN); + E1000_WRITE_REG(hw, RCTL, rctl); + /* disable VLAN tagging/striping */ + ctrl = E1000_READ_REG(hw, CTRL); + ctrl &= ~E1000_CTRL_VME; + E1000_WRITE_REG(hw, CTRL, ctrl); + } + else { + /* TODO: Do we need a way to explicitly turn this off if it was + * previously enabled, or will it magically go back to normal??? --Ben + */ + } + /* 82542 2.0 needs to be in reset to write receive address registers */ if (hw->mac_type == e1000_82542_rev2_0) @@ -2954,6 +2986,7 @@ set_itr_now: #define E1000_TX_FLAGS_VLAN 0x00000002 #define E1000_TX_FLAGS_TSO 0x00000004 #define E1000_TX_FLAGS_IPV4 0x00000008 +#define E1000_TX_FLAGS_NO_FCS 0x00000010 #define E1000_TX_FLAGS_VLAN_MASK 0xffff0000 #define E1000_TX_FLAGS_VLAN_SHIFT 16 @@ -3204,6 +3237,13 @@ e1000_tx_queue(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, txd_upper |= (tx_flags & E1000_TX_FLAGS_VLAN_MASK); } +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + if (unlikely(tx_flags & E1000_TX_FLAGS_NO_FCS)) { + txd_lower &= ~(E1000_TXD_CMD_IFCS); + /* printk("Disabling CRC in tx_queue, txd_lower: 0x%x\n", txd_lower); */ + } +#endif + i = tx_ring->next_to_use; while (count--) { @@ -3218,6 +3258,14 @@ e1000_tx_queue(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd); +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + /* txd_cmd re-enables FCS, so we'll re-disable it here as desired. */ + if (unlikely(tx_flags & E1000_TX_FLAGS_NO_FCS)) { + tx_desc->lower.data &= ~(cpu_to_le32(E1000_TXD_CMD_IFCS)); + /* printk("Disabling2 CRC in tx_queue, txd_lower: 0x%x\n", tx_desc->lower.data); */ + } +#endif + /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, @@ -3495,6 +3543,12 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) if (likely(skb->protocol == htons(ETH_P_IP))) tx_flags |= E1000_TX_FLAGS_IPV4; +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + if (unlikely(skb->use_specified_ether_crc)) { + tx_flags |= E1000_TX_FLAGS_NO_FCS; + } +#endif + e1000_tx_queue(adapter, tx_ring, tx_flags, e1000_tx_map(adapter, tx_ring, skb, first, max_per_txd, nr_frags, mss)); @@ -4265,7 +4319,11 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, goto next_desc; } - if (unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) { + /* If we are accepting all frames, then do not pay attention to the + * framing errors. + */ + if (unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) && + !(netdev->priv_flags & IFF_ACCEPT_ALL_FRAMES)) { last_byte = *(skb->data + length - 1); if (TBI_ACCEPT(&adapter->hw, status, rx_desc->errors, length, last_byte)) { @@ -4291,6 +4349,16 @@ e1000_clean_rx_irq(struct e1000_adapter *adapter, total_rx_bytes += length; total_rx_packets++; + + // This may not be needed now. --Ben + //if (netdev->priv_flags & IFF_SAVE_FCS) { + // skb_put(skb, length); + //} + //else { + // skb_put(skb, length - ETHERNET_FCS_SIZE); + //} + + /* code added for copybreak, this should improve * performance for small packets with large amounts * of reassembly being done in the stack */ @@ -4433,7 +4501,8 @@ e1000_clean_rx_irq_ps(struct e1000_adapter *adapter, goto next_desc; } - if (unlikely(staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)) { + if ((unlikely(staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)) && + !(netdev->priv_flags & IFF_ACCEPT_ALL_FRAMES)) { dev_kfree_skb_irq(skb); goto next_desc; } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 34c2b98..fd46902 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -38,13 +38,6 @@ struct macvlan_port { struct list_head vlans; }; -struct macvlan_dev { - struct net_device *dev; - struct list_head list; - struct hlist_node hlist; - struct macvlan_port *port; - struct net_device *lowerdev; -}; static struct macvlan_dev *macvlan_hash_lookup(const struct macvlan_port *port, diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 69a2e19..80417d2 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -138,6 +138,7 @@ struct TCP_Server_Info { struct sockaddr_in sockAddr; struct sockaddr_in6 sockAddr6; } addr; + u32 ip4_local_ip; wait_queue_head_t response_q; wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ struct list_head pending_mid_q; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 8dbfa97..750dcbb 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -95,12 +95,14 @@ struct smb_vol { unsigned int sockopt; unsigned short int port; char *prepath; + u32 local_ip; /* allow binding to a local IP address if != 0 */ }; static int ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, char *netb_name, - char *server_netb_name); + char *server_netb_name, + u32 local_ip); static int ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket); @@ -195,7 +197,8 @@ cifs_reconnect(struct TCP_Server_Info *server) rc = ipv4_connect(&server->addr.sockAddr, &server->ssocket, server->workstation_RFC1001_name, - server->server_RFC1001_name); + server->server_RFC1001_name, + server->ip4_local_ip); } if (rc) { cFYI(1, ("reconnect error %d", rc)); @@ -1038,6 +1041,18 @@ cifs_parse_mount_options(char *options, const char *devname, "long\n"); return 1; } + } else if (strnicmp(data, "local_ip", 8) == 0) { + if (!value || !*value) { + printk(KERN_WARNING "CIFS: local_ip value not specified.\n"); + return 1; /* needs_arg; */ + } + i = cifs_inet_pton(AF_INET, value, &(vol->local_ip)); + if (i < 0) { + vol->local_ip = 0; + printk(KERN_WARNING "CIFS: Could not parse local_ip: %s\n", + value); + return 1; + } } else if (strnicmp(data, "prefixpath", 10) == 0) { if (!value || !*value) { printk(KERN_WARNING @@ -1319,7 +1334,8 @@ cifs_parse_mount_options(char *options, const char *devname, static struct cifsSesInfo * cifs_find_tcp_session(struct in_addr *target_ip_addr, struct in6_addr *target_ip6_addr, - char *userName, struct TCP_Server_Info **psrvTcp) + char *userName, struct TCP_Server_Info **psrvTcp, + u32 local_ip) { struct list_head *tmp; struct cifsSesInfo *ses; @@ -1329,7 +1345,11 @@ cifs_find_tcp_session(struct in_addr *target_ip_addr, list_for_each(tmp, &GlobalSMBSessionList) { ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); if (ses->server) { - if ((target_ip_addr && + if((target_ip_addr && + /* If binding to a local IP, do not re-use sessions bound to different + * local IP addresses. + */ + (local_ip == ses->server->ip4_local_ip) && (ses->server->addr.sockAddr.sin_addr.s_addr == target_ip_addr->s_addr)) || (target_ip6_addr && memcmp(&ses->server->addr.sockAddr6.sin6_addr, @@ -1358,7 +1378,7 @@ cifs_find_tcp_session(struct in_addr *target_ip_addr, } static struct cifsTconInfo * -find_unc(__be32 new_target_ip_addr, char *uncName, char *userName) +find_unc(__be32 new_target_ip_addr, char *uncName, char *userName, u32 local_ip) { struct list_head *tmp; struct cifsTconInfo *tcon; @@ -1373,8 +1393,9 @@ find_unc(__be32 new_target_ip_addr, char *uncName, char *userName) ("old ip addr: %x == new ip %x ?", tcon->ses->server->addr.sockAddr.sin_addr. s_addr, new_target_ip_addr)); - if (tcon->ses->server->addr.sockAddr.sin_addr. - s_addr == new_target_ip_addr) { + if ((local_ip == tcon->ses->server->ip4_local_ip) && + (tcon->ses->server->addr.sockAddr.sin_addr. + s_addr == new_target_ip_addr)) { /* BB lock tcon, server and tcp session and increment use count here? */ /* found a match on the TCP session */ /* BB check if reconnection needed */ @@ -1481,7 +1502,8 @@ static void rfc1002mangle(char *target, char *source, unsigned int length) static int ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, - char *netbios_name, char *target_name) + char *netbios_name, char *target_name, + u32 local_ip /* in network byte order */) { int rc = 0; int connected = 0; @@ -1501,6 +1523,24 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, } } + /* Bind to the local IP address if specified */ + if (local_ip) { + struct sockaddr_in myaddr = { + .sin_family = AF_INET, + }; + myaddr.sin_addr.s_addr = local_ip; + myaddr.sin_port = 0; /* any */ + rc = (*csocket)->ops->bind(*csocket, (struct sockaddr *) &myaddr, + sizeof(myaddr)); + if (rc < 0) { + printk("Tried to bind to local ip: 0x%x, but failed with error: %d\n", + local_ip, rc); + } + else { + printk("CIFS: Successfully bound to local ip: 0x%x\n", local_ip); + } + } + psin_server->sin_family = AF_INET; if (psin_server->sin_port) { /* user overrode default port */ rc = (*csocket)->ops->connect(*csocket, @@ -1912,12 +1952,12 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, if (address_type == AF_INET) existingCifsSes = cifs_find_tcp_session(&sin_server.sin_addr, NULL /* no ipv6 addr */, - volume_info.username, &srvTcp); + volume_info.username, &srvTcp, volume_info.local_ip); else if (address_type == AF_INET6) { cFYI(1, ("looking for ipv6 address")); existingCifsSes = cifs_find_tcp_session(NULL /* no ipv4 addr */, &sin_server6.sin6_addr, - volume_info.username, &srvTcp); + volume_info.username, &srvTcp, 0); } else { rc = -EINVAL; goto out; @@ -1938,7 +1978,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, } else rc = ipv4_connect(&sin_server, &csocket, volume_info.source_rfc1001_name, - volume_info.target_rfc1001_name); + volume_info.target_rfc1001_name, + volume_info.local_ip); if (rc < 0) { cERROR(1, ("Error connecting to IPv4 socket. " "Aborting operation")); @@ -1972,6 +2013,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, to the struct since the kernel thread not created yet so no need to spinlock this init of tcpStatus */ srvTcp->tcpStatus = CifsNew; + srvTcp->ip4_local_ip = volume_info.local_ip; init_MUTEX(&srvTcp->tcpSem); srvTcp->tsk = kthread_run((void *)(void *)cifs_demultiplex_thread, srvTcp, "cifsd"); if (IS_ERR(srvTcp->tsk)) { @@ -2125,7 +2167,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, tcon = find_unc(sin_server.sin_addr.s_addr, volume_info.UNC, - volume_info.username); + volume_info.username, volume_info.local_ip); if (tcon) { cFYI(1, ("Found match on UNC path")); /* we can have only one retry value for a connection diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index c6e72ae..357f2d7 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -2101,6 +2101,11 @@ COMPATIBLE_IOCTL(SIOCGMIIREG) COMPATIBLE_IOCTL(SIOCSMIIREG) COMPATIBLE_IOCTL(SIOCGIFVLAN) COMPATIBLE_IOCTL(SIOCSIFVLAN) +COMPATIBLE_IOCTL(SIOCSIFMACVLAN) +COMPATIBLE_IOCTL(SIOCGIFMACVLAN) +COMPATIBLE_IOCTL(SIOCGIFREDIRDEV) +COMPATIBLE_IOCTL(SIOCSIFREDIRDEV) +COMPATIBLE_IOCTL(0x7450 /* GET_PKTGEN_INTERFACE_INFO */) COMPATIBLE_IOCTL(SIOCBRADDBR) COMPATIBLE_IOCTL(SIOCBRDELBR) /* SG stuff */ diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 66648dd..7907e3e 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -175,7 +175,7 @@ static int nfs_callback_authenticate(struct svc_rqst *rqstp) RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); /* Don't talk to strangers */ - clp = nfs_find_client(svc_addr(rqstp), 4); + clp = nfs_find_client(svc_addr(rqstp), svc_addr(rqstp), 4); if (clp == NULL) return SVC_DROP; diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index bb25d21..08cbf17 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -39,6 +39,7 @@ struct cb_compound_hdr_res { struct cb_getattrargs { struct sockaddr *addr; + struct sockaddr *clientaddr; struct nfs_fh fh; uint32_t bitmap[2]; }; @@ -54,6 +55,7 @@ struct cb_getattrres { struct cb_recallargs { struct sockaddr *addr; + struct sockaddr *clientaddr; struct nfs_fh fh; nfs4_stateid stateid; uint32_t truncate; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 15f7785..f82afb5 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -25,7 +25,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres * res->bitmap[0] = res->bitmap[1] = 0; res->status = htonl(NFS4ERR_BADHANDLE); - clp = nfs_find_client(args->addr, 4); + clp = nfs_find_client(args->clientaddr, args->addr, 4); if (clp == NULL) goto out; @@ -68,7 +68,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy) __be32 res; res = htonl(NFS4ERR_BADHANDLE); - clp = nfs_find_client(args->addr, 4); + clp = nfs_find_client(args->clientaddr, args->addr, 4); if (clp == NULL) goto out; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 13619d2..fc4fa64 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -177,6 +177,7 @@ static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr if (unlikely(status != 0)) goto out; args->addr = svc_addr(rqstp); + args->clientaddr = svc_daddr(rqstp); status = decode_bitmap(xdr, args->bitmap); out: dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(status)); @@ -189,6 +190,7 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, __be32 status; args->addr = svc_addr(rqstp); + args->clientaddr = svc_daddr(rqstp); status = decode_stateid(xdr, &args->stateid); if (unlikely(status != 0)) goto out; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index c5c0175..1603ecb 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -98,6 +98,7 @@ struct rpc_program nfsacl_program = { struct nfs_client_initdata { const char *hostname; const struct sockaddr *addr; + const struct sockaddr *clientaddr; size_t addrlen; const struct nfs_rpc_ops *rpc_ops; int proto; @@ -129,6 +130,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ memcpy(&clp->cl_addr, cl_init->addr, cl_init->addrlen); clp->cl_addrlen = cl_init->addrlen; + memcpy(&clp->cl_ipaddr, cl_init->clientaddr, cl_init->addrlen); if (cl_init->hostname) { clp->cl_hostname = kstrdup(cl_init->hostname, GFP_KERNEL); @@ -245,7 +247,8 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1, * Find a client by IP address and protocol version * - returns NULL if no such client */ -struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion) +struct nfs_client *nfs_find_client(const struct sockaddr *clientaddr, + const struct sockaddr *addr, u32 nfsversion) { struct nfs_client *clp; @@ -264,6 +267,8 @@ struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion) if (addr->sa_family != clap->sa_family) continue; /* Match only the IP address, not the port number */ + if (!nfs_sockaddr_match_ipaddr(clientaddr, &clp->cl_ipaddr)) + continue; if (!nfs_sockaddr_match_ipaddr(addr, clap)) continue; @@ -331,6 +336,9 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat continue; /* Match the full socket address */ + if (memcmp(&clp->cl_ipaddr, data->clientaddr, + sizeof(clp->cl_ipaddr)) != 0) + continue; if (memcmp(&clp->cl_addr, data->addr, sizeof(clp->cl_addr)) != 0) continue; @@ -463,6 +471,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp, struct rpc_clnt *clnt = NULL; struct rpc_create_args args = { .protocol = clp->cl_proto, + .saddress = (struct sockaddr *)&clp->cl_ipaddr, .address = (struct sockaddr *)&clp->cl_addr, .addrsize = clp->cl_addrlen, .timeout = timeparms, @@ -633,6 +642,7 @@ static int nfs_init_server(struct nfs_server *server, .hostname = data->nfs_server.hostname, .addr = (const struct sockaddr *)&data->nfs_server.address, .addrlen = data->nfs_server.addrlen, + .clientaddr = (const struct sockaddr *)&data->client.address, .rpc_ops = &nfs_v2_clientops, .proto = data->nfs_server.protocol, }; @@ -941,7 +951,6 @@ error: */ static int nfs4_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, - const char *ip_addr, rpc_authflavor_t authflavour) { int error; @@ -959,7 +968,6 @@ static int nfs4_init_client(struct nfs_client *clp, RPC_CLNT_CREATE_DISCRTRY); if (error < 0) goto error; - memcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); error = nfs_idmap_new(clp); if (error < 0) { @@ -985,7 +993,7 @@ static int nfs4_set_client(struct nfs_server *server, const char *hostname, const struct sockaddr *addr, const size_t addrlen, - const char *ip_addr, + const struct sockaddr *ip_addr, rpc_authflavor_t authflavour, int proto, const struct rpc_timeout *timeparms) { @@ -993,6 +1001,7 @@ static int nfs4_set_client(struct nfs_server *server, .hostname = hostname, .addr = addr, .addrlen = addrlen, + .clientaddr = ip_addr, .rpc_ops = &nfs_v4_clientops, .proto = proto, }; @@ -1007,7 +1016,7 @@ static int nfs4_set_client(struct nfs_server *server, error = PTR_ERR(clp); goto error; } - error = nfs4_init_client(clp, timeparms, ip_addr, authflavour); + error = nfs4_init_client(clp, timeparms, authflavour); if (error < 0) goto error_put; @@ -1041,7 +1050,7 @@ static int nfs4_init_server(struct nfs_server *server, data->nfs_server.hostname, (const struct sockaddr *)&data->nfs_server.address, data->nfs_server.addrlen, - data->client_address, + (const struct sockaddr *)&data->client.address, data->auth_flavors[0], data->nfs_server.protocol, &timeparms); @@ -1157,7 +1166,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, error = nfs4_set_client(server, data->hostname, data->addr, data->addrlen, - parent_client->cl_ipaddr, + &parent_client->cl_ipaddr, data->authflavor, parent_server->client->cl_xprt->prot, parent_server->client->cl_timeout); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 4c62be1..2048eb4 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -40,7 +40,12 @@ struct nfs_parsed_mount_data { unsigned int bsize; unsigned int auth_flavor_len; rpc_authflavor_t auth_flavors[1]; - char *client_address; + + struct { + struct sockaddr_storage address; + size_t addrlen; + char *hostname; + } client; struct { struct sockaddr_storage address; @@ -66,7 +71,8 @@ struct nfs_parsed_mount_data { extern struct rpc_program nfs_program; extern void nfs_put_client(struct nfs_client *); -extern struct nfs_client *nfs_find_client(const struct sockaddr *, u32); +extern struct nfs_client *nfs_find_client(const struct sockaddr *, + const struct sockaddr *, u32); extern struct nfs_client *nfs_find_client_next(struct nfs_client *); extern struct nfs_server *nfs_create_server( const struct nfs_parsed_mount_data *, diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 49c7cd0..1270ff9 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -38,7 +38,8 @@ struct mnt_fhstatus { * * Uses default timeout parameters specified by underlying transport. */ -int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path, +int nfs_mount(struct sockaddr *clientaddr, struct sockaddr *addr, size_t len, + char *hostname, char *path, int version, int protocol, struct nfs_fh *fh) { struct mnt_fhstatus result = { @@ -50,6 +51,7 @@ int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path, }; struct rpc_create_args args = { .protocol = protocol, + .saddress = clientaddr, .address = addr, .addrsize = len, .servername = hostname, diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3a2ff77..30a4ae7 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2882,8 +2882,8 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short po for(;;) { setclientid.sc_name_len = scnprintf(setclientid.sc_name, - sizeof(setclientid.sc_name), "%s/%s %s %s %u", - clp->cl_ipaddr, + sizeof(setclientid.sc_name), "%u.%u.%u.%u/%s %s %s %u", + NIPQUAD(((struct sockaddr_in *)&clp->cl_ipaddr)->sin_addr), rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR), rpc_peeraddr2str(clp->cl_rpcclient, @@ -2895,8 +2895,9 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short po rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_NETID)); setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, - sizeof(setclientid.sc_uaddr), "%s.%u.%u", - clp->cl_ipaddr, port >> 8, port & 255); + sizeof(setclientid.sc_uaddr), "%u.%u.%u.%u.%u.%u", + NIPQUAD(((struct sockaddr_in *)&clp->cl_ipaddr)->sin_addr), + port >> 8, port & 255); status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); if (status != -NFS4ERR_CLID_INUSE) diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 531379d..d5f88eb 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -494,7 +494,7 @@ static int __init root_nfs_get_handle(void) NFS_MNT3_VERSION : NFS_MNT_VERSION; set_sockaddr(&sin, servaddr, htons(mount_port)); - status = nfs_mount((struct sockaddr *) &sin, sizeof(sin), NULL, + status = nfs_mount(NULL, (struct sockaddr *) &sin, sizeof(sin), NULL, nfs_path, version, protocol, &fh); if (status < 0) printk(KERN_ERR "Root-NFS: Server returned error %d " diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f921902..d51b8cb 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1019,8 +1019,11 @@ static int nfs_parse_mount_options(char *raw, string = match_strdup(args); if (string == NULL) goto out_nomem; - kfree(mnt->client_address); - mnt->client_address = string; + nfs_parse_server_address(string, (struct sockaddr *) + &mnt->client.address, + &mnt->client.addrlen); + kfree(mnt->client.hostname); + mnt->client.hostname = string; break; case Opt_mounthost: string = match_strdup(args); @@ -1117,7 +1120,7 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, * Now ask the mount server to map our export path * to a file handle. */ - status = nfs_mount(sap, + status = nfs_mount(NULL, sap, args->mount_server.addrlen, hostname, args->nfs_server.export_path, @@ -1231,6 +1234,13 @@ static int nfs_validate_mount_data(void *options, args->bsize = data->bsize; args->auth_flavors[0] = data->pseudoflavor; + /* FIXME: Should be version 7 - why do old API versions fall + * through to new ones??? + */ + memcpy(&args->client.address, &data->clientaddr, + sizeof(data->clientaddr)); + args->client.addrlen = sizeof(data->clientaddr); + /* * The legacy version 6 binary mount data from userspace has a * field used only to transport selinux information into the @@ -1798,7 +1808,10 @@ static int nfs4_validate_mount_data(void *options, c = strndup_user(data->client_addr.data, 16); if (IS_ERR(c)) return PTR_ERR(c); - args->client_address = c; + args->client.hostname = c; + args->client.addrlen = 4; + ((struct sockaddr_in *)&args->client.address)->sin_family = AF_INET; + ((struct sockaddr_in *)&args->client.address)->sin_addr.s_addr = in_aton(c); /* * Translate to nfs_parsed_mount_data, which nfs4_fill_super @@ -1861,7 +1874,7 @@ static int nfs4_validate_mount_data(void *options, dprintk("NFS: MNTPATH: '%s'\n", args->nfs_server.export_path); - if (args->client_address == NULL) + if (args->client.hostname == NULL) goto out_no_client_address; break; @@ -1952,7 +1965,7 @@ static int nfs4_get_sb(struct file_system_type *fs_type, error = 0; out: - kfree(data.client_address); + kfree(data.client.hostname); kfree(data.nfs_server.export_path); kfree(data.nfs_server.hostname); security_free_mnt_opts(&data.lsm_opts); diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 82b3a1b..342aac7 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -429,7 +429,8 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino, } } unlock_new_inode(inode); - } + } else + module_put(de->owner); return inode; out_ino: diff --git a/include/asm-x86/socket.h b/include/asm-x86/socket.h index 80af9c4..0b19fb2 100644 --- a/include/asm-x86/socket.h +++ b/include/asm-x86/socket.h @@ -54,4 +54,9 @@ #define SO_MARK 36 +/* Instruct lower device to not calculate the frame + * checksum. Useful only for testing, afaik. --Ben */ +#define SO_NOFCS 50 + + #endif /* _ASM_SOCKET_H */ diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index c8d2163..f84863f 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1,4 +1,4 @@ -/* +/* -*-linux-c-*- * ethtool.h: Defines for Linux ethtool. * * Copyright (C) 1998 David S. Miller (davem@redhat.com) @@ -324,7 +324,11 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data); * self_test: Run specified self-tests * get_strings: Return a set of strings that describe the requested objects * phys_id: Identify the device - * get_stats: Return statistics about the device + * get_stats: Return statistics about the devic + * set_rx_all: Set or clear IFF_ACCEPT_ALL_FRAMES, see if.h + * get_rx_all: Return 1 if set, 0 if not. + * set_save_fcs: Set or clear IFF_SAVE_FCS, see if.h + * get_save_fcs: Return 1 if set, 0 if not. * get_flags: get 32-bit flags bitmap * set_flags: set 32-bit flags bitmap * @@ -383,6 +387,10 @@ struct ethtool_ops { void (*get_strings)(struct net_device *, u32 stringset, u8 *); int (*phys_id)(struct net_device *, u32); void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *, u64 *); + int (*set_rx_all)(struct net_device *, u32); + int (*get_rx_all)(struct net_device *, u32 *); + int (*set_save_fcs)(struct net_device *, u32); + int (*get_save_fcs)(struct net_device *, u32 *); int (*begin)(struct net_device *); void (*complete)(struct net_device *); u32 (*get_ufo)(struct net_device *); @@ -399,6 +407,13 @@ struct ethtool_ops { }; #endif /* __KERNEL__ */ +/* for dumping net-device statistics */ +struct ethtool_ndstats { + u32 cmd; /* ETHTOOL_GNDSTATS */ + u8 data[0]; /* sizeof(struct net_device_stats) */ +}; + + /* CMDs currently supported */ #define ETHTOOL_GSET 0x00000001 /* Get settings. */ #define ETHTOOL_SSET 0x00000002 /* Set settings. */ @@ -442,6 +457,15 @@ struct ethtool_ops { #define ETHTOOL_GPFLAGS 0x00000027 /* Get driver-private flags bitmap */ #define ETHTOOL_SPFLAGS 0x00000028 /* Set driver-private flags bitmap */ + +#define ETHTOOL_GNDSTATS 0x00000070 /* get standard net-device statistics */ +#define ETHTOOL_GETRXALL 0x00000071 /* Retrieve whether or not + * IFF_ACCEPT_ALL_FRAMES is set. */ +#define ETHTOOL_SETRXALL 0x00000072 /* Set IFF_ACCEPT_ALL_FRAMES */ +#define ETHTOOL_GETRXFCS 0x00000073 /* Set IFF_SAVE_FCS */ +#define ETHTOOL_SETRXFCS 0x00000074 /* Set IFF_SAVE_FCS */ + + /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET #define SPARC_ETH_SSET ETHTOOL_SSET diff --git a/include/linux/if.h b/include/linux/if.h index 5c9d1fa..63de1a7 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -65,6 +65,14 @@ #define IFF_SLAVE_NEEDARP 0x40 /* need ARPs for validation */ #define IFF_ISATAP 0x80 /* ISATAP interface (RFC4214) */ +#define IFF_ACCEPT_ALL_FRAMES 0x0400 /** Accept all frames, even ones with bad CRCs. + * Should only be used in debugging/testing situations + * Do NOT enable this unless you understand the + * consequences! */ +#define IFF_SAVE_FCS 0x0800 /** Save the Frame Check Sum (FCS) on receive, if + * possible. */ +#define IFF_MAC_VLAN 0x1000 /* MAC VLAN device. */ + #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 0d9d7ea..1a0f9ef 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -5,5 +5,13 @@ extern struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *); +struct macvlan_dev { + struct net_device *dev; + struct list_head list; + struct hlist_node hlist; + struct macvlan_port *port; + struct net_device *lowerdev; +}; + #endif /* __KERNEL__ */ #endif /* _LINUX_IF_MACVLAN_H */ diff --git a/include/linux/if_redirdev.h b/include/linux/if_redirdev.h new file mode 100644 index 0000000..cf8055c --- /dev/null +++ b/include/linux/if_redirdev.h @@ -0,0 +1,35 @@ +/* -*- linux-c -*- */ +#ifndef _LINUX_IF_REDIRDEV_H +#define _LINUX_IF_REDIRDEV_H + +/* the ioctl commands */ + +#define REDIRDEV_ADD 2090 +#define REDIRDEV_DEL 2091 +/* If this IOCTL succeedes, we are a Redirect-Device + interface, otherwise, we are not. */ +#define REDIRDEV_IS_REDIRDEV 2092 +#define REDIRDEV_GET_BY_IDX 2093 +#define REDIRDEV_GET_BY_NAME 2094 +#define REDIRDEV_SET_QUOTA 2095 + +#ifdef __KERNEL__ +#include +#include +extern int (*redirdev_ioctl_hook)(void*); + +#endif + +/* Request and response */ +struct redirdev_ioctl { + u32 cmd; + u32 ifidx; /* when getting info by idx */ + +#define RDD_ASSOCIATED (1<<0) + u32 flags; /* 1<<0: Is the interface associated with tx-dev or not */ + u32 not_used; /* explicitly align 64-bit */ + char ifname[IFNAMSIZ]; + char txifname[IFNAMSIZ]; +}; + +#endif diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index fc4e3db..1ab92bd 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -107,7 +107,8 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) #define IN_DEV_ARPFILTER(in_dev) IN_DEV_ORCONF((in_dev), ARPFILTER) #define IN_DEV_ARP_ANNOUNCE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE) #define IN_DEV_ARP_IGNORE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_IGNORE) - +#define IN_DEV_ACCEPT_STS(in_dev) IN_DEV_MAXCONF((in_dev), ACCEPT_STS) + struct in_ifaddr { struct in_ifaddr *ifa_next; diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 2584306..0d66649 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -158,6 +158,11 @@ struct ipv6_devconf { #endif __s32 proxy_ndp; __s32 accept_source_route; + __s32 nlnotify_on_addr_add; /* Always notify netlink on addr add, even if it is tentative. + * As currently implemented, this will often cause multiple netlink + * RTM_NEWADDR messages, as a new notification will be sent when + * the address becomes un-tentative. + */ #ifdef CONFIG_IPV6_OPTIMISTIC_DAD __s32 optimistic_dad; #endif @@ -192,6 +197,7 @@ enum { DEVCONF_PROXY_NDP, DEVCONF_OPTIMISTIC_DAD, DEVCONF_ACCEPT_SOURCE_ROUTE, + DEVCONF_NLNOTIFY_ON_ADDR_ADD, DEVCONF_MAX }; diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 35a8277..9bca1b8 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -30,11 +30,16 @@ #define SIOCGETSGCNT (SIOCPROTOPRIVATE+1) #define SIOCGETRPF (SIOCPROTOPRIVATE+2) +#define SIOCGETVIFCNT_NG (SIOCPROTOPRIVATE+3) +#define SIOCGETSGCNT_NG (SIOCPROTOPRIVATE+4) + #define MAXVIFS 32 typedef unsigned long vifbitmap_t; /* User mode code depends on this lot */ typedef unsigned short vifi_t; #define ALL_VIFS ((vifi_t)(-1)) +#define DFLT_MROUTE_TBL RT_TABLE_MAIN + /* * Same idea as select */ @@ -60,6 +65,11 @@ struct vifctl { struct in_addr vifc_rmt_addr; /* IPIP tunnel addr */ }; +struct vifctl_ng { + struct vifctl vif; + unsigned table_id; +} __attribute__ ((packed)); + #define VIFF_TUNNEL 0x1 /* IPIP tunnel */ #define VIFF_SRCRT 0x2 /* NI */ #define VIFF_REGISTER 0x4 /* register vif */ @@ -80,6 +90,18 @@ struct mfcctl int mfcc_expire; }; +struct mfcctl_ng +{ + struct mfcctl mfc; + unsigned int table_id; +} __attribute__ ((packed)); + +struct mrt_sockopt_simple +{ + unsigned int optval; + unsigned int table_id; +}; + /* * Group count retrieval for mrouted */ @@ -93,6 +115,12 @@ struct sioc_sg_req unsigned long wrong_if; }; +struct sioc_sg_req_ng +{ + struct sioc_sg_req req; + unsigned int table_id; +} __attribute__ ((packed)); + /* * To get vif packet counts */ @@ -106,6 +134,12 @@ struct sioc_vif_req unsigned long obytes; /* Out bytes */ }; +struct sioc_vif_req_ng +{ + struct sioc_vif_req vif; + unsigned int table_id; +} __attribute__ ((packed)); + /* * This is the format the mroute daemon expects to see IGMP control * data. Magically happens to be like an IP packet as per the original @@ -156,6 +190,8 @@ struct vif_device unsigned short flags; /* Control flags */ __be32 local,remote; /* Addresses(remote for tunnels)*/ int link; /* Physical interface index */ + int vif_index; /* Index in vif_table */ + unsigned int table_id; /* table-id that this vif belongs to */ }; #define VIFF_STATIC 0x8000 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ee81906..6dbbf1b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -43,6 +43,10 @@ #include +#ifdef CONFIG_IP_MROUTE +struct ipmr_table; +#endif + struct vlan_group; struct ethtool_ops; struct netpoll_info; @@ -178,6 +182,7 @@ enum { struct neighbour; struct neigh_parms; struct sk_buff; +struct pktgen_dev; struct netif_rx_stats { @@ -507,10 +512,15 @@ struct net_device #define NETIF_F_NETNS_LOCAL 8192 /* Does not change network namespaces */ #define NETIF_F_MULTI_QUEUE 16384 /* Has multiple TX/RX queues */ #define NETIF_F_LRO 32768 /* large receive offload */ - +#define NETIF_F_SAVE_CRC 65536 /* Can save FCS in skb, last 4 bytes for ethernet */ +#define NETIF_F_RX_ALL (1<<16) /* Can be configured to receive all packets, even + * ones with busted CRC. May disable VLAN filtering + * in the NIC, users should NOT enable this feature + * unless they understand the consequences. */ + /* Segmentation offload features */ -#define NETIF_F_GSO_SHIFT 16 -#define NETIF_F_GSO_MASK 0xffff0000 +#define NETIF_F_GSO_SHIFT 24 +#define NETIF_F_GSO_MASK 0xff000000 #define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT) #define NETIF_F_UFO (SKB_GSO_UDP << NETIF_F_GSO_SHIFT) #define NETIF_F_GSO_ROBUST (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT) @@ -713,6 +723,20 @@ struct net_device /* Network namespace this network device is inside */ struct net *nd_net; + /* Callback for when the queue is woken, used by pktgen currently */ + int (*notify_queue_woken)(struct net_device *dev); + void* nqw_data; /* To be used by the method above as needed */ + + struct pktgen_dev* pkt_dev; /* to quickly find the pkt-gen dev registered with this + * interface, if any. + */ + long dflt_skb_mark; /* Specify skb->mark for pkts received on this interface. */ + +#ifdef CONFIG_IP_MROUTE + /* IPv4 Multicast Routing Table for tis device. */ + struct ipmr_table* mrt_entry; +#endif + /* bridge stuff */ struct net_bridge_port *br_port; /* macvlan */ @@ -936,8 +960,13 @@ static inline void netif_wake_queue(struct net_device *dev) return; } #endif - if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state)) + if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state)) { __netif_schedule(dev); + + if (dev->notify_queue_woken) { + dev->notify_queue_woken(dev); + } + } } /** @@ -1109,6 +1138,14 @@ extern int netdev_budget; /* Called by rtnetlink.c:rtnl_unlock() */ extern void netdev_run_todo(void); +#ifdef CONFIG_DEBUG_NETDEV +extern int netdev_debug; +extern void __dev_hold(struct net_device *, const char *); +extern void __dev_put(struct net_device *, const char *); + +#define dev_hold(dev) __dev_hold(dev, __FUNCTION__) +#define dev_put(dev) __dev_put(dev, __FUNCTION__) +#else /** * dev_put - release reference to device * @dev: network device @@ -1130,6 +1167,8 @@ static inline void dev_hold(struct net_device *dev) { atomic_inc(&dev->refcnt); } +#endif + /* Carrier loss detection, dial on demand. The functions netif_carrier_on * and _off may be called from IRQ context, but it is caller diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f4a0e4c..7ec5360 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -528,7 +528,7 @@ static inline void nfs3_forget_cached_acls(struct inode *inode) /* * linux/fs/mount_clnt.c */ -extern int nfs_mount(struct sockaddr *, size_t, char *, char *, +extern int nfs_mount(struct sockaddr *, struct sockaddr *, size_t, char *, char *, int, int, struct nfs_fh *); /* diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 3423c67..8e5f377 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -62,10 +62,10 @@ struct nfs_client { /* idmapper */ struct idmap * cl_idmap; - /* Our own IP address, as a null-terminated string. + /* Our own IP address. * This is used to generate the clientid, and the callback address. */ - char cl_ipaddr[48]; + struct sockaddr cl_ipaddr; unsigned char cl_id_uniquifier; #endif }; diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h index df7c6b7..0c6c320 100644 --- a/include/linux/nfs_mount.h +++ b/include/linux/nfs_mount.h @@ -20,7 +20,7 @@ * mount-to-kernel version compatibility. Some of these aren't used yet * but here they are anyway. */ -#define NFS_MOUNT_VERSION 6 +#define NFS_MOUNT_VERSION 7 #define NFS_MAX_CONTEXT_LEN 256 struct nfs_mount_data { @@ -43,6 +43,7 @@ struct nfs_mount_data { struct nfs3_fh root; /* 4 */ int pseudoflavor; /* 5 */ char context[NFS_MAX_CONTEXT_LEN + 1]; /* 6 */ + struct sockaddr clientaddr; /* 7 */ }; /* bits in the flags field */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bbd8d00..446f47f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -288,7 +288,8 @@ struct sk_buff { fclone:2, ipvs_property:1, peeked:1, - nf_trace:1; + nf_trace:1, + use_specified_ether_crc:1; __be16 protocol; void (*destructor)(struct sk_buff *skb); diff --git a/include/linux/sockios.h b/include/linux/sockios.h index abef759..367287c 100644 --- a/include/linux/sockios.h +++ b/include/linux/sockios.h @@ -94,6 +94,13 @@ #define SIOCGRARP 0x8961 /* get RARP table entry */ #define SIOCSRARP 0x8962 /* set RARP table entry */ +/* MAC address based VLAN control calls */ +#define SIOCGIFMACVLAN 0x8965 /* Mac address multiplex/demultiplex support */ +#define SIOCSIFMACVLAN 0x8966 /* Set macvlan options */ + +#define SIOCGIFREDIRDEV 0x8967 /* Redirect device get ioctl */ +#define SIOCSIFREDIRDEV 0x8968 /* Set redirect dev options */ + /* Driver configuration calls */ #define SIOCGIFMAP 0x8970 /* Get device parameters */ @@ -122,6 +129,7 @@ #define SIOCBRADDIF 0x89a2 /* add interface to bridge */ #define SIOCBRDELIF 0x89a3 /* remove interface from bridge */ + /* Device private ioctl calls */ /* diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 64c9755..781d767 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -281,6 +281,11 @@ static inline struct sockaddr *svc_addr(struct svc_rqst *rqst) return (struct sockaddr *) &rqst->rq_addr; } +static inline struct sockaddr *svc_daddr(struct svc_rqst *rqst) +{ + return (struct sockaddr *) &rqst->rq_daddr; +} + /* * Check buffer bounds after decoding arguments */ diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 571f01d..8dbe468 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -490,6 +490,7 @@ enum NET_IPV4_CONF_ARP_IGNORE=19, NET_IPV4_CONF_PROMOTE_SECONDARIES=20, NET_IPV4_CONF_ARP_ACCEPT=21, + NET_IPV4_CONF_ACCEPT_STS=22, __NET_IPV4_CONF_MAX }; @@ -578,6 +579,7 @@ enum { NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN=22, NET_IPV6_PROXY_NDP=23, NET_IPV6_ACCEPT_SOURCE_ROUTE=25, + NET_IPV6_NLNOTIFY_ON_ADDR_ADD=26, __NET_IPV6_MAX }; diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 496503c..a5c56b9 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -136,12 +136,25 @@ extern int inet6_ac_check(struct sock *sk, struct in6_addr *addr, int ifindex); extern int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr); extern int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr); extern int ipv6_chk_acast_addr(struct net_device *dev, struct in6_addr *addr); - +/*extern void ipv6_ac_destroy_dev(struct inet6_dev *idev);*/ /* Device notifier */ extern int register_inet6addr_notifier(struct notifier_block *nb); extern int unregister_inet6addr_notifier(struct notifier_block *nb); +#ifdef CONFIG_IPV6_REF_DEBUG +extern void in6_dev_atomic_inc(struct inet6_dev *idev, const char *func, int line); +extern void in6_dev_atomic_dec(struct inet6_dev *idev, const char *func, int line); +extern int in6_dev_atomic_dec_and_test(struct inet6_dev *idev, const char *func, int line); +#else +#define in6_dev_atomic_inc(idev, func, line) atomic_inc(&(idev)->refcnt) +#define in6_dev_atomic_dec(idev, func, line) atomic_dec(&(idev)->refcnt) +#define in6_dev_atomic_dec_and_test(idev, func, line) atomic_dec_and_test(&(idev)->refcnt) +#endif + +#define __in6_dev_put(idev) in6_dev_atomic_dec((idev), __func__, __LINE__) +#define in6_dev_hold(idev) in6_dev_atomic_inc((idev), __func__, __LINE__) + static inline struct inet6_dev * __in6_dev_get(struct net_device *dev) { @@ -149,40 +162,48 @@ __in6_dev_get(struct net_device *dev) } static inline struct inet6_dev * -in6_dev_get(struct net_device *dev) +____in6_dev_get(struct net_device *dev, const char *func, int line) { struct inet6_dev *idev = NULL; rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) - atomic_inc(&idev->refcnt); + in6_dev_atomic_inc(idev, func, line); rcu_read_unlock(); return idev; } +#define in6_dev_get(dev) ____in6_dev_get((dev), __func__, __LINE__) extern void in6_dev_finish_destroy(struct inet6_dev *idev); static inline void -in6_dev_put(struct inet6_dev *idev) +____in6_dev_put(struct inet6_dev *idev, const char *func, int line) { - if (atomic_dec_and_test(&idev->refcnt)) + if (in6_dev_atomic_dec_and_test(idev, func, line)) in6_dev_finish_destroy(idev); } +#define in6_dev_put(idev) ____in6_dev_put((idev), __func__, __LINE__) -#define __in6_dev_put(idev) atomic_dec(&(idev)->refcnt) -#define in6_dev_hold(idev) atomic_inc(&(idev)->refcnt) - +#ifdef CONFIG_IPV6_REF_DEBUG +extern void in6_ifa_atomic_inc(struct inet6_ifaddr *ifa, const char *func, int line); +extern void in6_ifa_atomic_dec(struct inet6_ifaddr *ifa, const char *func, int line); +extern int in6_ifa_atomic_dec_and_test(struct inet6_ifaddr *ifa, const char *func, int line); +#else +#define in6_ifa_atomic_inc(ifa, func, line) atomic_inc(&(ifa)->refcnt) +#define in6_ifa_atomic_dec(ifa, func, line) atomic_dec(&(ifa)->refcnt) +#define in6_ifa_atomic_dec_and_test(ifa, func, line) atomic_dec_and_test(&(ifa)->refcnt) +#endif extern void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp); -static inline void in6_ifa_put(struct inet6_ifaddr *ifp) +static inline void ____in6_ifa_put(struct inet6_ifaddr *ifp, const char *func, int line) { - if (atomic_dec_and_test(&ifp->refcnt)) + if (in6_ifa_atomic_dec_and_test(ifp, func, line)) inet6_ifa_finish_destroy(ifp); } - -#define __in6_ifa_put(ifp) atomic_dec(&(ifp)->refcnt) -#define in6_ifa_hold(ifp) atomic_inc(&(ifp)->refcnt) +#define in6_ifa_put(ifp) ____in6_ifa_put((ifp), __func__, __LINE__) +#define __in6_ifa_put(ifp) in6_ifa_atomic_dec((ifp), __func__, __LINE__) +#define in6_ifa_hold(ifp) in6_ifa_atomic_inc((ifp), __func__, __LINE__) extern void addrconf_forwarding_on(void); diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h index d9a4f7f..83b9a6e 100644 --- a/include/net/netfilter/nf_conntrack_tuple.h +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -91,6 +91,8 @@ struct nf_conntrack_tuple /* The direction (for tuplehash) */ u_int8_t dir; } dst; + + u_int32_t mark; }; struct nf_conntrack_tuple_mask @@ -130,7 +132,8 @@ static inline int __nf_ct_tuple_src_equal(const struct nf_conntrack_tuple *t1, t1->src.u3.all[2] == t2->src.u3.all[2] && t1->src.u3.all[3] == t2->src.u3.all[3] && t1->src.u.all == t2->src.u.all && - t1->src.l3num == t2->src.l3num); + t1->src.l3num == t2->src.l3num && + t1->mark == t2->mark); } static inline int __nf_ct_tuple_dst_equal(const struct nf_conntrack_tuple *t1, @@ -141,7 +144,8 @@ static inline int __nf_ct_tuple_dst_equal(const struct nf_conntrack_tuple *t1, t1->dst.u3.all[2] == t2->dst.u3.all[2] && t1->dst.u3.all[3] == t2->dst.u3.all[3] && t1->dst.u.all == t2->dst.u.all && - t1->dst.protonum == t2->dst.protonum); + t1->dst.protonum == t2->dst.protonum && + t1->mark == t2->mark); } static inline int nf_ct_tuple_equal(const struct nf_conntrack_tuple *t1, @@ -177,7 +181,8 @@ static inline int nf_ct_tuple_src_mask_cmp(const struct nf_conntrack_tuple *t1, return 0; if (t1->src.l3num != t2->src.l3num || - t1->dst.protonum != t2->dst.protonum) + t1->dst.protonum != t2->dst.protonum || + t1->mark != t2->mark) return 0; return 1; diff --git a/include/net/sock.h b/include/net/sock.h index fd98760..e97029f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -406,6 +406,10 @@ enum sock_flags { SOCK_RCVTSTAMPNS, /* %SO_TIMESTAMPNS setting */ SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */ SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ + SOCK_DONT_DO_LL_FCS, /* Tell NIC not to do the ethernet FCS. Will use + * last 4 bytes of packet sent from user-space + * instead. + */ }; static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) diff --git a/kernel/panic.c b/kernel/panic.c index 24af9f8..57847ff 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -179,7 +179,7 @@ const char *print_tainted(void) void add_taint(unsigned flag) { - debug_locks = 0; /* can't trust the integrity of the kernel anymore */ + /* debug_locks = 0; --Ben */ /* can't trust the integrity of the kernel anymore */ tainted |= flag; } EXPORT_SYMBOL(add_taint); diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index c09350d..37c8fab 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c @@ -219,6 +219,7 @@ static const struct trans_ctl_table trans_net_ipv4_conf_vars_table[] = { { NET_IPV4_CONF_ARP_IGNORE, "arp_ignore" }, { NET_IPV4_CONF_PROMOTE_SECONDARIES, "promote_secondaries" }, { NET_IPV4_CONF_ARP_ACCEPT, "arp_accept" }, + { NET_IPV4_CONF_ACCEPT_STS, "accept_sts" }, {} }; @@ -494,6 +495,7 @@ static const struct trans_ctl_table trans_net_ipv6_conf_var_table[] = { { NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN, "accept_ra_rt_info_max_plen" }, { NET_IPV6_PROXY_NDP, "proxy_ndp" }, { NET_IPV6_ACCEPT_SOURCE_ROUTE, "accept_source_route" }, + { NET_IPV6_NLNOTIFY_ON_ADDR_ADD, "nlnotify_on_addr_add" }, {} }; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 0796c1a..6854057 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -397,6 +397,13 @@ config DEBUG_KOBJECT If you say Y here, some extra kobject debugging messages will be sent to the syslog. +config DEBUG_NETDEV + bool "network device debugging" + depends on DEBUG_KERNEL + help + This option enables extra checking on usage and reference counting + of network devices. + config DEBUG_HIGHMEM bool "Highmem debugging" depends on DEBUG_KERNEL && HIGHMEM diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index c0bac6d..91d95a2 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -435,6 +435,11 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, } break; +#if 0 + /* Don't propagate management state from base dev to VLANs. If you do this, + * then if you 'ifconfig eth0 down; ifconfig eth0 up', you also lose all the + * routes for eth0.* VLANs. --Ben + */ case NETDEV_DOWN: /* Put all VLANs for this dev in the down state too. */ for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { @@ -464,7 +469,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, dev_change_flags(vlandev, flgs | IFF_UP); } break; - +#endif + case NETDEV_UNREGISTER: /* Delete all VLANs for this dev. */ for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 41a76a0..af27671 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -367,6 +367,11 @@ static int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) struct net_device_stats *stats = &dev->stats; struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); + /* Please note, dev_queue_xmit consumes the pkt regardless of the + * return value. So, will copy the skb first and free if successful. + */ + struct sk_buff* skb2 = skb_get(skb); + /* Handle non-VLAN frames if they are sent to us, for example by DHCP. * * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING @@ -395,6 +400,10 @@ static int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) skb = __vlan_put_tag(skb, veth_TCI); if (!skb) { stats->tx_dropped++; + /* Free the extra copy, assuming this is a non-recoverable + * issue and we don't want calling code to retry. + */ + kfree_skb(skb2); return 0; } @@ -412,13 +421,21 @@ static int vlan_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) veth->h_vlan_proto, veth->h_vlan_TCI, veth->h_vlan_encapsulated_proto); - stats->tx_packets++; /* for statics only */ - stats->tx_bytes += skb->len; - skb->dev = vlan_dev_info(dev)->real_dev; - dev_queue_xmit(skb); - - return 0; + { + int rv = dev_queue_xmit(skb); + if (rv == 0) { + /* Was success, need to free the skb reference since + * we bumped up the user count above. If there was an + * error instead, then the skb2 will not be freed, and so + * the calling code will be able to re-send it. + */ + stats->tx_packets++; /* for statics only */ + stats->tx_bytes += skb2->len; + kfree_skb(skb2); + } + return rv; + } } static int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, diff --git a/net/Kconfig b/net/Kconfig index 6627c6a..fee9b8c 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -183,6 +183,7 @@ source "net/tipc/Kconfig" source "net/atm/Kconfig" source "net/bridge/Kconfig" source "net/8021q/Kconfig" +source "net/redir/Kconfig" source "net/decnet/Kconfig" source "net/llc/Kconfig" source "net/ipx/Kconfig" @@ -225,6 +226,14 @@ config NET_TCPPROBE To compile this code as a module, choose M here: the module will be called tcp_probe. +config SUPPORT_SEND_BAD_CRC + bool "Support Send Bad CRC (USE WITH CAUTION)" + ---help--- + When enabled, one can send a specially crafted packet to the ethernet + device via a raw socket and it will be sent with the last 4 bytes of + the packet as the ethernet CRC. Requires driver support. Current driver + support is limited to e100 and e1000. + endmenu endmenu diff --git a/net/Makefile b/net/Makefile index b7a1364..2b3cb93 100644 --- a/net/Makefile +++ b/net/Makefile @@ -50,6 +50,7 @@ obj-$(CONFIG_MAC80211) += mac80211/ obj-$(CONFIG_IEEE80211) += ieee80211/ obj-$(CONFIG_TIPC) += tipc/ obj-$(CONFIG_NETLABEL) += netlabel/ +obj-$(CONFIG_REDIRDEV) += redir/ obj-$(CONFIG_IUCV) += iucv/ obj-$(CONFIG_RFKILL) += rfkill/ obj-$(CONFIG_NET_9P) += 9p/ diff --git a/net/core/dev.c b/net/core/dev.c index 37ffd7a..0ec6bf9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include #include @@ -119,9 +120,26 @@ #include #include #include +#include #include "net-sysfs.h" +#if defined(CONFIG_NET_PKTGEN) || defined(CONFIG_NET_PKTGEN_MODULE) +#include "pktgen.h" + +#warning "Compiling dev.c for pktgen."; + +int (*handle_pktgen_hook)(struct sk_buff *skb) = NULL; +EXPORT_SYMBOL(handle_pktgen_hook); + +static __inline__ int handle_pktgen_rcv(struct sk_buff* skb) { + if (handle_pktgen_hook) { + return handle_pktgen_hook(skb); + } + return -1; +} +#endif + /* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. @@ -2047,6 +2065,11 @@ int netif_receive_skb(struct sk_buff *skb) skb_reset_transport_header(skb); skb->mac_len = skb->network_header - skb->mac_header; + /* Set the default 'mark' for this skb. dflt_skb_mark may be set through + * the /sys/class/net/[dev-name]/dflt_skb_mark file. + */ + skb->mark = skb->dev->dflt_skb_mark; + pt_prev = NULL; rcu_read_lock(); @@ -2080,6 +2103,16 @@ ncls: if (!skb) goto out; +#if defined(CONFIG_NET_PKTGEN) || defined(CONFIG_NET_PKTGEN_MODULE) + if ((skb->dev->pkt_dev) && + (handle_pktgen_rcv(skb) >= 0)) { + /* Pktgen may consume the packet, no need to send + * to further protocols. + */ + goto out; + } +#endif + type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { @@ -3858,6 +3891,7 @@ EXPORT_SYMBOL(register_netdev); static void netdev_wait_allrefs(struct net_device *dev) { unsigned long rebroadcast_time, warning_time; + int n = 0; rebroadcast_time = warning_time = jiffies; while (atomic_read(&dev->refcnt) != 0) { @@ -3891,10 +3925,37 @@ static void netdev_wait_allrefs(struct net_device *dev) "count = %d\n", dev->name, atomic_read(&dev->refcnt)); warning_time = jiffies; + n++; } + if (n >= 3) + break; } } +#ifdef CONFIG_DEBUG_NETDEV +/* This is for debugging reference counting of devices */ +int netdev_debug __read_mostly; + +void __dev_hold(struct net_device *dev, const char *func) +{ + atomic_inc(&dev->refcnt); + if (unlikely(netdev_debug) && (strcmp(dev->name, "rddVR0") == 0)) + printk(KERN_DEBUG "%s: dev_hold %d %s\n", + dev->name, atomic_read(&dev->refcnt), func); +} +EXPORT_SYMBOL(__dev_hold); + +void __dev_put(struct net_device *dev, const char *func) +{ + BUG_ON(atomic_read(&dev->refcnt) == 0); + if (unlikely(netdev_debug) && (strcmp(dev->name, "rddVR0") == 0)) + printk(KERN_DEBUG "%s: dev_put %d %s\n", + dev->name, atomic_read(&dev->refcnt), func); + atomic_dec(&dev->refcnt); +} +EXPORT_SYMBOL(__dev_put); +#endif + /* The sequence is: * * rtnl_lock(); @@ -3954,7 +4015,7 @@ void netdev_run_todo(void) netdev_wait_allrefs(dev); /* paranoia */ - BUG_ON(atomic_read(&dev->refcnt)); + WARN_ON(atomic_read(&dev->refcnt)); BUG_TRAP(!dev->ip_ptr); BUG_TRAP(!dev->ip6_ptr); BUG_TRAP(!dev->dn_ptr); @@ -4584,6 +4645,10 @@ EXPORT_SYMBOL(net_enable_timestamp); EXPORT_SYMBOL(net_disable_timestamp); EXPORT_SYMBOL(dev_get_flags); +#if defined(CONFIG_NET_PKTGEN) || defined(CONFIG_NET_PKTGEN_MODULE) +EXPORT_SYMBOL(handle_pktgen_rcv); +#endif + #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) EXPORT_SYMBOL(br_handle_frame_hook); EXPORT_SYMBOL(br_fdb_get_hook); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 1163eb2..ab7cc33 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1,4 +1,4 @@ -/* +/* -*- linux-c -*- * net/core/ethtool.c - Ethtool ioctl handler * Copyright (c) 2003 Matthew Wilcox * @@ -35,6 +35,12 @@ u32 ethtool_op_get_tx_csum(struct net_device *dev) return (dev->features & NETIF_F_ALL_CSUM) != 0; } +u32 ethtool_op_get_rx_all(struct net_device *dev, u32* retval) +{ + *retval = ((dev->priv_flags & IFF_ACCEPT_ALL_FRAMES) != 0); + return 0; +} + int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) { if (data) @@ -777,6 +783,88 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr, return actor(dev, edata.data); } + +static int ethtool_get_rx_all(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GSG }; + int rv = 0; + + if (!dev->ethtool_ops->get_rx_all) + return -EOPNOTSUPP; + + if ((rv = dev->ethtool_ops->get_rx_all(dev, &edata.data)) < 0) { + return rv; + } + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + + +static int ethtool_set_rx_all(struct net_device *dev, void *useraddr) +{ + struct ethtool_value id; + + if (!dev->ethtool_ops->set_rx_all) + return -EOPNOTSUPP; + + if (copy_from_user(&id, useraddr, sizeof(id))) + return -EFAULT; + + return dev->ethtool_ops->set_rx_all(dev, id.data); +} + +static int ethtool_get_rx_fcs(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GSG }; + int rv = 0; + + if (!dev->ethtool_ops->get_save_fcs) + return -EOPNOTSUPP; + + if ((rv = dev->ethtool_ops->get_save_fcs(dev, &edata.data)) < 0) { + return rv; + } + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + + +static int ethtool_set_rx_fcs(struct net_device *dev, void *useraddr) +{ + struct ethtool_value id; + + if (!dev->ethtool_ops->set_save_fcs) + return -EOPNOTSUPP; + + if (copy_from_user(&id, useraddr, sizeof(id))) + return -EFAULT; + + return dev->ethtool_ops->set_save_fcs(dev, id.data); +} + + +/* Handle some generic ethtool commands here */ +static int ethtool_get_netdev_stats(struct net_device *dev, void *useraddr) { + + struct ethtool_ndstats* nds = (struct ethtool_ndstats*)(useraddr); + + struct net_device_stats *stats = dev->get_stats(dev); + if (stats) { + if (copy_to_user(nds->data, stats, sizeof(*stats))) { + return -EFAULT; + } + } + else { + return -EOPNOTSUPP; + } + return 0; +} + + /* The main entry point in this file. Called from net/core/dev.c */ int dev_ethtool(struct net *net, struct ifreq *ifr) @@ -790,9 +878,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) if (!dev || !netif_device_present(dev)) return -ENODEV; - if (!dev->ethtool_ops) - return -EOPNOTSUPP; - if (copy_from_user(ðcmd, useraddr, sizeof (ethcmd))) return -EFAULT; @@ -819,12 +904,25 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) return -EPERM; } - if (dev->ethtool_ops->begin) + if (dev->ethtool_ops && dev->ethtool_ops->begin) if ((rc = dev->ethtool_ops->begin(dev)) < 0) return rc; old_features = dev->features; + /* Handle some generic operations that do not require specific + * ethtool handlers. + */ + switch (ethcmd) { + case ETHTOOL_GNDSTATS: + return ethtool_get_netdev_stats(dev, useraddr); + default: + break; + } + + if (!dev->ethtool_ops) + return -EOPNOTSUPP; + switch (ethcmd) { case ETHTOOL_GSET: rc = ethtool_get_settings(dev, useraddr); @@ -927,6 +1025,18 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_PHYS_ID: rc = ethtool_phys_id(dev, useraddr); break; + case ETHTOOL_SETRXALL: + rc = ethtool_set_rx_all(dev, useraddr); + break; + case ETHTOOL_GETRXALL: + rc = ethtool_get_rx_all(dev, useraddr); + break; + case ETHTOOL_SETRXFCS: + rc = ethtool_set_rx_fcs(dev, useraddr); + break; + case ETHTOOL_GETRXFCS: + rc = ethtool_get_rx_fcs(dev, useraddr); + break; case ETHTOOL_GSTATS: rc = ethtool_get_stats(dev, useraddr); break; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 7635d3f..f24b2ac 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -208,6 +208,20 @@ static ssize_t store_tx_queue_len(struct device *dev, return netdev_store(dev, attr, buf, len, change_tx_queue_len); } +NETDEVICE_SHOW(dflt_skb_mark, fmt_ulong); + +static int change_dflt_skb_mark(struct net_device *net, unsigned long new_val) +{ + net->dflt_skb_mark = new_val; + return 0; +} + +static ssize_t store_dflt_skb_mark(struct device *dev, struct device_attribute* attr, + const char* buf, size_t len) +{ + return netdev_store(dev, attr, buf, len, change_dflt_skb_mark); +} + static struct device_attribute net_class_attributes[] = { __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), __ATTR(iflink, S_IRUGO, show_iflink, NULL), @@ -224,6 +238,8 @@ static struct device_attribute net_class_attributes[] = { __ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags), __ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, store_tx_queue_len), + __ATTR(dflt_skb_mark, S_IRUGO | S_IWUSR, show_dflt_skb_mark, + store_dflt_skb_mark), {} }; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 20e63b3..f33ac94 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -6,7 +6,7 @@ * * Alexey Kuznetsov * Ben Greear - * Jens Låås + * Jens Låås * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -167,233 +167,110 @@ #include #include /* do_div */ #include - +#include /* sched_clock() */ +#include "pktgen.h" + +#define USE_NQW_CALLBACK +#ifdef USE_NQW_CALLBACK +# include +# if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) +# include +# endif +#endif #define VERSION "pktgen v2.69: Packet Generator for packet performance testing.\n" -#define IP_NAME_SZ 32 -#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ -#define MPLS_STACK_BOTTOM htonl(0x00000100) - -/* Device flag bits */ -#define F_IPSRC_RND (1<<0) /* IP-Src Random */ -#define F_IPDST_RND (1<<1) /* IP-Dst Random */ -#define F_UDPSRC_RND (1<<2) /* UDP-Src Random */ -#define F_UDPDST_RND (1<<3) /* UDP-Dst Random */ -#define F_MACSRC_RND (1<<4) /* MAC-Src Random */ -#define F_MACDST_RND (1<<5) /* MAC-Dst Random */ -#define F_TXSIZE_RND (1<<6) /* Transmit size is random */ -#define F_IPV6 (1<<7) /* Interface in IPV6 Mode */ -#define F_MPLS_RND (1<<8) /* Random MPLS labels */ -#define F_VID_RND (1<<9) /* Random VLAN ID */ -#define F_SVID_RND (1<<10) /* Random SVLAN ID */ -#define F_FLOW_SEQ (1<<11) /* Sequential flows */ -#define F_IPSEC_ON (1<<12) /* ipsec on for flows */ -#define F_QUEUE_MAP_RND (1<<13) /* queue map Random */ - -/* Thread control flag bits */ -#define T_TERMINATE (1<<0) -#define T_STOP (1<<1) /* Stop run */ -#define T_RUN (1<<2) /* Start run */ -#define T_REMDEVALL (1<<3) /* Remove all devs */ -#define T_REMDEV (1<<4) /* Remove one dev */ - -/* If lock -- can be removed after some work */ -#define if_lock(t) spin_lock(&(t->if_lock)); -#define if_unlock(t) spin_unlock(&(t->if_lock)); - -/* Used to help with determining the pkts on receive */ -#define PKTGEN_MAGIC 0xbe9be955 -#define PG_PROC_DIR "pktgen" -#define PGCTRL "pgctrl" static struct proc_dir_entry *pg_proc_dir = NULL; -#define MAX_CFLOWS 65536 - -#define VLAN_TAG_SIZE(x) ((x)->vlan_id == 0xffff ? 0 : 4) -#define SVLAN_TAG_SIZE(x) ((x)->svlan_id == 0xffff ? 0 : 4) - -struct flow_state { - __be32 cur_daddr; - int count; -#ifdef CONFIG_XFRM - struct xfrm_state *x; -#endif - __u32 flags; -}; - -/* flow flag bits */ -#define F_INIT (1<<0) /* flow has been initialized */ - -struct pktgen_dev { - /* - * Try to keep frequent/infrequent used vars. separated. - */ - struct proc_dir_entry *entry; /* proc file */ - struct pktgen_thread *pg_thread;/* the owner */ - struct list_head list; /* Used for chaining in the thread's run-queue */ - - int running; /* if this changes to false, the test will stop */ - - /* If min != max, then we will either do a linear iteration, or - * we will do a random selection from within the range. - */ - __u32 flags; - int removal_mark; /* non-zero => the device is marked for - * removal by worker thread */ - - int min_pkt_size; /* = ETH_ZLEN; */ - int max_pkt_size; /* = ETH_ZLEN; */ - int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */ - int nfrags; - __u32 delay_us; /* Default delay */ - __u32 delay_ns; - __u64 count; /* Default No packets to send */ - __u64 sofar; /* How many pkts we've sent so far */ - __u64 tx_bytes; /* How many bytes we've transmitted */ - __u64 errors; /* Errors when trying to transmit, pkts will be re-sent */ - - /* runtime counters relating to clone_skb */ - __u64 next_tx_us; /* timestamp of when to tx next */ - __u32 next_tx_ns; - - __u64 allocated_skbs; - __u32 clone_count; - int last_ok; /* Was last skb sent? - * Or a failed transmit of some sort? This will keep - * sequence numbers in order, for example. - */ - __u64 started_at; /* micro-seconds */ - __u64 stopped_at; /* micro-seconds */ - __u64 idle_acc; /* micro-seconds */ - __u32 seq_num; - - int clone_skb; /* Use multiple SKBs during packet gen. If this number - * is greater than 1, then that many copies of the same - * packet will be sent before a new packet is allocated. - * For instance, if you want to send 1024 identical packets - * before creating a new packet, set clone_skb to 1024. - */ - - char dst_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ - char dst_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ - char src_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ - char src_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ - - struct in6_addr in6_saddr; - struct in6_addr in6_daddr; - struct in6_addr cur_in6_daddr; - struct in6_addr cur_in6_saddr; - /* For ranges */ - struct in6_addr min_in6_daddr; - struct in6_addr max_in6_daddr; - struct in6_addr min_in6_saddr; - struct in6_addr max_in6_saddr; - - /* If we're doing ranges, random or incremental, then this - * defines the min/max for those ranges. - */ - __be32 saddr_min; /* inclusive, source IP address */ - __be32 saddr_max; /* exclusive, source IP address */ - __be32 daddr_min; /* inclusive, dest IP address */ - __be32 daddr_max; /* exclusive, dest IP address */ - - __u16 udp_src_min; /* inclusive, source UDP port */ - __u16 udp_src_max; /* exclusive, source UDP port */ - __u16 udp_dst_min; /* inclusive, dest UDP port */ - __u16 udp_dst_max; /* exclusive, dest UDP port */ - - /* DSCP + ECN */ - __u8 tos; /* six most significant bits of (former) IPv4 TOS are for dscp codepoint */ - __u8 traffic_class; /* ditto for the (former) Traffic Class in IPv6 (see RFC 3260, sec. 4) */ - - /* MPLS */ - unsigned nr_labels; /* Depth of stack, 0 = no MPLS */ - __be32 labels[MAX_MPLS_LABELS]; - - /* VLAN/SVLAN (802.1Q/Q-in-Q) */ - __u8 vlan_p; - __u8 vlan_cfi; - __u16 vlan_id; /* 0xffff means no vlan tag */ - - __u8 svlan_p; - __u8 svlan_cfi; - __u16 svlan_id; /* 0xffff means no svlan tag */ - - __u32 src_mac_count; /* How many MACs to iterate through */ - __u32 dst_mac_count; /* How many MACs to iterate through */ - - unsigned char dst_mac[ETH_ALEN]; - unsigned char src_mac[ETH_ALEN]; - - __u32 cur_dst_mac_offset; - __u32 cur_src_mac_offset; - __be32 cur_saddr; - __be32 cur_daddr; - __u16 cur_udp_dst; - __u16 cur_udp_src; - __u16 cur_queue_map; - __u32 cur_pkt_size; - - __u8 hh[14]; - /* = { - 0x00, 0x80, 0xC8, 0x79, 0xB3, 0xCB, - - We fill in SRC address later - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x08, 0x00 - }; - */ - __u16 pad; /* pad out the hh struct to an even 16 bytes */ +#define REMOVE 1 +#define FIND 0 - struct sk_buff *skb; /* skb we are to transmit next, mainly used for when we - * are transmitting the same one multiple times - */ - struct net_device *odev; /* The out-going device. Note that the device should - * have it's pg_info pointer pointing back to this - * device. This will be set when the user specifies - * the out-going device name (not when the inject is - * started as it used to do.) - */ - struct flow_state *flows; - unsigned cflows; /* Concurrent flows (config) */ - unsigned lflow; /* Flow length (config) */ - unsigned nflows; /* accumulated flows (stats) */ - unsigned curfl; /* current sequenced flow (state)*/ - - u16 queue_map_min; - u16 queue_map_max; +static char* version = VERSION; -#ifdef CONFIG_XFRM - __u8 ipsmode; /* IPSEC mode (config) */ - __u8 ipsproto; /* IPSEC type (config) */ -#endif - char result[512]; +static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove); +static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i); +static int pktgen_add_device(struct pktgen_thread *t, const char *ifname); +static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, const char *ifname); +static int pktgen_device_event(struct notifier_block *, unsigned long, void *); +static void pktgen_run_all_threads(int background); +static void pktgen_stop_all_threads_ifs(void); +static int pktgen_stop_device(struct pktgen_dev *pkt_dev); +static void pktgen_stop(struct pktgen_thread *t); +static void pktgen_clear_counters(struct pktgen_dev *pkt_dev, int seq_too); +static void pktgen_mark_device(const char *ifname); +static unsigned int scan_ip6(const char *s, char ip[16]); +static unsigned int fmt_ip6(char *s, const char ip[16]); +static void clear_nqw_hook(struct pktgen_thread* t, struct net_device* dev); +static int set_nqw_hook(struct pktgen_thread* t, struct net_device* dev, int gfp); + +/* Module parameters, defaults. */ +static int pg_count_d = 1000; /* 1000 pkts by default */ +static int pg_delay_d = 0x7FFFFFFF; /* Don't run until someone sets a different delay. */ + +static int pg_clone_skb_d; +static int debug; + +static DEFINE_MUTEX(pktgen_thread_lock); +static LIST_HEAD(pktgen_threads); + +static struct notifier_block pktgen_notifier_block = { + .notifier_call = pktgen_device_event, }; + +/* This code works around the fact that do_div cannot handle two 64-bit + numbers, and regular 64-bit division doesn't work on x86 kernels. + --Ben +*/ -struct pktgen_hdr { - __be32 pgh_magic; - __be32 seq_num; - __be32 tv_sec; - __be32 tv_usec; -}; +#define PG_DIV 0 -struct pktgen_thread { - spinlock_t if_lock; - struct list_head if_list; /* All device here */ - struct list_head th_list; - struct task_struct *tsk; - char result[512]; +/* This was emailed to LMKL by: Chris Caputo + * Function copied/adapted/optimized from: + * + * nemesis.sourceforge.net/browse/lib/static/intmath/ix86/intmath.c.html + * + * Copyright 1994, University of Cambridge Computer Laboratory + * All Rights Reserved. + * + */ +static inline s64 divremdi3(s64 x, s64 y, int type) +{ + u64 a = (x < 0) ? -x : x; + u64 b = (y < 0) ? -y : y; + u64 res = 0, d = 1; + + if (b > 0) { + while (b < a) { + b <<= 1; + d <<= 1; + } + } - /* Field for thread to receive "posted" events terminate, stop ifs etc. */ + do { + if (a >= b) { + a -= b; + res += d; + } + b >>= 1; + d >>= 1; + } + while (d); - u32 control; - int cpu; + if (PG_DIV == type) { + return (((x ^ y) & (1ll << 63)) == 0) ? res : -(s64) res; + } else { + return ((x & (1ll << 63)) == 0) ? a : -(s64) a; + } +} - wait_queue_head_t queue; -}; +/* End of hacks to deal with 64-bit math on x86 */ -#define REMOVE 1 -#define FIND 0 +/** Convert to milliseconds */ +static inline __u64 tv_to_ms(const struct timeval *tv) +{ + __u64 ms = tv->tv_usec / 1000; + ms += (__u64) tv->tv_sec * (__u64) 1000; + return ms; +} /** Convert to micro-seconds */ static inline __u64 tv_to_us(const struct timeval *tv) @@ -403,44 +280,55 @@ static inline __u64 tv_to_us(const struct timeval *tv) return us; } -static __u64 getCurUs(void) +static inline __u64 pg_div(__u64 n, __u32 base) +{ + __u64 tmp = n; + do_div(tmp, base); + /* printk("pktgen: pg_div, n: %llu base: %d rv: %llu\n", + n, base, tmp); */ + return tmp; +} + +static inline __u64 pg_div64(__u64 n, __u64 base) +{ + __u64 tmp = n; +/* + * How do we know if the architecture we are running on + * supports division with 64 bit base? + * + */ +#if defined(__sparc_v9__) || defined(__powerpc64__) || defined(__alpha__) || defined(__x86_64__) || defined(__ia64__) + + do_div(tmp, base); +#else + tmp = divremdi3(n, base, PG_DIV); +#endif + return tmp; +} + +static inline __u64 getCurUs(void) { struct timeval tv; do_gettimeofday(&tv); return tv_to_us(&tv); } +static inline __u64 tv_diff(const struct timeval *a, const struct timeval *b) +{ + return tv_to_us(a) - tv_to_us(b); +} +/* Since the machine booted. */ +static inline __u64 getRelativeCurUs(void) { + return pg_div(sched_clock(), 1000); +} + +/* Since the machine booted. */ +static inline __u64 getRelativeCurNs(void) { + return sched_clock(); +} + /* old include end */ -static char version[] __initdata = VERSION; - -static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i); -static int pktgen_add_device(struct pktgen_thread *t, const char *ifname); -static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, - const char *ifname); -static int pktgen_device_event(struct notifier_block *, unsigned long, void *); -static void pktgen_run_all_threads(void); -static void pktgen_stop_all_threads_ifs(void); -static int pktgen_stop_device(struct pktgen_dev *pkt_dev); -static void pktgen_stop(struct pktgen_thread *t); -static void pktgen_clear_counters(struct pktgen_dev *pkt_dev); - -static unsigned int scan_ip6(const char *s, char ip[16]); -static unsigned int fmt_ip6(char *s, const char ip[16]); - -/* Module parameters, defaults. */ -static int pg_count_d = 1000; /* 1000 pkts by default */ -static int pg_delay_d; -static int pg_clone_skb_d; -static int debug; - -static DEFINE_MUTEX(pktgen_thread_lock); -static LIST_HEAD(pktgen_threads); - -static struct notifier_block pktgen_notifier_block = { - .notifier_call = pktgen_device_event, -}; - /* * /proc handling functions * @@ -448,7 +336,7 @@ static struct notifier_block pktgen_notifier_block = { static int pgctrl_show(struct seq_file *seq, void *v) { - seq_puts(seq, VERSION); + seq_puts(seq, version); return 0; } @@ -476,8 +364,10 @@ static ssize_t pgctrl_write(struct file *file, const char __user * buf, pktgen_stop_all_threads_ifs(); else if (!strcmp(data, "start")) - pktgen_run_all_threads(); - + pktgen_run_all_threads(0); + /* Run in the background. */ + else if (!strcmp(data, "bg_start")) + pktgen_run_all_threads(1); else printk(KERN_WARNING "pktgen: Unknown command: %s\n", data); @@ -492,6 +382,137 @@ static int pgctrl_open(struct inode *inode, struct file *file) return single_open(file, pgctrl_show, PDE(inode)->data); } +static int pg_populate_report(struct pktgen_dev_report* rpt, struct pktgen_dev* pkt_dev) { + int i; + + memset(rpt, 0, sizeof(*rpt)); + rpt->api_version = 1; + rpt->flags = pkt_dev->flags; + strncpy(rpt->thread_name, pkt_dev->pg_thread->tsk->comm, 32); + strncpy(rpt->interface_name, pkt_dev->ifname, 32); + rpt->min_pkt_size = pkt_dev->min_pkt_size; + rpt->max_pkt_size = pkt_dev->max_pkt_size; + rpt->clone_skb = pkt_dev->clone_skb; + rpt->peer_clone_skb = pkt_dev->peer_clone_skb; + rpt->nfrags = pkt_dev->nfrags; + + strncpy(rpt->dst_min, pkt_dev->dst_min, IP_NAME_SZ); + strncpy(rpt->dst_max, pkt_dev->dst_max, IP_NAME_SZ); + strncpy(rpt->src_min, pkt_dev->src_min, IP_NAME_SZ); + strncpy(rpt->src_max, pkt_dev->src_max, IP_NAME_SZ); + + memcpy(&rpt->in6_saddr, &pkt_dev->in6_saddr, sizeof(struct in6_addr)); + memcpy(&rpt->in6_daddr, &pkt_dev->in6_daddr, sizeof(struct in6_addr)); + + /* For ranges */ + memcpy(&rpt->min_in6_daddr, &pkt_dev->min_in6_daddr, sizeof(struct in6_addr)); + memcpy(&rpt->max_in6_daddr, &pkt_dev->max_in6_daddr, sizeof(struct in6_addr)); + memcpy(&rpt->min_in6_saddr, &pkt_dev->min_in6_saddr, sizeof(struct in6_addr)); + memcpy(&rpt->max_in6_saddr, &pkt_dev->max_in6_saddr, sizeof(struct in6_addr)); + + /* If we're doing ranges, random or incremental, then this + * defines the min/max for those ranges. + */ + rpt->saddr_min = pkt_dev->saddr_min; + rpt->saddr_max = pkt_dev->saddr_max; + rpt->daddr_min = pkt_dev->daddr_min; + rpt->daddr_max = pkt_dev->daddr_max; + + rpt->udp_src_min = pkt_dev->udp_src_min; + rpt->udp_src_max = pkt_dev->udp_src_max; + rpt->udp_dst_min = pkt_dev->udp_dst_min; + rpt->udp_dst_max = pkt_dev->udp_dst_max; + + /* MPLS */ + rpt->nr_labels = pkt_dev->nr_labels; /* Depth of stack, 0 = no MPLS */ + for (i = 0; ilabels[i] = pkt_dev->labels[i]; + } + + rpt->src_mac_count = pkt_dev->src_mac_count; + rpt->dst_mac_count = pkt_dev->dst_mac_count; + + memcpy(&rpt->dst_mac, &pkt_dev->dst_mac, ETH_ALEN); + memcpy(&rpt->src_mac, &pkt_dev->src_mac, ETH_ALEN); + + rpt->nflows = pkt_dev->nflows; + rpt->cflows = pkt_dev->cflows; + rpt->lflow = pkt_dev->lflow; + + rpt->delay_ns = pkt_dev->delay_ns; + rpt->count = pkt_dev->count; /* Default No packets to send */ + rpt->sofar = pkt_dev->sofar; /* How many pkts we've sent so far */ + rpt->tx_bytes = pkt_dev->tx_bytes; /* How many bytes we've transmitted */ + rpt->errors = pkt_dev->errors; /* Errors when trying to transmit, pkts will be re-sent */ + + /* Fields relating to receiving pkts */ + rpt->avg_latency = pkt_dev->avg_latency; /* in micro-seconds */ + rpt->min_latency = pkt_dev->min_latency; + rpt->max_latency = pkt_dev->max_latency; + for (i = 0; ilatency_bkts[i] = pkt_dev->latency_bkts[i]; + } + rpt->pkts_rcvd_since_clear = pkt_dev->pkts_rcvd_since_clear; + + rpt->ooo_rcvd = pkt_dev->ooo_rcvd; + rpt->pkts_rcvd = pkt_dev->pkts_rcvd; + rpt->dup_rcvd = pkt_dev->dup_rcvd; + rpt->bytes_rcvd = pkt_dev->bytes_rcvd; + rpt->seq_gap_rcvd = pkt_dev->seq_gap_rcvd; + rpt->non_pg_pkts_rcvd = pkt_dev->non_pg_pkts_rcvd; + return 0; +}; /* populate report */ + + +int pktgen_proc_ioctl(struct inode* inode, struct file* file, unsigned int cmd, + unsigned long arg) { + int err = 0; + struct pktgen_ioctl_info args; + struct pktgen_dev* pkt_dev = NULL; + + if (copy_from_user(&args, (void*)arg, sizeof(args))) { + return -EFAULT; + } + + /* Null terminate the names */ + args.thread_name[31] = 0; + args.interface_name[31] = 0; + + /* printk("pktgen: thread_name: %s interface_name: %s\n", + * args.thread_name, args.interface_name); + */ + + switch (cmd) { + case GET_PKTGEN_INTERFACE_INFO: { + mutex_lock(&pktgen_thread_lock); + pkt_dev = __pktgen_NN_threads(args.interface_name, FIND); + if (pkt_dev) { + pg_populate_report(&(args.report), pkt_dev); + if (copy_to_user((void*)(arg), &args, sizeof(args))) { + printk("ERROR: pktgen: copy_to_user failed.\n"); + err = -EFAULT; + } + else { + err = 0; + } + } + else { + printk("ERROR: pktgen: Could not find interface -:%s:-\n", + args.interface_name); + err = -ENODEV; + } + mutex_unlock(&pktgen_thread_lock); + break; + } + default: + printk("%s: Unknown pktgen IOCTL: %x \n", __FUNCTION__, + cmd); + return -EINVAL; + } + + return err; +}/* pktgen_proc_ioctl */ + static const struct file_operations pktgen_fops = { .owner = THIS_MODULE, .open = pgctrl_open, @@ -499,6 +520,7 @@ static const struct file_operations pktgen_fops = { .llseek = seq_lseek, .write = pgctrl_write, .release = single_release, + .ioctl = pktgen_proc_ioctl, }; static int pktgen_if_show(struct seq_file *seq, void *v) @@ -514,11 +536,12 @@ static int pktgen_if_show(struct seq_file *seq, void *v) (unsigned long long)pkt_dev->count, pkt_dev->min_pkt_size, pkt_dev->max_pkt_size); - seq_printf(seq, - " frags: %d delay: %u clone_skb: %d ifname: %s\n", - pkt_dev->nfrags, - 1000 * pkt_dev->delay_us + pkt_dev->delay_ns, - pkt_dev->clone_skb, pkt_dev->odev->name); + seq_printf(seq, + " frags: %d delay: %lluns clone_skb: %d peer_clone_skb: %d ifname: %s\n", + pkt_dev->nfrags, + (unsigned long long)pkt_dev->delay_ns, + pkt_dev->clone_skb, pkt_dev->peer_clone_skb, + pkt_dev->ifname); seq_printf(seq, " flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow); @@ -528,6 +551,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v) pkt_dev->queue_map_min, pkt_dev->queue_map_max); + if (pkt_dev->flags & F_IPV6) { char b1[128], b2[128], b3[128]; fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr); @@ -652,11 +676,32 @@ static int pktgen_if_show(struct seq_file *seq, void *v) stopped = now; /* not really stopped, more like last-running-at */ seq_printf(seq, - "Current:\n pkts-sofar: %llu errors: %llu\n started: %lluus stopped: %lluus idle: %lluus\n", + "Current:\n tx-pkts: %llu tx-errors: %llu tx-bytes: %llu\n", (unsigned long long)pkt_dev->sofar, - (unsigned long long)pkt_dev->errors, (unsigned long long)sa, + (unsigned long long)pkt_dev->errors, + (unsigned long long)pkt_dev->tx_bytes); + seq_printf(seq, + " rx-pkts: %llu rx-bytes: %llu alloc_skbs: %llu oom_alloc_skbs: %llu\n", + (unsigned long long)pkt_dev->pkts_rcvd, + (unsigned long long)pkt_dev->bytes_rcvd, + (unsigned long long)pkt_dev->allocated_skbs, + (unsigned long long)pkt_dev->oom_on_alloc_skb); + + + seq_printf(seq, + " blocked: %s next-tx-ns: %llu (%lli) started: %lluus stopped: %lluus idle: %lluns\n", + pkt_dev->tx_blocked ? "TRUE" : "false", + (unsigned long long)pkt_dev->next_tx_ns, + (long long)(pkt_dev->next_tx_ns - getRelativeCurNs()), + (unsigned long long)sa, (unsigned long long)stopped, - (unsigned long long)pkt_dev->idle_acc); + (unsigned long long)pkt_dev->idle_acc_ns); + seq_printf(seq, + " nanodelays: %llu sleeps: %llu queue_stopped: %llu tx-early: %llu\n", + (unsigned long long)pkt_dev->nanodelays, + (unsigned long long)pkt_dev->sleeps, + (unsigned long long)pkt_dev->queue_stopped, + (unsigned long long)pkt_dev->req_tx_early); seq_printf(seq, " seq_num: %d cur_dst_mac_offset: %d cur_src_mac_offset: %d\n", @@ -676,7 +721,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v) pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src); seq_printf(seq, " cur_queue_map: %u\n", pkt_dev->cur_queue_map); - + seq_printf(seq, " flows: %u\n", pkt_dev->nflows); if (pkt_dev->result[0]) @@ -942,15 +987,11 @@ static ssize_t pktgen_if_write(struct file *file, return len; } i += len; - if (value == 0x7FFFFFFF) { - pkt_dev->delay_us = 0x7FFFFFFF; - pkt_dev->delay_ns = 0; - } else { - pkt_dev->delay_us = value / 1000; - pkt_dev->delay_ns = value % 1000; + pkt_dev->delay_ns = value; + if ((getRelativeCurNs() + pkt_dev->delay_ns) > pkt_dev->next_tx_ns) { + pkt_dev->next_tx_ns = getRelativeCurNs() + pkt_dev->delay_ns; } - sprintf(pg_result, "OK: delay=%u", - 1000 * pkt_dev->delay_us + pkt_dev->delay_ns); + sprintf(pg_result, "OK: delay=%lluns", (unsigned long long)pkt_dev->delay_ns); return count; } if (!strcmp(name, "udp_src_min")) { @@ -1016,6 +1057,17 @@ static ssize_t pktgen_if_write(struct file *file, sprintf(pg_result, "OK: clone_skb=%d", pkt_dev->clone_skb); return count; } + if (!strcmp(name, "peer_clone_skb")) { + len = num_arg(&user_buffer[i], 10, &value); + if (len < 0) { + return len; + } + i += len; + pkt_dev->peer_clone_skb = value; + + sprintf(pg_result, "OK: peer_clone_skb=%d", pkt_dev->peer_clone_skb); + return count; + } if (!strcmp(name, "count")) { len = num_arg(&user_buffer[i], 10, &value); if (len < 0) { @@ -1127,12 +1179,13 @@ static ssize_t pktgen_if_write(struct file *file, else if (strcmp(f, "FLOW_SEQ") == 0) pkt_dev->flags |= F_FLOW_SEQ; - + else if (strcmp(f, "QUEUE_MAP_RND") == 0) pkt_dev->flags |= F_QUEUE_MAP_RND; else if (strcmp(f, "!QUEUE_MAP_RND") == 0) pkt_dev->flags &= ~F_QUEUE_MAP_RND; + #ifdef CONFIG_XFRM else if (strcmp(f, "IPSEC") == 0) pkt_dev->flags |= F_IPSEC_ON; @@ -1142,6 +1195,7 @@ static ssize_t pktgen_if_write(struct file *file, pkt_dev->flags &= ~F_IPV6; else { + printk("pktgen: Flag -:%s:- unknown\n", f); sprintf(pg_result, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", f, @@ -1408,13 +1462,12 @@ static ssize_t pktgen_if_write(struct file *file, /* Set up Src MAC */ if (compare_ether_addr(old_smac, pkt_dev->src_mac)) memcpy(&(pkt_dev->hh[6]), pkt_dev->src_mac, ETH_ALEN); - sprintf(pg_result, "OK: srcmac"); return count; } if (!strcmp(name, "clear_counters")) { - pktgen_clear_counters(pkt_dev); + pktgen_clear_counters(pkt_dev, 0); sprintf(pg_result, "OK: Clearing counters.\n"); return count; } @@ -1468,10 +1521,10 @@ static ssize_t pktgen_if_write(struct file *file, if (!strcmp(name, "mpls")) { unsigned n, cnt; - len = get_labels(&user_buffer[i], pkt_dev); if (len < 0) return len; + i += len; cnt = sprintf(pg_result, "OK: mpls="); for (n = 0; n < pkt_dev->nr_labels; n++) @@ -1635,6 +1688,7 @@ static ssize_t pktgen_if_write(struct file *file, return count; } + printk("pktgen: No such parameter \"%s\"\n", name); sprintf(pkt_dev->result, "No such parameter \"%s\"", name); return -EINVAL; } @@ -1651,6 +1705,7 @@ static const struct file_operations pktgen_if_fops = { .llseek = seq_lseek, .write = pktgen_if_write, .release = single_release, + .ioctl = pktgen_proc_ioctl, }; static int pktgen_thread_show(struct seq_file *seq, void *v) @@ -1660,9 +1715,15 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) BUG_ON(!t); + mutex_lock(&pktgen_thread_lock); + /* versioning info. CFG_RT means we do not busy-spin, so can be configured for + * real-time scheduling if user-space so desires. */ + seq_printf(seq, "VERSION-2 CFG_RT\n"); + seq_printf(seq, "PID: %d Name: %s\n", + t->pid, t->tsk->comm); + seq_printf(seq, "Running: "); - if_lock(t); list_for_each_entry(pkt_dev, &t->if_list, list) if (pkt_dev->running) seq_printf(seq, "%s ", pkt_dev->odev->name); @@ -1678,8 +1739,7 @@ static int pktgen_thread_show(struct seq_file *seq, void *v) else seq_printf(seq, "\nResult: NA\n"); - if_unlock(t); - + mutex_unlock(&pktgen_thread_lock); return 0; } @@ -1747,29 +1807,55 @@ static ssize_t pktgen_thread_write(struct file *file, return -EFAULT; i += len; mutex_lock(&pktgen_thread_lock); - pktgen_add_device(t, f); + t->control_arg = f; + t->control |= T_ADD_DEV; + while (t->control & T_ADD_DEV) { + schedule_timeout_interruptible(msecs_to_jiffies(10)); + } + t->control_arg = 0; mutex_unlock(&pktgen_thread_lock); ret = count; sprintf(pg_result, "OK: add_device=%s", f); goto out; } + if (!strcmp(name, "rem_device")) { + char f[32]; + memset(f, 0, 32); + len = strn_len(&user_buffer[i], sizeof(f) - 1); + if (len < 0) { + ret = len; + goto out; + } + if (copy_from_user(f, &user_buffer[i], len)) + return -EFAULT; + i += len; + pktgen_mark_device(f); + ret = count; + sprintf(pg_result, "OK: rem_device=%s", f); + goto out; + } + if (!strcmp(name, "rem_device_all")) { mutex_lock(&pktgen_thread_lock); t->control |= T_REMDEVALL; mutex_unlock(&pktgen_thread_lock); - schedule_timeout_interruptible(msecs_to_jiffies(125)); /* Propagate thread->control */ + while (t->control & T_REMDEVALL) { + schedule_timeout_interruptible(msecs_to_jiffies(10)); + } ret = count; sprintf(pg_result, "OK: rem_device_all"); goto out; } if (!strcmp(name, "max_before_softirq")) { - sprintf(pg_result, "OK: Note! max_before_softirq is obsoleted -- Do not use"); - ret = count; - goto out; - } + ret = count; + sprintf(pg_result, "ERROR: max_before_softirq no longer supported"); + goto out; + } + printk("pktgen: un-known command to pktgen_thread: -:%s:-\n", name); + ret = -EINVAL; out: return ret; @@ -1787,8 +1873,10 @@ static const struct file_operations pktgen_thread_fops = { .llseek = seq_lseek, .write = pktgen_thread_write, .release = single_release, + .ioctl = pktgen_proc_ioctl, }; + /* Think find or remove for NN */ static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove) { @@ -1799,10 +1887,8 @@ static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove) pkt_dev = pktgen_find_dev(t, ifname); if (pkt_dev) { if (remove) { - if_lock(t); pkt_dev->removal_mark = 1; t->control |= T_REMDEV; - if_unlock(t); } break; } @@ -1896,31 +1982,45 @@ static int pktgen_device_event(struct notifier_block *unused, /* Associate pktgen_dev with a device. */ -static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) +static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, struct pktgen_thread* t) { struct net_device *odev; int err; /* Clean old setups */ if (pkt_dev->odev) { +#ifdef USE_NQW_CALLBACK + /* Set the nqw callback hooks */ + rtnl_lock(); + clear_nqw_hook(t, pkt_dev->odev); + rtnl_unlock(); +#endif + pkt_dev->odev->pkt_dev = NULL; dev_put(pkt_dev->odev); pkt_dev->odev = NULL; } - odev = dev_get_by_name(&init_net, ifname); + odev = dev_get_by_name(&init_net, pkt_dev->ifname); if (!odev) { - printk(KERN_ERR "pktgen: no such netdevice: \"%s\"\n", ifname); + printk(KERN_ERR "pktgen: no such netdevice: \"%s\"\n", pkt_dev->ifname); return -ENODEV; } if (odev->type != ARPHRD_ETHER) { - printk(KERN_ERR "pktgen: not an ethernet device: \"%s\"\n", ifname); + printk(KERN_ERR "pktgen: not an ethernet device: \"%s\"\n", pkt_dev->ifname); err = -EINVAL; } else if (!netif_running(odev)) { - printk(KERN_ERR "pktgen: device is down: \"%s\"\n", ifname); + printk(KERN_ERR "pktgen: device is down: \"%s\"\n", pkt_dev->ifname); err = -ENETDOWN; } else { pkt_dev->odev = odev; +#ifdef USE_NQW_CALLBACK + /* Set the nqw callback hooks */ + rtnl_lock(); + set_nqw_hook(t, pkt_dev->odev, GFP_ATOMIC); + rtnl_unlock(); +#endif + pkt_dev->odev->pkt_dev = pkt_dev; return 0; } @@ -1933,6 +2033,10 @@ static int pktgen_setup_dev(struct pktgen_dev *pkt_dev, const char *ifname) */ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) { + /* Try once more, just in case it works now. */ + if (!pkt_dev->odev) + pktgen_setup_dev(pkt_dev, pkt_dev->pg_thread); + if (!pkt_dev->odev) { printk(KERN_ERR "pktgen: ERROR: pkt_dev->odev == NULL in " "setup_inject.\n"); @@ -1945,6 +2049,9 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) if (is_zero_ether_addr(pkt_dev->src_mac)) memcpy(&(pkt_dev->hh[6]), pkt_dev->odev->dev_addr, ETH_ALEN); + else + memcpy(&(pkt_dev->hh[6]), pkt_dev->src_mac, ETH_ALEN); + /* Set up Dest MAC */ memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, ETH_ALEN); @@ -2036,28 +2143,192 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) pkt_dev->nflows = 0; } -static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us) -{ - __u64 start; - __u64 now; - - start = now = getCurUs(); - while (now < spin_until_us) { - /* TODO: optimize sleeping behavior */ - if (spin_until_us - now > jiffies_to_usecs(1) + 1) - schedule_timeout_interruptible(1); - else if (spin_until_us - now > 100) { - if (!pkt_dev->running) - return; - if (need_resched()) - schedule(); + +#ifdef USE_NQW_CALLBACK +/* Runs from interrupt */ +int pg_notify_queue_woken(struct net_device* dev) { + /* Find the thread that needs waking. */ + struct pktgen_thread* t = ((struct pg_nqw_data*)(dev->nqw_data))->pg_thread; + t->control |= T_WAKE_BLOCKED; + wake_up_interruptible(&(t->queue)); + return 0; +} + +/* Must hold RTNL lock while calling this. */ +static int set_nqw_hook(struct pktgen_thread* t, struct net_device* dev, int gfp) { + /* The notify-queue-woken magic only works for physical + * devices at this time. So, apply hook to underlying + * device. + */ + struct pg_nqw_data* nqwd; + ASSERT_RTNL(); + BUG_ON(!t); + + if (!dev) { + WARN_ON(!dev); + return -ENODEV; + } +#if 0 +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) + if (dev->macvlan_port) { + struct macvlan_dev *vlan = netdev_priv(dev); + printk("pktgen: setting nqw_hook on lower mac-vlan dev: %p\n", vlan->lowerdev); + return set_nqw_hook(t, vlan->lowerdev, gfp); + } +#endif +#endif + + if (dev->priv_flags & IFF_802_1Q_VLAN) { + printk("pktgen: setting nqw_hook on real-dev of .1q vlan: %s\n", dev->name); + return set_nqw_hook(t, vlan_dev_info(dev)->real_dev, gfp); + } + + nqwd = (struct pg_nqw_data*)(dev->nqw_data); + + if (nqwd) { + if (nqwd->magic == PG_NQW_MAGIC) { + if (nqwd->pg_thread == t) { + atomic_inc(&(nqwd->nqw_ref_count)); + + printk("pktgen: Incremented nqw_ref_count: %d device: %s\n", + (int)(atomic_read(&(nqwd->nqw_ref_count))), dev->name); + return 0; + } + else { + printk("pktgen: ERROR: set_nqw_hook: nqwd thread does not match, dev: %s", + dev->name); + return -EINVAL; + } + } + else { + printk("wanlink: WARNING: set_nqw_hook: nqwd magic is NOT WanLink, dev: %s magic: 0x%x", + dev->name, nqwd->magic); + return 0; + } + } + else { + nqwd = kmalloc(sizeof(*nqwd), gfp); + if (nqwd) { + memset(nqwd, 0, sizeof(*nqwd)); + nqwd->magic = PG_NQW_MAGIC; + atomic_inc(&(nqwd->nqw_ref_count)); + nqwd->pg_thread = t; + dev->nqw_data = nqwd; + dev->notify_queue_woken = pg_notify_queue_woken; + printk("pktgen: Added nqw callback to device: %s\n", + dev->name); + return 0; } + else { + printk("pktgen: ERROR: could not allocate nqwd for dev: %s\n", dev->name); + return -ENOBUFS; + } + } +}//set_nqw_hook + + +/* Must hold RTNL lock while calling this. */ +static void clear_nqw_hook(struct pktgen_thread* t, struct net_device* dev) { + /* The notify-queue-woken magic only works for physical + * devices at this time. So, apply hook to underlying + * device. + */ + ASSERT_RTNL(); + BUG_ON(!t); - now = getCurUs(); +#if 0 +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) + if (dev->macvlan_port) { + struct macvlan_vlan *vlan = dev->priv; + clear_nqw_hook(t, vlan->lowerdev); + return; + } +#endif +#endif + + if (dev->priv_flags & IFF_802_1Q_VLAN) { + clear_nqw_hook(t, vlan_dev_info(dev)->real_dev); + return; } - pkt_dev->idle_acc += now - start; -} + if (dev->nqw_data) { + struct pg_nqw_data* nqwd = (struct pg_nqw_data*)(dev->nqw_data); + if (nqwd->magic == PG_NQW_MAGIC) { + if (t != nqwd->pg_thread) { + printk("pktgen ERROR: t != nqwd->pg_thread\n"); + } + atomic_dec(&(nqwd->nqw_ref_count)); + + printk("pktgen: Decremented nqw_ref_count: %d device: %s\n", + (int)(atomic_read(&(nqwd->nqw_ref_count))), + dev->name); + + BUG_ON(atomic_read(&(nqwd->nqw_ref_count)) < 0); + + if (atomic_read(&(nqwd->nqw_ref_count)) == 0) { + printk("pktgen: Removing nqw reference from device: %s\n", + dev->name); + dev->notify_queue_woken = NULL; + dev->nqw_data = NULL; + kfree(nqwd); + } + } + else { + printk("pktgen: WARNING: clear_nqw_hook: nqwd magic is NOT PKT-GEN, dev: %s magic: 0x%x", + dev->name, nqwd->magic); + } + } + else { + printk("pktgen: Warning: nqw_data is null in clear_nqw_hook, dev: %s\n", + dev->name); + } +}//clear_nqw_hook + +#endif + + +/* delay_ns is in nano-seconds */ +static void pg_nanodelay(u64 delay_ns, struct pktgen_dev* info) { + u64 idle_start = getRelativeCurNs(); + u64 last_time; + u64 _diff; + u64 itmp = idle_start; + struct pktgen_dev *p = NULL; + struct pktgen_thread* t = info->pg_thread; + + info->nanodelays++; + info->accum_delay_ns += delay_ns; + while (info->accum_delay_ns > PG_MAX_ACCUM_DELAY_NS) { + info->sleeps++; + interruptible_sleep_on_timeout(&(t->queue), 1); + /* will wake after one tick */ + last_time = itmp; + + /* Subtract delay from all interfaces for this thread, since all are blocked when + * any are blocked. + */ + itmp = getRelativeCurNs(); + _diff = (itmp - last_time); + list_for_each_entry(p, &t->if_list, list) { + p->accum_delay_ns -= _diff; + /* Limit saving up too much time... */ + if (p->accum_delay_ns < -10000000) { + p->accum_delay_ns = -10000000; + } + } + + /* For accounting, only charge this guy for the idle though...*/ + info->idle_acc_ns += _diff; + + /* break out if we are stopped or if we should transmit (maybe our ipg changed?) */ + if (info->removal_mark || (itmp >= info->next_tx_ns) || + (t->control && T_WAKE_BLOCKED) || + (t->control && T_STOP)) { + break; + } + }/* while */ +}//pg_nanodelay + static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) { @@ -2261,11 +2532,9 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) t = random32() % (imx - imn) + imn; s = htonl(t); - while (ipv4_is_loopback(s) || - ipv4_is_multicast(s) || - ipv4_is_lbcast(s) || - ipv4_is_zeronet(s) || - ipv4_is_local_multicast(s)) { + while (ipv4_is_loopback(s) || ipv4_is_multicast(s) + || ipv4_is_lbcast(s) || ipv4_is_zeronet(s) + || ipv4_is_local_multicast(s)) { t = random32() % (imx - imn) + imn; s = htonl(t); } @@ -2337,12 +2606,12 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) } pkt_dev->cur_queue_map = t; } - + pkt_dev->flows[flow].count++; } -#ifdef CONFIG_XFRM +//#ifdef CONFIG_XFRM static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) { struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x; @@ -2368,6 +2637,7 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev) x->curlft.bytes +=skb->len; x->curlft.packets++; + error: spin_unlock(&x->lock); return err; @@ -2424,7 +2694,6 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev, } return 1; } -#endif static void mpls_push(__be32 *mpls, struct pktgen_dev *pkt_dev) { @@ -2457,7 +2726,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, __be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */ __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */ __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ - + int cur_pkt_size; if (pkt_dev->nr_labels) protocol = htons(ETH_P_MPLS_UC); @@ -2471,12 +2740,14 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, mod_cur_headers(pkt_dev); datalen = (odev->hard_header_len + 16) & ~0xf; - skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + datalen + + cur_pkt_size = pkt_dev->cur_pkt_size; /* protect against race */ + skb = alloc_skb(cur_pkt_size + 64 + datalen + pkt_dev->pkt_overhead, GFP_ATOMIC); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; } + pkt_dev->seq_num++; /* Increase the pktgen sequence number for the next packet. */ skb_reserve(skb, datalen); @@ -2506,6 +2777,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, skb->network_header = skb->tail; skb->transport_header = skb->network_header + sizeof(struct iphdr); skb_put(skb, sizeof(struct iphdr) + sizeof(struct udphdr)); + skb_set_queue_mapping(skb, pkt_dev->cur_queue_map); iph = ip_hdr(skb); udph = udp_hdr(skb); @@ -2514,7 +2786,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, *(__be16 *) & eth[12] = protocol; /* Eth + IPh + UDPh + mpls */ - datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 - + datalen = cur_pkt_size - 14 - 20 - 8 - pkt_dev->pkt_overhead; if (datalen < sizeof(struct pktgen_hdr)) datalen = sizeof(struct pktgen_hdr); @@ -2608,11 +2880,12 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, pgh->tv_sec = htonl(timestamp.tv_sec); pgh->tv_usec = htonl(timestamp.tv_usec); } - +#if 0 #ifdef CONFIG_XFRM if (!process_ipsec(pkt_dev, skb, protocol)) return NULL; #endif +#endif return skb; } @@ -2650,9 +2923,10 @@ static unsigned int scan_ip6(const char *s, char ip[16]) } s++; } - + u = simple_strtoul(s, &pos, 16); i = pos - s; + if (!i) return 0; if (prefixlen == 12 && s[i] == '.') { @@ -2796,7 +3070,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, __be16 *vlan_encapsulated_proto = NULL; /* packet type ID field (or len) for VLAN tag */ __be16 *svlan_tci = NULL; /* Encapsulates priority and SVLAN ID */ __be16 *svlan_encapsulated_proto = NULL; /* packet type ID field (or len) for SVLAN tag */ - + int cur_pkt_size; + if (pkt_dev->nr_labels) protocol = htons(ETH_P_MPLS_UC); @@ -2808,7 +3083,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, */ mod_cur_headers(pkt_dev); - skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16 + + cur_pkt_size = pkt_dev->cur_pkt_size; + skb = alloc_skb(cur_pkt_size + 64 + 16 + pkt_dev->pkt_overhead, GFP_ATOMIC); if (!skb) { sprintf(pkt_dev->result, "No memory"); @@ -2843,6 +3119,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->network_header = skb->tail; skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); skb_put(skb, sizeof(struct ipv6hdr) + sizeof(struct udphdr)); + skb_set_queue_mapping(skb, pkt_dev->cur_queue_map); iph = ipv6_hdr(skb); udph = udp_hdr(skb); @@ -2851,7 +3128,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, *(__be16 *) & eth[12] = protocol; /* Eth + IPh + UDPh + mpls */ - datalen = pkt_dev->cur_pkt_size - 14 - + datalen = cur_pkt_size - 14 - sizeof(struct ipv6hdr) - sizeof(struct udphdr) - pkt_dev->pkt_overhead; @@ -2954,7 +3231,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, pgh->tv_sec = htonl(timestamp.tv_sec); pgh->tv_usec = htonl(timestamp.tv_usec); } - /* pkt_dev->seq_num++; FF: you really mean this? */ + pkt_dev->seq_num++; /* Increase the pktgen seq number for the next packet. */ return skb; } @@ -2968,13 +3245,206 @@ static inline struct sk_buff *fill_packet(struct net_device *odev, return fill_packet_ipv4(odev, pkt_dev); } -static void pktgen_clear_counters(struct pktgen_dev *pkt_dev) -{ - pkt_dev->seq_num = 1; - pkt_dev->idle_acc = 0; + +static void record_latency(struct pktgen_dev* pkt_dev, int latency) { + /* NOTE: Latency can be negative */ + int div = 100; + int diff; + int vl; + int i; + + pkt_dev->pkts_rcvd_since_clear++; + + if (pkt_dev->pkts_rcvd_since_clear < 100) { + div = pkt_dev->pkts_rcvd; + if (pkt_dev->pkts_rcvd_since_clear == 1) { + pkt_dev->avg_latency = latency; + } + } + + if ((div + 1) == 0) { + pkt_dev->avg_latency = 0; + } + else { + pkt_dev->avg_latency = ((pkt_dev->avg_latency * div + latency) / (div + 1)); + } + + if (latency < pkt_dev->min_latency) { + pkt_dev->min_latency = latency; + } + if (latency > pkt_dev->max_latency) { + pkt_dev->max_latency = latency; + } + + /* Place the latency in the right 'bucket' */ + diff = (latency - pkt_dev->min_latency); + for (i = 0; ilatency_bkts[i]++; + break; + } + } +}/* record latency */ + + +/* Returns < 0 if the skb is not a pktgen buffer. */ +int pktgen_receive(struct sk_buff* skb) { + /* See if we have a pktgen packet */ + /* TODO: Add support for detecting IPv6, TCP packets too. This will only + * catch UDP at the moment. --Ben + */ + /* printk("pktgen-rcv, skb->len: %d\n", skb->len); */ + + /* If this is a paged skb, make sure we pull up + * whatever data we need to look at. */ + if (!pskb_may_pull(skb, 20 + 8 + sizeof(struct pktgen_hdr))) { + return -1; + } + + if ((skb->len >= (20 + 8 + sizeof(struct pktgen_hdr))) && + (skb->protocol == __constant_htons(ETH_P_IP))) { + struct pktgen_hdr* pgh; + + /* It's IP, and long enough, lets check the magic number. + * TODO: This is a hack not always guaranteed to catch the right + * packets. + */ + + /* printk("Length & protocol passed, skb->data: %p, raw: %p\n", + skb->data, skb->h.raw); */ + + pgh = (struct pktgen_hdr*)(skb->data + 20 + 8); + + /* + tmp = (char*)(skb->data); + for (i = 0; i<90; i++) { + printk("%02hx ", tmp[i]); + if (((i + 1) % 15) == 0) { + printk("\n"); + } + } + printk("\n"); + */ + + if (pgh->pgh_magic == __constant_ntohl(PKTGEN_MAGIC)) { + struct net_device* dev = skb->dev; + struct pktgen_dev* pkt_dev; + __u32 seq = ntohl(pgh->seq_num); + + // TODO: Need lock..maybe + pkt_dev = dev->pkt_dev; + + if (!pkt_dev) { + return -1; + } + + pkt_dev->pkts_rcvd++; + pkt_dev->bytes_rcvd += skb->len; + + /* Check for out-of-sequence packets */ + if (pkt_dev->last_seq_rcvd == seq) { + pkt_dev->dup_rcvd++; + pkt_dev->dup_since_incr++; + } + else { + __s64 rx; + __s64 tx; + struct timeval txtv; + if (! skb->tstamp.tv64) { + __net_timestamp(skb); + } + skb_get_timestamp(skb, &txtv); + rx = tv_to_us(&txtv); + + txtv.tv_usec = ntohl(pgh->tv_usec); + txtv.tv_sec = ntohl(pgh->tv_sec); + tx = tv_to_us(&txtv); + record_latency(pkt_dev, rx - tx); + + if ((pkt_dev->last_seq_rcvd + 1) == seq) { + if ((pkt_dev->peer_clone_skb > 1) && + (pkt_dev->peer_clone_skb > (pkt_dev->dup_since_incr + 1))) { + + pkt_dev->seq_gap_rcvd += (pkt_dev->peer_clone_skb - + pkt_dev->dup_since_incr - 1); + } + /* Great, in order...all is well */ + } + else if (pkt_dev->last_seq_rcvd < seq) { + /* sequence gap, means we dropped a pkt most likely */ + if (pkt_dev->peer_clone_skb > 1) { + /* We dropped more than one sequence number's worth, + * and if we're using clone_skb, then this is quite + * a few. This number still will not be exact, but + * it will be closer. + */ + pkt_dev->seq_gap_rcvd += (((seq - pkt_dev->last_seq_rcvd) * + pkt_dev->peer_clone_skb) - + pkt_dev->dup_since_incr); + } + else { + pkt_dev->seq_gap_rcvd += (seq - pkt_dev->last_seq_rcvd - 1); + } + } + else { + pkt_dev->ooo_rcvd++; /* out-of-order */ + } + + pkt_dev->dup_since_incr = 0; + } + pkt_dev->last_seq_rcvd = seq; + kfree_skb(skb); + if (debug > 1) { + printk("done with pktgen_receive, free'd pkt\n"); + } + return 0; + } + } + return -1; /* Let another protocol handle it, it's not for us! */ +}/* pktgen_receive */ + +static void pg_reset_latency_counters(struct pktgen_dev* pkt_dev) { + int i; + pkt_dev->avg_latency = 0; + pkt_dev->min_latency = 0x7fffffff; /* largest integer */ + pkt_dev->max_latency = 0x80000000; /* smallest integer */ + pkt_dev->pkts_rcvd_since_clear = 0; + for (i = 0; ilatency_bkts[i] = 0; + } +} + + +static void pktgen_clear_counters(struct pktgen_dev *pkt_dev, int seq_too) { + pkt_dev->idle_acc_ns = 0; pkt_dev->sofar = 0; pkt_dev->tx_bytes = 0; pkt_dev->errors = 0; + pkt_dev->pkts_rcvd_since_clear = 0; + + pkt_dev->ooo_rcvd = 0; + pkt_dev->dup_rcvd = 0; + pkt_dev->pkts_rcvd = 0; + pkt_dev->bytes_rcvd = 0; + pkt_dev->non_pg_pkts_rcvd = 0; + pkt_dev->seq_gap_rcvd = 0; /* dropped */ + + /* Clear some transient state */ + pkt_dev->accum_delay_ns = 0; + pkt_dev->sleeps = 0; + pkt_dev->nanodelays = 0; + + /* This is a bit of a hack, but it gets the dup counters + * in line so we don't have false alarms on dropped pkts. + */ + if (seq_too) { + pkt_dev->dup_since_incr = pkt_dev->peer_clone_skb - 1; + pkt_dev->seq_num = 0; + pkt_dev->last_seq_rcvd = 0; + } + + pg_reset_latency_counters(pkt_dev); } /* Set up structure for sending pkts, clear counters */ @@ -2986,31 +3456,31 @@ static void pktgen_run(struct pktgen_thread *t) pr_debug("pktgen: entering pktgen_run. %p\n", t); - if_lock(t); list_for_each_entry(pkt_dev, &t->if_list, list) { + /* If already running, then ignore. */ + if (! pkt_dev->running) { + + /** Clear counters before we setup the first inject. */ + pktgen_clear_counters(pkt_dev, 1); - /* - * setup odev and create initial packet. - */ - pktgen_setup_inject(pkt_dev); - - if (pkt_dev->odev) { - pktgen_clear_counters(pkt_dev); - pkt_dev->running = 1; /* Cranke yeself! */ - pkt_dev->skb = NULL; - pkt_dev->started_at = getCurUs(); - pkt_dev->next_tx_us = getCurUs(); /* Transmit immediately */ - pkt_dev->next_tx_ns = 0; - set_pkt_overhead(pkt_dev); - - strcpy(pkt_dev->result, "Starting"); - started++; - } else - strcpy(pkt_dev->result, "Error starting"); + /* + * setup odev and create initial packet. + */ + pktgen_setup_inject(pkt_dev); + + if (pkt_dev->odev) { + pkt_dev->running = 1; /* Cranke yeself! */ + pkt_dev->skb = NULL; + pkt_dev->started_at = getCurUs(); + /* Transmit first pkt after 20ms to let listener get started. */ + pkt_dev->next_tx_ns = getRelativeCurNs() + 20 * 1000000; + + strcpy(pkt_dev->result, "Starting"); + started++; + } else + strcpy(pkt_dev->result, "Error starting"); + } } - if_unlock(t); - if (started) - t->control &= ~(T_STOP); } static void pktgen_stop_all_threads_ifs(void) @@ -3026,66 +3496,11 @@ static void pktgen_stop_all_threads_ifs(void) mutex_unlock(&pktgen_thread_lock); } - -static int thread_is_running(struct pktgen_thread *t) -{ - struct pktgen_dev *pkt_dev; - int res = 0; - - list_for_each_entry(pkt_dev, &t->if_list, list) - if (pkt_dev->running) { - res = 1; - break; - } - return res; -} - -static int pktgen_wait_thread_run(struct pktgen_thread *t) -{ - if_lock(t); - - while (thread_is_running(t)) { - - if_unlock(t); - - msleep_interruptible(100); - - if (signal_pending(current)) - goto signal; - if_lock(t); - } - if_unlock(t); - return 1; -signal: - return 0; -} - -static int pktgen_wait_all_threads_run(void) -{ +static void pktgen_run_all_threads(int background) { struct pktgen_thread *t; - int sig = 1; - mutex_lock(&pktgen_thread_lock); - - list_for_each_entry(t, &pktgen_threads, th_list) { - sig = pktgen_wait_thread_run(t); - if (sig == 0) - break; - } - - if (sig == 0) - list_for_each_entry(t, &pktgen_threads, th_list) - t->control |= (T_STOP); - - mutex_unlock(&pktgen_thread_lock); - return sig; -} - -static void pktgen_run_all_threads(void) -{ - struct pktgen_thread *t; - - pr_debug("pktgen: entering pktgen_run_all_threads.\n"); + pr_debug("pktgen: entering pktgen_run_all_threads, background: %d\n", + background); mutex_lock(&pktgen_thread_lock); @@ -3094,9 +3509,14 @@ static void pktgen_run_all_threads(void) mutex_unlock(&pktgen_thread_lock); - schedule_timeout_interruptible(msecs_to_jiffies(125)); /* Propagate thread->control */ + /* This is a hack at best...disabling, we should not have to depend on this. */ + /*schedule_timeout_interruptible(msecs_to_jiffies(125));*/ /* Propagate thread->control */ - pktgen_wait_all_threads_run(); + // Much harder to get rid of the if_lock if we allow this to block... + if (!background) { + printk("ERROR: non-background mode no longer supported.\n"); + //pktgen_wait_all_threads_run(); + } } static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) @@ -3106,7 +3526,7 @@ static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) total_us = pkt_dev->stopped_at - pkt_dev->started_at; - idle = pkt_dev->idle_acc; + idle = do_div(pkt_dev->idle_acc_ns, 1000); p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n", (unsigned long long)total_us, @@ -3155,22 +3575,80 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev) return 0; } -static struct pktgen_dev *next_to_run(struct pktgen_thread *t) -{ - struct pktgen_dev *pkt_dev, *best = NULL; - - if_lock(t); +/** Find the adapter that needs to tx next. + * We need to take the blocked adapters into account, but can't ignore + * them forever just in case we missed the tx-queue-wake event for some + * reason. + */ +static struct pktgen_dev *next_to_run(struct pktgen_thread *t, u64 now, u64* next_running_delay) { + struct pktgen_dev *pkt_dev = NULL; + struct pktgen_dev *best = NULL; + struct pktgen_dev *best_blocked = NULL; + struct pktgen_dev *rv = NULL; list_for_each_entry(pkt_dev, &t->if_list, list) { if (!pkt_dev->running) continue; - if (best == NULL) - best = pkt_dev; - else if (pkt_dev->next_tx_us < best->next_tx_us) - best = pkt_dev; + if (pkt_dev->tx_blocked) { + if (best_blocked == NULL) + best_blocked = pkt_dev; + else { + if (pkt_dev->next_tx_ns < best_blocked->next_tx_ns) { + best_blocked = pkt_dev; + } + } + pkt_dev->tx_blocked = 0; /* give it another try next time */ + } + else { + if (best == NULL) + best = pkt_dev; + else { + if (pkt_dev->next_tx_ns < best->next_tx_ns) { + best = pkt_dev; + } + } + } } - if_unlock(t); - return best; + + /** If we have both blocked and non-blocked, and non-blocked wants to transmit now, then + * choose it. Otherwise, just choose whoever wants to run next. + */ + if (best_blocked && best) { + if (best->next_tx_ns <= now) { + rv = best; + } + else if (best->next_tx_ns < best_blocked->next_tx_ns) { + rv = best; + } + else { + rv = best_blocked; + } + } + + if (!rv) { + if (best_blocked && (best_blocked->next_tx_ns < (now - PG_TRY_TX_ANYWAY_NS))) { + rv = best_blocked; + } + } + if (!rv) { + rv = best; + } + if (!rv) { + rv = best_blocked; + } + + if (rv) { + if (rv->next_tx_ns <= now) { + *next_running_delay = 0; + } + else { + *next_running_delay = rv->next_tx_ns - now; + } + } + else { + *next_running_delay = 10000000; /* 10ms */ + } + return rv; } static void pktgen_stop(struct pktgen_thread *t) @@ -3179,8 +3657,6 @@ static void pktgen_stop(struct pktgen_thread *t) pr_debug("pktgen: entering pktgen_stop\n"); - if_lock(t); - list_for_e