diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt index 278771c..e1149c9 100644 --- a/Documentation/networking/pktgen.txt +++ b/Documentation/networking/pktgen.txt @@ -1,232 +1,117 @@ - - - HOWTO for the linux packet generator - ------------------------------------ - -Date: 041221 - -Enable CONFIG_NET_PKTGEN to compile and build pktgen.o either in kernel -or as module. Module is preferred. insmod pktgen if needed. Once running -pktgen creates a thread on each CPU where each thread has affinty it's CPU. -Monitoring and controlling is done via /proc. Easiest to select a suitable -a sample script and configure. - -On a dual CPU: - -ps aux | grep pkt -root 129 0.3 0.0 0 0 ? SW 2003 523:20 [pktgen/0] -root 130 0.3 0.0 0 0 ? SW 2003 509:50 [pktgen/1] - - -For montoring and control pktgen creates: - /proc/net/pktgen/pgctrl - /proc/net/pktgen/kpktgend_X - /proc/net/pktgen/ethX - - -Viewing threads -=============== -/proc/net/pktgen/kpktgend_0 -Name: kpktgend_0 max_before_softirq: 10000 -Running: -Stopped: eth1 -Result: OK: max_before_softirq=10000 - -Most important the devices assigend to thread. Note! A device can only belong -to one thread. - - -Viewing devices -=============== - -Parm section holds configured info. Current hold running stats. -Result is printed after run or after interruption. Example: - -/proc/net/pktgen/eth1 - -Params: count 10000000 min_pkt_size: 60 max_pkt_size: 60 - frags: 0 delay: 0 clone_skb: 1000000 ifname: eth1 - flows: 0 flowlen: 0 - dst_min: 10.10.11.2 dst_max: - src_min: src_max: - src_mac: 00:00:00:00:00:00 dst_mac: 00:04:23:AC:FD:82 - udp_src_min: 9 udp_src_max: 9 udp_dst_min: 9 udp_dst_max: 9 - src_mac_count: 0 dst_mac_count: 0 - Flags: -Current: - pkts-sofar: 10000000 errors: 39664 - started: 1103053986245187us stopped: 1103053999346329us idle: 880401us - seq_num: 10000011 cur_dst_mac_offset: 0 cur_src_mac_offset: 0 - cur_saddr: 0x10a0a0a cur_daddr: 0x20b0a0a - cur_udp_dst: 9 cur_udp_src: 9 - flows: 0 -Result: OK: 13101142(c12220741+d880401) usec, 10000000 (60byte,0frags) - 763292pps 390Mb/sec (390805504bps) errors: 39664 - -Confguring threads and devices -============================== -This is done via the /proc interface easiest done via pgset in the scripts - -Examples: - - pgset "clone_skb 1" sets the number of copies of the same packet - pgset "clone_skb 0" use single SKB for all transmits - pgset "pkt_size 9014" sets packet size to 9014 - pgset "frags 5" packet will consist of 5 fragments - pgset "count 200000" sets number of packets to send, set to zero - for continious sends untill explicitl stopped. - - pgset "delay 5000" adds delay to hard_start_xmit(). nanoseconds - - pgset "dst 10.0.0.1" sets IP destination address +How to use the Linux packet generator module. + +1. Enable CONFIG_NET_PKTGEN to compile and build pktgen.o, install it + in the place where insmod may find it. +2. Add an interface to the kpktgend_0 thread: + echo "add_interface eth1" > /proc/net/pktgen/kpktgend_0 +2a. Add more interfaces as needed. +3. Configure interfaces by setting values as defined below. The + general strategy is: echo "command" > /proc/net/pktgen/[device] + For example: echo "multiskb 100" > /proc/net/pktgen/eth1 + + "multiskb 100" Will send 100 identical pkts before creating + new packet with new timestamp, etc. + "multiskb 0" Will create new skb for all transmits. + "peer_multiskb 100" Helps us determine dropped & dup pkts, sender's multiskb. + "min_pkt_size 60" sets packet minimum size to 60 (64 counting CRC) + "max_pkt_size 1514" sets packet size to 1514 (1518 counting CRC) + "frags 5" packet will consist of 5 fragments + "count 200000" sets number of packets to send, set to zero + for continious sends untill explicitly + stopped. + "ipg 5000" sets artificial gap inserted between packets + to 5000 nanoseconds + "dst 10.0.0.1" sets IP destination address (BEWARE! This generator is very aggressive!) - - pgset "dst_min 10.0.0.1" Same as dst - pgset "dst_max 10.0.0.254" Set the maximum destination IP. - pgset "src_min 10.0.0.1" Set the minimum (or only) source IP. - pgset "src_max 10.0.0.254" Set the maximum source IP. - pgset "dst6 fec0::1" IPV6 destination address - pgset "src6 fec0::2" IPV6 source address - pgset "dstmac 00:00:00:00:00:00" sets MAC destination address - pgset "srcmac 00:00:00:00:00:00" sets MAC source address - - pgset "src_mac_count 1" Sets the number of MACs we'll range through. - The 'minimum' MAC is what you set with srcmac. - - pgset "dst_mac_count 1" Sets the number of MACs we'll range through. - The 'minimum' MAC is what you set with dstmac. - - pgset "flag [name]" Set a flag to determine behaviour. Current flags - are: IPSRC_RND #IP Source is random (between min/max), - IPDST_RND, UDPSRC_RND, - UDPDST_RND, MACSRC_RND, MACDST_RND - - pgset "udp_src_min 9" set UDP source port min, If < udp_src_max, then + "dst_min 10.0.0.1" Same as dst + "dst_max 10.0.0.254" Set the maximum destination IP. + "src_min 10.0.0.1" Set the minimum (or only) source IP. + "src_max 10.0.0.254" Set the maximum source IP. + "dst_mac 00:00:00:00:00:00" sets MAC destination address + "src_mac 00:00:00:00:00:00" sets MAC source address + "src_mac_count 1" Sets the number of MACs we'll range through. The + 'minimum' MAC is what you set with srcmac. + "dst_mac_count 1" Sets the number of MACs we'll range through. The + 'minimum' MAC is what you set with dstmac. + "flag [name]" Set a flag to determine behaviour. Prepend '!' to the + flag to turn it off. Current flags are: + IPSRC_RND #IP Source is random (between min/max), + IPDST_RND, UDPSRC_RND, TXSIZE_RND + UDPDST_RND, MACSRC_RND, MACDST_RND + "udp_src_min 9" set UDP source port min, If < udp_src_max, then cycle through the port range. - - pgset "udp_src_max 9" set UDP source port max. - pgset "udp_dst_min 9" set UDP destination port min, If < udp_dst_max, then + "udp_src_max 9" set UDP source port max. + "udp_dst_min 9" set UDP destination port min, If < udp_dst_max, then cycle through the port range. - pgset "udp_dst_max 9" set UDP destination port max. - - pgset "mpls 0001000a,0002000a,0000000a" set MPLS labels (in this example - outer label=16,middle label=32, - inner label=0 (IPv4 NULL)) Note that - there must be no spaces between the - arguments. Leading zeros are required. - Do not set the bottom of stack bit, - thats done automatically. If you do - set the bottom of stack bit, that - indicates that you want to randomly - generate that address and the flag - MPLS_RND will be turned on. You - can have any mix of random and fixed - labels in the label stack. - - pgset "mpls 0" turn off mpls (or any invalid argument works too!) - - pgset stop aborts injection. Also, ^C aborts generator. - - -Example scripts -=============== - -A collection of small tutorial scripts for pktgen is in expamples dir. - -pktgen.conf-1-1 # 1 CPU 1 dev -pktgen.conf-1-2 # 1 CPU 2 dev -pktgen.conf-2-1 # 2 CPU's 1 dev -pktgen.conf-2-2 # 2 CPU's 2 dev -pktgen.conf-1-1-rdos # 1 CPU 1 dev w. route DoS -pktgen.conf-1-1-ip6 # 1 CPU 1 dev ipv6 -pktgen.conf-1-1-ip6-rdos # 1 CPU 1 dev ipv6 w. route DoS -pktgen.conf-1-1-flows # 1 CPU 1 dev multiple flows. - -Run in shell: ./pktgen.conf-X-Y It does all the setup including sending. - - -Interrupt affinity -=================== -Note when adding devices to a specific CPU there good idea to also assign -/proc/irq/XX/smp_affinity so the TX-interrupts gets bound to the same CPU. -as this reduces cache bouncing when freeing skb's. - - -Current commands and configuration options -========================================== - -** Pgcontrol commands: - -start -stop - -** Thread commands: - -add_device -rem_device_all -max_before_softirq - - -** Device commands: - -count -clone_skb -debug - -frags -delay - -src_mac_count -dst_mac_count - -pkt_size -min_pkt_size -max_pkt_size - -mpls - -udp_src_min -udp_src_max - -udp_dst_min -udp_dst_max - -flag - IPSRC_RND - TXSIZE_RND - IPDST_RND - UDPSRC_RND - UDPDST_RND - MACSRC_RND - MACDST_RND - -dst_min -dst_max - -src_min -src_max - -dst_mac -src_mac - -clear_counters - -dst6 -src6 - -flows -flowlen - -References: -ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/ -ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/examples/ - -Paper from Linux-Kongress in Erlangen 2004. -ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/pktgen_paper.pdf - -Thanks to: -Grant Grundler for testing on IA-64 and parisc, Harald Welte, Lennert Buytenhek -Stephen Hemminger, Andi Kleen, Dave Miller and many others. - - -Good luck with the linux net-development. + "udp_dst_max 9" set UDP destination port max. + "stop" Stops this interface from transmitting. It will still + receive packets and record their latency, etc. + "start" Starts the interface transmitting packets. + "clear_counters" Clear the packet and latency counters. + +You can start and stop threads by echoing commands to the /proc/net/pktgen/pgctrl +file. Supported commands are: + "stop kpktgend_0" Stop thread 0. + "start threadXX" Start (create) thread XX. You may wish to create one thread + per CPU. + + +You can control manage the interfaces on a thread by echoing commands to +the /proc/net/pktgen/[thread] file. Supported commands are: + "add_interface eth1" Add interface eth1 to the chosen thread. + "rem_interface eth1" Remove interface eth1 from the chosen thread. + "max_before_softirq" Maximum loops before we cause a call to do_softirq, + this is to help mitigate starvatation on the RX side. + + +You can examine various counters and parameters by reading the appropriate +proc file: + +[root@localhost lanforge]# cat /proc/net/pktgen/kpktgend_0 +VERSION-1 +Name: kpktgend_0 +Current: eth2 +Running: eth6 +Stopped: eth1 eth5 +Result: NA + + +[root@localhost lanforge]# cat /proc/net/pktgen/eth2 +VERSION-1 +Params: count 0 pkt_size: 300 frags: 0 ipg: 0 multiskb: 0 ifname "eth2" + dst_min: 172.2.1.1 dst_max: 172.2.1.6 src_min: 172.1.1.4 src_max: 172.1.1.8 + src_mac: 00:00:00:00:00:00 dst_mac: 00:00:00:00:00:00 + udp_src_min: 99 udp_src_max: 1005 udp_dst_min: 9 udp_dst_max: 9 + src_mac_count: 0 dst_mac_count: 0 + Flags: IPSRC_RND IPDST_RND UDPSRC_RND +Current: + pkts-sofar: 158835950 errors: 0 + started: 1026024703542360us elapsed: 4756326418us + idle: 1723232054307ns next_tx: 27997154666566(-3202934)ns + seq_num: 158835951 cur_dst_mac_offset: 0 cur_src_mac_offset: 0 + cur_saddr: 0x60101ac cur_daddr: 0x30102ac cur_udp_dst: 9 cur_udp_src: 966 + pkts_rcvd: 476002 bytes_rcvd: 159929440 last_seq_rcvd: 476002 ooo_rcvd: 0 + dup_rcvd: 0 seq_gap_rcvd(dropped): 0 non_pg_rcvd: 0 + avg_latency: 41us min_latency: 40us max_latency: 347us pkts_in_sample: 476002 + Buckets(us) [ 0 0 0 0 0 0 311968 164008 23 3 0 0 0 0 0 0 0 0 0 0 ] +Result: OK: ipg=0 + +[root@localhost lanforge]# cat /proc/net/pktgen/eth6 +VERSION-1 +Params: count 0 pkt_size: 300 frags: 0 ipg: 11062341 multiskb: 0 ifname "eth6" + dst_min: 90 dst_max: 90 src_min: 90 src_max: 90 + src_mac: 00:00:00:00:00:00 dst_mac: 00:00:00:00:00:00 + udp_src_min: 9 udp_src_max: 9 udp_dst_min: 9 udp_dst_max: 9 + src_mac_count: 0 dst_mac_count: 0 + Flags: +Current: + pkts-sofar: 479940 errors: 0 + started: 1026024703542707us elapsed: 4795667656us + idle: 109585100905ns next_tx: 28042807786397(-79364)ns + seq_num: 479941 cur_dst_mac_offset: 0 cur_src_mac_offset: 0 + cur_saddr: 0x0 cur_daddr: 0x0 cur_udp_dst: 9 cur_udp_src: 9 + pkts_rcvd: 160323509 bytes_rcvd: 50392479910 last_seq_rcvd: 160323509 ooo_rcvd: 0 + dup_rcvd: 0 seq_gap_rcvd(dropped): 0 non_pg_rcvd: 0 + avg_latency: 230us min_latency: 36us max_latency: 1837us pkts_in_sample: 160323509 + Buckets(us) [ 0 0 0 0 0 0 287725 2618755 54130607 98979415 80358 4226649 0 0 0 0 0 0 0 0 ] +Result: OK: ipg=11062341 diff --git a/drivers/net/e100.c b/drivers/net/e100.c index 31ac001..bac0176 100644 --- a/drivers/net/e100.c +++ b/drivers/net/e100.c @@ -1,4 +1,4 @@ -/******************************************************************************* +/************************************************************** Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved. @@ -381,6 +381,7 @@ enum cb_command { cb_ucode = 0x0005, cb_dump = 0x0006, cb_tx_sf = 0x0008, + cb_tx_nc = 0x0010, /* 0 == controler does CRC, ie normal. 1 == CRC from memory */ cb_cid = 0x1f00, cb_i = 0x2000, cb_s = 0x4000, @@ -417,7 +418,7 @@ struct config { /*5*/ u8 X(tx_dma_max_count:7, dma_max_count_enable:1); /*6*/ u8 X(X(X(X(X(X(X(late_scb_update:1, direct_rx_dma:1), tno_intr:1), cna_intr:1), standard_tcb:1), standard_stat_counter:1), - rx_discard_overruns:1), rx_save_bad_frames:1); + rx_save_overruns:1), rx_save_bad_frames:1); /*7*/ u8 X(X(X(X(X(rx_discard_short_frames:1, tx_underrun_retry:2), pad7:2), rx_extended_rfd:1), tx_two_frames_in_fifo:1), tx_dynamic_tbd:1); @@ -547,6 +548,8 @@ struct nic { multicast_all = (1 << 2), wol_magic = (1 << 3), ich_10h_workaround = (1 << 4), + accept_all_frames = (1 << 5), + save_fcs = (1 << 6), } flags ____cacheline_aligned; enum mac mac; @@ -1006,6 +1009,16 @@ static void e100_configure(struct nic *n config->promiscuous_mode = 0x1; /* 1=on, 0=off */ } + if(nic->flags & accept_all_frames) { + config->rx_save_overruns = 0x1; /* 1=save, 0=discard */ + config->rx_save_bad_frames = 0x1; /* 1=save, 0=discard */ + config->rx_discard_short_frames = 0x0; /* 1=discard, 0=save */ + } + + if(nic->flags & save_fcs) { + config->rx_crc_transfer = 0x1; /* 1=save, 0=discard */ + } + if(nic->flags & multicast_all) config->multicast_all = 0x1; /* 1=accept, 0=no */ @@ -1464,6 +1477,16 @@ static void e100_set_multicast_list(stru else nic->flags &= ~promiscuous; + if(netdev->flags & IFF_ACCEPT_ALL_FRAMES) + nic->flags |= accept_all_frames; + else + nic->flags &= ~accept_all_frames; + + if(netdev->flags & IFF_SAVE_FCS) + nic->flags |= save_fcs; + else + nic->flags &= ~save_fcs; + if(netdev->flags & IFF_ALLMULTI || netdev->mc_count > E100_MAX_MULTICAST_ADDRS) nic->flags |= multicast_all; @@ -1605,6 +1628,19 @@ static void e100_xmit_prepare(struct nic struct sk_buff *skb) { cb->command = nic->tx_command; + +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + /* Use the last 4 bytes of the SKB payload packet as the CRC, used for + * testing, ie sending frames with bad CRC. + */ + if (unlikely(skb->use_specified_ether_crc)) { + cb->command |= __constant_cpu_to_le16(cb_tx_nc); + } + else { + cb->command &= ~__constant_cpu_to_le16(cb_tx_nc); + } +#endif + /* interrupt every 16 packets regardless of delay */ if((nic->cbs_avail & ~15) == nic->cbs_avail) cb->command |= cpu_to_le16(cb_i); @@ -1834,7 +1870,21 @@ static int e100_rx_indicate(struct nic * skb_reserve(skb, sizeof(struct rfd)); skb_put(skb, actual_size); skb->protocol = eth_type_trans(skb, nic->netdev); - + /* NOTE: The config step turns on acceptance of various bogus frames + * when in loopback or promisc mode, but this code will still throw + * them away unless you also set the new 'accept_all_frames' flag. + * Perhaps the implementors meant to accept the bogus frames in + * promisc mode here?? --Ben + */ + if(unlikely(!(nic->flags & accept_all_frames))) { + if(actual_size > nic->netdev->mtu + VLAN_ETH_HLEN) { + /* Received oversized frame */ + nic->net_stats.rx_over_errors++; + } + /* We're accepting all, so pass the bogons on up the stack. */ + goto process_skb; + } + if(unlikely(!(rfd_status & cb_ok))) { /* Don't indicate if hardware indicates errors */ dev_kfree_skb_any(skb); @@ -1843,6 +1893,7 @@ static int e100_rx_indicate(struct nic * nic->rx_over_length_errors++; dev_kfree_skb_any(skb); } else { + process_skb: nic->net_stats.rx_packets++; nic->net_stats.rx_bytes += actual_size; nic->netdev->last_rx = jiffies; @@ -2206,6 +2257,63 @@ static int e100_set_settings(struct net_ return err; } +static int e100_set_rxall(struct net_device *netdev, u32 data) +{ + struct nic *nic = netdev->priv; + if (data) { + netdev->priv_flags |= IFF_ACCEPT_ALL_FRAMES; + nic->flags |= accept_all_frames; + } + else { + netdev->priv_flags &= ~(IFF_ACCEPT_ALL_FRAMES); + nic->flags &= ~accept_all_frames; + } + + e100_exec_cb(nic, NULL, e100_configure); + + return 0; +} + +static int e100_get_rxall(struct net_device *netdev, u32* data) +{ + struct nic *nic = netdev->priv; + if (nic->flags & accept_all_frames) { + *data = 1; + } + else { + *data = 0; + } + + return 0; +} + +static int e100_set_save_fcs(struct net_device *netdev, u32 data) +{ + struct nic *nic = netdev->priv; + if (data) { + nic->flags |= save_fcs; + } + else { + nic->flags &= ~save_fcs; + } + e100_exec_cb(nic, NULL, e100_configure); + + return 0; +} + +static int e100_get_save_fcs(struct net_device *netdev, u32* data) +{ + struct nic *nic = netdev->priv; + if (nic->flags & save_fcs) { + *data = 1; + } + else { + *data = 0; + } + + return 0; +} + static void e100_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) { @@ -2503,6 +2611,10 @@ static struct ethtool_ops e100_ethtool_o .get_stats_count = e100_get_stats_count, .get_ethtool_stats = e100_get_ethtool_stats, .get_perm_addr = ethtool_op_get_perm_addr, + .set_rx_all = e100_set_rxall, + .get_rx_all = e100_get_rxall, + .set_save_fcs = e100_set_save_fcs, + .get_save_fcs = e100_get_save_fcs, }; static int e100_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) diff --git a/drivers/net/e1000/e1000.h b/drivers/net/e1000/e1000.h index 281de41..e8cd57f 100644 --- a/drivers/net/e1000/e1000.h +++ b/drivers/net/e1000/e1000.h @@ -356,6 +356,7 @@ int e1000_set_spd_dplx(struct e1000_adap /* e1000_ethtool.c */ void e1000_set_ethtool_ops(struct net_device *netdev); +void e1000_set_multi(struct net_device *netdev); /* e1000_param.c */ void e1000_check_options(struct e1000_adapter *adapter); diff --git a/drivers/net/e1000/e1000_ethtool.c b/drivers/net/e1000/e1000_ethtool.c index d1c705b..e8c1033 100644 --- a/drivers/net/e1000/e1000_ethtool.c +++ b/drivers/net/e1000/e1000_ethtool.c @@ -1,4 +1,4 @@ -/******************************************************************************* +/***************************************************************** Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved. @@ -1857,6 +1857,58 @@ e1000_get_strings(struct net_device *net } } +static int e1000_ethtool_setrxall(struct net_device *netdev, uint32_t val) { + unsigned short old_flags = netdev->priv_flags; + if (val) { + netdev->priv_flags |= IFF_ACCEPT_ALL_FRAMES; + } + else { + netdev->priv_flags &= ~(IFF_ACCEPT_ALL_FRAMES); + } + + /* printk("e1000_ethtool_setrxall (%s) val: %d\n", + netdev->name, val); */ + if (old_flags != netdev->priv_flags) { + spin_lock_bh(&netdev->xmit_lock); + if (netif_running(netdev)) { + /*printk("Kicking e1000 for setrxall..\n");*/ + e1000_set_multi(netdev); + } else { + /* Value will be flushed into the hardware when the device is + * brought up. + */ + } + spin_unlock_bh(&netdev->xmit_lock); + } + return 0; +} + +static int e1000_ethtool_set_save_fcs(struct net_device *netdev, uint32_t val) { + spin_lock_bh(&netdev->xmit_lock); + if (val) { + netdev->priv_flags |= IFF_SAVE_FCS; + } + else { + netdev->priv_flags &= ~IFF_SAVE_FCS; + } + spin_unlock_bh(&netdev->xmit_lock); + return 0; +} + +static int e1000_ethtool_get_save_fcs(struct net_device *netdev, uint32_t* val) { + *val = !!(netdev->priv_flags & IFF_SAVE_FCS); + /*printk("GETRXALL, data: %d priv_flags: %hx\n", + edata.data, netdev->priv_flags);*/ + return 0; +} + +static int e1000_ethtool_getrxall(struct net_device *netdev, uint32_t* val) { + *val = !!(netdev->priv_flags & IFF_ACCEPT_ALL_FRAMES); + /*printk("GETRXALL, data: %d priv_flags: %hx\n", + edata.data, netdev->priv_flags);*/ + return 0; +} + static struct ethtool_ops e1000_ethtool_ops = { .get_settings = e1000_get_settings, .set_settings = e1000_set_settings, @@ -1893,6 +1945,10 @@ #endif .get_stats_count = e1000_get_stats_count, .get_ethtool_stats = e1000_get_ethtool_stats, .get_perm_addr = ethtool_op_get_perm_addr, + .get_rx_all = e1000_ethtool_getrxall, + .set_rx_all = e1000_ethtool_setrxall, + .set_save_fcs = e1000_ethtool_set_save_fcs, + .get_save_fcs = e1000_ethtool_get_save_fcs, }; void e1000_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 97e71a4..6c967b2 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -1,4 +1,4 @@ -/******************************************************************************* +/***************************************************************** Copyright(c) 1999 - 2005 Intel Corporation. All rights reserved. @@ -168,7 +168,7 @@ static void e1000_clean_tx_ring(struct e struct e1000_tx_ring *tx_ring); static void e1000_clean_rx_ring(struct e1000_adapter *adapter, struct e1000_rx_ring *rx_ring); -static void e1000_set_multi(struct net_device *netdev); +void e1000_set_multi(struct net_device *netdev); static void e1000_update_phy_info(unsigned long data); static void e1000_watchdog(unsigned long data); static void e1000_watchdog_task(struct e1000_adapter *adapter); @@ -740,6 +740,10 @@ #endif if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; + /* Has ability to receive all frames (even bad CRCs and such) */ + netdev->features |= NETIF_F_RX_ALL | NETIF_F_SAVE_CRC; + + /* hard_start_xmit is safe against parallel locking */ netdev->features |= NETIF_F_LLTX; @@ -2093,7 +2097,7 @@ e1000_set_mac(struct net_device *netdev, * promiscuous mode, and all-multi behavior. **/ -static void +void e1000_set_multi(struct net_device *netdev) { struct e1000_adapter *adapter = netdev_priv(netdev); @@ -2122,6 +2126,35 @@ e1000_set_multi(struct net_device *netde E1000_WRITE_REG(hw, RCTL, rctl); + + /* This is useful for using ethereal or tcpdump to sniff + * packets in promiscuous mode without stripping VLAN/priority + * information, and also letting bad packets through. + * + * THIS IS NOT PRODUCTION CODE - FOR INTERNAL USE ONLY!!! + * + */ + if (netdev->priv_flags & IFF_ACCEPT_ALL_FRAMES) { + uint32_t ctrl; + /*printk("%s: Enabling acceptance of ALL frames (bad CRC too).\n", + netdev->name); */ + /* store bad packets, promisc/multicast all, no VLAN + * filter */ + rctl = E1000_READ_REG(hw, RCTL); + rctl |= (E1000_RCTL_SBP | E1000_RCTL_UPE | E1000_RCTL_MPE); + rctl &= ~(E1000_RCTL_VFE | E1000_RCTL_CFIEN); + E1000_WRITE_REG(hw, RCTL, rctl); + /* disable VLAN tagging/striping */ + ctrl = E1000_READ_REG(hw, CTRL); + ctrl &= ~E1000_CTRL_VME; + E1000_WRITE_REG(hw, CTRL, ctrl); + } + else { + /* TODO: Do we need a way to explicitly turn this off if it was + * previously enabled, or will it magically go back to normal??? --Ben + */ + } + /* 82542 2.0 needs to be in reset to write receive address registers */ if (hw->mac_type == e1000_82542_rev2_0) @@ -2397,6 +2430,7 @@ #define E1000_TX_FLAGS_CSUM 0x00000001 #define E1000_TX_FLAGS_VLAN 0x00000002 #define E1000_TX_FLAGS_TSO 0x00000004 #define E1000_TX_FLAGS_IPV4 0x00000008 +#define E1000_TX_FLAGS_NO_FCS 0x00000010 #define E1000_TX_FLAGS_VLAN_MASK 0xffff0000 #define E1000_TX_FLAGS_VLAN_SHIFT 16 @@ -2653,6 +2687,13 @@ e1000_tx_queue(struct e1000_adapter *ada txd_upper |= (tx_flags & E1000_TX_FLAGS_VLAN_MASK); } +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + if (unlikely(tx_flags & E1000_TX_FLAGS_NO_FCS)) { + txd_lower &= ~(E1000_TXD_CMD_IFCS); + /* printk("Disabling CRC in tx_queue, txd_lower: 0x%x\n", txd_lower); */ + } +#endif + i = tx_ring->next_to_use; while (count--) { @@ -2667,6 +2708,14 @@ e1000_tx_queue(struct e1000_adapter *ada tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd); +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + /* txd_cmd re-enables FCS, so we'll re-disable it here as desired. */ + if (unlikely(tx_flags & E1000_TX_FLAGS_NO_FCS)) { + tx_desc->lower.data &= ~(cpu_to_le32(E1000_TXD_CMD_IFCS)); + /* printk("Disabling2 CRC in tx_queue, txd_lower: 0x%x\n", tx_desc->lower.data); */ + } +#endif + /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, @@ -2904,6 +2953,12 @@ #endif if (likely(skb->protocol == ntohs(ETH_P_IP))) tx_flags |= E1000_TX_FLAGS_IPV4; +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + if (unlikely(skb->use_specified_ether_crc)) { + tx_flags |= E1000_TX_FLAGS_NO_FCS; + } +#endif + e1000_tx_queue(adapter, tx_ring, tx_flags, e1000_tx_map(adapter, tx_ring, skb, first, max_per_txd, nr_frags, mss)); @@ -3555,7 +3610,11 @@ #endif goto next_desc; } - if (unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) { + /* If we are accepting all frames, then do not pay attention to the + * framing errors. + */ + if (unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) && + !(netdev->priv_flags & IFF_ACCEPT_ALL_FRAMES)) { last_byte = *(skb->data + length - 1); if (TBI_ACCEPT(&adapter->hw, status, rx_desc->errors, length, last_byte)) { @@ -3572,6 +3631,16 @@ #endif } } + + // This may not be needed now. --Ben + //if (netdev->priv_flags & IFF_SAVE_FCS) { + // skb_put(skb, length); + //} + //else { + // skb_put(skb, length - ETHERNET_FCS_SIZE); + //} + + /* code added for copybreak, this should improve * performance for small packets with large amounts * of reassembly being done in the stack */ @@ -3709,7 +3778,8 @@ #endif goto next_desc; } - if (unlikely(staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)) { + if ((unlikely(staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK)) && + !(netdev->priv_flags & IFF_ACCEPT_ALL_FRAMES)) { dev_kfree_skb_irq(skb); goto next_desc; } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 006eb33..1f34c3f 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -118,6 +118,7 @@ struct TCP_Server_Info { struct sockaddr_in sockAddr; struct sockaddr_in6 sockAddr6; } addr; + u32 ip4_local_ip; /* if != 0, will bind locally to this IP */ wait_queue_head_t response_q; wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ struct list_head pending_mid_q; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index bae1479..1b4b0c1 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -94,13 +94,15 @@ struct smb_vol { unsigned int rsize; unsigned int wsize; unsigned int sockopt; + u32 local_ip; /* allow binding to a local IP address if != 0 */ unsigned short int port; }; static int ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, char * netb_name, - char * server_netb_name); + char * server_netb_name, + u32 local_ip); static int ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket); @@ -194,7 +196,8 @@ cifs_reconnect(struct TCP_Server_Info *s rc = ipv4_connect(&server->addr.sockAddr, &server->ssocket, server->workstation_RFC1001_name, - server->server_RFC1001_name); + server->server_RFC1001_name, + server->ip4_local_ip); } if(rc) { cFYI(1,("reconnect error %d",rc)); @@ -983,6 +986,18 @@ cifs_parse_mount_options(char *options, printk(KERN_WARNING "CIFS: domain name too long\n"); return 1; } + } else if (strnicmp(data, "local_ip", 8) == 0) { + if (!value || !*value) { + printk(KERN_WARNING "CIFS: local_ip value not specified.\n"); + return 1; /* needs_arg; */ + } + i = cifs_inet_pton(AF_INET, value, &(vol->local_ip)); + if (i < 0) { + vol->local_ip = 0; + printk(KERN_WARNING "CIFS: Could not parse local_ip: %s\n", + value); + return 1; + } } else if (strnicmp(data, "iocharset", 9) == 0) { if (!value || !*value) { printk(KERN_WARNING "CIFS: invalid iocharset specified\n"); @@ -1217,7 +1232,8 @@ cifs_parse_mount_options(char *options, static struct cifsSesInfo * cifs_find_tcp_session(struct in_addr * target_ip_addr, struct in6_addr *target_ip6_addr, - char *userName, struct TCP_Server_Info **psrvTcp) + char *userName, struct TCP_Server_Info **psrvTcp, + u32 local_ip) { struct list_head *tmp; struct cifsSesInfo *ses; @@ -1227,7 +1243,11 @@ cifs_find_tcp_session(struct in_addr * t list_for_each(tmp, &GlobalSMBSessionList) { ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); if (ses->server) { - if((target_ip_addr && + if((target_ip_addr && + /* If binding to a local IP, do not re-use sessions bound to different + * local IP addresses. + */ + (local_ip == ses->server->ip4_local_ip) && (ses->server->addr.sockAddr.sin_addr.s_addr == target_ip_addr->s_addr)) || (target_ip6_addr && memcmp(&ses->server->addr.sockAddr6.sin6_addr, @@ -1250,7 +1270,7 @@ cifs_find_tcp_session(struct in_addr * t } static struct cifsTconInfo * -find_unc(__be32 new_target_ip_addr, char *uncName, char *userName) +find_unc(__be32 new_target_ip_addr, char *uncName, char *userName, u32 local_ip) { struct list_head *tmp; struct cifsTconInfo *tcon; @@ -1265,8 +1285,9 @@ find_unc(__be32 new_target_ip_addr, char (" old ip addr: %x == new ip %x ?", tcon->ses->server->addr.sockAddr.sin_addr. s_addr, new_target_ip_addr)); - if (tcon->ses->server->addr.sockAddr.sin_addr. - s_addr == new_target_ip_addr) { + if ((local_ip == tcon->ses->server->ip4_local_ip) && + (tcon->ses->server->addr.sockAddr.sin_addr. + s_addr == new_target_ip_addr)) { /* BB lock tcon and server and tcp session and increment use count here? */ /* found a match on the TCP session */ /* BB check if reconnection needed */ @@ -1366,7 +1387,8 @@ static void rfc1002mangle(char * target, static int ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, - char * netbios_name, char * target_name) + char * netbios_name, char * target_name, + u32 local_ip /* in network byte order */) { int rc = 0; int connected = 0; @@ -1385,6 +1407,24 @@ ipv4_connect(struct sockaddr_in *psin_se } } + /* Bind to the local IP address if specified */ + if (local_ip) { + struct sockaddr_in myaddr = { + .sin_family = AF_INET, + }; + myaddr.sin_addr.s_addr = local_ip; + myaddr.sin_port = 0; /* any */ + rc = (*csocket)->ops->bind(*csocket, (struct sockaddr *) &myaddr, + sizeof(myaddr)); + if (rc < 0) { + printk("Tried to bind to local ip: 0x%x, but failed with error: %d\n", + local_ip, rc); + } + else { + printk("CIFS: Successfully bound to local ip: 0x%x\n", local_ip); + } + } + psin_server->sin_family = AF_INET; if(psin_server->sin_port) { /* user overrode default port */ rc = (*csocket)->ops->connect(*csocket, @@ -1664,11 +1704,12 @@ cifs_mount(struct super_block *sb, struc if(address_type == AF_INET) existingCifsSes = cifs_find_tcp_session(&sin_server.sin_addr, NULL /* no ipv6 addr */, - volume_info.username, &srvTcp); + volume_info.username, &srvTcp, + volume_info.local_ip); else if(address_type == AF_INET6) existingCifsSes = cifs_find_tcp_session(NULL /* no ipv4 addr */, &sin_server6.sin6_addr, - volume_info.username, &srvTcp); + volume_info.username, &srvTcp, 0); else { kfree(volume_info.UNC); kfree(volume_info.password); @@ -1686,7 +1727,8 @@ cifs_mount(struct super_block *sb, struc sin_server.sin_port = 0; rc = ipv4_connect(&sin_server,&csocket, volume_info.source_rfc1001_name, - volume_info.target_rfc1001_name); + volume_info.target_rfc1001_name, + volume_info.local_ip); if (rc < 0) { cERROR(1, ("Error connecting to IPv4 socket. Aborting operation")); @@ -1713,6 +1755,7 @@ cifs_mount(struct super_block *sb, struc /* BB Add code for ipv6 case too */ srvTcp->ssocket = csocket; srvTcp->protocolType = IPV4; + srvTcp->ip4_local_ip = volume_info.local_ip; init_waitqueue_head(&srvTcp->response_q); init_waitqueue_head(&srvTcp->request_q); INIT_LIST_HEAD(&srvTcp->pending_mid_q); @@ -1839,7 +1882,7 @@ cifs_mount(struct super_block *sb, struc tcon = find_unc(sin_server.sin_addr.s_addr, volume_info.UNC, - volume_info.username); + volume_info.username, volume_info.local_ip); if (tcon) { cFYI(1, ("Found match on UNC path")); /* we can have only one retry value for a connection diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d0b991a..269d9c3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -413,6 +413,12 @@ nfs_create_client(struct nfs_server *ser __FUNCTION__, PTR_ERR(xprt)); return (struct rpc_clnt *)xprt; } + + if (data->local_ip != 0) { + printk("nfs: Configuring local ip address as: 0x%x\n", + data->local_ip); + } + xprt->local_address = data->local_ip; /* specify local IP address */ clnt = rpc_create_client(xprt, server->hostname, &nfs_program, server->rpc_ops->version, data->pseudoflavor); if (IS_ERR(clnt)) { @@ -1687,6 +1693,11 @@ static int nfs_compare_super(struct supe return 0; if (old->addr.sin_port != server->addr.sin_port) return 0; + if (old->local_ip != server->local_ip) { + /*printk("nfs_compare_super, old->ip: %x server->ip: %x\n", + old->local_ip, server->local_ip); */ + return 0; + } return !nfs_compare_fh(&old->fh, &server->fh); } @@ -1769,6 +1780,10 @@ #endif /* CONFIG_NFS_V3 */ goto out_err; } + server->local_ip = data->local_ip; /* Allow unique local mounts when + * binding to local IP addresses. + */ + /* Fire up rpciod if not yet running */ s = ERR_PTR(rpciod_up()); if (IS_ERR(s)) { @@ -1789,6 +1804,7 @@ #endif /* CONFIG_NFS_V3 */ deactivate_super(s); return ERR_PTR(error); } + s->s_flags |= MS_ACTIVE; return s; out_rpciod_down: @@ -2007,6 +2023,11 @@ static int nfs4_compare_super(struct sup return 0; if (strcmp(server->mnt_path, old->mnt_path) != 0) return 0; + if (old->local_ip != server->local_ip) { + /*printk("nfs4_compare_super, old->ip: %x server->ip: %x\n", + old->local_ip, server->local_ip);*/ + return 0; + } return 1; } diff --git a/include/asm-i386/socket.h b/include/asm-i386/socket.h index 802ae76..69d9f0a 100644 --- a/include/asm-i386/socket.h +++ b/include/asm-i386/socket.h @@ -49,4 +49,8 @@ #define SO_ACCEPTCONN 30 #define SO_PEERSEC 31 +/* Instruct lower device to not calculate the frame + * checksum. Useful only for testing, afaik. --Ben */ +#define SO_NOFCS 50 + #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-x86_64/socket.h b/include/asm-x86_64/socket.h index f2cdbea..2f205aa 100644 --- a/include/asm-x86_64/socket.h +++ b/include/asm-x86_64/socket.h @@ -49,4 +49,9 @@ #define SO_ACCEPTCONN 30 #define SO_PEERSEC 31 +/* Instruct lower device to not calculate the frame + * checksum. Useful only for testing, afaik. --Ben */ +#define SO_NOFCS 50 + + #endif /* _ASM_SOCKET_H */ diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h index 89ab677..daaf1bc 100644 --- a/include/linux/compat_ioctl.h +++ b/include/linux/compat_ioctl.h @@ -265,6 +265,10 @@ COMPATIBLE_IOCTL(SIOCGMIIREG) COMPATIBLE_IOCTL(SIOCSMIIREG) COMPATIBLE_IOCTL(SIOCGIFVLAN) COMPATIBLE_IOCTL(SIOCSIFVLAN) +COMPATIBLE_IOCTL(SIOCSIFMACVLAN) +COMPATIBLE_IOCTL(SIOCGIFMACVLAN) +COMPATIBLE_IOCTL(SIOCGIFREDIRDEV) +COMPATIBLE_IOCTL(SIOCSIFREDIRDEV) COMPATIBLE_IOCTL(SIOCBRADDBR) COMPATIBLE_IOCTL(SIOCBRDELBR) /* SG stuff */ diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 93535f0..e0b45d8 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1,4 +1,4 @@ -/* +/* -*-linux-c-*- * ethtool.h: Defines for Linux ethtool. * * Copyright (C) 1998 David S. Miller (davem@redhat.com) @@ -307,7 +307,11 @@ int ethtool_op_set_ufo(struct net_device * phys_id: Identify the device * get_stats: Return statistics about the device * get_perm_addr: Gets the permanent hardware address - * + * set_rx_all: Set or clear IFF_ACCEPT_ALL_FRAMES, see if.h + * get_rx_all: Return 1 if set, 0 if not. + * set_save_fcs: Set or clear IFF_SAVE_FCS, see if.h + * get_save_fcs: Return 1 if set, 0 if not. + * * Description: * * get_settings: @@ -366,12 +370,24 @@ struct ethtool_ops { int (*get_stats_count)(struct net_device *); void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *, u64 *); int (*get_perm_addr)(struct net_device *, struct ethtool_perm_addr *, u8 *); + int (*set_rx_all)(struct net_device *, u32); + int (*get_rx_all)(struct net_device *, u32 *); + int (*set_save_fcs)(struct net_device *, u32); + int (*get_save_fcs)(struct net_device *, u32 *); int (*begin)(struct net_device *); void (*complete)(struct net_device *); u32 (*get_ufo)(struct net_device *); int (*set_ufo)(struct net_device *, u32); }; + +/* for dumping net-device statistics */ +struct ethtool_ndstats { + u32 cmd; /* ETHTOOL_GNDSTATS */ + u8 data[0]; /* sizeof(struct net_device_stats) */ +}; + + /* CMDs currently supported */ #define ETHTOOL_GSET 0x00000001 /* Get settings. */ #define ETHTOOL_SSET 0x00000002 /* Set settings. */ @@ -409,6 +425,15 @@ #define ETHTOOL_GPERMADDR 0x00000020 /* #define ETHTOOL_GUFO 0x00000021 /* Get UFO enable (ethtool_value) */ #define ETHTOOL_SUFO 0x00000022 /* Set UFO enable (ethtool_value) */ + +#define ETHTOOL_GNDSTATS 0x00000070 /* get standard net-device statistics */ +#define ETHTOOL_GETRXALL 0x00000071 /* Retrieve whether or not + * IFF_ACCEPT_ALL_FRAMES is set. */ +#define ETHTOOL_SETRXALL 0x00000072 /* Set IFF_ACCEPT_ALL_FRAMES */ +#define ETHTOOL_GETRXFCS 0x00000073 /* Set IFF_SAVE_FCS */ +#define ETHTOOL_SETRXFCS 0x00000074 /* Set IFF_SAVE_FCS */ + + /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET #define SPARC_ETH_SSET ETHTOOL_SSET diff --git a/include/linux/if.h b/include/linux/if.h index 374e20a..814657c 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -60,6 +60,20 @@ #define IFF_SLAVE_INACTIVE 0x4 /* bondin #define IFF_MASTER_8023AD 0x8 /* bonding master, 802.3ad. */ #define IFF_MASTER_ALB 0x10 /* bonding master, balance-alb. */ +#define IFF_PKTGEN_RCV 0x0100 /* Registered to receive & consume Pktgen skbs */ +#define IFF_ACCEPT_LOCAL_ADDRS 0x0200 /** Accept pkts even if they come from a local + * address. This lets use send pkts to ourselves + * over external interfaces (when used in conjunction + * with SO_BINDTODEVICE + */ +#define IFF_ACCEPT_ALL_FRAMES 0x0400 /** Accept all frames, even ones with bad CRCs. + * Should only be used in debugging/testing situations + * Do NOT enable this unless you understand the + * consequences! */ +#define IFF_SAVE_FCS 0x0800 /** Save the Frame Check Sum (FCS) on receive, if + * possible. */ +#define IFF_MAC_VLAN 0x1000 /* MAC VLAN device. */ + #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h new file mode 100644 index 0000000..0f56ae5 --- /dev/null +++ b/include/linux/if_macvlan.h @@ -0,0 +1,58 @@ +/* -*- linux-c -*- */ +#ifndef _LINUX_IF_MACVLAN_H +#define _LINUX_IF_MACVLAN_H + +/* the ioctl commands */ + +/* actions */ +#define MACVLAN_ENABLE 1 +#define MACVLAN_DISABLE 2 +#define MACVLAN_ADD 3 +#define MACVLAN_DEL 4 +//#define MACVLAN_BIND 5 +//#define MACVLAN_UNBIND 6 + +/* informative */ +#define MACVLAN_GET_NUM_PORTS 7 +#define MACVLAN_GET_PORT_NAME 8 +#define MACVLAN_GET_NUM_VLANS 9 +#define MACVLAN_GET_VLAN_NAME 10 +//#define MACVLAN_GET_NUM_MACS 11 +//#define MACVLAN_GET_MAC_NAME 12 + +#define MACVLAN_SET_PORT_FLAGS 13 +#define MACVLAN_GET_PORT_FLAGS 14 + +/* If this IOCTL succeedes, we are a MAC-VLAN interface, otherwise, we are not. */ +#define MACVLAN_IS_MACVLAN 15 +#define MACVLAN_IS_MACVLAN2 16 /* new ioctl API */ + + +#ifdef __KERNEL__ +#include +#include +extern int (*macvlan_ioctl_hook)(unsigned long arg); + +/* Returns >= 0 if it consumed the packet, otherwise let the pkt + * be processed by the netif_rx method, as if macvlan's didn't + * exist. + */ +extern int (*macvlan_handle_frame_hook)(struct sk_buff *skb); +#endif + +struct macvlan_ioctl_reply { + int32_t num; + char name[IFNAMSIZ]; +}; + +struct macvlan_ioctl { + int32_t cmd; + int32_t portidx; + int32_t ifidx; /* flags when setting port flags */ + int32_t macaddridx; + char ifname[IFNAMSIZ]; + unsigned char macaddr[8]; + struct macvlan_ioctl_reply reply; +}; + +#endif diff --git a/include/linux/if_redirdev.h b/include/linux/if_redirdev.h new file mode 100644 index 0000000..2acdd77 --- /dev/null +++ b/include/linux/if_redirdev.h @@ -0,0 +1,34 @@ +/* -*- linux-c -*- */ +#ifndef _LINUX_IF_REDIRDEV_H +#define _LINUX_IF_REDIRDEV_H + +/* the ioctl commands */ + +#define REDIRDEV_ADD 2090 +#define REDIRDEV_DEL 2091 +/* If this IOCTL succeedes, we are a Redirect-Device + interface, otherwise, we are not. */ +#define REDIRDEV_IS_REDIRDEV 2092 +#define REDIRDEV_GET_BY_IDX 2093 +#define REDIRDEV_GET_BY_NAME 2094 + +#ifdef __KERNEL__ +#include +#include +extern int (*redirdev_ioctl_hook)(void*); + +#endif + +/* Request and response */ +struct redirdev_ioctl { + u32 cmd; + u32 ifidx; /* when getting info by idx */ + +#define RDD_ASSOCIATED (1<<0) + u32 flags; /* 1<<0: Is the interface associated with tx-dev or not */ + u32 not_used; /* explicitly align 64-bit */ + char ifname[IFNAMSIZ]; + char txifname[IFNAMSIZ]; +}; + +#endif diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index eef0876..fa429a0 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -1,4 +1,4 @@ -/* +/* -*- linux-c -*- * VLAN An implementation of 802.1Q VLAN tagging. * * Authors: Ben Greear diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f4169bb..99c02e0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -310,6 +310,11 @@ #define NETIF_F_VLAN_CHALLENGED 1024 /* #define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */ #define NETIF_F_LLTX 4096 /* LockLess TX */ #define NETIF_F_UFO 8192 /* Can offload UDP Large Send*/ +#define NETIF_F_RX_ALL 16384 /* Can be configured to receive all packets, even + * ones with busted CRC. May disable VLAN filtering + * in the NIC, users should NOT enable this feature + * unless they understand the consequences. */ +#define NETIF_F_SAVE_CRC 32768 /* Can save FCS in skb, last 4 bytes for ethernet */ struct net_device *next_sched; @@ -495,9 +500,18 @@ #ifdef CONFIG_NET_POLL_CONTROLLER void (*poll_controller)(struct net_device *dev); #endif + /* Callback for when the queue is woken, used by pktgen currently */ + int (*notify_queue_woken)(struct net_device *dev); + void* nqw_data; /* To be used by the method above as needed */ + /* bridge stuff */ struct net_bridge_port *br_port; +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) + struct macvlan_port *macvlan_priv; +#endif + + #ifdef CONFIG_NET_DIVERT /* this will get initialized at each interface type init routine */ struct divert_blk *divert; @@ -618,8 +632,13 @@ #ifdef CONFIG_NETPOLL_TRAP if (netpoll_trap()) return; #endif - if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state)) + if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state)) { __netif_schedule(dev); + + if (dev->notify_queue_woken) { + dev->notify_queue_woken(dev); + } + } } static inline void netif_stop_queue(struct net_device *dev) diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 65dec21..0b26449 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -36,6 +36,8 @@ struct nfs_server { struct nfs_fh fh; struct sockaddr_in addr; unsigned long mount_time; /* when this fs was mounted */ + u32 local_ip; /* Allow local binding in .v3 */ + #ifdef CONFIG_NFS_V4 /* Our own IP address, as a null-terminated string. * This is used to generate the clientid, and the callback address. diff --git a/include/linux/nfs_mount.h b/include/linux/nfs_mount.h index 659c754..77700de 100644 --- a/include/linux/nfs_mount.h +++ b/include/linux/nfs_mount.h @@ -20,7 +20,7 @@ #include * mount-to-kernel version compatibility. Some of these aren't used yet * but here they are anyway. */ -#define NFS_MOUNT_VERSION 6 +#define NFS_MOUNT_VERSION 7 #define NFS_MAX_CONTEXT_LEN 256 struct nfs_mount_data { @@ -43,6 +43,8 @@ struct nfs_mount_data { struct nfs3_fh root; /* 4 */ int pseudoflavor; /* 5 */ char context[NFS_MAX_CONTEXT_LEN + 1]; /* 6 */ + char pad[3]; /* 7 Align the context above */ + unsigned int local_ip; /* 7 */ }; /* bits in the flags field */ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index df0cdd4..20f66f8 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -238,9 +238,8 @@ enum rt_class_t RT_TABLE_DEFAULT=253, RT_TABLE_MAIN=254, RT_TABLE_LOCAL=255, - __RT_TABLE_MAX }; -#define RT_TABLE_MAX (__RT_TABLE_MAX - 1) +#define RT_TABLE_MAX 0xFFFFFFFF @@ -263,6 +262,7 @@ enum rtattr_type_t RTA_CACHEINFO, RTA_SESSION, RTA_MP_ALGO, + RTA_TABLE, __RTA_MAX }; @@ -1066,6 +1066,13 @@ #define BUG_TRAP(x) do { \ } \ } while(0) +static inline u32 rtm_get_table(struct rtmsg *rtm, struct rtattr **rta) +{ + return RTA_GET_U32(rta[RTA_TABLE-1]); +rtattr_failure: + return rtm->rtm_table; +} + #endif /* __KERNEL__ */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f8f2347..adb34ac 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -265,7 +265,11 @@ struct sk_buff { nfctinfo:3; __u8 pkt_type:3, fclone:2, - ipvs_property:1; + ipvs_property:1, + /* Use the last 4 bytes of the payload for the ethernet CRC. Only supported on some + * NICs, such as e1000. --Ben + */ + use_specified_ether_crc:1; __be16 protocol; void (*destructor)(struct sk_buff *skb); diff --git a/include/linux/sockios.h b/include/linux/sockios.h index e6b9d1d..7082208 100644 --- a/include/linux/sockios.h +++ b/include/linux/sockios.h @@ -65,6 +65,8 @@ #define SIOCGIFPFLAGS 0x8935 #define SIOCDIFADDR 0x8936 /* delete PA address */ #define SIOCSIFHWBROADCAST 0x8937 /* set hardware broadcast addr */ #define SIOCGIFCOUNT 0x8938 /* get number of devices */ +#define SIOCGIFWEIGHT 0x8939 /* get weight of device, in stones */ +#define SIOCSIFWEIGHT 0x893a /* set weight of device, in stones */ #define SIOCGIFBR 0x8940 /* Bridging support */ #define SIOCSIFBR 0x8941 /* Set bridging options */ @@ -94,6 +96,13 @@ #define SIOCDRARP 0x8960 /* delete RARP #define SIOCGRARP 0x8961 /* get RARP table entry */ #define SIOCSRARP 0x8962 /* set RARP table entry */ +/* MAC address based VLAN control calls */ +#define SIOCGIFMACVLAN 0x8965 /* Mac address multiplex/demultiplex support */ +#define SIOCSIFMACVLAN 0x8966 /* Set macvlan options */ + +#define SIOCGIFREDIRDEV 0x8967 /* Redirect device get ioctl */ +#define SIOCSIFREDIRDEV 0x8968 /* Set redirect dev options */ + /* Driver configuration calls */ #define SIOCGIFMAP 0x8970 /* Get device parameters */ @@ -122,6 +131,15 @@ #define SIOCBRDELBR 0x89a1 /* remov #define SIOCBRADDIF 0x89a2 /* add interface to bridge */ #define SIOCBRDELIF 0x89a3 /* remove interface from bridge */ +/* Ben's little hack land */ +#define SIOCSACCEPTLOCALADDRS 0x89ba /* Allow interfaces to accept pkts from + * local interfaces...use with SO_BINDTODEVICE + */ +#define SIOCGACCEPTLOCALADDRS 0x89bb /* Allow interfaces to accept pkts from + * local interfaces...use with SO_BINDTODEVICE + */ + + /* Device private ioctl calls */ /* diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index e8bbe81..78b1b5c 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -171,7 +171,8 @@ struct rpc_xprt { reestablish_timeout; struct work_struct connect_worker; unsigned short port; - + u32 local_address; /* local IP address to bind to */ + /* * Disconnection of idle transports */ diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index a15dcf0..9464f48 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -94,7 +94,7 @@ #define DN_FIB_INFO(f) ((f)->fn_info) struct dn_fib_table { - int n; + u32 n; int (*insert)(struct dn_fib_table *t, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, @@ -137,7 +137,7 @@ extern int dn_fib_sync_up(struct net_dev /* * dn_tables.c */ -extern struct dn_fib_table *dn_fib_get_table(int n, int creat); +extern struct dn_fib_table *dn_fib_get_table(u32 n, int creat); extern struct dn_fib_table *dn_fib_empty_table(void); extern void dn_fib_table_init(void); extern void dn_fib_table_cleanup(void); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index e000fa2..e626e61 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -150,7 +150,8 @@ #define FIB_RES_NETMASK(res) (0) #endif /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */ struct fib_table { - unsigned char tb_id; + struct hlist_node tb_hlist; + u32 tb_id; unsigned tb_stamp; int (*tb_lookup)(struct fib_table *tb, const struct flowi *flp, struct fib_result *res); int (*tb_insert)(struct fib_table *table, struct rtmsg *r, @@ -173,14 +174,14 @@ #ifndef CONFIG_IP_MULTIPLE_TABLES extern struct fib_table *ip_fib_local_table; extern struct fib_table *ip_fib_main_table; -static inline struct fib_table *fib_get_table(int id) +static inline struct fib_table *fib_get_table(u32 id) { if (id != RT_TABLE_LOCAL) return ip_fib_main_table; return ip_fib_local_table; } -static inline struct fib_table *fib_new_table(int id) +static inline struct fib_table *fib_new_table(u32 id) { return fib_get_table(id); } @@ -200,30 +201,14 @@ static inline void fib_select_default(co } #else /* CONFIG_IP_MULTIPLE_TABLES */ -#define ip_fib_local_table (fib_tables[RT_TABLE_LOCAL]) -#define ip_fib_main_table (fib_tables[RT_TABLE_MAIN]) +#define ip_fib_local_table fib_get_table(RT_TABLE_LOCAL) +#define ip_fib_main_table fib_get_table(RT_TABLE_MAIN) -extern struct fib_table * fib_tables[RT_TABLE_MAX+1]; extern int fib_lookup(const struct flowi *flp, struct fib_result *res); -extern struct fib_table *__fib_new_table(int id); +extern struct fib_table *fib_new_table(u32 id); +extern struct fib_table *fib_get_table(u32 id); extern void fib_rule_put(struct fib_rule *r); -static inline struct fib_table *fib_get_table(int id) -{ - if (id == 0) - id = RT_TABLE_MAIN; - - return fib_tables[id]; -} - -static inline struct fib_table *fib_new_table(int id) -{ - if (id == 0) - id = RT_TABLE_MAIN; - - return fib_tables[id] ? : __fib_new_table(id); -} - extern void fib_select_default(const struct flowi *flp, struct fib_result *res); #endif /* CONFIG_IP_MULTIPLE_TABLES */ @@ -249,7 +234,7 @@ extern int fib_convert_rtentry(int cmd, extern u32 __fib_res_prefsrc(struct fib_result *res); /* Exported by fib_hash.c */ -extern struct fib_table *fib_hash_init(int id); +extern struct fib_table *fib_hash_init(u32 id); #ifdef CONFIG_IP_MULTIPLE_TABLES /* Exported by fib_rules.c */ diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 4901ee4..613eacc 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -139,6 +139,7 @@ struct neighbour int (*output)(struct sk_buff *skb); struct sk_buff_head arp_queue; struct timer_list timer; + u32 in_timer; /* boolean, space for more flags as needed. */ struct neigh_ops *ops; u8 primary_key[0]; }; diff --git a/include/net/sock.h b/include/net/sock.h index c9fad6f..101c448 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -385,6 +385,10 @@ enum sock_flags { SOCK_NO_LARGESEND, /* whether to sent large segments or not */ SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */ SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ + SOCK_DONT_DO_LL_FCS, /* Tell NIC not to do the ethernet FCS. Will use + * last 4 bytes of packet sent from user-space + * instead. + */ }; static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 3948949..136e63f 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -1,4 +1,4 @@ -/* +/* -*- linux-c -*- * INET 802.1Q VLAN * Ethernet-type device handling. * diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index da9cfe9..e49e252 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -439,6 +439,11 @@ int vlan_dev_hard_start_xmit(struct sk_b struct net_device_stats *stats = vlan_dev_get_stats(dev); struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); + /* Please note, dev_queue_xmit consumes the pkt regardless of the + * return value. So, will copy the skb first and free if successful. + */ + struct sk_buff* skb2 = skb_get(skb); + /* Handle non-VLAN frames if they are sent to us, for example by DHCP. * * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING @@ -468,6 +473,10 @@ #endif skb = __vlan_put_tag(skb, veth_TCI); if (!skb) { stats->tx_dropped++; + /* Free the extra copy, assuming this is a non-recoverable + * issue and we don't want calling code to retry. + */ + kfree_skb(skb2); return 0; } @@ -485,13 +494,24 @@ #ifdef VLAN_DEBUG veth->h_vlan_proto, veth->h_vlan_TCI, veth->h_vlan_encapsulated_proto); #endif - stats->tx_packets++; /* for statics only */ - stats->tx_bytes += skb->len; - skb->dev = VLAN_DEV_INFO(dev)->real_dev; - dev_queue_xmit(skb); - return 0; + { + int rv = dev_queue_xmit(skb); + if (rv == 0) { + /* Was success, need to free the skb reference since + * we bumped up the user count above. If there was an + * error instead, then the skb2 will not be freed, and so + * the calling code will be able to re-send it. + */ + + stats->tx_packets++; /* for statics only */ + stats->tx_bytes += skb2->len; + + kfree_skb(skb2); + } + return rv; + } } int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) diff --git a/net/Kconfig b/net/Kconfig index 4193cdc..029ec20 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -161,6 +161,8 @@ source "net/tipc/Kconfig" source "net/atm/Kconfig" source "net/bridge/Kconfig" source "net/8021q/Kconfig" +source "net/macvlan/Kconfig" +source "net/redir/Kconfig" source "net/decnet/Kconfig" source "net/llc/Kconfig" source "net/ipx/Kconfig" @@ -198,6 +200,13 @@ source "net/econet/Kconfig" source "net/wanrouter/Kconfig" source "net/sched/Kconfig" +config DEBUG_NETDEV_REFCOUNT + bool "Debug network device reference counting." + help + You can say Y here if you want to debug netdevice reference + counting. This is likely to slow down the the networking code + so only enable if you actually need this. + menu "Network testing" config NET_PKTGEN @@ -215,6 +224,14 @@ config NET_PKTGEN To compile this code as a module, choose M here: the module will be called pktgen. +config SUPPORT_SEND_BAD_CRC + bool "Support Send Bad CRC (USE WITH CAUTION)" + ---help--- + When enabled, one can send a specially crafted packet to the ethernet + device via a raw socket and it will be sent with the last 4 bytes of + the packet as the ethernet CRC. Requires driver support. Current driver + support is limited to e100 and e1000. + endmenu endmenu diff --git a/net/Makefile b/net/Makefile index 065796f..0fdf59d 100644 --- a/net/Makefile +++ b/net/Makefile @@ -46,6 +46,8 @@ obj-$(CONFIG_IP_DCCP) += dccp/ obj-$(CONFIG_IP_SCTP) += sctp/ obj-$(CONFIG_IEEE80211) += ieee80211/ obj-$(CONFIG_TIPC) += tipc/ +obj-$(CONFIG_MACVLAN) += macvlan/ +obj-$(CONFIG_REDIRDEV) += redir/ ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o diff --git a/net/core/dev.c b/net/core/dev.c index 4fba549..692fa0f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -90,6 +90,7 @@ #include #include #include #include +#include #include #include #include @@ -116,6 +117,22 @@ #include #include #include +#if defined(CONFIG_NET_PKTGEN) || defined(CONFIG_NET_PKTGEN_MODULE) +#include "pktgen.h" + +#warning "Compiling dev.c for pktgen."; + +int (*handle_pktgen_hook)(struct sk_buff *skb) = NULL; +EXPORT_SYMBOL(handle_pktgen_hook); + +static __inline__ int handle_pktgen_rcv(struct sk_buff* skb) { + if (handle_pktgen_hook) { + return handle_pktgen_hook(skb); + } + return -1; +} +#endif + /* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. @@ -1668,6 +1685,23 @@ static int ing_filter(struct sk_buff *sk } #endif + +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) +/* Returns >= 0 if we consume the packet. Otherwise, let + * it fall through the rest of the packet processing. + */ +int (*macvlan_handle_frame_hook)(struct sk_buff *skb) = NULL; +EXPORT_SYMBOL(macvlan_handle_frame_hook); +#endif + +/* Returns >= 0 if we consume the packet. Otherwise, let + * it fall through the rest of the packet processing. + */ +static __inline__ int handle_macvlan(struct sk_buff *skb) { + return macvlan_handle_frame_hook(skb); +} + + int netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; @@ -1738,6 +1772,32 @@ #endif if (handle_bridge(&skb, &pt_prev, &ret, orig_dev)) goto out; +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) + if (skb->dev->macvlan_priv != NULL && + macvlan_handle_frame_hook != NULL) { + if (handle_macvlan(skb) >= 0) { + /* consumed by mac-vlan...it would have been + * re-sent to this method with a different + * device... + */ + goto out; + } + else { + /* Let it fall through and be processed normally */ + } + } +#endif + +#if defined(CONFIG_NET_PKTGEN) || defined(CONFIG_NET_PKTGEN_MODULE) + if ((skb->dev->priv_flags & IFF_PKTGEN_RCV) && + (handle_pktgen_rcv(skb) >= 0)) { + /* Pktgen may consume the packet, no need to send + * to further protocols. + */ + goto out; + } +#endif + type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { if (ptype->type == type && @@ -2486,6 +2546,24 @@ static int dev_ifsioc(struct ifreq *ifr, ifr->ifr_newname[IFNAMSIZ-1] = '\0'; return dev_change_name(dev, ifr->ifr_newname); + case SIOCSACCEPTLOCALADDRS: + if (ifr->ifr_flags) { + dev->priv_flags |= IFF_ACCEPT_LOCAL_ADDRS; + } + else { + dev->priv_flags &= ~IFF_ACCEPT_LOCAL_ADDRS; + } + return 0; + + case SIOCGACCEPTLOCALADDRS: + if (dev->priv_flags & IFF_ACCEPT_LOCAL_ADDRS) { + ifr->ifr_flags = 1; + } + else { + ifr->ifr_flags = 0; + } + return 0; + /* * Unknown or private ioctl */ @@ -2584,6 +2662,7 @@ int dev_ioctl(unsigned int cmd, void __u case SIOCGIFMAP: case SIOCGIFINDEX: case SIOCGIFTXQLEN: + case SIOCGACCEPTLOCALADDRS: dev_load(ifr.ifr_name); read_lock(&dev_base_lock); ret = dev_ifsioc(&ifr, cmd); @@ -2658,6 +2737,7 @@ int dev_ioctl(unsigned int cmd, void __u case SIOCBONDCHANGEACTIVE: case SIOCBRADDIF: case SIOCBRDELIF: + case SIOCSACCEPTLOCALADDRS: if (!capable(CAP_NET_ADMIN)) return -EPERM; /* fall through */ @@ -3404,6 +3484,10 @@ EXPORT_SYMBOL(net_enable_timestamp); EXPORT_SYMBOL(net_disable_timestamp); EXPORT_SYMBOL(dev_get_flags); +#if defined(CONFIG_NET_PKTGEN) || defined(CONFIG_NET_PKTGEN_MODULE) +EXPORT_SYMBOL(handle_pktgen_rcv); +#endif + #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) EXPORT_SYMBOL(br_handle_frame_hook); EXPORT_SYMBOL(br_fdb_get_hook); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index c680b7e..d93732a 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1,4 +1,4 @@ -/* +/* -*- linux-c -*- * net/core/ethtool.c - Ethtool ioctl handler * Copyright (c) 2003 Matthew Wilcox * @@ -33,6 +33,12 @@ u32 ethtool_op_get_tx_csum(struct net_de return (dev->features & (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM)) != 0; } +u32 ethtool_op_get_rx_all(struct net_device *dev, u32* retval) +{ + *retval = ((dev->priv_flags & IFF_ACCEPT_ALL_FRAMES) != 0); + return 0; +} + int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) { if (data) @@ -775,6 +781,88 @@ static int ethtool_get_perm_addr(struct return ret; } + +static int ethtool_get_rx_all(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GSG }; + int rv = 0; + + if (!dev->ethtool_ops->get_rx_all) + return -EOPNOTSUPP; + + if ((rv = dev->ethtool_ops->get_rx_all(dev, &edata.data)) < 0) { + return rv; + } + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + + +static int ethtool_set_rx_all(struct net_device *dev, void *useraddr) +{ + struct ethtool_value id; + + if (!dev->ethtool_ops->set_rx_all) + return -EOPNOTSUPP; + + if (copy_from_user(&id, useraddr, sizeof(id))) + return -EFAULT; + + return dev->ethtool_ops->set_rx_all(dev, id.data); +} + +static int ethtool_get_rx_fcs(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GSG }; + int rv = 0; + + if (!dev->ethtool_ops->get_save_fcs) + return -EOPNOTSUPP; + + if ((rv = dev->ethtool_ops->get_save_fcs(dev, &edata.data)) < 0) { + return rv; + } + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + + +static int ethtool_set_rx_fcs(struct net_device *dev, void *useraddr) +{ + struct ethtool_value id; + + if (!dev->ethtool_ops->set_save_fcs) + return -EOPNOTSUPP; + + if (copy_from_user(&id, useraddr, sizeof(id))) + return -EFAULT; + + return dev->ethtool_ops->set_save_fcs(dev, id.data); +} + + +/* Handle some generic ethtool commands here */ +static int ethtool_get_netdev_stats(struct net_device *dev, void *useraddr) { + + struct ethtool_ndstats* nds = (struct ethtool_ndstats*)(useraddr); + + struct net_device_stats *stats = dev->get_stats(dev); + if (stats) { + if (copy_to_user(nds->data, stats, sizeof(*stats))) { + return -EFAULT; + } + } + else { + return -EOPNOTSUPP; + } + return 0; +} + + /* The main entry point in this file. Called from net/core/dev.c */ int dev_ethtool(struct ifreq *ifr) @@ -795,18 +883,28 @@ int dev_ethtool(struct ifreq *ifr) if (!dev || !netif_device_present(dev)) return -ENODEV; - if (!dev->ethtool_ops) - goto ioctl; - if (copy_from_user(ðcmd, useraddr, sizeof (ethcmd))) return -EFAULT; - if(dev->ethtool_ops->begin) + if(dev->ethtool_ops && dev->ethtool_ops->begin) if ((rc = dev->ethtool_ops->begin(dev)) < 0) return rc; old_features = dev->features; - + + /* Handle some generic operations that do not require specific + * ethtool handlers. + */ + switch (ethcmd) { + case ETHTOOL_GNDSTATS: + return ethtool_get_netdev_stats(dev, useraddr); + default: + break; + } + + if (!dev->ethtool_ops) + goto ioctl; + switch (ethcmd) { case ETHTOOL_GSET: rc = ethtool_get_settings(dev, useraddr); @@ -895,6 +993,18 @@ int dev_ethtool(struct ifreq *ifr) case ETHTOOL_PHYS_ID: rc = ethtool_phys_id(dev, useraddr); break; + case ETHTOOL_SETRXALL: + rc = ethtool_set_rx_all(dev, useraddr); + break; + case ETHTOOL_GETRXALL: + rc = ethtool_get_rx_all(dev, useraddr); + break; + case ETHTOOL_SETRXFCS: + rc = ethtool_set_rx_fcs(dev, useraddr); + break; + case ETHTOOL_GETRXFCS: + rc = ethtool_get_rx_fcs(dev, useraddr); + break; case ETHTOOL_GSTATS: rc = ethtool_get_stats(dev, useraddr); break; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index c23e9c0..62882fd 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1,22 +1,12 @@ -/* - * Authors: - * Copyright 2001, 2002 by Robert Olsson - * Uppsala University and - * Swedish University of Agricultural Sciences - * - * Alexey Kuznetsov - * Ben Greear - * Jens Låås - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. +/* -*-linux-c-*- * + * Copyright 2001, 2002 by Robert Olsson + * Uppsala University, Sweden + * 2002 Ben Greear * * A tool for loading the network with preconfigurated packets. * The tool is implemented as a linux module. Parameters are output - * device, delay (to hard_xmit), number of packets, and whether + * device, IPG (interpacket gap), number of packets, and whether * to use multiple SKBs or just the same one. * pktgen uses the installed interface's output routine. * @@ -43,6 +33,16 @@ * latencies (with micro-second) precision. * * Add IOCTL interface to easily get counters & configuration. * --Ben Greear + + * Fix refcount off by one if first packet fails, potential null deref, + * memleak 030710- KJP + * + * * Added the IPMAC option to allow the MAC addresses to mirror IP addresses. + * -- (dhetheri) Dave Hetherington 03/09/29 + * * Allow the user to change the protocol field via 'pgset "prot 0"' command + * -- (dhetheri) Dave Hetherington 03/10/7 + * Integrated to 2.5.x 021029 --Lucio Maciel (luciomaciel@zipmail.com.br) + * * * Renamed multiskb to clone_skb and cleaned up sending core for two distinct * skb modes. A clone_skb=0 mode for Ben "ranges" work and a clone_skb != 0 @@ -54,307 +54,123 @@ * Also moved to /proc/net/pktgen/ * --ro * - * Sept 10: Fixed threading/locking. Lots of bone-headed and more clever - * mistakes. Also merged in DaveM's patch in the -pre6 patch. - * --Ben Greear - * - * Integrated to 2.5.x 021029 --Lucio Maciel (luciomaciel@zipmail.com.br) - * - * - * 021124 Finished major redesign and rewrite for new functionality. - * See Documentation/networking/pktgen.txt for how to use this. - * - * The new operation: - * For each CPU one thread/process is created at start. This process checks - * for running devices in the if_list and sends packets until count is 0 it - * also the thread checks the thread->control which is used for inter-process - * communication. controlling process "posts" operations to the threads this - * way. The if_lock should be possible to remove when add/rem_device is merged - * into this too. - * - * By design there should only be *one* "controlling" process. In practice - * multiple write accesses gives unpredictable result. Understood by "write" - * to /proc gives result code thats should be read be the "writer". - * For practical use this should be no problem. - * - * Note when adding devices to a specific CPU there good idea to also assign - * /proc/irq/XX/smp_affinity so TX-interrupts gets bound to the same CPU. - * --ro - * - * Fix refcount off by one if first packet fails, potential null deref, + * Fix refcount off by one if first packet fails, potential null deref, * memleak 030710- KJP * - * First "ranges" functionality for ipv6 030726 --ro - * - * Included flow support. 030802 ANK. - * - * Fixed unaligned access on IA-64 Grant Grundler - * - * Remove if fix from added Harald Welte 040419 - * ia64 compilation fix from Aron Griffis 040604 - * - * New xmit() return, do_div and misc clean up by Stephen Hemminger - * 040923 - * - * Randy Dunlap fixed u64 printk compiler waring * - * Remove FCS from BW calculation. Lennert Buytenhek - * New time handling. Lennert Buytenhek 041213 - * - * Corrections from Nikolai Malykh (nmalykh@bilim.com) - * Removed unused flags F_SET_SRCMAC & F_SET_SRCIP 041230 - * - * interruptible_sleep_on_timeout() replaced Nishanth Aravamudan - * 050103 - * - * MPLS support by Steven Whitehouse + * Sept 10: Fixed threading/locking. Lots of bone-headed and more clever + * mistakes. Also merged in DaveM's patch in the -pre6 patch. * + * See Documentation/networking/pktgen.txt for how to use this. */ -#include -#include + #include -#include #include -#include -#include #include -#include -#include -#include +#include #include #include #include #include +#include #include -#include +#include #include -#include -#include #include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include #include #include #include #include -#include /* do_div */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#define VERSION "pktgen v2.67: Packet Generator for packet performance testing.\n" +#include /* for lock kernel */ +#include /* do_div */ -/* #define PG_DEBUG(a) a */ -#define PG_DEBUG(a) - -/* The buckets are exponential in 'width' */ -#define LAT_BUCKETS_MAX 32 -#define IP_NAME_SZ 32 -#define MAX_MPLS_LABELS 16 /* This is the max label stack depth */ -#define MPLS_STACK_BOTTOM __constant_htonl(0x00000100) - -/* Device flag bits */ -#define F_IPSRC_RND (1<<0) /* IP-Src Random */ -#define F_IPDST_RND (1<<1) /* IP-Dst Random */ -#define F_UDPSRC_RND (1<<2) /* UDP-Src Random */ -#define F_UDPDST_RND (1<<3) /* UDP-Dst Random */ -#define F_MACSRC_RND (1<<4) /* MAC-Src Random */ -#define F_MACDST_RND (1<<5) /* MAC-Dst Random */ -#define F_TXSIZE_RND (1<<6) /* Transmit size is random */ -#define F_IPV6 (1<<7) /* Interface in IPV6 Mode */ -#define F_MPLS_RND (1<<8) /* Random MPLS labels */ - -/* Thread control flag bits */ -#define T_TERMINATE (1<<0) -#define T_STOP (1<<1) /* Stop run */ -#define T_RUN (1<<2) /* Start run */ -#define T_REMDEVALL (1<<3) /* Remove all devs */ -#define T_REMDEV (1<<4) /* Remove one dev */ - -/* If lock -- can be removed after some work */ -#define if_lock(t) spin_lock(&(t->if_lock)); -#define if_unlock(t) spin_unlock(&(t->if_lock)); -/* Used to help with determining the pkts on receive */ -#define PKTGEN_MAGIC 0xbe9be955 -#define PG_PROC_DIR "pktgen" -#define PGCTRL "pgctrl" -static struct proc_dir_entry *pg_proc_dir = NULL; +#include +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) +#include "../macvlan/macvlan.h" +#endif -#define MAX_CFLOWS 65536 +#include "pktgen.h" -struct flow_state { - __u32 cur_daddr; - int count; -}; -struct pktgen_dev { +#define VERSION "pktgen version 1.9.3 (nospin)" +static char version[] __initdata = + "pktgen.c: v1.9.3 (nospin): Packet Generator for packet performance testing.\n"; - /* - * Try to keep frequent/infrequent used vars. separated. - */ +/* Used to help with determining the pkts on receive */ - char ifname[IFNAMSIZ]; - char result[512]; +#define PKTGEN_MAGIC 0xbe9be955 - struct pktgen_thread *pg_thread; /* the owner */ - struct list_head list; /* Used for chaining in the thread's run-queue */ +/* #define PG_DEBUG(a) a */ +#define PG_DEBUG(a) /* a */ - int running; /* if this changes to false, the test will stop */ +/* cycles per micro-second */ +static u32 pg_cycles_per_ns; +static u32 pg_cycles_per_us; +static u32 pg_cycles_per_ms; - /* If min != max, then we will either do a linear iteration, or - * we will do a random selection from within the range. - */ - __u32 flags; - int removal_mark; /* non-zero => the device is marked for - * removal by worker thread */ - - int min_pkt_size; /* = ETH_ZLEN; */ - int max_pkt_size; /* = ETH_ZLEN; */ - int nfrags; - __u32 delay_us; /* Default delay */ - __u32 delay_ns; - __u64 count; /* Default No packets to send */ - __u64 sofar; /* How many pkts we've sent so far */ - __u64 tx_bytes; /* How many bytes we've transmitted */ - __u64 errors; /* Errors when trying to transmit, pkts will be re-sent */ - - /* runtime counters relating to clone_skb */ - __u64 next_tx_us; /* timestamp of when to tx next */ - __u32 next_tx_ns; - - __u64 allocated_skbs; - __u32 clone_count; - int last_ok; /* Was last skb sent? - * Or a failed transmit of some sort? This will keep - * sequence numbers in order, for example. - */ - __u64 started_at; /* micro-seconds */ - __u64 stopped_at; /* micro-seconds */ - __u64 idle_acc; /* micro-seconds */ - __u32 seq_num; - - int clone_skb; /* Use multiple SKBs during packet gen. If this number - * is greater than 1, then that many copies of the same - * packet will be sent before a new packet is allocated. - * For instance, if you want to send 1024 identical packets - * before creating a new packet, set clone_skb to 1024. - */ - - char dst_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ - char dst_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ - char src_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ - char src_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ - - struct in6_addr in6_saddr; - struct in6_addr in6_daddr; - struct in6_addr cur_in6_daddr; - struct in6_addr cur_in6_saddr; - /* For ranges */ - struct in6_addr min_in6_daddr; - struct in6_addr max_in6_daddr; - struct in6_addr min_in6_saddr; - struct in6_addr max_in6_saddr; - - /* If we're doing ranges, random or incremental, then this - * defines the min/max for those ranges. - */ - __u32 saddr_min; /* inclusive, source IP address */ - __u32 saddr_max; /* exclusive, source IP address */ - __u32 daddr_min; /* inclusive, dest IP address */ - __u32 daddr_max; /* exclusive, dest IP address */ - - __u16 udp_src_min; /* inclusive, source UDP port */ - __u16 udp_src_max; /* exclusive, source UDP port */ - __u16 udp_dst_min; /* inclusive, dest UDP port */ - __u16 udp_dst_max; /* exclusive, dest UDP port */ - - /* MPLS */ - unsigned nr_labels; /* Depth of stack, 0 = no MPLS */ - __be32 labels[MAX_MPLS_LABELS]; - - __u32 src_mac_count; /* How many MACs to iterate through */ - __u32 dst_mac_count; /* How many MACs to iterate through */ - - unsigned char dst_mac[ETH_ALEN]; - unsigned char src_mac[ETH_ALEN]; - - __u32 cur_dst_mac_offset; - __u32 cur_src_mac_offset; - __u32 cur_saddr; - __u32 cur_daddr; - __u16 cur_udp_dst; - __u16 cur_udp_src; - __u32 cur_pkt_size; - - __u8 hh[14]; - /* = { - 0x00, 0x80, 0xC8, 0x79, 0xB3, 0xCB, - - We fill in SRC address later - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x08, 0x00 - }; - */ - __u16 pad; /* pad out the hh struct to an even 16 bytes */ - - struct sk_buff *skb; /* skb we are to transmit next, mainly used for when we - * are transmitting the same one multiple times - */ - struct net_device *odev; /* The out-going device. Note that the device should - * have it's pg_info pointer pointing back to this - * device. This will be set when the user specifies - * the out-going device name (not when the inject is - * started as it used to do.) - */ - struct flow_state *flows; - unsigned cflows; /* Concurrent flows (config) */ - unsigned lflow; /* Flow length (config) */ - unsigned nflows; /* accumulated flows (stats) */ -}; +/* Module parameters, defaults. */ +static int pg_count_d = 0; /* run forever by default */ +static int pg_ipg_d = 0; +static int pg_multiskb_d = 0; +static int pg_thread_count = 1; /* Initial threads to create */ +static int debug = 0; +module_param(pg_count_d, int, 0); +module_param(pg_ipg_d, int, 0); +module_param(pg_thread_count, int, 0); +module_param(pg_multiskb_d, int, 0); +module_param(debug, int, 0); -struct pktgen_hdr { - __u32 pgh_magic; - __u32 seq_num; - __u32 tv_sec; - __u32 tv_usec; -}; -struct pktgen_thread { - spinlock_t if_lock; - struct list_head if_list; /* All device here */ - struct list_head th_list; - int removed; - char name[32]; - char result[512]; - u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */ - /* Field for thread to receive "posted" events terminate, stop ifs etc. */ +/* List of all running threads */ +static struct pktgen_thread_info* pktgen_threads = NULL; +spinlock_t _pg_threadlist_lock = SPIN_LOCK_UNLOCKED; - u32 control; - int pid; - int cpu; +/* Holds interfaces for all threads */ +#define PG_INFO_HASH_MAX 32 +static struct pktgen_interface_info* pg_info_hash[PG_INFO_HASH_MAX]; +spinlock_t _pg_hash_lock = SPIN_LOCK_UNLOCKED; - wait_queue_head_t queue; -}; +#define PG_PROC_DIR "pktgen" +static struct proc_dir_entry *pg_proc_dir = NULL; -#define REMOVE 1 -#define FIND 0 +char module_fname[128]; +struct proc_dir_entry *module_proc_ent = NULL; + + +static void init_pktgen_kthread(struct pktgen_thread_info *kthread, char *name); +static int pg_rem_interface_info(struct pktgen_thread_info* pg_thread, + struct pktgen_interface_info* i); +static int pg_add_interface_info(struct pktgen_thread_info* pg_thread, + const char* ifname); +static void exit_pktgen_kthread(struct pktgen_thread_info *kthread); +static void stop_pktgen_kthread(struct pktgen_thread_info *kthread); +static struct pktgen_thread_info* pg_find_thread(const char* name); +static int pg_add_thread_info(const char* name); +static struct pktgen_interface_info* pg_find_interface(struct pktgen_thread_info* pg_thread, + const char* ifname); +static int pktgen_device_event(struct notifier_block *, unsigned long, void *); + + +struct notifier_block pktgen_notifier_block = { + notifier_call: pktgen_device_event, +}; /* This code works around the fact that do_div cannot handle two 64-bit numbers, and regular 64-bit division doesn't work on x86 kernels. @@ -362,6 +178,7 @@ #define FIND 0 */ #define PG_DIV 0 +#define PG_REM 1 /* This was emailed to LMKL by: Chris Caputo * Function copied/adapted/optimized from: @@ -371,1309 +188,533 @@ #define PG_DIV 0 * Copyright 1994, University of Cambridge Computer Laboratory * All Rights Reserved. * + * TODO: When running on a 64-bit CPU platform, this should no longer be + * TODO: necessary. */ -static inline s64 divremdi3(s64 x, s64 y, int type) -{ - u64 a = (x < 0) ? -x : x; - u64 b = (y < 0) ? -y : y; - u64 res = 0, d = 1; - - if (b > 0) { - while (b < a) { - b <<= 1; - d <<= 1; - } - } - - do { - if (a >= b) { - a -= b; - res += d; - } - b >>= 1; - d >>= 1; - } - while (d); - - if (PG_DIV == type) { - return (((x ^ y) & (1ll << 63)) == 0) ? res : -(s64) res; - } else { - return ((x & (1ll << 63)) == 0) ? a : -(s64) a; - } -} +inline static s64 divremdi3(s64 x, s64 y, int type) { + u64 a = (x < 0) ? -x : x; + u64 b = (y < 0) ? -y : y; + u64 res = 0, d = 1; + + if (b > 0) { + while (b < a) { + b <<= 1; + d <<= 1; + } + } + + do { + if ( a >= b ) { + a -= b; + res += d; + } + b >>= 1; + d >>= 1; + } + while (d); + + if (PG_DIV == type) { + return (((x ^ y) & (1ll<<63)) == 0) ? res : -(s64)res; + } + else { + return ((x & (1ll<<63)) == 0) ? a : -(s64)a; + } +}/* divremdi3 */ /* End of hacks to deal with 64-bit math on x86 */ -/** Convert to milliseconds */ -static inline __u64 tv_to_ms(const struct timeval *tv) -{ - __u64 ms = tv->tv_usec / 1000; - ms += (__u64) tv->tv_sec * (__u64) 1000; - return ms; -} -/** Convert to micro-seconds */ -static inline __u64 tv_to_us(const struct timeval *tv) -{ - __u64 us = tv->tv_usec; - us += (__u64) tv->tv_sec * (__u64) 1000000; - return us; -} -static inline __u64 pg_div(__u64 n, __u32 base) -{ - __u64 tmp = n; - do_div(tmp, base); - /* printk("pktgen: pg_div, n: %llu base: %d rv: %llu\n", - n, base, tmp); */ - return tmp; +inline static void pg_lock_thread_list(const char* msg) { + if (debug > 1) { + printk("before pg_lock_thread_list, msg: %s\n", msg); + } + spin_lock(&_pg_threadlist_lock); + if (debug > 1) { + printk("after pg_lock_thread_list, msg: %s\n", msg); + } } -static inline __u64 pg_div64(__u64 n, __u64 base) -{ - __u64 tmp = n; -/* - * How do we know if the architecture we are running on - * supports division with 64 bit base? - * - */ -#if defined(__sparc_v9__) || defined(__powerpc64__) || defined(__alpha__) || defined(__x86_64__) || defined(__ia64__) - - do_div(tmp, base); -#else - tmp = divremdi3(n, base, PG_DIV); -#endif - return tmp; +inline static void pg_unlock_thread_list(const char* msg) { + if (debug > 1) { + printk("before pg_unlock_thread_list, msg: %s\n", msg); + } + spin_unlock(&_pg_threadlist_lock); + if (debug > 1) { + printk("after pg_unlock_thread_list, msg: %s\n", msg); + } } -static inline u32 pktgen_random(void) -{ -#if 0 - __u32 n; - get_random_bytes(&n, 4); - return n; -#else - return net_random(); -#endif +inline static void pg_lock_hash(const char* msg) { + if (debug > 1) { + printk("before pg_lock_hash, msg: %s\n", msg); + } + spin_lock(&_pg_hash_lock); + if (debug > 1) { + printk("before pg_lock_hash, msg: %s\n", msg); + } } -static inline __u64 getCurMs(void) -{ - struct timeval tv; - do_gettimeofday(&tv); - return tv_to_ms(&tv); +inline static void pg_unlock_hash(const char* msg) { + if (debug > 1) { + printk("before pg_unlock_hash, msg: %s\n", msg); + } + spin_unlock(&_pg_hash_lock); + if (debug > 1) { + printk("after pg_unlock_hash, msg: %s\n", msg); + } } -static inline __u64 getCurUs(void) -{ - struct timeval tv; - do_gettimeofday(&tv); - return tv_to_us(&tv); +inline static void pg_lock(struct pktgen_thread_info* pg_thread, const char* msg) { + if (debug > 1) { + printk("before pg_lock thread, msg: %s\n", msg); + } + spin_lock(&(pg_thread->pg_threadlock)); + if (debug > 1) { + printk("after pg_lock thread, msg: %s\n", msg); + } } -static inline __u64 tv_diff(const struct timeval *a, const struct timeval *b) -{ - return tv_to_us(a) - tv_to_us(b); +inline static void pg_unlock(struct pktgen_thread_info* pg_thread, const char* msg) { + if (debug > 1) { + printk("before pg_unlock thread, thread: %p msg: %s\n", + pg_thread, msg); + } + spin_unlock(&(pg_thread->pg_threadlock)); + if (debug > 1) { + printk("after pg_unlock thread, thread: %p msg: %s\n", + pg_thread, msg); + } } -/* old include end */ - -static char version[] __initdata = VERSION; - -static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i); -static int pktgen_add_device(struct pktgen_thread *t, const char *ifname); -static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, - const char *ifname); -static int pktgen_device_event(struct notifier_block *, unsigned long, void *); -static void pktgen_run_all_threads(void); -static void pktgen_stop_all_threads_ifs(void); -static int pktgen_stop_device(struct pktgen_dev *pkt_dev); -static void pktgen_stop(struct pktgen_thread *t); -static void pktgen_clear_counters(struct pktgen_dev *pkt_dev); -static int pktgen_mark_device(const char *ifname); -static unsigned int scan_ip6(const char *s, char ip[16]); -static unsigned int fmt_ip6(char *s, const char ip[16]); - -/* Module parameters, defaults. */ -static int pg_count_d = 1000; /* 1000 pkts by default */ -static int pg_delay_d; -static int pg_clone_skb_d; -static int debug; - -static DEFINE_MUTEX(pktgen_thread_lock); -static LIST_HEAD(pktgen_threads); - -static struct notifier_block pktgen_notifier_block = { - .notifier_call = pktgen_device_event, -}; - -/* - * /proc handling functions - * - */ - -static int pgctrl_show(struct seq_file *seq, void *v) -{ - seq_puts(seq, VERSION); - return 0; +/** Convert to miliseconds */ +static inline __u64 tv_to_ms(const struct timeval* tv) { + __u64 ms = tv->tv_usec / 1000; + ms += (__u64)tv->tv_sec * (__u64)1000; + return ms; } -static ssize_t pgctrl_write(struct file *file, const char __user * buf, - size_t count, loff_t * ppos) -{ - int err = 0; - char data[128]; - if (!capable(CAP_NET_ADMIN)) { - err = -EPERM; - goto out; - } - - if (count > sizeof(data)) - count = sizeof(data); - - if (copy_from_user(data, buf, count)) { - err = -EFAULT; - goto out; - } - data[count - 1] = 0; /* Make string */ - - if (!strcmp(data, "stop")) - pktgen_stop_all_threads_ifs(); - - else if (!strcmp(data, "start")) - pktgen_run_all_threads(); - - else - printk("pktgen: Unknown command: %s\n", data); - - err = count; - -out: - return err; -} - -static int pgctrl_open(struct inode *inode, struct file *file) -{ - return single_open(file, pgctrl_show, PDE(inode)->data); +/** Convert to micro-seconds */ +static inline __u64 tv_to_us(const struct timeval* tv) { + __u64 us = tv->tv_usec; + us += (__u64)tv->tv_sec * (__u64)1000000; + return us; } -static struct file_operations pktgen_fops = { - .owner = THIS_MODULE, - .open = pgctrl_open, - .read = seq_read, - .llseek = seq_lseek, - .write = pgctrl_write, - .release = single_release, -}; - -static int pktgen_if_show(struct seq_file *seq, void *v) -{ - int i; - struct pktgen_dev *pkt_dev = seq->private; - __u64 sa; - __u64 stopped; - __u64 now = getCurUs(); - - seq_printf(seq, - "Params: count %llu min_pkt_size: %u max_pkt_size: %u\n", - (unsigned long long)pkt_dev->count, pkt_dev->min_pkt_size, - pkt_dev->max_pkt_size); - - seq_printf(seq, - " frags: %d delay: %u clone_skb: %d ifname: %s\n", - pkt_dev->nfrags, - 1000 * pkt_dev->delay_us + pkt_dev->delay_ns, - pkt_dev->clone_skb, pkt_dev->ifname); - - seq_printf(seq, " flows: %u flowlen: %u\n", pkt_dev->cflows, - pkt_dev->lflow); - - if (pkt_dev->flags & F_IPV6) { - char b1[128], b2[128], b3[128]; - fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr); - fmt_ip6(b2, pkt_dev->min_in6_saddr.s6_addr); - fmt_ip6(b3, pkt_dev->max_in6_saddr.s6_addr); - seq_printf(seq, - " saddr: %s min_saddr: %s max_saddr: %s\n", b1, - b2, b3); - - fmt_ip6(b1, pkt_dev->in6_daddr.s6_addr); - fmt_ip6(b2, pkt_dev->min_in6_daddr.s6_addr); - fmt_ip6(b3, pkt_dev->max_in6_daddr.s6_addr); - seq_printf(seq, - " daddr: %s min_daddr: %s max_daddr: %s\n", b1, - b2, b3); - - } else - seq_printf(seq, - " dst_min: %s dst_max: %s\n src_min: %s src_max: %s\n", - pkt_dev->dst_min, pkt_dev->dst_max, pkt_dev->src_min, - pkt_dev->src_max); - - seq_puts(seq, " src_mac: "); - - if (is_zero_ether_addr(pkt_dev->src_mac)) - for (i = 0; i < 6; i++) - seq_printf(seq, "%02X%s", pkt_dev->odev->dev_addr[i], - i == 5 ? " " : ":"); - else - for (i = 0; i < 6; i++) - seq_printf(seq, "%02X%s", pkt_dev->src_mac[i], - i == 5 ? " " : ":"); - - seq_printf(seq, "dst_mac: "); - for (i = 0; i < 6; i++) - seq_printf(seq, "%02X%s", pkt_dev->dst_mac[i], - i == 5 ? "\n" : ":"); - - seq_printf(seq, - " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n", - pkt_dev->udp_src_min, pkt_dev->udp_src_max, - pkt_dev->udp_dst_min, pkt_dev->udp_dst_max); - - seq_printf(seq, - " src_mac_count: %d dst_mac_count: %d\n", - pkt_dev->src_mac_count, pkt_dev->dst_mac_count); - - if (pkt_dev->nr_labels) { - unsigned i; - seq_printf(seq, " mpls: "); - for(i = 0; i < pkt_dev->nr_labels; i++) - seq_printf(seq, "%08x%s", ntohl(pkt_dev->labels[i]), - i == pkt_dev->nr_labels-1 ? "\n" : ", "); - } - - seq_printf(seq, " Flags: "); - - if (pkt_dev->flags & F_IPV6) - seq_printf(seq, "IPV6 "); - - if (pkt_dev->flags & F_IPSRC_RND) - seq_printf(seq, "IPSRC_RND "); - - if (pkt_dev->flags & F_IPDST_RND) - seq_printf(seq, "IPDST_RND "); - - if (pkt_dev->flags & F_TXSIZE_RND) - seq_printf(seq, "TXSIZE_RND "); - - if (pkt_dev->flags & F_UDPSRC_RND) - seq_printf(seq, "UDPSRC_RND "); - - if (pkt_dev->flags & F_UDPDST_RND) - seq_printf(seq, "UDPDST_RND "); - - if (pkt_dev->flags & F_MPLS_RND) - seq_printf(seq, "MPLS_RND "); - - if (pkt_dev->flags & F_MACSRC_RND) - seq_printf(seq, "MACSRC_RND "); - - if (pkt_dev->flags & F_MACDST_RND) - seq_printf(seq, "MACDST_RND "); - - seq_puts(seq, "\n"); - sa = pkt_dev->started_at; - stopped = pkt_dev->stopped_at; - if (pkt_dev->running) - stopped = now; /* not really stopped, more like last-running-at */ - - seq_printf(seq, - "Current:\n pkts-sofar: %llu errors: %llu\n started: %lluus stopped: %lluus idle: %lluus\n", - (unsigned long long)pkt_dev->sofar, - (unsigned long long)pkt_dev->errors, (unsigned long long)sa, - (unsigned long long)stopped, - (unsigned long long)pkt_dev->idle_acc); - - seq_printf(seq, - " seq_num: %d cur_dst_mac_offset: %d cur_src_mac_offset: %d\n", - pkt_dev->seq_num, pkt_dev->cur_dst_mac_offset, - pkt_dev->cur_src_mac_offset); - - if (pkt_dev->flags & F_IPV6) { - char b1[128], b2[128]; - fmt_ip6(b1, pkt_dev->cur_in6_daddr.s6_addr); - fmt_ip6(b2, pkt_dev->cur_in6_saddr.s6_addr); - seq_printf(seq, " cur_saddr: %s cur_daddr: %s\n", b2, b1); - } else - seq_printf(seq, " cur_saddr: 0x%x cur_daddr: 0x%x\n", - pkt_dev->cur_saddr, pkt_dev->cur_daddr); - - seq_printf(seq, " cur_udp_dst: %d cur_udp_src: %d\n", - pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src); - - seq_printf(seq, " flows: %u\n", pkt_dev->nflows); - - if (pkt_dev->result[0]) - seq_printf(seq, "Result: %s\n", pkt_dev->result); - else - seq_printf(seq, "Result: Idle\n"); - - return 0; +static inline __u64 pg_div(__u64 n, __u32 base) { + __u64 tmp = n; + do_div(tmp, base); + /* printk("pg_div, n: %llu base: %d rv: %llu\n", + n, base, tmp); */ + return tmp; } - -static int hex32_arg(const char __user *user_buffer, __u32 *num) -{ - int i = 0; - *num = 0; - - for(; i < 8; i++) { - char c; - *num <<= 4; - if (get_user(c, &user_buffer[i])) - return -EFAULT; - if ((c >= '0') && (c <= '9')) - *num |= c - '0'; - else if ((c >= 'a') && (c <= 'f')) - *num |= c - 'a' + 10; - else if ((c >= 'A') && (c <= 'F')) - *num |= c - 'A' + 10; - else - break; - } - return i; +/* Fast, not horribly accurate, since the machine started. */ +static inline __u64 getRelativeCurMs(void) { + return pg_div(get_cycles(), pg_cycles_per_ms); } -static int count_trail_chars(const char __user * user_buffer, - unsigned int maxlen) -{ - int i; - - for (i = 0; i < maxlen; i++) { - char c; - if (get_user(c, &user_buffer[i])) - return -EFAULT; - switch (c) { - case '\"': - case '\n': - case '\r': - case '\t': - case ' ': - case '=': - break; - default: - goto done; - }; - } -done: - return i; +/* Since the epoc. More precise over long periods of time than + * getRelativeCurMs + */ +static inline __u64 getCurMs(void) { + struct timeval tv; + do_gettimeofday(&tv); + return tv_to_ms(&tv); } -static unsigned long num_arg(const char __user * user_buffer, - unsigned long maxlen, unsigned long *num) -{ - int i = 0; - *num = 0; - - for (; i < maxlen; i++) { - char c; - if (get_user(c, &user_buffer[i])) - return -EFAULT; - if ((c >= '0') && (c <= '9')) { - *num *= 10; - *num += c - '0'; - } else - break; - } - return i; +/* Since the epoc. More precise over long periods of time than + * getRelativeCurMs + */ +static inline __u64 getCurUs(void) { + struct timeval tv; + do_gettimeofday(&tv); + return tv_to_us(&tv); } -static int strn_len(const char __user * user_buffer, unsigned int maxlen) -{ - int i = 0; - - for (; i < maxlen; i++) { - char c; - if (get_user(c, &user_buffer[i])) - return -EFAULT; - switch (c) { - case '\"': - case '\n': - case '\r': - case '\t': - case ' ': - goto done_str; - break; - default: - break; - }; - } -done_str: - return i; +/* Since the machine booted. */ +static inline __u64 getRelativeCurUs(void) { + return pg_div(get_cycles(), pg_cycles_per_us); } -static ssize_t get_labels(const char __user *buffer, struct pktgen_dev *pkt_dev) -{ - unsigned n = 0; - char c; - ssize_t i = 0; - int len; +/* Since the machine booted. */ +static inline __u64 getRelativeCurNs(void) { + return pg_div(get_cycles(), pg_cycles_per_ns); +} - pkt_dev->nr_labels = 0; - do { - __u32 tmp; - len = hex32_arg(&buffer[i], &tmp); - if (len <= 0) - return len; - pkt_dev->labels[n] = htonl(tmp); - if (pkt_dev->labels[n] & MPLS_STACK_BOTTOM) - pkt_dev->flags |= F_MPLS_RND; - i += len; - if (get_user(c, &buffer[i])) - return -EFAULT; - i++; - n++; - if (n >= MAX_MPLS_LABELS) - return -E2BIG; - } while(c == ','); - - pkt_dev->nr_labels = n; - return i; +static inline __u64 tv_diff(const struct timeval* a, const struct timeval* b) { + return tv_to_us(a) - tv_to_us(b); } -static ssize_t pktgen_if_write(struct file *file, - const char __user * user_buffer, size_t count, - loff_t * offset) -{ - struct seq_file *seq = (struct seq_file *)file->private_data; - struct pktgen_dev *pkt_dev = seq->private; - int i = 0, max, len; - char name[16], valstr[32]; - unsigned long value = 0; - char *pg_result = NULL; - int tmp = 0; - char buf[128]; - pg_result = &(pkt_dev->result[0]); - if (count < 1) { - printk("pktgen: wrong command format\n"); +int pktgen_proc_ioctl(struct inode* inode, struct file* file, unsigned int cmd, + unsigned long arg) { + int err = 0; + struct pktgen_ioctl_info args; + struct pktgen_thread_info* targ = NULL; + + /* + if (!capable(CAP_NET_ADMIN)){ + return -EPERM; + } + */ + + if (copy_from_user(&args, (void*)arg, sizeof(args))) { + return -EFAULT; + } + + /* Null terminate the names */ + args.thread_name[31] = 0; + args.interface_name[31] = 0; + + /* printk("pktgen: thread_name: %s interface_name: %s\n", + * args.thread_name, args.interface_name); + */ + + switch (cmd) { + case GET_PKTGEN_INTERFACE_INFO: { + targ = pg_find_thread(args.thread_name); + if (targ) { + struct pktgen_interface_info* info; + info = pg_find_interface(targ, args.interface_name); + if (info) { + memcpy(&(args.info), info, sizeof(args.info)); + if (copy_to_user((void*)(arg), &args, sizeof(args))) { + printk("ERROR: pktgen: copy_to_user failed.\n"); + err = -EFAULT; + } + else { + err = 0; + } + } + else { + /* printk("ERROR: pktgen: Could not find interface -:%s:-\n", + args.interface_name);*/ + err = -ENODEV; + } + } + else { + printk("ERROR: pktgen: Could not find thread -:%s:-.\n", + args.thread_name); + err = -ENODEV; + } + break; + } + default: + /* pass on to underlying device instead?? */ + printk("%s: Unknown pktgen IOCTL: %x \n", __FUNCTION__, + cmd); return -EINVAL; - } + } + + return err; +}/* pktgen_proc_ioctl */ - max = count - i; - tmp = count_trail_chars(&user_buffer[i], max); - if (tmp < 0) { - printk("pktgen: illegal format\n"); - return tmp; - } - i += tmp; - - /* Read variable name */ - - len = strn_len(&user_buffer[i], sizeof(name) - 1); - if (len < 0) { - return len; - } - memset(name, 0, sizeof(name)); - if (copy_from_user(name, &user_buffer[i], len)) - return -EFAULT; - i += len; - - max = count - i; - len = count_trail_chars(&user_buffer[i], max); - if (len < 0) - return len; - - i += len; - - if (debug) { - char tb[count + 1]; - if (copy_from_user(tb, user_buffer, count)) - return -EFAULT; - tb[count] = 0; - printk("pktgen: %s,%lu buffer -:%s:-\n", name, - (unsigned long)count, tb); - } - - if (!strcmp(name, "min_pkt_size")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { - return len; - } - i += len; - if (value < 14 + 20 + 8) - value = 14 + 20 + 8; - if (value != pkt_dev->min_pkt_size) { - pkt_dev->min_pkt_size = value; - pkt_dev->cur_pkt_size = value; - } - sprintf(pg_result, "OK: min_pkt_size=%u", - pkt_dev->min_pkt_size); - return count; - } - - if (!strcmp(name, "max_pkt_size")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { - return len; - } - i += len; - if (value < 14 + 20 + 8) - value = 14 + 20 + 8; - if (value != pkt_dev->max_pkt_size) { - pkt_dev->max_pkt_size = value; - pkt_dev->cur_pkt_size = value; - } - sprintf(pg_result, "OK: max_pkt_size=%u", - pkt_dev->max_pkt_size); - return count; - } - - /* Shortcut for min = max */ - - if (!strcmp(name, "pkt_size")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { - return len; - } - i += len; - if (value < 14 + 20 + 8) - value = 14 + 20 + 8; - if (value != pkt_dev->min_pkt_size) { - pkt_dev->min_pkt_size = value; - pkt_dev->max_pkt_size = value; - pkt_dev->cur_pkt_size = value; - } - sprintf(pg_result, "OK: pkt_size=%u", pkt_dev->min_pkt_size); - return count; - } +static struct file_operations pktgen_fops = { + ioctl: pktgen_proc_ioctl, +}; - if (!strcmp(name, "debug")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { - return len; +/* Runs from interrupt */ +int pg_notify_queue_woken(struct net_device* dev) { + struct pktgen_thread_info* pg_thread = dev->nqw_data; + /* printk("pg_nqw, pg_thread: %p\n", pg_thread); */ + if (pg_thread && pg_thread->sleeping) { + if (getRelativeCurNs() > (pg_thread->next_tx_ns - 1000)) { + /* See if we should wake up the thread, wake + * slightly early (1000 ns) + */ + pg_thread->sleeping = 0; + wake_up_interruptible(&(pg_thread->queue)); } - i += len; - debug = value; - sprintf(pg_result, "OK: debug=%u", debug); - return count; } + return 0; +} - if (!strcmp(name, "frags")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { - return len; - } - i += len; - pkt_dev->nfrags = value; - sprintf(pg_result, "OK: frags=%u", pkt_dev->nfrags); - return count; - } - if (!strcmp(name, "delay")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { - return len; - } - i += len; - if (value == 0x7FFFFFFF) { - pkt_dev->delay_us = 0x7FFFFFFF; - pkt_dev->delay_ns = 0; - } else { - pkt_dev->delay_us = value / 1000; - pkt_dev->delay_ns = value % 1000; - } - sprintf(pg_result, "OK: delay=%u", - 1000 * pkt_dev->delay_us + pkt_dev->delay_ns); - return count; - } - if (!strcmp(name, "udp_src_min")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { - return len; - } - i += len; - if (value != pkt_dev->udp_src_min) { - pkt_dev->udp_src_min = value; - pkt_dev->cur_udp_src = value; - } - sprintf(pg_result, "OK: udp_src_min=%u", pkt_dev->udp_src_min); - return count; - } - if (!strcmp(name, "udp_dst_min")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { - return len; - } - i += len; - if (value != pkt_dev->udp_dst_min) { - pkt_dev->udp_dst_min = value; - pkt_dev->cur_udp_dst = value; - } - sprintf(pg_result, "OK: udp_dst_min=%u", pkt_dev->udp_dst_min); - return count; - } - if (!strcmp(name, "udp_src_max")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { - return len; - } - i += len; - if (value != pkt_dev->udp_src_max) { - pkt_dev->udp_src_max = value; - pkt_dev->cur_udp_src = value; - } - sprintf(pg_result, "OK: udp_src_max=%u", pkt_dev->udp_src_max); - return count; - } - if (!strcmp(name, "udp_dst_max")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { - return len; - } - i += len; - if (value != pkt_dev->udp_dst_max) { - pkt_dev->udp_dst_max = value; - pkt_dev->cur_udp_dst = value; - } - sprintf(pg_result, "OK: udp_dst_max=%u", pkt_dev->udp_dst_max); - return count; - } - if (!strcmp(name, "clone_skb")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { - return len; - } - i += len; - pkt_dev->clone_skb = value; - sprintf(pg_result, "OK: clone_skb=%d", pkt_dev->clone_skb); - return count; - } - if (!strcmp(name, "count")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { - return len; - } - i += len; - pkt_dev->count = value; - sprintf(pg_result, "OK: count=%llu", - (unsigned long long)pkt_dev->count); - return count; - } - if (!strcmp(name, "src_mac_count")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { - return len; - } - i += len; - if (pkt_dev->src_mac_count != value) { - pkt_dev->src_mac_count = value; - pkt_dev->cur_src_mac_offset = 0; - } - sprintf(pg_result, "OK: src_mac_count=%d", - pkt_dev->src_mac_count); - return count; - } - if (!strcmp(name, "dst_mac_count")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { - return len; - } - i += len; - if (pkt_dev->dst_mac_count != value) { - pkt_dev->dst_mac_count = value; - pkt_dev->cur_dst_mac_offset = 0; - } - sprintf(pg_result, "OK: dst_mac_count=%d", - pkt_dev->dst_mac_count); - return count; +static void set_nqw_hook(struct pktgen_thread_info* pg_thread, + struct pktgen_interface_info* info, + struct net_device* dev) { + /* The notify-queue-woken magic only works for physical + * devices at this time. So, apply hook to underlying + * device. + */ + struct pktgen_thread_info* pg_thread_nd; + /* printk("In set_nqw_hook, dev: %s\n", dev->name); */ + +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) + if (dev->priv_flags & IFF_MAC_VLAN) { + struct macvlan_vlan *vlan = dev->priv; + set_nqw_hook(pg_thread, info, vlan->lowerdev); + return; } - if (!strcmp(name, "flag")) { - char f[32]; - memset(f, 0, 32); - len = strn_len(&user_buffer[i], sizeof(f) - 1); - if (len < 0) { - return len; - } - if (copy_from_user(f, &user_buffer[i], len)) - return -EFAULT; - i += len; - if (strcmp(f, "IPSRC_RND") == 0) - pkt_dev->flags |= F_IPSRC_RND; - - else if (strcmp(f, "!IPSRC_RND") == 0) - pkt_dev->flags &= ~F_IPSRC_RND; - - else if (strcmp(f, "TXSIZE_RND") == 0) - pkt_dev->flags |= F_TXSIZE_RND; - - else if (strcmp(f, "!TXSIZE_RND") == 0) - pkt_dev->flags &= ~F_TXSIZE_RND; - - else if (strcmp(f, "IPDST_RND") == 0) - pkt_dev->flags |= F_IPDST_RND; - - else if (strcmp(f, "!IPDST_RND") == 0) - pkt_dev->flags &= ~F_IPDST_RND; - - else if (strcmp(f, "UDPSRC_RND") == 0) - pkt_dev->flags |= F_UDPSRC_RND; - - else if (strcmp(f, "!UDPSRC_RND") == 0) - pkt_dev->flags &= ~F_UDPSRC_RND; - - else if (strcmp(f, "UDPDST_RND") == 0) - pkt_dev->flags |= F_UDPDST_RND; - - else if (strcmp(f, "!UDPDST_RND") == 0) - pkt_dev->flags &= ~F_UDPDST_RND; - - else if (strcmp(f, "MACSRC_RND") == 0) - pkt_dev->flags |= F_MACSRC_RND; - - else if (strcmp(f, "!MACSRC_RND") == 0) - pkt_dev->flags &= ~F_MACSRC_RND; - - else if (strcmp(f, "MACDST_RND") == 0) - pkt_dev->flags |= F_MACDST_RND; - - else if (strcmp(f, "!MACDST_RND") == 0) - pkt_dev->flags &= ~F_MACDST_RND; - - else if (strcmp(f, "MPLS_RND") == 0) - pkt_dev->flags |= F_MPLS_RND; - - else if (strcmp(f, "!MPLS_RND") == 0) - pkt_dev->flags &= ~F_MPLS_RND; +#endif - else { - sprintf(pg_result, - "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", - f, - "IPSRC_RND, IPDST_RND, TXSIZE_RND, UDPSRC_RND, UDPDST_RND, MACSRC_RND, MACDST_RND\n"); - return count; - } - sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); - return count; + if (dev->priv_flags & IFF_802_1Q_VLAN) { + set_nqw_hook(pg_thread, info, VLAN_DEV_INFO(dev)->real_dev); + return; } - if (!strcmp(name, "dst_min") || !strcmp(name, "dst")) { - len = strn_len(&user_buffer[i], sizeof(pkt_dev->dst_min) - 1); - if (len < 0) { - return len; - } - if (copy_from_user(buf, &user_buffer[i], len)) - return -EFAULT; - buf[len] = 0; - if (strcmp(buf, pkt_dev->dst_min) != 0) { - memset(pkt_dev->dst_min, 0, sizeof(pkt_dev->dst_min)); - strncpy(pkt_dev->dst_min, buf, len); - pkt_dev->daddr_min = in_aton(pkt_dev->dst_min); - pkt_dev->cur_daddr = pkt_dev->daddr_min; - } - if (debug) - printk("pktgen: dst_min set to: %s\n", - pkt_dev->dst_min); - i += len; - sprintf(pg_result, "OK: dst_min=%s", pkt_dev->dst_min); - return count; - } - if (!strcmp(name, "dst_max")) { - len = strn_len(&user_buffer[i], sizeof(pkt_dev->dst_max) - 1); - if (len < 0) { - return len; - } + pg_thread_nd = (struct pktgen_thread_info*)(dev->nqw_data); - if (copy_from_user(buf, &user_buffer[i], len)) - return -EFAULT; + /*printk("pg_thread_nd: %p info: %p\n", pg_thread_nd, info);*/ + + if (pg_thread_nd) { + /* Just bump a reference count, it doesn't really matter which + * info is there since we always look at the parent thread anyway + */ + atomic_inc(&(pg_thread_nd->nqw_ref_count)); - buf[len] = 0; - if (strcmp(buf, pkt_dev->dst_max) != 0) { - memset(pkt_dev->dst_max, 0, sizeof(pkt_dev->dst_max)); - strncpy(pkt_dev->dst_max, buf, len); - pkt_dev->daddr_max = in_aton(pkt_dev->dst_max); - pkt_dev->cur_daddr = pkt_dev->daddr_max; - } - if (debug) - printk("pktgen: dst_max set to: %s\n", - pkt_dev->dst_max); - i += len; - sprintf(pg_result, "OK: dst_max=%s", pkt_dev->dst_max); - return count; + /*printk("Incremented nqw_ref_count: %d device: %s\n", + (int)(atomic_read(&(pg_thread_nd->nqw_ref_count))), dev->name); */ } - if (!strcmp(name, "dst6")) { - len = strn_len(&user_buffer[i], sizeof(buf) - 1); - if (len < 0) - return len; - - pkt_dev->flags |= F_IPV6; - - if (copy_from_user(buf, &user_buffer[i], len)) - return -EFAULT; - buf[len] = 0; - - scan_ip6(buf, pkt_dev->in6_daddr.s6_addr); - fmt_ip6(buf, pkt_dev->in6_daddr.s6_addr); - - ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr); - - if (debug) - printk("pktgen: dst6 set to: %s\n", buf); - - i += len; - sprintf(pg_result, "OK: dst6=%s", buf); - return count; + else { + /* Must be a real device, or at least some un-handled fake device. Can't + * hurt to add it here at any rate. + */ + atomic_inc(&(pg_thread->nqw_ref_count)); + dev->nqw_data = pg_thread; + dev->notify_queue_woken = pg_notify_queue_woken; + /*printk("Added nqw callback to device: %s, data: %p data_nd: %p\n", + dev->name, dev->nqw_data, pg_thread_nd);*/ } - if (!strcmp(name, "dst6_min")) { - len = strn_len(&user_buffer[i], sizeof(buf) - 1); - if (len < 0) - return len; +}//set_nqw_hook - pkt_dev->flags |= F_IPV6; - if (copy_from_user(buf, &user_buffer[i], len)) - return -EFAULT; - buf[len] = 0; - - scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr); - fmt_ip6(buf, pkt_dev->min_in6_daddr.s6_addr); - - ipv6_addr_copy(&pkt_dev->cur_in6_daddr, - &pkt_dev->min_in6_daddr); - if (debug) - printk("pktgen: dst6_min set to: %s\n", buf); - - i += len; - sprintf(pg_result, "OK: dst6_min=%s", buf); - return count; +static void clear_nqw_hook(struct pktgen_interface_info* info, + struct net_device* dev) { + /* The notify-queue-woken magic only works for physical + * devices at this time. So, apply hook to underlying + * device. + */ + struct pktgen_thread_info* pg_thread; + /* printk("In clear_nqw_hook, dev: %s\n", dev->name); */ + +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) + if (dev->priv_flags & IFF_MAC_VLAN) { + struct macvlan_vlan *vlan = dev->priv; + clear_nqw_hook(info, vlan->lowerdev); + return; } - if (!strcmp(name, "dst6_max")) { - len = strn_len(&user_buffer[i], sizeof(buf) - 1); - if (len < 0) - return len; - - pkt_dev->flags |= F_IPV6; - - if (copy_from_user(buf, &user_buffer[i], len)) - return -EFAULT; - buf[len] = 0; - - scan_ip6(buf, pkt_dev->max_in6_daddr.s6_addr); - fmt_ip6(buf, pkt_dev->max_in6_daddr.s6_addr); - - if (debug) - printk("pktgen: dst6_max set to: %s\n", buf); +#endif - i += len; - sprintf(pg_result, "OK: dst6_max=%s", buf); - return count; + if (dev->priv_flags & IFF_802_1Q_VLAN) { + clear_nqw_hook(info, VLAN_DEV_INFO(dev)->real_dev); + return; } - if (!strcmp(name, "src6")) { - len = strn_len(&user_buffer[i], sizeof(buf) - 1); - if (len < 0) - return len; - - pkt_dev->flags |= F_IPV6; - if (copy_from_user(buf, &user_buffer[i], len)) - return -EFAULT; - buf[len] = 0; - - scan_ip6(buf, pkt_dev->in6_saddr.s6_addr); - fmt_ip6(buf, pkt_dev->in6_saddr.s6_addr); - - ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr); - - if (debug) - printk("pktgen: src6 set to: %s\n", buf); + if (dev->nqw_data) { + pg_thread = (struct pktgen_thread_info*)(dev->nqw_data); + atomic_dec(&(pg_thread->nqw_ref_count)); + + /* printk("Decremented nqw_ref_count: %d device: %s\n", + (int)(atomic_read(&(pg_thread->nqw_ref_count))), dev->name); */ + + BUG_ON(atomic_read(&(pg_thread->nqw_ref_count)) < 0); + + if (atomic_read(&(pg_thread->nqw_ref_count)) == 0) { + /*printk("Removing nqw reference from device: %s\n", dev->name);*/ + dev->notify_queue_woken = NULL; + dev->nqw_data = NULL; + + /* Clear from all other devices too. There is an issue/bug where + * we may at times have more actual references to the pg_thread than + * our reference counter shows. In practice, I do not believe this + * will be a problem, but it will be slightly inefficient. A true + * fix would involve some sort of hash/list/map of physical interfaces + * to reference counts, stored in the pg_thread class. + * --Ben + */ - i += len; - sprintf(pg_result, "OK: src6=%s", buf); - return count; - } - if (!strcmp(name, "src_min")) { - len = strn_len(&user_buffer[i], sizeof(pkt_dev->src_min) - 1); - if (len < 0) { - return len; - } - if (copy_from_user(buf, &user_buffer[i], len)) - return -EFAULT; - buf[len] = 0; - if (strcmp(buf, pkt_dev->src_min) != 0) { - memset(pkt_dev->src_min, 0, sizeof(pkt_dev->src_min)); - strncpy(pkt_dev->src_min, buf, len); - pkt_dev->saddr_min = in_aton(pkt_dev->src_min); - pkt_dev->cur_saddr = pkt_dev->saddr_min; - } - if (debug) - printk("pktgen: src_min set to: %s\n", - pkt_dev->src_min); - i += len; - sprintf(pg_result, "OK: src_min=%s", pkt_dev->src_min); - return count; - } - if (!strcmp(name, "src_max")) { - len = strn_len(&user_buffer[i], sizeof(pkt_dev->src_max) - 1); - if (len < 0) { - return len; - } - if (copy_from_user(buf, &user_buffer[i], len)) - return -EFAULT; - buf[len] = 0; - if (strcmp(buf, pkt_dev->src_max) != 0) { - memset(pkt_dev->src_max, 0, sizeof(pkt_dev->src_max)); - strncpy(pkt_dev->src_max, buf, len); - pkt_dev->saddr_max = in_aton(pkt_dev->src_max); - pkt_dev->cur_saddr = pkt_dev->saddr_max; - } - if (debug) - printk("pktgen: src_max set to: %s\n", - pkt_dev->src_max); - i += len; - sprintf(pg_result, "OK: src_max=%s", pkt_dev->src_max); - return count; - } - if (!strcmp(name, "dst_mac")) { - char *v = valstr; - unsigned char old_dmac[ETH_ALEN]; - unsigned char *m = pkt_dev->dst_mac; - memcpy(old_dmac, pkt_dev->dst_mac, ETH_ALEN); - - len = strn_len(&user_buffer[i], sizeof(valstr) - 1); - if (len < 0) { - return len; - } - memset(valstr, 0, sizeof(valstr)); - if (copy_from_user(valstr, &user_buffer[i], len)) - return -EFAULT; - i += len; - - for (*m = 0; *v && m < pkt_dev->dst_mac + 6; v++) { - if (*v >= '0' && *v <= '9') { - *m *= 16; - *m += *v - '0'; - } - if (*v >= 'A' && *v <= 'F') { - *m *= 16; - *m += *v - 'A' + 10; - } - if (*v >= 'a' && *v <= 'f') { - *m *= 16; - *m += *v - 'a' + 10; - } - if (*v == ':') { - m++; - *m = 0; + { + int i; + struct pktgen_interface_info* p; + for (i = 0; iodev && p->odev->nqw_data == pg_thread) { + /*printk("Removing nqw reference from dev: %s\n", + p->odev->name);*/ + p->odev->notify_queue_woken = NULL; + p->odev->nqw_data = NULL; + } + p = p->next; + } + } } } - - /* Set up Dest MAC */ - if (compare_ether_addr(old_dmac, pkt_dev->dst_mac)) - memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, ETH_ALEN); - - sprintf(pg_result, "OK: dstmac"); - return count; - } - if (!strcmp(name, "src_mac")) { - char *v = valstr; - unsigned char *m = pkt_dev->src_mac; - - len = strn_len(&user_buffer[i], sizeof(valstr) - 1); - if (len < 0) { - return len; - } - memset(valstr, 0, sizeof(valstr)); - if (copy_from_user(valstr, &user_buffer[i], len)) - return -EFAULT; - i += len; - - for (*m = 0; *v && m < pkt_dev->src_mac + 6; v++) { - if (*v >= '0' && *v <= '9') { - *m *= 16; - *m += *v - '0'; - } - if (*v >= 'A' && *v <= 'F') { - *m *= 16; - *m += *v - 'A' + 10; - } - if (*v >= 'a' && *v <= 'f') { - *m *= 16; - *m += *v - 'a' + 10; + else { + /* Else, if we are directly attached to this device, remove the + * nqw callback then too, because we only allow one info struct + * per net-device, so this MUST be the only one. + */ + if (dev == info->odev) { + /*printk("Removing nqw reference from real device: %s\n", + dev->name);*/ + dev->notify_queue_woken = NULL; + dev->nqw_data = NULL; } - if (*v == ':') { - m++; - *m = 0; + else { + /*printk("pktgen NOTE: dev: %s != info->odev: %s\n", + dev ? dev->name : "NULL", + info->odev ? info->odev->name : "NULL");*/ } } - - sprintf(pg_result, "OK: srcmac"); - return count; - } - - if (!strcmp(name, "clear_counters")) { - pktgen_clear_counters(pkt_dev); - sprintf(pg_result, "OK: Clearing counters.\n"); - return count; - } - - if (!strcmp(name, "flows")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { - return len; - } - i += len; - if (value > MAX_CFLOWS) - value = MAX_CFLOWS; - - pkt_dev->cflows = value; - sprintf(pg_result, "OK: flows=%u", pkt_dev->cflows); - return count; - } - - if (!strcmp(name, "flowlen")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { - return len; - } - i += len; - pkt_dev->lflow = value; - sprintf(pg_result, "OK: flowlen=%u", pkt_dev->lflow); - return count; - } - - if (!strcmp(name, "mpls")) { - unsigned n, offset; - len = get_labels(&user_buffer[i], pkt_dev); - if (len < 0) { return len; } - i += len; - offset = sprintf(pg_result, "OK: mpls="); - for(n = 0; n < pkt_dev->nr_labels; n++) - offset += sprintf(pg_result + offset, - "%08x%s", ntohl(pkt_dev->labels[n]), - n == pkt_dev->nr_labels-1 ? "" : ","); - return count; - } - - sprintf(pkt_dev->result, "No such parameter \"%s\"", name); - return -EINVAL; -} - -static int pktgen_if_open(struct inode *inode, struct file *file) -{ - return single_open(file, pktgen_if_show, PDE(inode)->data); -} - -static struct file_operations pktgen_if_fops = { - .owner = THIS_MODULE, - .open = pktgen_if_open, - .read = seq_read, - .llseek = seq_lseek, - .write = pktgen_if_write, - .release = single_release, -}; - -static int pktgen_thread_show(struct seq_file *seq, void *v) -{ - struct pktgen_thread *t = seq->private; - struct pktgen_dev *pkt_dev; - - BUG_ON(!t); - - seq_printf(seq, "Name: %s max_before_softirq: %d\n", - t->name, t->max_before_softirq); - - seq_printf(seq, "Running: "); - - if_lock(t); - list_for_each_entry(pkt_dev, &t->if_list, list) - if (pkt_dev->running) - seq_printf(seq, "%s ", pkt_dev->ifname); - - seq_printf(seq, "\nStopped: "); - - list_for_each_entry(pkt_dev, &t->if_list, list) - if (!pkt_dev->running) - seq_printf(seq, "%s ", pkt_dev->ifname); - - if (t->result[0]) - seq_printf(seq, "\nResult: %s\n", t->result); - else - seq_printf(seq, "\nResult: NA\n"); - - if_unlock(t); - - return 0; -} - -static ssize_t pktgen_thread_write(struct file *file, - const char __user * user_buffer, - size_t count, loff_t * offset) -{ - struct seq_file *seq = (struct seq_file *)file->private_data; - struct pktgen_thread *t = seq->private; - int i = 0, max, len, ret; - char name[40]; - char *pg_result; - unsigned long value = 0; - - if (count < 1) { - // sprintf(pg_result, "Wrong command format"); - return -EINVAL; - } - - max = count - i; - len = count_trail_chars(&user_buffer[i], max); - if (len < 0) - return len; - - i += len; - - /* Read variable name */ - - len = strn_len(&user_buffer[i], sizeof(name) - 1); - if (len < 0) - return len; - - memset(name, 0, sizeof(name)); - if (copy_from_user(name, &user_buffer[i], len)) - return -EFAULT; - i += len; - - max = count - i; - len = count_trail_chars(&user_buffer[i], max); - if (len < 0) - return len; - - i += len; - - if (debug) - printk("pktgen: t=%s, count=%lu\n", name, (unsigned long)count); - - if (!t) { - printk("pktgen: ERROR: No thread\n"); - ret = -EINVAL; - goto out; - } - - pg_result = &(t->result[0]); - - if (!strcmp(name, "add_device")) { - char f[32]; - memset(f, 0, 32); - len = strn_len(&user_buffer[i], sizeof(f) - 1); - if (len < 0) { - ret = len; - goto out; - } - if (copy_from_user(f, &user_buffer[i], len)) - return -EFAULT; - i += len; - mutex_lock(&pktgen_thread_lock); - pktgen_add_device(t, f); - mutex_unlock(&pktgen_thread_lock); - ret = count; - sprintf(pg_result, "OK: add_device=%s", f); - goto out; - } - - if (!strcmp(name, "rem_device_all")) { - mutex_lock(&pktgen_thread_lock); - t->control |= T_REMDEVALL; - mutex_unlock(&pktgen_thread_lock); - schedule_timeout_interruptible(msecs_to_jiffies(125)); /* Propagate thread->control */ - ret = count; - sprintf(pg_result, "OK: rem_device_all"); - goto out; - } - - if (!strcmp(name, "max_before_softirq")) { - len = num_arg(&user_buffer[i], 10, &value); - mutex_lock(&pktgen_thread_lock); - t->max_before_softirq = value; - mutex_unlock(&pktgen_thread_lock); - ret = count; - sprintf(pg_result, "OK: max_before_softirq=%lu", value); - goto out; } - - ret = -EINVAL; -out: - return ret; -} - -static int pktgen_thread_open(struct inode *inode, struct file *file) -{ - return single_open(file, pktgen_thread_show, PDE(inode)->data); -} - -static struct file_operations pktgen_thread_fops = { - .owner = THIS_MODULE, - .open = pktgen_thread_open, - .read = seq_read, - .llseek = seq_lseek, - .write = pktgen_thread_write, - .release = single_release, -}; - -/* Think find or remove for NN */ -static struct pktgen_dev *__pktgen_NN_threads(const char *ifname, int remove) -{ - struct pktgen_thread *t; - struct pktgen_dev *pkt_dev = NULL; - - list_for_each_entry(t, &pktgen_threads, th_list) { - pkt_dev = pktgen_find_dev(t, ifname); - if (pkt_dev) { - if (remove) { - if_lock(t); - pkt_dev->removal_mark = 1; - t->control |= T_REMDEV; - if_unlock(t); + else { + printk("pktgen: Warning: nqw_data is null in clear_nqw_hook, dev: %s\n", + dev->name); + } +}//clear_nqw_hook + + +static void remove_pg_info_from_hash(struct pktgen_interface_info* info) { + pg_lock_hash(__FUNCTION__); + { + int device_idx = info->odev ? info->odev->ifindex : 0; + int b = device_idx % PG_INFO_HASH_MAX; + struct pktgen_interface_info* p = pg_info_hash[b]; + struct pktgen_interface_info* prev = pg_info_hash[b]; + int found_one = 0; + + PG_DEBUG(printk("remove_pg_info_from_hash, p: %p info: %p device_idx: %i\n", + p, info, device_idx)); + + if (p != NULL) { + + if (p == info) { + pg_info_hash[b] = p->next_hash; + p->next_hash = NULL; + found_one = 1; + } + else { + while (prev->next_hash) { + p = prev->next_hash; + if (p == info) { + prev->next_hash = p->next_hash; + p->next_hash = NULL; + found_one = 1; + break; + } + prev = p; + } + } + } + + if (found_one) { + if (info->odev) { + info->odev->priv_flags &= ~(IFF_PKTGEN_RCV); + clear_nqw_hook(info, info->odev); } - break; } - } - return pkt_dev; + } + pg_unlock_hash(__FUNCTION__); +}/* remove_pg_info_from_hash */ + + +static void add_pg_info_to_hash(struct pktgen_thread_info* pg_thread, + struct pktgen_interface_info* info) { + /* First remove it, just in case it's already there. */ + remove_pg_info_from_hash(info); + + pg_lock_hash(__FUNCTION__); + { + int device_idx = info->odev ? info->odev->ifindex : 0; + int b = device_idx % PG_INFO_HASH_MAX; + + PG_DEBUG(printk("add_pg_info_from_hash, b: %i info: %p device_idx: %i\n", + b, info, device_idx)); + + info->next_hash = pg_info_hash[b]; + pg_info_hash[b] = info; + + + if (info->odev) { + set_nqw_hook(pg_thread, info, info->odev); + info->odev->priv_flags |= (IFF_PKTGEN_RCV); + } + } + pg_unlock_hash(__FUNCTION__); +}/* add_pg_info_to_hash */ + + +/* Find the pktgen_interface_info for a device idx */ +struct pktgen_interface_info* find_pg_info(int device_idx) { + struct pktgen_interface_info* p = NULL; + if (debug > 1) { + printk("in find_pg_info...\n"); + } + pg_lock_hash(__FUNCTION__); + { + int b = device_idx % PG_INFO_HASH_MAX; + p = pg_info_hash[b]; + while (p) { + if (p->odev && (p->odev->ifindex == device_idx)) { + break; + } + p = p->next_hash; + } + } + pg_unlock_hash(__FUNCTION__); + return p; } -/* - * mark a device for removal - */ -static int pktgen_mark_device(const char *ifname) -{ - struct pktgen_dev *pkt_dev = NULL; - const int max_tries = 10, msec_per_try = 125; - int i = 0; - int ret = 0; - - mutex_lock(&pktgen_thread_lock); - PG_DEBUG(printk("pktgen: pktgen_mark_device marking %s for removal\n", - ifname)); - - while (1) { - - pkt_dev = __pktgen_NN_threads(ifname, REMOVE); - if (pkt_dev == NULL) - break; /* success */ - - mutex_unlock(&pktgen_thread_lock); - PG_DEBUG(printk("pktgen: pktgen_mark_device waiting for %s " - "to disappear....\n", ifname)); - schedule_timeout_interruptible(msecs_to_jiffies(msec_per_try)); - mutex_lock(&pktgen_thread_lock); - - if (++i >= max_tries) { - printk("pktgen_mark_device: timed out after waiting " - "%d msec for device %s to be removed\n", - msec_per_try * i, ifname); - ret = 1; - break; - } - - } - mutex_unlock(&pktgen_thread_lock); - - return ret; -} - -static int pktgen_device_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ +/* Remove an interface from our hash, dissassociate pktgen_interface_info + * from interface + */ +static void check_remove_device(struct pktgen_interface_info* info) { + struct pktgen_interface_info* pi = NULL; + if (info->odev) { + pi = find_pg_info(info->odev->ifindex); + if (pi != info) { + printk("ERROR: pi != info, pi: %p info: %p\n", pi, info); + } + else { + /* Remove info from our hash */ + remove_pg_info_from_hash(info); + } + + /* TODO: Wonder if we need locking here? Had rtnl_lock, but + * that can sleep and this is called with irqs disabled... + */ + info->odev->priv_flags &= ~(IFF_PKTGEN_RCV); + atomic_dec(&(info->odev->refcnt)); + info->odev = NULL; + } +}/* check_remove_device */ + + +static int pg_remove_interface_from_all_threads(const char* dev_name) { + int cnt = 0; + pg_lock_thread_list(__FUNCTION__); + { + struct pktgen_thread_info* tmp = pktgen_threads; + struct pktgen_interface_info* info = NULL; + + while (tmp) { + info = pg_find_interface(tmp, dev_name); + if (info) { + printk("pktgen: Removing interface: %s from pktgen control.\n", + dev_n