--- linux-2.6.11/include/linux/if.h 2005-03-01 23:38:08.000000000 -0800 +++ linux-2.6.11.p4s/include/linux/if.h 2005-07-11 16:06:34.000000000 -0700 @@ -52,6 +52,20 @@ /* Private (from user) interface flags (netdevice->priv_flags). */ #define IFF_802_1Q_VLAN 0x1 /* 802.1Q VLAN device. */ #define IFF_EBRIDGE 0x2 /* Ethernet bridging device. */ +#define IFF_PKTGEN_RCV 0x4 /* Registered to receive & consume Pktgen skbs */ +#define IFF_ACCEPT_LOCAL_ADDRS 0x8 /** Accept pkts even if they come from a local + * address. This lets use send pkts to ourselves + * over external interfaces (when used in conjunction + * with SO_BINDTODEVICE + */ +#define IFF_ACCEPT_ALL_FRAMES 0x10 /** Accept all frames, even ones with bad CRCs. + * Should only be used in debugging/testing situations + * Do NOT enable this unless you understand the + * consequences! */ +#define IFF_SAVE_FCS 0x20 /** Save the Frame Check Sum (FCS) on receive, if + * possible. */ +#define IFF_MAC_VLAN 0x40 /* MAC VLAN device. */ + #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 --- linux-2.6.11/include/linux/netdevice.h 2005-03-01 23:38:26.000000000 -0800 +++ linux-2.6.11.p4s/include/linux/netdevice.h 2005-07-11 16:06:34.000000000 -0700 @@ -327,7 +327,9 @@ unsigned short flags; /* interface flags (a la BSD) */ unsigned short gflags; - unsigned short priv_flags; /* Like 'flags' but invisible to userspace. */ + unsigned short priv_flags; /* Like 'flags' but invisible to userspace, + * see: if.h for flag definitions. + */ unsigned short unused_alignment_fixer; /* Because we need priv_flags, * and we want to be 32-bit aligned. */ @@ -416,6 +418,12 @@ #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ #define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */ #define NETIF_F_LLTX 4096 /* LockLess TX */ +#define NETIF_F_RX_ALL (1<<12) /* Can be configured to receive all packets, even + * ones with busted CRC. May disable VLAN filtering + * in the NIC, users should NOT enable this feature + * unless they understand the consequences. */ +#define NETIF_F_SAVE_CRC (1<<13) /* Can save FCS in skb, last 4 bytes for ethernet */ + /* Called after device is detached from network. */ void (*uninit)(struct net_device *dev); @@ -477,9 +485,18 @@ void (*poll_controller)(struct net_device *dev); #endif + /* Callback for when the queue is woken, used by pktgen currently */ + int (*notify_queue_woken)(struct net_device *dev); + void* nqw_data; /* To be used by the method above as needed */ + /* bridge stuff */ struct net_bridge_port *br_port; +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) + struct macvlan_port *macvlan_priv; +#endif + + #ifdef CONFIG_NET_DIVERT /* this will get initialized at each interface type init routine */ struct divert_blk *divert; @@ -614,8 +631,13 @@ if (netpoll_trap()) return; #endif - if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state)) + if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state)) { __netif_schedule(dev); + + if (dev->notify_queue_woken) { + dev->notify_queue_woken(dev); + } + } } static inline void netif_stop_queue(struct net_device *dev) --- linux-2.6.11/net/core/dev.c 2005-03-01 23:38:09.000000000 -0800 +++ linux-2.6.11.p4s/net/core/dev.c 2005-07-11 16:06:34.000000000 -0700 @@ -1,4 +1,4 @@ -/* +/* -*-linux-c-*- * NET3 Protocol independent device support routines. * * This program is free software; you can redistribute it and/or @@ -88,6 +88,7 @@ #include #include #include +#include #include #include #include @@ -115,6 +116,24 @@ #endif /* CONFIG_NET_RADIO */ #include +#if defined(CONFIG_NET_PKTGEN) || defined(CONFIG_NET_PKTGEN_MODULE) +#include "pktgen.h" + +#warning "Compiling dev.c for pktgen."; + +int (*handle_pktgen_hook)(struct sk_buff *skb) = NULL; +EXPORT_SYMBOL(handle_pktgen_hook); + +static __inline__ int handle_pktgen_rcv(struct sk_buff* skb) { + if (handle_pktgen_hook) { + return handle_pktgen_hook(skb); + } + return -1; +} + +#endif + + /* This define, if set, will randomly drop a packet when congestion * is more than moderate. It helps fairness in the multi-interface * case when one of them is a hog, but it kills performance for the @@ -1214,6 +1233,19 @@ * A negative errno code is returned on a failure. A success does not * guarantee the frame will be transmitted as it may be dropped due * to congestion or traffic shaping. + * + * ----------------------------------------------------------------------------------- + * I notice this method can also return errors from the queue disciplines, + * including NET_XMIT_DROP, which is a positive value. So, errors can also + * be positive. + * + * Regardless of the return value, the skb is consumed, so it is currently + * difficult to retry a send to this method. (You can bump the ref count + * before sending to hold a reference for retry if you are careful.) + * + * When calling this method, interrupts MUST be enabled. This is because + * the BH enable code must have IRQs enabled so that it will not deadlock. + * --BLG */ int dev_queue_xmit(struct sk_buff *skb) @@ -1339,7 +1371,7 @@ =======================================================================*/ int netdev_max_backlog = 300; -int weight_p = 64; /* old backlog weight */ +int weight_p = 64; /* old backlog weight */ /* These numbers are selected based on intuition and some * experimentatiom, if you have more scientific way of doing this * please go ahead and fix things. @@ -1623,6 +1655,24 @@ } #endif + +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) +/* Returns >= 0 if we consume the packet. Otherwise, let + * it fall through the rest of the packet processing. + */ +int (*macvlan_handle_frame_hook)(struct sk_buff *skb) = NULL; +EXPORT_SYMBOL(macvlan_handle_frame_hook); +#endif + +/* Returns >= 0 if we consume the packet. Otherwise, let + * it fall through the rest of the packet processing. + */ +static __inline__ int handle_macvlan(struct sk_buff *skb) +{ + return macvlan_handle_frame_hook(skb); +} + + int netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; @@ -1689,6 +1739,32 @@ if (handle_bridge(&skb, &pt_prev, &ret)) goto out; +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) + if (skb->dev->macvlan_priv != NULL && + macvlan_handle_frame_hook != NULL) { + if (handle_macvlan(skb) >= 0) { + /* consumed by mac-vlan...it would have been + * re-sent to this method with a different + * device... + */ + goto out; + } + else { + /* Let it fall through and be processed normally */ + } + } +#endif + +#if defined(CONFIG_NET_PKTGEN) || defined(CONFIG_NET_PKTGEN_MODULE) + if ((skb->dev->priv_flags & IFF_PKTGEN_RCV) && + (handle_pktgen_rcv(skb) >= 0)) { + /* Pktgen may consume the packet, no need to send + * to further protocols. + */ + goto out; + } +#endif + type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { if (ptype->type == type && @@ -2420,6 +2496,24 @@ ifr->ifr_newname[IFNAMSIZ-1] = '\0'; return dev_change_name(dev, ifr->ifr_newname); + case SIOCSACCEPTLOCALADDRS: + if (ifr->ifr_flags) { + dev->priv_flags |= IFF_ACCEPT_LOCAL_ADDRS; + } + else { + dev->priv_flags &= ~IFF_ACCEPT_LOCAL_ADDRS; + } + return 0; + + case SIOCGACCEPTLOCALADDRS: + if (dev->priv_flags & IFF_ACCEPT_LOCAL_ADDRS) { + ifr->ifr_flags = 1; + } + else { + ifr->ifr_flags = 0; + } + return 0; + /* * Unknown or private ioctl */ @@ -2518,6 +2612,7 @@ case SIOCGIFMAP: case SIOCGIFINDEX: case SIOCGIFTXQLEN: + case SIOCGACCEPTLOCALADDRS: dev_load(ifr.ifr_name); read_lock(&dev_base_lock); ret = dev_ifsioc(&ifr, cmd); @@ -2594,6 +2689,7 @@ case SIOCBONDCHANGEACTIVE: case SIOCBRADDIF: case SIOCBRDELIF: + case SIOCSACCEPTLOCALADDRS: if (!capable(CAP_NET_ADMIN)) return -EPERM; dev_load(ifr.ifr_name); @@ -3338,6 +3434,10 @@ EXPORT_SYMBOL(net_enable_timestamp); EXPORT_SYMBOL(net_disable_timestamp); +#if defined(CONFIG_NET_PKTGEN) || defined(CONFIG_NET_PKTGEN_MODULE) +EXPORT_SYMBOL(handle_pktgen_rcv); +#endif + #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) EXPORT_SYMBOL(br_handle_frame_hook); #endif --- linux-2.6.11/net/core/pktgen.c 2005-03-01 23:38:38.000000000 -0800 +++ linux-2.6.11.p4s/net/core/pktgen.c 2005-07-11 16:06:34.000000000 -0700 @@ -1,22 +1,12 @@ -/* - * Authors: - * Copyright 2001, 2002 by Robert Olsson - * Uppsala University and - * Swedish University of Agricultural Sciences - * - * Alexey Kuznetsov - * Ben Greear - * Jens Låås - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. +/* -*-linux-c-*- * + * Copyright 2001, 2002 by Robert Olsson + * Uppsala University, Sweden + * 2002 Ben Greear * * A tool for loading the network with preconfigurated packets. * The tool is implemented as a linux module. Parameters are output - * device, delay (to hard_xmit), number of packets, and whether + * device, IPG (interpacket gap), number of packets, and whether * to use multiple SKBs or just the same one. * pktgen uses the installed interface's output routine. * @@ -43,6 +33,16 @@ * latencies (with micro-second) precision. * * Add IOCTL interface to easily get counters & configuration. * --Ben Greear + + * Fix refcount off by one if first packet fails, potential null deref, + * memleak 030710- KJP + * + * * Added the IPMAC option to allow the MAC addresses to mirror IP addresses. + * -- (dhetheri) Dave Hetherington 03/09/29 + * * Allow the user to change the protocol field via 'pgset "prot 0"' command + * -- (dhetheri) Dave Hetherington 03/10/7 + * Integrated to 2.5.x 021029 --Lucio Maciel (luciomaciel@zipmail.com.br) + * * * Renamed multiskb to clone_skb and cleaned up sending core for two distinct * skb modes. A clone_skb=0 mode for Ben "ranges" work and a clone_skb != 0 @@ -54,305 +54,126 @@ * Also moved to /proc/net/pktgen/ * --ro * - * Sept 10: Fixed threading/locking. Lots of bone-headed and more clever - * mistakes. Also merged in DaveM's patch in the -pre6 patch. - * --Ben Greear - * - * Integrated to 2.5.x 021029 --Lucio Maciel (luciomaciel@zipmail.com.br) - * - * - * 021124 Finished major redesign and rewrite for new functionality. - * See Documentation/networking/pktgen.txt for how to use this. - * - * The new operation: - * For each CPU one thread/process is created at start. This process checks - * for running devices in the if_list and sends packets until count is 0 it - * also the thread checks the thread->control which is used for inter-process - * communication. controlling process "posts" operations to the threads this - * way. The if_lock should be possible to remove when add/rem_device is merged - * into this too. - * - * By design there should only be *one* "controlling" process. In practice - * multiple write accesses gives unpredictable result. Understood by "write" - * to /proc gives result code thats should be read be the "writer". - * For pratical use this should be no problem. - * - * Note when adding devices to a specific CPU there good idea to also assign - * /proc/irq/XX/smp_affinity so TX-interrupts gets bound to the same CPU. - * --ro - * - * Fix refcount off by one if first packet fails, potential null deref, + * Fix refcount off by one if first packet fails, potential null deref, * memleak 030710- KJP * - * First "ranges" functionality for ipv6 030726 --ro - * - * Included flow support. 030802 ANK. - * - * Fixed unaligned access on IA-64 Grant Grundler - * - * Remove if fix from added Harald Welte 040419 - * ia64 compilation fix from Aron Griffis 040604 - * - * New xmit() return, do_div and misc clean up by Stephen Hemminger - * 040923 - * - * Rany Dunlap fixed u64 printk compiler waring * - * Remove FCS from BW calculation. Lennert Buytenhek - * New time handling. Lennert Buytenhek 041213 - * - * Corrections from Nikolai Malykh (nmalykh@bilim.com) - * Removed unused flags F_SET_SRCMAC & F_SET_SRCIP 041230 + * Sept 10: Fixed threading/locking. Lots of bone-headed and more clever + * mistakes. Also merged in DaveM's patch in the -pre6 patch. * - * interruptible_sleep_on_timeout() replaced Nishanth Aravamudan - * 050103 + * See Documentation/networking/pktgen.txt for how to use this. */ -#include -#include + #include -#include #include -#include -#include -#include -#include #include -#include +#include #include #include #include #include +#include #include +#include #include -#include #include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include #include #include #include #include -#include /* do_div */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include /* for lock kernel */ +#include /* do_div */ -#define VERSION "pktgen v2.58: Packet Generator for packet performance testing.\n" -/* #define PG_DEBUG(a) a */ -#define PG_DEBUG(a) +#include +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) +#include "../macvlan/macvlan.h" +#endif + +#include "pktgen.h" -/* The buckets are exponential in 'width' */ -#define LAT_BUCKETS_MAX 32 -#define IP_NAME_SZ 32 - -/* Device flag bits */ -#define F_IPSRC_RND (1<<0) /* IP-Src Random */ -#define F_IPDST_RND (1<<1) /* IP-Dst Random */ -#define F_UDPSRC_RND (1<<2) /* UDP-Src Random */ -#define F_UDPDST_RND (1<<3) /* UDP-Dst Random */ -#define F_MACSRC_RND (1<<4) /* MAC-Src Random */ -#define F_MACDST_RND (1<<5) /* MAC-Dst Random */ -#define F_TXSIZE_RND (1<<6) /* Transmit size is random */ -#define F_IPV6 (1<<7) /* Interface in IPV6 Mode */ - -/* Thread control flag bits */ -#define T_TERMINATE (1<<0) -#define T_STOP (1<<1) /* Stop run */ -#define T_RUN (1<<2) /* Start run */ -#define T_REMDEV (1<<3) /* Remove all devs */ - -/* Locks */ -#define thread_lock() spin_lock(&_thread_lock) -#define thread_unlock() spin_unlock(&_thread_lock) - -/* If lock -- can be removed after some work */ -#define if_lock(t) spin_lock(&(t->if_lock)); -#define if_unlock(t) spin_unlock(&(t->if_lock)); + +#define VERSION "pktgen version 1.9.2 (nospin)" +static char version[] __initdata = + "pktgen.c: v1.9.2 (nospin): Packet Generator for packet performance testing.\n"; /* Used to help with determining the pkts on receive */ -#define PKTGEN_MAGIC 0xbe9be955 -#define PG_PROC_DIR "pktgen" -#define MAX_CFLOWS 65536 +#define PKTGEN_MAGIC 0xbe9be955 -struct flow_state -{ - __u32 cur_daddr; - int count; -}; +/* #define PG_DEBUG(a) a */ +#define PG_DEBUG(a) /* a */ -struct pktgen_dev { +/* cycles per micro-second */ +static u32 pg_cycles_per_ns; +static u32 pg_cycles_per_us; +static u32 pg_cycles_per_ms; - /* - * Try to keep frequent/infrequent used vars. separated. - */ +/* Module parameters, defaults. */ +static int pg_count_d = 0; /* run forever by default */ +static int pg_ipg_d = 0; +static int pg_multiskb_d = 0; +static int pg_thread_count = 1; /* Initial threads to create */ +static int debug = 0; - char ifname[32]; - struct proc_dir_entry *proc_ent; - char result[512]; - /* proc file names */ - char fname[80]; - struct pktgen_thread* pg_thread; /* the owner */ - struct pktgen_dev *next; /* Used for chaining in the thread's run-queue */ - int running; /* if this changes to false, the test will stop */ - - /* If min != max, then we will either do a linear iteration, or - * we will do a random selection from within the range. - */ - __u32 flags; +/* List of all running threads */ +static struct pktgen_thread_info* pktgen_threads = NULL; +spinlock_t _pg_threadlist_lock = SPIN_LOCK_UNLOCKED; + +/* Holds interfaces for all threads */ +#define PG_INFO_HASH_MAX 32 +static struct pktgen_interface_info* pg_info_hash[PG_INFO_HASH_MAX]; +spinlock_t _pg_hash_lock = SPIN_LOCK_UNLOCKED; - int min_pkt_size; /* = ETH_ZLEN; */ - int max_pkt_size; /* = ETH_ZLEN; */ - int nfrags; - __u32 delay_us; /* Default delay */ - __u32 delay_ns; - __u64 count; /* Default No packets to send */ - __u64 sofar; /* How many pkts we've sent so far */ - __u64 tx_bytes; /* How many bytes we've transmitted */ - __u64 errors; /* Errors when trying to transmit, pkts will be re-sent */ - - /* runtime counters relating to clone_skb */ - __u64 next_tx_us; /* timestamp of when to tx next */ - __u32 next_tx_ns; - - __u64 allocated_skbs; - __u32 clone_count; - int last_ok; /* Was last skb sent? - * Or a failed transmit of some sort? This will keep - * sequence numbers in order, for example. - */ - __u64 started_at; /* micro-seconds */ - __u64 stopped_at; /* micro-seconds */ - __u64 idle_acc; /* micro-seconds */ - __u32 seq_num; - - int clone_skb; /* Use multiple SKBs during packet gen. If this number - * is greater than 1, then that many coppies of the same - * packet will be sent before a new packet is allocated. - * For instance, if you want to send 1024 identical packets - * before creating a new packet, set clone_skb to 1024. - */ - - char dst_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ - char dst_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ - char src_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ - char src_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ - - struct in6_addr in6_saddr; - struct in6_addr in6_daddr; - struct in6_addr cur_in6_daddr; - struct in6_addr cur_in6_saddr; - /* For ranges */ - struct in6_addr min_in6_daddr; - struct in6_addr max_in6_daddr; - struct in6_addr min_in6_saddr; - struct in6_addr max_in6_saddr; +#define PG_PROC_DIR "pktgen" +static struct proc_dir_entry *pg_proc_dir = NULL; - /* If we're doing ranges, random or incremental, then this - * defines the min/max for those ranges. - */ - __u32 saddr_min; /* inclusive, source IP address */ - __u32 saddr_max; /* exclusive, source IP address */ - __u32 daddr_min; /* inclusive, dest IP address */ - __u32 daddr_max; /* exclusive, dest IP address */ - - __u16 udp_src_min; /* inclusive, source UDP port */ - __u16 udp_src_max; /* exclusive, source UDP port */ - __u16 udp_dst_min; /* inclusive, dest UDP port */ - __u16 udp_dst_max; /* exclusive, dest UDP port */ - - __u32 src_mac_count; /* How many MACs to iterate through */ - __u32 dst_mac_count; /* How many MACs to iterate through */ - - unsigned char dst_mac[6]; - unsigned char src_mac[6]; - - __u32 cur_dst_mac_offset; - __u32 cur_src_mac_offset; - __u32 cur_saddr; - __u32 cur_daddr; - __u16 cur_udp_dst; - __u16 cur_udp_src; - __u32 cur_pkt_size; - - __u8 hh[14]; - /* = { - 0x00, 0x80, 0xC8, 0x79, 0xB3, 0xCB, - - We fill in SRC address later - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x08, 0x00 - }; - */ - __u16 pad; /* pad out the hh struct to an even 16 bytes */ +char module_fname[128]; +struct proc_dir_entry *module_proc_ent = NULL; - struct sk_buff* skb; /* skb we are to transmit next, mainly used for when we - * are transmitting the same one multiple times - */ - struct net_device* odev; /* The out-going device. Note that the device should - * have it's pg_info pointer pointing back to this - * device. This will be set when the user specifies - * the out-going device name (not when the inject is - * started as it used to do.) - */ - struct flow_state *flows; - unsigned cflows; /* Concurrent flows (config) */ - unsigned lflow; /* Flow length (config) */ - unsigned nflows; /* accumulated flows (stats) */ -}; -struct pktgen_hdr { - __u32 pgh_magic; - __u32 seq_num; - __u32 tv_sec; - __u32 tv_usec; -}; +static void init_pktgen_kthread(struct pktgen_thread_info *kthread, char *name); +static int pg_rem_interface_info(struct pktgen_thread_info* pg_thread, + struct pktgen_interface_info* i); +static int pg_add_interface_info(struct pktgen_thread_info* pg_thread, + const char* ifname); +static void exit_pktgen_kthread(struct pktgen_thread_info *kthread); +static void stop_pktgen_kthread(struct pktgen_thread_info *kthread); +static struct pktgen_thread_info* pg_find_thread(const char* name); +static int pg_add_thread_info(const char* name); +static struct pktgen_interface_info* pg_find_interface(struct pktgen_thread_info* pg_thread, + const char* ifname); +static int pktgen_device_event(struct notifier_block *, unsigned long, void *); -struct pktgen_thread { - spinlock_t if_lock; - struct pktgen_dev *if_list; /* All device here */ - struct pktgen_thread* next; - char name[32]; - char fname[128]; /* name of proc file */ - struct proc_dir_entry *proc_ent; - char result[512]; - u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */ - - /* Field for thread to receive "posted" events terminate, stop ifs etc.*/ - - u32 control; - int pid; - int cpu; - wait_queue_head_t queue; +struct notifier_block pktgen_notifier_block = { + notifier_call: pktgen_device_event, }; -#define REMOVE 1 -#define FIND 0 - /* This code works around the fact that do_div cannot handle two 64-bit numbers, and regular 64-bit division doesn't work on x86 kernels. --Ben */ #define PG_DIV 0 +#define PG_REM 1 /* This was emailed to LMKL by: Chris Caputo * Function copied/adapted/optimized from: @@ -362,9 +183,10 @@ * Copyright 1994, University of Cambridge Computer Laboratory * All Rights Reserved. * + * TODO: When running on a 64-bit CPU platform, this should no longer be + * TODO: necessary. */ -inline static s64 divremdi3(s64 x, s64 y, int type) -{ +inline static s64 divremdi3(s64 x, s64 y, int type) { u64 a = (x < 0) ? -x : x; u64 b = (y < 0) ? -y : y; u64 res = 0, d = 1; @@ -392,13 +214,76 @@ else { return ((x & (1ll<<63)) == 0) ? a : -(s64)a; } -} +}/* divremdi3 */ /* End of hacks to deal with 64-bit math on x86 */ + + +inline static void pg_lock_thread_list(const char* msg) { + if (debug > 1) { + printk("before pg_lock_thread_list, msg: %s\n", msg); + } + spin_lock(&_pg_threadlist_lock); + if (debug > 1) { + printk("after pg_lock_thread_list, msg: %s\n", msg); + } +} + +inline static void pg_unlock_thread_list(const char* msg) { + if (debug > 1) { + printk("before pg_unlock_thread_list, msg: %s\n", msg); + } + spin_unlock(&_pg_threadlist_lock); + if (debug > 1) { + printk("after pg_unlock_thread_list, msg: %s\n", msg); + } +} + +inline static void pg_lock_hash(const char* msg) { + if (debug > 1) { + printk("before pg_lock_hash, msg: %s\n", msg); + } + spin_lock(&_pg_hash_lock); + if (debug > 1) { + printk("before pg_lock_hash, msg: %s\n", msg); + } +} + +inline static void pg_unlock_hash(const char* msg) { + if (debug > 1) { + printk("before pg_unlock_hash, msg: %s\n", msg); + } + spin_unlock(&_pg_hash_lock); + if (debug > 1) { + printk("after pg_unlock_hash, msg: %s\n", msg); + } +} + +inline static void pg_lock(struct pktgen_thread_info* pg_thread, const char* msg) { + if (debug > 1) { + printk("before pg_lock thread, msg: %s\n", msg); + } + spin_lock(&(pg_thread->pg_threadlock)); + if (debug > 1) { + printk("after pg_lock thread, msg: %s\n", msg); + } +} + +inline static void pg_unlock(struct pktgen_thread_info* pg_thread, const char* msg) { + if (debug > 1) { + printk("before pg_unlock thread, thread: %p msg: %s\n", + pg_thread, msg); + } + spin_unlock(&(pg_thread->pg_threadlock)); + if (debug > 1) { + printk("after pg_unlock thread, thread: %p msg: %s\n", + pg_thread, msg); + } +} + /** Convert to miliseconds */ -static inline __u64 tv_to_ms(const struct timeval* tv) -{ +static inline __u64 tv_to_ms(const struct timeval* tv) { __u64 ms = tv->tv_usec / 1000; ms += (__u64)tv->tv_sec * (__u64)1000; return ms; @@ -406,1138 +291,430 @@ /** Convert to micro-seconds */ -static inline __u64 tv_to_us(const struct timeval* tv) -{ +static inline __u64 tv_to_us(const struct timeval* tv) { __u64 us = tv->tv_usec; us += (__u64)tv->tv_sec * (__u64)1000000; return us; } + static inline __u64 pg_div(__u64 n, __u32 base) { __u64 tmp = n; do_div(tmp, base); - /* printk("pktgen: pg_div, n: %llu base: %d rv: %llu\n", + /* printk("pg_div, n: %llu base: %d rv: %llu\n", n, base, tmp); */ return tmp; } -static inline __u64 pg_div64(__u64 n, __u64 base) -{ - __u64 tmp = n; -/* - * How do we know if the architectrure we are running on - * supports division with 64 bit base? - * - */ -#if defined(__sparc_v9__) || defined(__powerpc64__) || defined(__alpha__) || defined(__x86_64__) || defined(__ia64__) - - do_div(tmp, base); -#else - tmp = divremdi3(n, base, PG_DIV); -#endif - return tmp; -} - -static inline u32 pktgen_random(void) -{ -#if 0 - __u32 n; - get_random_bytes(&n, 4); - return n; -#else - return net_random(); -#endif +/* Fast, not horribly accurate, since the machine started. */ +static inline __u64 getRelativeCurMs(void) { + return pg_div(get_cycles(), pg_cycles_per_ms); } -static inline __u64 getCurMs(void) -{ +/* Since the epoc. More precise over long periods of time than + * getRelativeCurMs + */ +static inline __u64 getCurMs(void) { struct timeval tv; do_gettimeofday(&tv); return tv_to_ms(&tv); } -static inline __u64 getCurUs(void) -{ +/* Since the epoc. More precise over long periods of time than + * getRelativeCurMs + */ +static inline __u64 getCurUs(void) { struct timeval tv; do_gettimeofday(&tv); return tv_to_us(&tv); } -static inline __u64 tv_diff(const struct timeval* a, const struct timeval* b) -{ - return tv_to_us(a) - tv_to_us(b); +/* Since the machine booted. */ +static inline __u64 getRelativeCurUs(void) { + return pg_div(get_cycles(), pg_cycles_per_us); } +/* Since the machine booted. */ +static inline __u64 getRelativeCurNs(void) { + return pg_div(get_cycles(), pg_cycles_per_ns); +} -/* old include end */ +static inline __u64 tv_diff(const struct timeval* a, const struct timeval* b) { + return tv_to_us(a) - tv_to_us(b); +} -static char version[] __initdata = VERSION; -static ssize_t proc_pgctrl_read(struct file* file, char __user * buf, size_t count, loff_t *ppos); -static ssize_t proc_pgctrl_write(struct file* file, const char __user * buf, size_t count, loff_t *ppos); -static int proc_if_read(char *buf , char **start, off_t offset, int len, int *eof, void *data); - -static int proc_thread_read(char *buf , char **start, off_t offset, int len, int *eof, void *data); -static int proc_if_write(struct file *file, const char __user *user_buffer, unsigned long count, void *data); -static int proc_thread_write(struct file *file, const char __user *user_buffer, unsigned long count, void *data); -static int create_proc_dir(void); -static int remove_proc_dir(void); - -static int pktgen_remove_device(struct pktgen_thread* t, struct pktgen_dev *i); -static int pktgen_add_device(struct pktgen_thread* t, const char* ifname); -static struct pktgen_thread* pktgen_find_thread(const char* name); -static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread* t, const char* ifname); -static int pktgen_device_event(struct notifier_block *, unsigned long, void *); -static void pktgen_run_all_threads(void); -static void pktgen_stop_all_threads_ifs(void); -static int pktgen_stop_device(struct pktgen_dev *pkt_dev); -static void pktgen_stop(struct pktgen_thread* t); -static void pktgen_clear_counters(struct pktgen_dev *pkt_dev); -static struct pktgen_dev *pktgen_NN_threads(const char* dev_name, int remove); -static unsigned int scan_ip6(const char *s,char ip[16]); -static unsigned int fmt_ip6(char *s,const char ip[16]); -/* Module parameters, defaults. */ -static int pg_count_d = 1000; /* 1000 pkts by default */ -static int pg_delay_d = 0; -static int pg_clone_skb_d = 0; -static int debug = 0; +int pktgen_proc_ioctl(struct inode* inode, struct file* file, unsigned int cmd, + unsigned long arg) { + int err = 0; + struct pktgen_ioctl_info args; + struct pktgen_thread_info* targ = NULL; -static spinlock_t _thread_lock = SPIN_LOCK_UNLOCKED; -static struct pktgen_thread *pktgen_threads = NULL; + /* + if (!capable(CAP_NET_ADMIN)){ + return -EPERM; + } + */ + + if (copy_from_user(&args, (void*)arg, sizeof(args))) { + return -EFAULT; + } -static char module_fname[128]; -static struct proc_dir_entry *module_proc_ent = NULL; + /* Null terminate the names */ + args.thread_name[31] = 0; + args.interface_name[31] = 0; -static struct notifier_block pktgen_notifier_block = { - .notifier_call = pktgen_device_event, -}; + /* printk("pktgen: thread_name: %s interface_name: %s\n", + * args.thread_name, args.interface_name); + */ + + switch (cmd) { + case GET_PKTGEN_INTERFACE_INFO: { + targ = pg_find_thread(args.thread_name); + if (targ) { + struct pktgen_interface_info* info; + info = pg_find_interface(targ, args.interface_name); + if (info) { + memcpy(&(args.info), info, sizeof(args.info)); + if (copy_to_user((void*)(arg), &args, sizeof(args))) { + printk("ERROR: pktgen: copy_to_user failed.\n"); + err = -EFAULT; + } + else { + err = 0; + } + } + else { + /* printk("ERROR: pktgen: Could not find interface -:%s:-\n", + args.interface_name);*/ + err = -ENODEV; + } + } + else { + printk("ERROR: pktgen: Could not find thread -:%s:-.\n", + args.thread_name); + err = -ENODEV; + } + break; + } + default: + /* pass on to underlying device instead?? */ + printk("%s: Unknown pktgen IOCTL: %x \n", __FUNCTION__, + cmd); + return -EINVAL; + } + + return err; +}/* pktgen_proc_ioctl */ static struct file_operations pktgen_fops = { - .read = proc_pgctrl_read, - .write = proc_pgctrl_write, - /* .ioctl = pktgen_ioctl, later maybe */ + ioctl: pktgen_proc_ioctl, }; -/* - * /proc handling functions - * - */ - -static struct proc_dir_entry *pg_proc_dir = NULL; -static int proc_pgctrl_read_eof=0; +/* Runs from interrupt */ +int pg_notify_queue_woken(struct net_device* dev) { + struct pktgen_thread_info* pg_thread = dev->nqw_data; + /* printk("pg_nqw, pg_thread: %p\n", pg_thread); */ + if (pg_thread && pg_thread->sleeping) { + if (getRelativeCurNs() > (pg_thread->next_tx_ns - 1000)) { + /* See if we should wake up the thread, wake + * slightly early (1000 ns) + */ + pg_thread->sleeping = 0; + wake_up_interruptible(&(pg_thread->queue)); + } + } + return 0; +} -static ssize_t proc_pgctrl_read(struct file* file, char __user * buf, - size_t count, loff_t *ppos) -{ - char data[200]; - int len = 0; - if(proc_pgctrl_read_eof) { - proc_pgctrl_read_eof=0; - len = 0; - goto out; +static void set_nqw_hook(struct pktgen_thread_info* pg_thread, + struct pktgen_interface_info* info, + struct net_device* dev) { + /* The notify-queue-woken magic only works for physical + * devices at this time. So, apply hook to underlying + * device. + */ + struct pktgen_thread_info* pg_thread_nd; + /* printk("In set_nqw_hook, dev: %s\n", dev->name); */ + +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) + if (dev->priv_flags & IFF_MAC_VLAN) { + struct macvlan_vlan *vlan = dev->priv; + set_nqw_hook(pg_thread, info, vlan->lowerdev); + return; } +#endif - sprintf(data, "%s", VERSION); + if (dev->priv_flags & IFF_802_1Q_VLAN) { + set_nqw_hook(pg_thread, info, VLAN_DEV_INFO(dev)->real_dev); + return; + } - len = strlen(data); + pg_thread_nd = (struct pktgen_thread_info*)(dev->nqw_data); - if(len > count) { - len =-EFAULT; - goto out; - } + /*printk("pg_thread_nd: %p info: %p\n", pg_thread_nd, info);*/ + + if (pg_thread_nd) { + /* Just bump a reference count, it doesn't really matter which + * info is there since we always look at the parent thread anyway + */ + atomic_inc(&(pg_thread_nd->nqw_ref_count)); - if (copy_to_user(buf, data, len)) { - len =-EFAULT; - goto out; - } + /*printk("Incremented nqw_ref_count: %d device: %s\n", + (int)(atomic_read(&(pg_thread_nd->nqw_ref_count))), dev->name); */ + } + else { + /* Must be a real device, or at least some un-handled fake device. Can't + * hurt to add it here at any rate. + */ + atomic_inc(&(pg_thread->nqw_ref_count)); + dev->nqw_data = pg_thread; + dev->notify_queue_woken = pg_notify_queue_woken; + /*printk("Added nqw callback to device: %s, data: %p data_nd: %p\n", + dev->name, dev->nqw_data, pg_thread_nd);*/ + } +}//set_nqw_hook - *ppos += len; - proc_pgctrl_read_eof=1; /* EOF next call */ - out: - return len; -} +static void clear_nqw_hook(struct pktgen_interface_info* info, + struct net_device* dev) { + /* The notify-queue-woken magic only works for physical + * devices at this time. So, apply hook to underlying + * device. + */ + struct pktgen_thread_info* pg_thread; + /* printk("In clear_nqw_hook, dev: %s\n", dev->name); */ + +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE) + if (dev->priv_flags & IFF_MAC_VLAN) { + struct macvlan_vlan *vlan = dev->priv; + clear_nqw_hook(info, vlan->lowerdev); + return; + } +#endif -static ssize_t proc_pgctrl_write(struct file* file,const char __user * buf, - size_t count, loff_t *ppos) -{ - char *data = NULL; - int err = 0; + if (dev->priv_flags & IFF_802_1Q_VLAN) { + clear_nqw_hook(info, VLAN_DEV_INFO(dev)->real_dev); + return; + } - if (!capable(CAP_NET_ADMIN)){ - err = -EPERM; - goto out; - } + if (dev->nqw_data) { + pg_thread = (struct pktgen_thread_info*)(dev->nqw_data); + atomic_dec(&(pg_thread->nqw_ref_count)); - data = (void*)vmalloc ((unsigned int)count); + /* printk("Decremented nqw_ref_count: %d device: %s\n", + (int)(atomic_read(&(pg_thread->nqw_ref_count))), dev->name); */ + + BUG_ON(atomic_read(&(pg_thread->nqw_ref_count)) < 0); + + if (atomic_read(&(pg_thread->nqw_ref_count)) == 0) { + /*printk("Removing nqw reference from device: %s\n", dev->name);*/ + dev->notify_queue_woken = NULL; + dev->nqw_data = NULL; + + /* Clear from all other devices too. There is an issue/bug where + * we may at times have more actual references to the pg_thread than + * our reference counter shows. In practice, I do not believe this + * will be a problem, but it will be slightly inefficient. A true + * fix would involve some sort of hash/list/map of physical interfaces + * to reference counts, stored in the pg_thread class. + * --Ben + */ - if(!data) { - err = -ENOMEM; - goto out; + { + int i; + struct pktgen_interface_info* p; + for (i = 0; iodev && p->odev->nqw_data == pg_thread) { + /*printk("Removing nqw reference from dev: %s\n", + p->odev->name);*/ + p->odev->notify_queue_woken = NULL; + p->odev->nqw_data = NULL; + } + p = p->next; + } + } + } + } + else { + /* Else, if we are directly attached to this device, remove the + * nqw callback then too, because we only allow one info struct + * per net-device, so this MUST be the only one. + */ + if (dev == info->odev) { + /*printk("Removing nqw reference from real device: %s\n", + dev->name);*/ + dev->notify_queue_woken = NULL; + dev->nqw_data = NULL; + } + else { + /*printk("pktgen NOTE: dev: %s != info->odev: %s\n", + dev ? dev->name : "NULL", + info->odev ? info->odev->name : "NULL");*/ + } + } } - if (copy_from_user(data, buf, count)) { - err =-EFAULT; - goto out_free; - } - data[count-1] = 0; /* Make string */ - - if (!strcmp(data, "stop")) - pktgen_stop_all_threads_ifs(); + else { + printk("pktgen: Warning: nqw_data is null in clear_nqw_hook, dev: %s\n", + dev->name); + } +}//clear_nqw_hook - else if (!strcmp(data, "start")) - pktgen_run_all_threads(); - else - printk("pktgen: Unknown command: %s\n", data); +static void remove_pg_info_from_hash(struct pktgen_interface_info* info) { + pg_lock_hash(__FUNCTION__); + { + int device_idx = info->odev ? info->odev->ifindex : 0; + int b = device_idx % PG_INFO_HASH_MAX; + struct pktgen_interface_info* p = pg_info_hash[b]; + struct pktgen_interface_info* prev = pg_info_hash[b]; + int found_one = 0; + + PG_DEBUG(printk("remove_pg_info_from_hash, p: %p info: %p device_idx: %i\n", + p, info, device_idx)); + + if (p != NULL) { + + if (p == info) { + pg_info_hash[b] = p->next_hash; + p->next_hash = NULL; + found_one = 1; + } + else { + while (prev->next_hash) { + p = prev->next_hash; + if (p == info) { + prev->next_hash = p->next_hash; + p->next_hash = NULL; + found_one = 1; + break; + } + prev = p; + } + } + } - err = count; + if (found_one) { + if (info->odev) { + info->odev->priv_flags &= ~(IFF_PKTGEN_RCV); + clear_nqw_hook(info, info->odev); + } + } + } + pg_unlock_hash(__FUNCTION__); +}/* remove_pg_info_from_hash */ - out_free: - vfree (data); - out: - return err; -} -static int proc_if_read(char *buf , char **start, off_t offset, - int len, int *eof, void *data) -{ - char *p; - int i; - struct pktgen_dev *pkt_dev = (struct pktgen_dev*)(data); - __u64 sa; - __u64 stopped; - __u64 now = getCurUs(); +static void add_pg_info_to_hash(struct pktgen_thread_info* pg_thread, + struct pktgen_interface_info* info) { + /* First remove it, just in case it's already there. */ + remove_pg_info_from_hash(info); - p = buf; - p += sprintf(p, "Params: count %llu min_pkt_size: %u max_pkt_size: %u\n", - (unsigned long long) pkt_dev->count, - pkt_dev->min_pkt_size, pkt_dev->max_pkt_size); - - p += sprintf(p, " frags: %d delay: %u clone_skb: %d ifname: %s\n", - pkt_dev->nfrags, 1000*pkt_dev->delay_us+pkt_dev->delay_ns, pkt_dev->clone_skb, pkt_dev->ifname); - - p += sprintf(p, " flows: %u flowlen: %u\n", pkt_dev->cflows, pkt_dev->lflow); - - - if(pkt_dev->flags & F_IPV6) { - char b1[128], b2[128], b3[128]; - fmt_ip6(b1, pkt_dev->in6_saddr.s6_addr); - fmt_ip6(b2, pkt_dev->min_in6_saddr.s6_addr); - fmt_ip6(b3, pkt_dev->max_in6_saddr.s6_addr); - p += sprintf(p, " saddr: %s min_saddr: %s max_saddr: %s\n", b1, b2, b3); - - fmt_ip6(b1, pkt_dev->in6_daddr.s6_addr); - fmt_ip6(b2, pkt_dev->min_in6_daddr.s6_addr); - fmt_ip6(b3, pkt_dev->max_in6_daddr.s6_addr); - p += sprintf(p, " daddr: %s min_daddr: %s max_daddr: %s\n", b1, b2, b3); - - } - else - p += sprintf(p, " dst_min: %s dst_max: %s\n src_min: %s src_max: %s\n", - pkt_dev->dst_min, pkt_dev->dst_max, pkt_dev->src_min, pkt_dev->src_max); + pg_lock_hash(__FUNCTION__); + { + int device_idx = info->odev ? info->odev->ifindex : 0; + int b = device_idx % PG_INFO_HASH_MAX; - p += sprintf(p, " src_mac: "); - - if ((pkt_dev->src_mac[0] == 0) && - (pkt_dev->src_mac[1] == 0) && - (pkt_dev->src_mac[2] == 0) && - (pkt_dev->src_mac[3] == 0) && - (pkt_dev->src_mac[4] == 0) && - (pkt_dev->src_mac[5] == 0)) - - for (i = 0; i < 6; i++) - p += sprintf(p, "%02X%s", pkt_dev->odev->dev_addr[i], i == 5 ? " " : ":"); - - else - for (i = 0; i < 6; i++) - p += sprintf(p, "%02X%s", pkt_dev->src_mac[i], i == 5 ? " " : ":"); - - p += sprintf(p, "dst_mac: "); - for (i = 0; i < 6; i++) - p += sprintf(p, "%02X%s", pkt_dev->dst_mac[i], i == 5 ? "\n" : ":"); + PG_DEBUG(printk("add_pg_info_from_hash, b: %i info: %p device_idx: %i\n", + b, info, device_idx)); - p += sprintf(p, " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n", - pkt_dev->udp_src_min, pkt_dev->udp_src_max, pkt_dev->udp_dst_min, - pkt_dev->udp_dst_max); - - p += sprintf(p, " src_mac_count: %d dst_mac_count: %d \n Flags: ", - pkt_dev->src_mac_count, pkt_dev->dst_mac_count); + info->next_hash = pg_info_hash[b]; + pg_info_hash[b] = info; - if (pkt_dev->flags & F_IPV6) - p += sprintf(p, "IPV6 "); - - if (pkt_dev->flags & F_IPSRC_RND) - p += sprintf(p, "IPSRC_RND "); + if (info->odev) { + set_nqw_hook(pg_thread, info, info->odev); + info->odev->priv_flags |= (IFF_PKTGEN_RCV); + } + } + pg_unlock_hash(__FUNCTION__); +}/* add_pg_info_to_hash */ - if (pkt_dev->flags & F_IPDST_RND) - p += sprintf(p, "IPDST_RND "); - - if (pkt_dev->flags & F_TXSIZE_RND) - p += sprintf(p, "TXSIZE_RND "); - - if (pkt_dev->flags & F_UDPSRC_RND) - p += sprintf(p, "UDPSRC_RND "); - - if (pkt_dev->flags & F_UDPDST_RND) - p += sprintf(p, "UDPDST_RND "); - - if (pkt_dev->flags & F_MACSRC_RND) - p += sprintf(p, "MACSRC_RND "); - - if (pkt_dev->flags & F_MACDST_RND) - p += sprintf(p, "MACDST_RND "); - - p += sprintf(p, "\n"); - - sa = pkt_dev->started_at; - stopped = pkt_dev->stopped_at; - if (pkt_dev->running) - stopped = now; /* not really stopped, more like last-running-at */ - - p += sprintf(p, "Current:\n pkts-sofar: %llu errors: %llu\n started: %lluus stopped: %lluus idle: %lluus\n", - (unsigned long long) pkt_dev->sofar, - (unsigned long long) pkt_dev->errors, - (unsigned long long) sa, - (unsigned long long) stopped, - (unsigned long long) pkt_dev->idle_acc); +/* Find the pktgen_interface_info for a device idx */ +struct pktgen_interface_info* find_pg_info(int device_idx) { + struct pktgen_interface_info* p = NULL; + if (debug > 1) { + printk("in find_pg_info...\n"); + } + pg_lock_hash(__FUNCTION__); + { + int b = device_idx % PG_INFO_HASH_MAX; + p = pg_info_hash[b]; + while (p) { + if (p->odev && (p->odev->ifindex == device_idx)) { + break; + } + p = p->next_hash; + } + } + pg_unlock_hash(__FUNCTION__); + return p; +} - p += sprintf(p, " seq_num: %d cur_dst_mac_offset: %d cur_src_mac_offset: %d\n", - pkt_dev->seq_num, pkt_dev->cur_dst_mac_offset, pkt_dev->cur_src_mac_offset); - if(pkt_dev->flags & F_IPV6) { - char b1[128], b2[128]; - fmt_ip6(b1, pkt_dev->cur_in6_daddr.s6_addr); - fmt_ip6(b2, pkt_dev->cur_in6_saddr.s6_addr); - p += sprintf(p, " cur_saddr: %s cur_daddr: %s\n", b2, b1); - } - else - p += sprintf(p, " cur_saddr: 0x%x cur_daddr: 0x%x\n", - pkt_dev->cur_saddr, pkt_dev->cur_daddr); +/* Remove an interface from our hash, dissassociate pktgen_interface_info + * from interface + */ +static void check_remove_device(struct pktgen_interface_info* info) { + struct pktgen_interface_info* pi = NULL; + if (info->odev) { + pi = find_pg_info(info->odev->ifindex); + if (pi != info) { + printk("ERROR: pi != info, pi: %p info: %p\n", pi, info); + } + else { + /* Remove info from our hash */ + remove_pg_info_from_hash(info); + } + /* TODO: Wonder if we need locking here? Had rtnl_lock, but + * that can sleep and this is called with irqs disabled... + */ + info->odev->priv_flags &= ~(IFF_PKTGEN_RCV); + atomic_dec(&(info->odev->refcnt)); + info->odev = NULL; + } +}/* check_remove_device */ + + +static int pg_remove_interface_from_all_threads(const char* dev_name) { + int cnt = 0; + pg_lock_thread_list(__FUNCTION__); + { + struct pktgen_thread_info* tmp = pktgen_threads; + struct pktgen_interface_info* info = NULL; + + while (tmp) { + info = pg_find_interface(tmp, dev_name); + if (info) { + printk("pktgen: Removing interface: %s from pktgen control.\n", + dev_name); + pg_rem_interface_info(tmp, info); + cnt++; + } + else { + /* printk("pktgen: Could not find interface: %s in rem_from_all.\n", + dev_name); */ + } + tmp = tmp->next; + } + } + pg_unlock_thread_list(__FUNCTION__); + return cnt; +}/* pg_rem_interface_from_all_threads */ - p += sprintf(p, " cur_udp_dst: %d cur_udp_src: %d\n", - pkt_dev->cur_udp_dst, pkt_dev->cur_udp_src); - p += sprintf(p, " flows: %u\n", pkt_dev->nflows); +static int pktgen_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { + struct net_device *dev = (struct net_device *)(ptr); - if (pkt_dev->result[0]) - p += sprintf(p, "Result: %s\n", pkt_dev->result); - else - p += sprintf(p, "Result: Idle\n"); - *eof = 1; - - return p - buf; -} - - -static int count_trail_chars(const char __user *user_buffer, unsigned int maxlen) -{ - int i; - - for (i = 0; i < maxlen; i++) { - char c; - if (get_user(c, &user_buffer[i])) - return -EFAULT; - switch (c) { - case '\"': - case '\n': - case '\r': - case '\t': - case ' ': - case '=': - break; - default: - goto done; - }; - } -done: - return i; -} - -static unsigned long num_arg(const char __user *user_buffer, unsigned long maxlen, - unsigned long *num) -{ - int i = 0; - *num = 0; - - for(; i < maxlen; i++) { - char c; - if (get_user(c, &user_buffer[i])) - return -EFAULT; - if ((c >= '0') && (c <= '9')) { - *num *= 10; - *num += c -'0'; - } else - break; - } - return i; -} - -static int strn_len(const char __user *user_buffer, unsigned int maxlen) -{ - int i = 0; - - for(; i < maxlen; i++) { - char c; - if (get_user(c, &user_buffer[i])) - return -EFAULT; - switch (c) { - case '\"': - case '\n': - case '\r': - case '\t': - case ' ': - goto done_str; - break; - default: - break; - }; - } -done_str: - - return i; -} - -static int proc_if_write(struct file *file, const char __user *user_buffer, - unsigned long count, void *data) -{ - int i = 0, max, len; - char name[16], valstr[32]; - unsigned long value = 0; - struct pktgen_dev *pkt_dev = (struct pktgen_dev*)(data); - char* pg_result = NULL; - int tmp = 0; - char buf[128]; - - pg_result = &(pkt_dev->result[0]); - - if (count < 1) { - printk("pktgen: wrong command format\n"); - return -EINVAL; - } - - max = count - i; - tmp = count_trail_chars(&user_buffer[i], max); - if (tmp < 0) { - printk("pktgen: illegal format\n"); - return tmp; - } - i += tmp; - - /* Read variable name */ - - len = strn_len(&user_buffer[i], sizeof(name) - 1); - if (len < 0) { return len; } - memset(name, 0, sizeof(name)); - if (copy_from_user(name, &user_buffer[i], len) ) - return -EFAULT; - i += len; - - max = count -i; - len = count_trail_chars(&user_buffer[i], max); - if (len < 0) - return len; - - i += len; - - if (debug) { - char tb[count + 1]; - if (copy_from_user(tb, user_buffer, count)) - return -EFAULT; - tb[count] = 0; - printk("pktgen: %s,%lu buffer -:%s:-\n", name, count, tb); - } - - if (!strcmp(name, "min_pkt_size")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { return len; } - i += len; - if (value < 14+20+8) - value = 14+20+8; - if (value != pkt_dev->min_pkt_size) { - pkt_dev->min_pkt_size = value; - pkt_dev->cur_pkt_size = value; - } - sprintf(pg_result, "OK: min_pkt_size=%u", pkt_dev->min_pkt_size); - return count; - } - - if (!strcmp(name, "max_pkt_size")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { return len; } - i += len; - if (value < 14+20+8) - value = 14+20+8; - if (value != pkt_dev->max_pkt_size) { - pkt_dev->max_pkt_size = value; - pkt_dev->cur_pkt_size = value; - } - sprintf(pg_result, "OK: max_pkt_size=%u", pkt_dev->max_pkt_size); - return count; - } - - /* Shortcut for min = max */ - - if (!strcmp(name, "pkt_size")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { return len; } - i += len; - if (value < 14+20+8) - value = 14+20+8; - if (value != pkt_dev->min_pkt_size) { - pkt_dev->min_pkt_size = value; - pkt_dev->max_pkt_size = value; - pkt_dev->cur_pkt_size = value; - } - sprintf(pg_result, "OK: pkt_size=%u", pkt_dev->min_pkt_size); - return count; - } - - if (!strcmp(name, "debug")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { return len; } - i += len; - debug = value; - sprintf(pg_result, "OK: debug=%u", debug); - return count; - } - - if (!strcmp(name, "frags")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { return len; } - i += len; - pkt_dev->nfrags = value; - sprintf(pg_result, "OK: frags=%u", pkt_dev->nfrags); - return count; - } - if (!strcmp(name, "delay")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { return len; } - i += len; - if (value == 0x7FFFFFFF) { - pkt_dev->delay_us = 0x7FFFFFFF; - pkt_dev->delay_ns = 0; - } else { - pkt_dev->delay_us = value / 1000; - pkt_dev->delay_ns = value % 1000; - } - sprintf(pg_result, "OK: delay=%u", 1000*pkt_dev->delay_us+pkt_dev->delay_ns); - return count; - } - if (!strcmp(name, "udp_src_min")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { return len; } - i += len; - if (value != pkt_dev->udp_src_min) { - pkt_dev->udp_src_min = value; - pkt_dev->cur_udp_src = value; - } - sprintf(pg_result, "OK: udp_src_min=%u", pkt_dev->udp_src_min); - return count; - } - if (!strcmp(name, "udp_dst_min")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { return len; } - i += len; - if (value != pkt_dev->udp_dst_min) { - pkt_dev->udp_dst_min = value; - pkt_dev->cur_udp_dst = value; - } - sprintf(pg_result, "OK: udp_dst_min=%u", pkt_dev->udp_dst_min); - return count; - } - if (!strcmp(name, "udp_src_max")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { return len; } - i += len; - if (value != pkt_dev->udp_src_max) { - pkt_dev->udp_src_max = value; - pkt_dev->cur_udp_src = value; - } - sprintf(pg_result, "OK: udp_src_max=%u", pkt_dev->udp_src_max); - return count; - } - if (!strcmp(name, "udp_dst_max")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { return len; } - i += len; - if (value != pkt_dev->udp_dst_max) { - pkt_dev->udp_dst_max = value; - pkt_dev->cur_udp_dst = value; - } - sprintf(pg_result, "OK: udp_dst_max=%u", pkt_dev->udp_dst_max); - return count; - } - if (!strcmp(name, "clone_skb")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { return len; } - i += len; - pkt_dev->clone_skb = value; - - sprintf(pg_result, "OK: clone_skb=%d", pkt_dev->clone_skb); - return count; - } - if (!strcmp(name, "count")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { return len; } - i += len; - pkt_dev->count = value; - sprintf(pg_result, "OK: count=%llu", - (unsigned long long) pkt_dev->count); - return count; - } - if (!strcmp(name, "src_mac_count")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { return len; } - i += len; - if (pkt_dev->src_mac_count != value) { - pkt_dev->src_mac_count = value; - pkt_dev->cur_src_mac_offset = 0; - } - sprintf(pg_result, "OK: src_mac_count=%d", pkt_dev->src_mac_count); - return count; - } - if (!strcmp(name, "dst_mac_count")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { return len; } - i += len; - if (pkt_dev->dst_mac_count != value) { - pkt_dev->dst_mac_count = value; - pkt_dev->cur_dst_mac_offset = 0; - } - sprintf(pg_result, "OK: dst_mac_count=%d", pkt_dev->dst_mac_count); - return count; - } - if (!strcmp(name, "flag")) { - char f[32]; - memset(f, 0, 32); - len = strn_len(&user_buffer[i], sizeof(f) - 1); - if (len < 0) { return len; } - if (copy_from_user(f, &user_buffer[i], len)) - return -EFAULT; - i += len; - if (strcmp(f, "IPSRC_RND") == 0) - pkt_dev->flags |= F_IPSRC_RND; - - else if (strcmp(f, "!IPSRC_RND") == 0) - pkt_dev->flags &= ~F_IPSRC_RND; - - else if (strcmp(f, "TXSIZE_RND") == 0) - pkt_dev->flags |= F_TXSIZE_RND; - - else if (strcmp(f, "!TXSIZE_RND") == 0) - pkt_dev->flags &= ~F_TXSIZE_RND; - - else if (strcmp(f, "IPDST_RND") == 0) - pkt_dev->flags |= F_IPDST_RND; - - else if (strcmp(f, "!IPDST_RND") == 0) - pkt_dev->flags &= ~F_IPDST_RND; - - else if (strcmp(f, "UDPSRC_RND") == 0) - pkt_dev->flags |= F_UDPSRC_RND; - - else if (strcmp(f, "!UDPSRC_RND") == 0) - pkt_dev->flags &= ~F_UDPSRC_RND; - - else if (strcmp(f, "UDPDST_RND") == 0) - pkt_dev->flags |= F_UDPDST_RND; - - else if (strcmp(f, "!UDPDST_RND") == 0) - pkt_dev->flags &= ~F_UDPDST_RND; - - else if (strcmp(f, "MACSRC_RND") == 0) - pkt_dev->flags |= F_MACSRC_RND; - - else if (strcmp(f, "!MACSRC_RND") == 0) - pkt_dev->flags &= ~F_MACSRC_RND; - - else if (strcmp(f, "MACDST_RND") == 0) - pkt_dev->flags |= F_MACDST_RND; - - else if (strcmp(f, "!MACDST_RND") == 0) - pkt_dev->flags &= ~F_MACDST_RND; - - else { - sprintf(pg_result, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", - f, - "IPSRC_RND, IPDST_RND, TXSIZE_RND, UDPSRC_RND, UDPDST_RND, MACSRC_RND, MACDST_RND\n"); - return count; - } - sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); - return count; - } - if (!strcmp(name, "dst_min") || !strcmp(name, "dst")) { - len = strn_len(&user_buffer[i], sizeof(pkt_dev->dst_min) - 1); - if (len < 0) { return len; } - - if (copy_from_user(buf, &user_buffer[i], len)) - return -EFAULT; - buf[len] = 0; - if (strcmp(buf, pkt_dev->dst_min) != 0) { - memset(pkt_dev->dst_min, 0, sizeof(pkt_dev->dst_min)); - strncpy(pkt_dev->dst_min, buf, len); - pkt_dev->daddr_min = in_aton(pkt_dev->dst_min); - pkt_dev->cur_daddr = pkt_dev->daddr_min; - } - if(debug) - printk("pktgen: dst_min set to: %s\n", pkt_dev->dst_min); - i += len; - sprintf(pg_result, "OK: dst_min=%s", pkt_dev->dst_min); - return count; - } - if (!strcmp(name, "dst_max")) { - len = strn_len(&user_buffer[i], sizeof(pkt_dev->dst_max) - 1); - if (len < 0) { return len; } - - if (copy_from_user(buf, &user_buffer[i], len)) - return -EFAULT; - - buf[len] = 0; - if (strcmp(buf, pkt_dev->dst_max) != 0) { - memset(pkt_dev->dst_max, 0, sizeof(pkt_dev->dst_max)); - strncpy(pkt_dev->dst_max, buf, len); - pkt_dev->daddr_max = in_aton(pkt_dev->dst_max); - pkt_dev->cur_daddr = pkt_dev->daddr_max; - } - if(debug) - printk("pktgen: dst_max set to: %s\n", pkt_dev->dst_max); - i += len; - sprintf(pg_result, "OK: dst_max=%s", pkt_dev->dst_max); - return count; - } - if (!strcmp(name, "dst6")) { - len = strn_len(&user_buffer[i], sizeof(buf) - 1); - if (len < 0) return len; - - pkt_dev->flags |= F_IPV6; - - if (copy_from_user(buf, &user_buffer[i], len)) - return -EFAULT; - buf[len] = 0; - - scan_ip6(buf, pkt_dev->in6_daddr.s6_addr); - fmt_ip6(buf, pkt_dev->in6_daddr.s6_addr); - - ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr); - - if(debug) - printk("pktgen: dst6 set to: %s\n", buf); - - i += len; - sprintf(pg_result, "OK: dst6=%s", buf); - return count; - } - if (!strcmp(name, "dst6_min")) { - len = strn_len(&user_buffer[i], sizeof(buf) - 1); - if (len < 0) return len; - - pkt_dev->flags |= F_IPV6; - - if (copy_from_user(buf, &user_buffer[i], len)) - return -EFAULT; - buf[len] = 0; - - scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr); - fmt_ip6(buf, pkt_dev->min_in6_daddr.s6_addr); - - ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->min_in6_daddr); - if(debug) - printk("pktgen: dst6_min set to: %s\n", buf); - - i += len; - sprintf(pg_result, "OK: dst6_min=%s", buf); - return count; - } - if (!strcmp(name, "dst6_max")) { - len = strn_len(&user_buffer[i], sizeof(buf) - 1); - if (len < 0) return len; - - pkt_dev->flags |= F_IPV6; - - if (copy_from_user(buf, &user_buffer[i], len)) - return -EFAULT; - buf[len] = 0; - - scan_ip6(buf, pkt_dev->max_in6_daddr.s6_addr); - fmt_ip6(buf, pkt_dev->max_in6_daddr.s6_addr); - - if(debug) - printk("pktgen: dst6_max set to: %s\n", buf); - - i += len; - sprintf(pg_result, "OK: dst6_max=%s", buf); - return count; - } - if (!strcmp(name, "src6")) { - len = strn_len(&user_buffer[i], sizeof(buf) - 1); - if (len < 0) return len; - - pkt_dev->flags |= F_IPV6; - - if (copy_from_user(buf, &user_buffer[i], len)) - return -EFAULT; - buf[len] = 0; - - scan_ip6(buf, pkt_dev->in6_saddr.s6_addr); - fmt_ip6(buf, pkt_dev->in6_saddr.s6_addr); - - ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr); - - if(debug) - printk("pktgen: src6 set to: %s\n", buf); - - i += len; - sprintf(pg_result, "OK: src6=%s", buf); - return count; - } - if (!strcmp(name, "src_min")) { - len = strn_len(&user_buffer[i], sizeof(pkt_dev->src_min) - 1); - if (len < 0) { return len; } - if (copy_from_user(buf, &user_buffer[i], len)) - return -EFAULT; - buf[len] = 0; - if (strcmp(buf, pkt_dev->src_min) != 0) { - memset(pkt_dev->src_min, 0, sizeof(pkt_dev->src_min)); - strncpy(pkt_dev->src_min, buf, len); - pkt_dev->saddr_min = in_aton(pkt_dev->src_min); - pkt_dev->cur_saddr = pkt_dev->saddr_min; - } - if(debug) - printk("pktgen: src_min set to: %s\n", pkt_dev->src_min); - i += len; - sprintf(pg_result, "OK: src_min=%s", pkt_dev->src_min); - return count; - } - if (!strcmp(name, "src_max")) { - len = strn_len(&user_buffer[i], sizeof(pkt_dev->src_max) - 1); - if (len < 0) { return len; } - if (copy_from_user(buf, &user_buffer[i], len)) - return -EFAULT; - buf[len] = 0; - if (strcmp(buf, pkt_dev->src_max) != 0) { - memset(pkt_dev->src_max, 0, sizeof(pkt_dev->src_max)); - strncpy(pkt_dev->src_max, buf, len); - pkt_dev->saddr_max = in_aton(pkt_dev->src_max); - pkt_dev->cur_saddr = pkt_dev->saddr_max; - } - if(debug) - printk("pktgen: src_max set to: %s\n", pkt_dev->src_max); - i += len; - sprintf(pg_result, "OK: src_max=%s", pkt_dev->src_max); - return count; - } - if (!strcmp(name, "dst_mac")) { - char *v = valstr; - unsigned char old_dmac[6]; - unsigned char *m = pkt_dev->dst_mac; - memcpy(old_dmac, pkt_dev->dst_mac, 6); - - len = strn_len(&user_buffer[i], sizeof(valstr) - 1); - if (len < 0) { return len; } - memset(valstr, 0, sizeof(valstr)); - if( copy_from_user(valstr, &user_buffer[i], len)) - return -EFAULT; - i += len; - - for(*m = 0;*v && m < pkt_dev->dst_mac + 6; v++) { - if (*v >= '0' && *v <= '9') { - *m *= 16; - *m += *v - '0'; - } - if (*v >= 'A' && *v <= 'F') { - *m *= 16; - *m += *v - 'A' + 10; - } - if (*v >= 'a' && *v <= 'f') { - *m *= 16; - *m += *v - 'a' + 10; - } - if (*v == ':') { - m++; - *m = 0; - } - } - - /* Set up Dest MAC */ - if (memcmp(old_dmac, pkt_dev->dst_mac, 6) != 0) - memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, 6); - - sprintf(pg_result, "OK: dstmac"); - return count; - } - if (!strcmp(name, "src_mac")) { - char *v = valstr; - unsigned char *m = pkt_dev->src_mac; - - len = strn_len(&user_buffer[i], sizeof(valstr) - 1); - if (len < 0) { return len; } - memset(valstr, 0, sizeof(valstr)); - if( copy_from_user(valstr, &user_buffer[i], len)) - return -EFAULT; - i += len; - - for(*m = 0;*v && m < pkt_dev->src_mac + 6; v++) { - if (*v >= '0' && *v <= '9') { - *m *= 16; - *m += *v - '0'; - } - if (*v >= 'A' && *v <= 'F') { - *m *= 16; - *m += *v - 'A' + 10; - } - if (*v >= 'a' && *v <= 'f') { - *m *= 16; - *m += *v - 'a' + 10; - } - if (*v == ':') { - m++; - *m = 0; - } - } - - sprintf(pg_result, "OK: srcmac"); - return count; - } - - if (!strcmp(name, "clear_counters")) { - pktgen_clear_counters(pkt_dev); - sprintf(pg_result, "OK: Clearing counters.\n"); - return count; - } - - if (!strcmp(name, "flows")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { return len; } - i += len; - if (value > MAX_CFLOWS) - value = MAX_CFLOWS; - - pkt_dev->cflows = value; - sprintf(pg_result, "OK: flows=%u", pkt_dev->cflows); - return count; - } - - if (!strcmp(name, "flowlen")) { - len = num_arg(&user_buffer[i], 10, &value); - if (len < 0) { return len; } - i += len; - pkt_dev->lflow = value; - sprintf(pg_result, "OK: flowlen=%u", pkt_dev->lflow); - return count; - } - - sprintf(pkt_dev->result, "No such parameter \"%s\"", name); - return -EINVAL; -} - -static int proc_thread_read(char *buf , char **start, off_t offset, - int len, int *eof, void *data) -{ - char *p; - struct pktgen_thread *t = (struct pktgen_thread*)(data); - struct pktgen_dev *pkt_dev = NULL; - - - if (!t) { - printk("pktgen: ERROR: could not find thread in proc_thread_read\n"); - return -EINVAL; - } - - p = buf; - p += sprintf(p, "Name: %s max_before_softirq: %d\n", - t->name, t->max_before_softirq); - - p += sprintf(p, "Running: "); - - if_lock(t); - for(pkt_dev = t->if_list;pkt_dev; pkt_dev = pkt_dev->next) - if(pkt_dev->running) - p += sprintf(p, "%s ", pkt_dev->ifname); - - p += sprintf(p, "\nStopped: "); - - for(pkt_dev = t->if_list;pkt_dev; pkt_dev = pkt_dev->next) - if(!pkt_dev->running) - p += sprintf(p, "%s ", pkt_dev->ifname); - - if (t->result[0]) - p += sprintf(p, "\nResult: %s\n", t->result); - else - p += sprintf(p, "\nResult: NA\n"); - - *eof = 1; - - if_unlock(t); - - return p - buf; -} - -static int proc_thread_write(struct file *file, const char __user *user_buffer, - unsigned long count, void *data) -{ - int i = 0, max, len, ret; - char name[40]; - struct pktgen_thread *t; - char *pg_result; - unsigned long value = 0; - - if (count < 1) { - // sprintf(pg_result, "Wrong command format"); - return -EINVAL; - } - - max = count - i; - len = count_trail_chars(&user_buffer[i], max); - if (len < 0) - return len; - - i += len; - - /* Read variable name */ - - len = strn_len(&user_buffer[i], sizeof(name) - 1); - if (len < 0) - return len; - - memset(name, 0, sizeof(name)); - if (copy_from_user(name, &user_buffer[i], len)) - return -EFAULT; - i += len; - - max = count -i; - len = count_trail_chars(&user_buffer[i], max); - if (len < 0) - return len; - - i += len; - - if (debug) - printk("pktgen: t=%s, count=%lu\n", name, count); - - thread_lock(); - - t = (struct pktgen_thread*)(data); - if(!t) { - printk("pktgen: ERROR: No thread\n"); - ret = -EINVAL; - goto out; - } - - pg_result = &(t->result[0]); - - if (!strcmp(name, "add_device")) { - char f[32]; - memset(f, 0, 32); - len = strn_len(&user_buffer[i], sizeof(f) - 1); - if (len < 0) { - ret = len; - goto out; - } - if( copy_from_user(f, &user_buffer[i], len) ) - return -EFAULT; - i += len; - pktgen_add_device(t, f); - ret = count; - sprintf(pg_result, "OK: add_device=%s", f); - goto out; - } - - if (!strcmp(name, "rem_device_all")) { - t->control |= T_REMDEV; - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(HZ/8); /* Propagate thread->control */ - ret = count; - sprintf(pg_result, "OK: rem_device_all"); - goto out; - } - - - if (!strcmp(name, "max_before_softirq")) { - len = num_arg(&user_buffer[i], 10, &value); - t->max_before_softirq = value; - ret = count; - sprintf(pg_result, "OK: max_before_softirq=%lu", value); - goto out; - } - - ret = -EINVAL; - out: - thread_unlock(); - - return ret; -} - -static int create_proc_dir(void) -{ - int len; - /* does proc_dir already exists */ - len = strlen(PG_PROC_DIR); - - for (pg_proc_dir = proc_net->subdir; pg_proc_dir; pg_proc_dir=pg_proc_dir->next) { - if ((pg_proc_dir->namelen == len) && - (! memcmp(pg_proc_dir->name, PG_PROC_DIR, len))) - break; - } - - if (!pg_proc_dir) - pg_proc_dir = create_proc_entry(PG_PROC_DIR, S_IFDIR, proc_net); - - if (!pg_proc_dir) - return -ENODEV; - - return 0; -} - -static int remove_proc_dir(void) -{ - remove_proc_entry(PG_PROC_DIR, proc_net); - return 0; -} - -/* Think find or remove for NN */ -static struct pktgen_dev *__pktgen_NN_threads(const char* ifname, int remove) -{ - struct pktgen_thread *t; - struct pktgen_dev *pkt_dev = NULL; - - t = pktgen_threads; - - while (t) { - pkt_dev = pktgen_find_dev(t, ifname); - if (pkt_dev) { - if(remove) { - if_lock(t); - pktgen_remove_device(t, pkt_dev); - if_unlock(t); - } - break; - } - t = t->next; - } - return pkt_dev; -} - -static struct pktgen_dev *pktgen_NN_threads(const char* ifname, int remove) -{ - struct pktgen_dev *pkt_dev = NULL; - thread_lock(); - pkt_dev = __pktgen_NN_threads(ifname, remove); - thread_unlock(); - return pkt_dev; -} - -static int pktgen_device_event(struct notifier_block *unused, unsigned long event, void *ptr) -{ - struct net_device *dev = (struct net_device *)(ptr); - - /* It is OK that we do not hold the group lock right now, - * as we run under the RTNL lock. - */ + /* It is OK that we do not hold the group lock right now, + * as we run under the RTNL lock. + */ switch (event) { case NETDEV_CHANGEADDR: @@ -1548,368 +725,364 @@ break; case NETDEV_UNREGISTER: - pktgen_NN_threads(dev->name, REMOVE); + pg_remove_interface_from_all_threads(dev->name); break; }; return NOTIFY_DONE; } -/* Associate pktgen_dev with a device. */ -static struct net_device* pktgen_setup_dev(struct pktgen_dev *pkt_dev) { +/* Associate pktgen_interface_info with a device. + */ +static struct net_device* pg_setup_interface(struct pktgen_thread_info* pg_thread, + struct pktgen_interface_info* info) { struct net_device *odev; - - /* Clean old setups */ - - if (pkt_dev->odev) { - dev_put(pkt_dev->odev); - pkt_dev->odev = NULL; - } - - odev = dev_get_by_name(pkt_dev->ifname); - + int keep_it = 0; + + check_remove_device(info); + + odev = dev_get_by_name(info->ifname); if (!odev) { - printk("pktgen: no such netdevice: \"%s\"\n", pkt_dev->ifname); - goto out; + printk("No such netdevice: \"%s\"\n", info->ifname); } - if (odev->type != ARPHRD_ETHER) { - printk("pktgen: not an ethernet device: \"%s\"\n", pkt_dev->ifname); - goto out_put; - } - if (!netif_running(odev)) { - printk("pktgen: device is down: \"%s\"\n", pkt_dev->ifname); - goto out_put; + else if (odev->type != ARPHRD_ETHER) { + printk("Not an ethernet device: \"%s\"\n", info->ifname); } - pkt_dev->odev = odev; - - return pkt_dev->odev; + else if (!netif_running(odev)) { + printk("Device is down: \"%s\"\n", info->ifname); + } + else if (odev->priv_flags & IFF_PKTGEN_RCV) { + printk("ERROR: Device: \"%s\" is already assigned to a pktgen interface.\n", + info->ifname); + } + else { + info->odev = odev; + info->odev->priv_flags |= (IFF_PKTGEN_RCV); -out_put: - dev_put(odev); -out: - return NULL; + /* Can't use multi-skb > 0 with virtual interfaces, because they change + * the skb->dev pointer (at least) and so it's really impossible to send + * the exact same pkt over and over again + */ + if ((odev->priv_flags & IFF_MAC_VLAN) || + (odev->priv_flags & IFF_802_1Q_VLAN)) { + if (info->multiskb > 0) { + printk("pktgen: WARNING: Cannot use multi-skb > 0 on virtual interfaces, setting to zero.\n"); + info->multiskb = 0; + } + } + + keep_it = 1; + } + + if (info->odev) { + add_pg_info_to_hash(pg_thread, info); + } -} + if ((!keep_it) && odev) { + dev_put(odev); + } + + return info->odev; +}/* pg_setup_interface */ -/* Read pkt_dev from the interface and set up internal pktgen_dev +/* Read info from the interface and set up internal pktgen_interface_info * structure to have the right information to create/send packets */ -static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) +static void pg_setup_inject(struct pktgen_interface_info* info) { - /* Try once more, just in case it works now. */ - if (!pkt_dev->odev) - pktgen_setup_dev(pkt_dev); - - if (!pkt_dev->odev) { - printk("pktgen: ERROR: pkt_dev->odev == NULL in setup_inject.\n"); - sprintf(pkt_dev->result, "ERROR: pkt_dev->odev == NULL in setup_inject.\n"); + if (!info->odev) { + /* Try once more, just in case it works now. */ + pg_setup_interface(info->pg_thread, info); + } + + if (!info->odev) { + printk("ERROR: info->odev == NULL in setup_inject.\n"); + sprintf(info->result, "ERROR: info->odev == NULL in setup_inject.\n"); return; } /* Default to the interface's mac if not explicitly set. */ + if (!(info->flags & F_SET_SRCMAC)) { + memcpy(&(info->hh[6]), info->odev->dev_addr, 6); + } + else { + memcpy(&(info->hh[6]), info->src_mac, 6); + } - if ((pkt_dev->src_mac[0] == 0) && - (pkt_dev->src_mac[1] == 0) && - (pkt_dev->src_mac[2] == 0) && - (pkt_dev->src_mac[3] == 0) && - (pkt_dev->src_mac[4] == 0) && - (pkt_dev->src_mac[5] == 0)) { - - memcpy(&(pkt_dev->hh[6]), pkt_dev->odev->dev_addr, 6); - } /* Set up Dest MAC */ - memcpy(&(pkt_dev->hh[0]), pkt_dev->dst_mac, 6); + memcpy(&(info->hh[0]), info->dst_mac, 6); /* Set up pkt size */ - pkt_dev->cur_pkt_size = pkt_dev->min_pkt_size; - - if(pkt_dev->flags & F_IPV6) { - /* - * Skip this automatic address setting until locks or functions - * gets exported - */ - -#ifdef NOTNOW - int i, set = 0, err=1; - struct inet6_dev *idev; - - for(i=0; i< IN6_ADDR_HSIZE; i++) - if(pkt_dev->cur_in6_saddr.s6_addr[i]) { - set = 1; - break; - } + info->cur_pkt_size = info->min_pkt_size; + + info->saddr_min = 0; + info->saddr_max = 0; + if (strlen(info->src_min) == 0) { + if (info->odev->ip_ptr) { + struct in_device *in_dev = info->odev->ip_ptr; + + if (in_dev->ifa_list) { + info->saddr_min = in_dev->ifa_list->ifa_address; + info->saddr_max = info->saddr_min; + } + } + } + else { + info->saddr_min = in_aton(info->src_min); + info->saddr_max = in_aton(info->src_max); + } - if(!set) { - - /* - * Use linklevel address if unconfigured. - * - * use ipv6_get_lladdr if/when it's get exported - */ + info->daddr_min = in_aton(info->dst_min); + info->daddr_max = in_aton(info->dst_max); + /* Initialize current values. */ + info->cur_dst_mac_offset = 0; + info->cur_src_mac_offset = 0; + info->cur_saddr = info->saddr_min; + info->cur_daddr = info->daddr_min; + info->cur_udp_dst = info->udp_dst_min; + info->cur_udp_src = info->udp_src_min; +} + +/* delay_ns is in nano-seconds */ +static void pg_nanodelay(int delay_ns, struct pktgen_interface_info* info, + struct pktgen_thread_info* pg_thread) +{ + u64 idle_start = getRelativeCurNs(); + u64 last_time; + u64 itmp = idle_start; + info->nanodelays++; + info->accum_delay_ns += delay_ns; + while (info->accum_delay_ns > PG_MAX_ACCUM_DELAY_NS) { + info->sleeps++; + pg_thread->sleeping = 1; + interruptible_sleep_on_timeout(&(pg_thread->queue), 1); + pg_thread->sleeping = 0; + /* will wake after one tick */ + last_time = itmp; + itmp = getRelativeCurNs(); + info->accum_delay_ns -= (itmp - last_time); + info->idle_acc += (itmp - last_time); - read_lock(&addrconf_lock); - if ((idev = __in6_dev_get(pkt_dev->odev)) != NULL) { - struct inet6_ifaddr *ifp; - - read_lock_bh(&idev->lock); - for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) { - if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) { - ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &ifp->addr); - err = 0; - break; - } - } - read_unlock_bh(&idev->lock); - } - read_unlock(&addrconf_lock); - if(err) printk("pktgen: ERROR: IPv6 link address not availble.\n"); - } -#endif - } - else { - pkt_dev->saddr_min = 0; - pkt_dev->saddr_max = 0; - if (strlen(pkt_dev->src_min) == 0) { - - struct in_device *in_dev; - - rcu_read_lock(); - in_dev = __in_dev_get(pkt_dev->odev); - if (in_dev) { - if (in_dev->ifa_list) { - pkt_dev->saddr_min = in_dev->ifa_list->ifa_address; - pkt_dev->saddr_max = pkt_dev->saddr_min; - } - in_dev_put(in_dev); - } - rcu_read_unlock(); - } - else { - pkt_dev->saddr_min = in_aton(pkt_dev->src_min); - pkt_dev->saddr_max = in_aton(pkt_dev->src_max); + if (!info->do_run_run) { + break; } + }/* while */ +}//pg_nanodelay - pkt_dev->daddr_min = in_aton(pkt_dev->dst_min); - pkt_dev->daddr_max = in_aton(pkt_dev->dst_max); - } - /* Initialize current values. */ - pkt_dev->cur_dst_mac_offset = 0; - pkt_dev->cur_src_mac_offset = 0; - pkt_dev->cur_saddr = pkt_dev->saddr_min; - pkt_dev->cur_daddr = pkt_dev->daddr_min; - pkt_dev->cur_udp_dst = pkt_dev->udp_dst_min; - pkt_dev->cur_udp_src = pkt_dev->udp_src_min; - pkt_dev->nflows = 0; -} -static void spin(struct pktgen_dev *pkt_dev, __u64 spin_until_us) +/* Returns: cycles per micro-second */ +static int calc_mhz(void) { - __u64 start; - __u64 now; + struct timeval start, stop; + u64 start_s; + u64 t1, t2; + u32 elapsed; + u32 clock_time = 0; + + do_gettimeofday(&start); + start_s = get_cycles(); + /* Spin for 50,000,000 cycles */ + do { + barrier(); + elapsed = (u32)(get_cycles() - start_s); + if (elapsed == 0) + return 0; + } while (elapsed < 50000000); + do_gettimeofday(&stop); + + t1 = tv_to_us(&start); + t2 = tv_to_us(&stop); + + clock_time = (u32)(t2 - t1); + if (clock_time == 0) { + printk("pktgen: ERROR: clock_time was zero..things may not work right, t1: %u t2: %u ...\n", + (u32)(t1), (u32)(t2)); + return 0x7FFFFFFF; + } + return elapsed / clock_time; +} - start = now = getCurUs(); - printk(KERN_INFO "sleeping for %d\n", (int)(spin_until_us - now)); - while (now < spin_until_us) { - /* TODO: optimise sleeping behavior */ - if (spin_until_us - now > (1000000/HZ)+1) { - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(1); - } else if (spin_until_us - now > 100) { - do_softirq(); - if (!pkt_dev->running) - return; - if (need_resched()) - schedule(); - } +/* Calibrate cycles per micro-second */ +static void cycles_calibrate(void) +{ + int i; - now = getCurUs(); + for (i = 0; i < 3; i++) { + u32 res = calc_mhz(); + if (res > pg_cycles_per_us) + pg_cycles_per_us = res; } - pkt_dev->idle_acc += now - start; + /* Set these up too, only need to calculate these once. */ + pg_cycles_per_ns = pg_cycles_per_us / 1000; + if (pg_cycles_per_ns == 0) { + pg_cycles_per_ns = 1; + } + pg_cycles_per_ms = pg_cycles_per_us * 1000; + + printk("pktgen: cycles_calibrate, cycles_per_ns: %d per_us: %d per_ms: %d\n", + pg_cycles_per_ns, pg_cycles_per_us, pg_cycles_per_ms); } /* Increment/randomize headers according to flags and current values * for IP src/dest, UDP src/dst port, MAC-Addr src/dst */ -static void mod_cur_headers(struct pktgen_dev *pkt_dev) { +static void mod_cur_headers(struct pktgen_interface_info* info) { __u32 imn; __u32 imx; - int flow = 0; - - if(pkt_dev->cflows) { - flow = pktgen_random() % pkt_dev->cflows; - - if (pkt_dev->flows[flow].count > pkt_dev->lflow) - pkt_dev->flows[flow].count = 0; - } - - + /* Deal with source MAC */ - if (pkt_dev->src_mac_count > 1) { + if (info->src_mac_count > 1) { __u32 mc; __u32 tmp; - - if (pkt_dev->flags & F_MACSRC_RND) - mc = pktgen_random() % (pkt_dev->src_mac_count); + if (info->flags & F_MACSRC_RND) { + mc = net_random() % (info->src_mac_count); + } else { - mc = pkt_dev->cur_src_mac_offset++; - if (pkt_dev->cur_src_mac_offset > pkt_dev->src_mac_count) - pkt_dev->cur_src_mac_offset = 0; + mc = info->cur_src_mac_offset++; + if (info->cur_src_mac_offset > info->src_mac_count) { + info->cur_src_mac_offset = 0; + } } - tmp = pkt_dev->src_mac[5] + (mc & 0xFF); - pkt_dev->hh[11] = tmp; - tmp = (pkt_dev->src_mac[4] + ((mc >> 8) & 0xFF) + (tmp >> 8)); - pkt_dev->hh[10] = tmp; - tmp = (pkt_dev->src_mac[3] + ((mc >> 16) & 0xFF) + (tmp >> 8)); - pkt_dev->hh[9] = tmp; - tmp = (pkt_dev->src_mac[2] + ((mc >> 24) & 0xFF) + (tmp >> 8)); - pkt_dev->hh[8] = tmp; - tmp = (pkt_dev->src_mac[1] + (tmp >> 8)); - pkt_dev->hh[7] = tmp; + tmp = info->src_mac[5] + (mc & 0xFF); + info->hh[11] = tmp; + tmp = (info->src_mac[4] + ((mc >> 8) & 0xFF) + (tmp >> 8)); + info->hh[10] = tmp; + tmp = (info->src_mac[3] + ((mc >> 16) & 0xFF) + (tmp >> 8)); + info->hh[9] = tmp; + tmp = (info->src_mac[2] + ((mc >> 24) & 0xFF) + (tmp >> 8)); + info->hh[8] = tmp; + tmp = (info->src_mac[1] + (tmp >> 8)); + info->hh[7] = tmp; } /* Deal with Destination MAC */ - if (pkt_dev->dst_mac_count > 1) { + if (info->dst_mac_count > 1) { __u32 mc; __u32 tmp; - - if (pkt_dev->flags & F_MACDST_RND) - mc = pktgen_random() % (pkt_dev->dst_mac_count); - + if (info->flags & F_MACDST_RND) { + mc = net_random() % (info->dst_mac_count); + } else { - mc = pkt_dev->cur_dst_mac_offset++; - if (pkt_dev->cur_dst_mac_offset > pkt_dev->dst_mac_count) { - pkt_dev->cur_dst_mac_offset = 0; + mc = info->cur_dst_mac_offset++; + if (info->cur_dst_mac_offset > info->dst_mac_count) { + info->cur_dst_mac_offset = 0; } } - tmp = pkt_dev->dst_mac[5] + (mc & 0xFF); - pkt_dev->hh[5] = tmp; - tmp = (pkt_dev->dst_mac[4] + ((mc >> 8) & 0xFF) + (tmp >> 8)); - pkt_dev->hh[4] = tmp; - tmp = (pkt_dev->dst_mac[3] + ((mc >> 16) & 0xFF) + (tmp >> 8)); - pkt_dev->hh[3] = tmp; - tmp = (pkt_dev->dst_mac[2] + ((mc >> 24) & 0xFF) + (tmp >> 8)); - pkt_dev->hh[2] = tmp; - tmp = (pkt_dev->dst_mac[1] + (tmp >> 8)); - pkt_dev->hh[1] = tmp; - } - - if (pkt_dev->udp_src_min < pkt_dev->udp_src_max) { - if (pkt_dev->flags & F_UDPSRC_RND) - pkt_dev->cur_udp_src = ((pktgen_random() % (pkt_dev->udp_src_max - pkt_dev->udp_src_min)) + pkt_dev->udp_src_min); - + tmp = info->dst_mac[5] + (mc & 0xFF); + info->hh[5] = tmp; + tmp = (info->dst_mac[4] + ((mc >> 8) & 0xFF) + (tmp >> 8)); + info->hh[4] = tmp; + tmp = (info->dst_mac[3] + ((mc >> 16) & 0xFF) + (tmp >> 8)); + info->hh[3] = tmp; + tmp = (info->dst_mac[2] + ((mc >> 24) & 0xFF) + (tmp >> 8)); + info->hh[2] = tmp; + tmp = (info->dst_mac[1] + (tmp >> 8)); + info->hh[1] = tmp; + } + + if (info->udp_src_min < info->udp_src_max) { + if (info->flags & F_UDPSRC_RND) { + info->cur_udp_src = ((net_random() % (info->udp_src_max - info->udp_src_min)) + + info->udp_src_min); + } else { - pkt_dev->cur_udp_src++; - if (pkt_dev->cur_udp_src >= pkt_dev->udp_src_max) - pkt_dev->cur_udp_src = pkt_dev->udp_src_min; + info->cur_udp_src++; + if (info->cur_udp_src >= info->udp_src_max) { + info->cur_udp_src = info->udp_src_min; + } } } - if (pkt_dev->udp_dst_min < pkt_dev->udp_dst_max) { - if (pkt_dev->flags & F_UDPDST_RND) { - pkt_dev->cur_udp_dst = ((pktgen_random() % (pkt_dev->udp_dst_max - pkt_dev->udp_dst_min)) + pkt_dev->udp_dst_min); + if (info->udp_dst_min < info->udp_dst_max) { + if (info->flags & F_UDPDST_RND) { + info->cur_udp_dst = ((net_random() % (info->udp_dst_max - info->udp_dst_min)) + + info->udp_dst_min); } else { - pkt_dev->cur_udp_dst++; - if (pkt_dev->cur_udp_dst >= pkt_dev->udp_dst_max) - pkt_dev->cur_udp_dst = pkt_dev->udp_dst_min; + info->cur_udp_dst++; + if (info->cur_udp_dst >= info->udp_dst_max) { + info->cur_udp_dst = info->udp_dst_min; + } } } - if (!(pkt_dev->flags & F_IPV6)) { - - if ((imn = ntohl(pkt_dev->saddr_min)) < (imx = ntohl(pkt_dev->saddr_max))) { - __u32 t; - if (pkt_dev->flags & F_IPSRC_RND) - t = ((pktgen_random() % (imx - imn)) + imn); - else { - t = ntohl(pkt_dev->cur_saddr); - t++; - if (t > imx) { - t = imn; - } - } - pkt_dev->cur_saddr = htonl(t); - } - - if (pkt_dev->cflows && pkt_dev->flows[flow].count != 0) { - pkt_dev->cur_daddr = pkt_dev->flows[flow].cur_daddr; - } else { - - if ((imn = ntohl(pkt_dev->daddr_min)) < (imx = ntohl(pkt_dev->daddr_max))) { - __u32 t; - if (pkt_dev->flags & F_IPDST_RND) { - - t = ((pktgen_random() % (imx - imn)) + imn); - t = htonl(t); - - while( LOOPBACK(t) || MULTICAST(t) || BADCLASS(t) || ZERONET(t) || LOCAL_MCAST(t) ) { - t = ((pktgen_random() % (imx - imn)) + imn); - t = htonl(t); - } - pkt_dev->cur_daddr = t; - } - - else { - t = ntohl(pkt_dev->cur_daddr); - t++; - if (t > imx) { - t = imn; - } - pkt_dev->cur_daddr = htonl(t); - } - } - if(pkt_dev->cflows) { - pkt_dev->flows[flow].cur_daddr = pkt_dev->cur_daddr; - pkt_dev->nflows++; - } - } - } - else /* IPV6 * */ - { - if(pkt_dev->min_in6_daddr.s6_addr32[0] == 0 && - pkt_dev->min_in6_daddr.s6_addr32[1] == 0 && - pkt_dev->min_in6_daddr.s6_addr32[2] == 0 && - pkt_dev->min_in6_daddr.s6_addr32[3] == 0); - else { - int i; - - /* Only random destinations yet */ - - for(i=0; i < 4; i++) { - pkt_dev->cur_in6_daddr.s6_addr32[i] = - ((pktgen_random() | - pkt_dev->min_in6_daddr.s6_addr32[i]) & - pkt_dev->max_in6_daddr.s6_addr32[i]); - } - } - } + if ((imn = ntohl(info->saddr_min)) < (imx = ntohl(info->saddr_max))) { + __u32 t; + if (info->flags & F_IPSRC_RND) { + t = ((net_random() % (imx - imn)) + imn); + } + else { + t = ntohl(info->cur_saddr); + t++; + if (t > imx) { + t = imn; + } + } + info->cur_saddr = htonl(t); + } - if (pkt_dev->min_pkt_size < pkt_dev->max_pkt_size) { + if ((imn = ntohl(info->daddr_min)) < (imx = ntohl(info->daddr_max))) { __u32 t; - if (pkt_dev->flags & F_TXSIZE_RND) { - t = ((pktgen_random() % (pkt_dev->max_pkt_size - pkt_dev->min_pkt_size)) - + pkt_dev->min_pkt_size); + if (info->flags & F_IPDST_RND) { + t = ((net_random() % (imx - imn)) + imn); } else { - t = pkt_dev->cur_pkt_size + 1; - if (t > pkt_dev->max_pkt_size) - t = pkt_dev->min_pkt_size; + t = ntohl(info->cur_daddr); + t++; + if (t > imx) { + t = imn; + } } - pkt_dev->cur_pkt_size = t; + info->cur_daddr = htonl(t); } - pkt_dev->flows[flow].count++; -} + /* dhetheri - Make MAC address = 00:00:IP address */ + if (info->flags & F_IPMAC) { + __u32 tmp; + __u32 t; + + /* SRC MAC = 00:00:IP address */ + t = ntohl(info->cur_saddr); + tmp = info->src_mac[5] + (t & 0xFF); + info->hh[11] = tmp; + tmp = (info->src_mac[4] + ((t >> 8) & 0xFF) + (tmp >> 8)); + info->hh[10] = tmp; + tmp = (info->src_mac[3] + ((t >> 16) & 0xFF) + (tmp >> 8)); + info->hh[9] = tmp; + tmp = (info->src_mac[2] + ((t >> 24) & 0xFF) + (tmp >> 8)); + info->hh[8] = tmp; + tmp = (info->src_mac[1] + (tmp >> 8)); + info->hh[7] = tmp; + + info->cur_saddr = htonl(t); + + /* DST MAC = 00:00:IP address */ + t = ntohl(info->cur_daddr); + + tmp = info->dst_mac[5] + (t & 0xFF); + info->hh[5] = tmp; + tmp = (info->dst_mac[4] + ((t >> 8) & 0xFF) + (tmp >> 8)); + info->hh[4] = tmp; + tmp = (info->dst_mac[3] + ((t >> 16) & 0xFF) + (tmp >> 8)); + info->hh[3] = tmp; + tmp = (info->dst_mac[2] + ((t >> 24) & 0xFF) + (tmp >> 8)); + info->hh[2] = tmp; + tmp = (info->dst_mac[1] + (tmp >> 8)); + info->hh[1] = tmp; + + info->cur_daddr = htonl(t); + } /* MAC = 00:00:IP address (dhetheri) */ +}/* mod_cur_headers */ -static struct sk_buff *fill_packet_ipv4(struct net_device *odev, - struct pktgen_dev *pkt_dev) + +static struct sk_buff *fill_packet(struct net_device *odev, struct pktgen_interface_info* info) { struct sk_buff *skb = NULL; __u8 *eth; @@ -1917,10 +1090,26 @@ int datalen, iplen; struct iphdr *iph; struct pktgen_hdr *pgh = NULL; - - skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16, GFP_ATOMIC); + + /* dhetheri - Moved out of mod_cur_headers. */ + if (info->min_pkt_size < info->max_pkt_size) { + __u32 t; + if (info->flags & F_TXSIZE_RND) { + t = ((net_random() % (info->max_pkt_size - info->min_pkt_size)) + + info->min_pkt_size); + } + else { + t = info->cur_pkt_size + 1; + if (t > info->max_pkt_size) { + t = info->min_pkt_size; + } + } + info->cur_pkt_size = t; + } + + skb = alloc_skb(info->cur_pkt_size + 64 + 16, GFP_ATOMIC); if (!skb) { - sprintf(pkt_dev->result, "No memory"); + sprintf(info->result, "No memory"); return NULL; } @@ -1934,17 +1123,17 @@ /* Update any of the values, used when we're incrementing various * fields. */ - mod_cur_headers(pkt_dev); + mod_cur_headers(info); - memcpy(eth, pkt_dev->hh, 12); - *(u16*)ð[12] = __constant_htons(ETH_P_IP); - - datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8; /* Eth + IPh + UDPh */ - if (datalen < sizeof(struct pktgen_hdr)) + memcpy(eth, info->hh, 14); + + datalen = info->cur_pkt_size - 14 - 20 - 8; /* Eth + IPh + UDPh */ + if (datalen < sizeof(struct pktgen_hdr)) { datalen = sizeof(struct pktgen_hdr); + } - udph->source = htons(pkt_dev->cur_udp_src); - udph->dest = htons(pkt_dev->cur_udp_dst); + udph->source = htons(info->cur_udp_src); + udph->dest = htons(info->cur_udp_dst); udph->len = htons(datalen + 8); /* DATA + udphdr */ udph->check = 0; /* No checksum */ @@ -1952,9 +1141,14 @@ iph->version = 4; iph->ttl = 32; iph->tos = 0; - iph->protocol = IPPROTO_UDP; /* UDP */ - iph->saddr = pkt_dev->cur_saddr; - iph->daddr = pkt_dev->cur_daddr; + if (info->prot) { /* dhetheri */ + iph->protocol = info->prot; /* dhetheri */ + } + else { + iph->protocol = IPPROTO_UDP; /* UDP */ + } + iph->saddr = info->cur_saddr; + iph->daddr = info->cur_daddr; iph->frag_off = 0; iplen = 20 + 8 + datalen; iph->tot_len = htons(iplen); @@ -1965,10 +1159,10 @@ skb->dev = odev; skb->pkt_type = PACKET_HOST; - if (pkt_dev->nfrags <= 0) + if (info->nfrags <= 0) { pgh = (struct pktgen_hdr *)skb_put(skb, datalen); - else { - int frags = pkt_dev->nfrags; + } else { + int frags = info->nfrags; int i; pgh = (struct pktgen_hdr*)(((char*)(udph)) + 8); @@ -2016,1121 +1210,2321 @@ } } - /* Stamp the time, and sequence number, convert them to network byte order */ + /* Stamp the time, and sequence number, convert them to network byte order */ + if (pgh) { + pgh->pgh_magic = __constant_htonl(PKTGEN_MAGIC); + do_gettimeofday(&(pgh->timestamp)); + pgh->timestamp.tv_usec = htonl(pgh->timestamp.tv_usec); + pgh->timestamp.tv_sec = htonl(pgh->timestamp.tv_sec); + pgh->seq_num = htonl(info->seq_num); + } + info->seq_num++; + + return skb; +} + + +static void record_latency(struct pktgen_interface_info* info, int latency) { + /* NOTE: Latency can be negative */ + int div = 100; + int diff; + int vl; + int i; + + info->pkts_rcvd_since_clear++; + + if (info->pkts_rcvd_since_clear < 100) { + div = info->pkts_rcvd; + if (info->pkts_rcvd_since_clear == 1) { + info->avg_latency = latency; + } + } + + if ((div + 1) == 0) { + info->avg_latency = 0; + } + else { + info->avg_latency = ((info->avg_latency * div + latency) / (div + 1)); + } + + if (latency < info->min_latency) { + info->min_latency = latency; + } + if (latency > info->max_latency) { + info->max_latency = latency; + } + + /* Place the latency in the right 'bucket' */ + diff = (latency - info->min_latency); + for (i = 0; ilatency_bkts[i]++; + break; + } + } +}/* record latency */ + + +/* Returns < 0 if the skb is not a pktgen buffer. */ +int pktgen_receive(struct sk_buff* skb) { + /* int i; */ /* Debugging only */ + /* unsigned char* tmp; */ + + /* dhetheri */ + //printk("pktgen receive:\n"); + //tmp=(char *)(skb->data); + //for (i=0; i<90; i++) { + // printk("%02hx ", tmp[i]); + // if (((i+1) % 15) == 0) { + // printk("\n"); + // } + //} + //printk("\n"); + /* dhetheri */ + + /* See if we have a pktgen packet */ + if ((skb->len >= (20 + 8 + sizeof(struct pktgen_hdr))) && + (skb->protocol == __constant_htons(ETH_P_IP))) { + struct pktgen_hdr* pgh; + + /* It's IP, and long enough, lets check the magic number. + * TODO: This is a hack not always guaranteed to catch the right + * packets. + */ + + /* printk("Length & protocol passed, skb->data: %p, raw: %p\n", + skb->data, skb->h.raw); */ + + pgh = (struct pktgen_hdr*)(skb->data + 20 + 8); + + /* + tmp = (char*)(skb->data); + for (i = 0; i<90; i++) { + printk("%02hx ", tmp[i]); + if (((i + 1) % 15) == 0) { + printk("\n"); + } + } + printk("\n"); + */ + + if (pgh->pgh_magic == __constant_ntohl(PKTGEN_MAGIC)) { + struct net_device* dev = skb->dev; + struct pktgen_interface_info* info = find_pg_info(dev->ifindex); + + /* Got one! */ + /* TODO: Check UDP checksum ?? */ + __u32 seq = ntohl(pgh->seq_num); + + if (!info) { + return -1; + } + + info->pkts_rcvd++; + info->bytes_rcvd += ((skb->tail - skb->mac.raw) + 4); /* +4 for the checksum */ + + /* Check for out-of-sequence packets */ + if (info->last_seq_rcvd == seq) { + info->dup_rcvd++; + info->dup_since_incr++; + } + else { + __s64 rx; + __s64 tx; + struct timeval txtv; + if (!skb->stamp.tv_sec) { + do_gettimeofday(&skb->stamp); + } + rx = tv_to_us(&(skb->stamp)); + + txtv.tv_usec = ntohl(pgh->timestamp.tv_usec); + txtv.tv_sec = ntohl(pgh->timestamp.tv_sec); + tx = tv_to_us(&txtv); + record_latency(info, rx - tx); + + if ((info->last_seq_rcvd + 1) == seq) { + if ((info->peer_multiskb > 1) && + (info->peer_multiskb > (info->dup_since_incr + 1))) { + + info->seq_gap_rcvd += (info->peer_multiskb - + info->dup_since_incr - 1); + } + /* Great, in order...all is well */ + } + else if (info->last_seq_rcvd < seq) { + /* sequence gap, means we dropped a pkt most likely */ + if (info->peer_multiskb > 1) { + /* We dropped more than one sequence number's worth, + * and if we're using multiskb, then this is quite + * a few. This number still will not be exact, but + * it will be closer. + */ + info->seq_gap_rcvd += (((seq - info->last_seq_rcvd) * + info->peer_multiskb) - + info->dup_since_incr); + } + else { + info->seq_gap_rcvd += (seq - info->last_seq_rcvd - 1); + } + } + else { + info->ooo_rcvd++; /* out-of-order */ + } + + info->dup_since_incr = 0; + } + info->last_seq_rcvd = seq; + kfree_skb(skb); + if (debug > 1) { + printk("done with pktgen_receive, free'd pkt\n"); + } + return 0; + } + } + return -1; /* Let another protocol handle it, it's not for us! */ +}/* pktgen_receive */ + +static void pg_reset_latency_counters(struct pktgen_interface_info* info) { + int i; + info->avg_latency = 0; + info->min_latency = 0x7fffffff; /* largest integer */ + info->max_latency = 0x80000000; /* smallest integer */ + info->pkts_rcvd_since_clear = 0; + for (i = 0; ilatency_bkts[i] = 0; + } +} + +static void pg_clear_counters(struct pktgen_interface_info* info, int seq_too) { + info->idle_acc = 0; + info->sofar = 0; + info->tx_bytes = 0; + info->errors = 0; + info->ooo_rcvd = 0; + info->dup_rcvd = 0; + info->pkts_rcvd = 0; + info->bytes_rcvd = 0; + info->non_pg_pkts_rcvd = 0; + info->seq_gap_rcvd = 0; /* dropped */ + + /* Clear some transient state */ + info->accum_delay_ns = 0; + info->sleeps = 0; + info->nanodelays = 0; + + /* This is a bit of a hack, but it gets the dup counters + * in line so we don't have false alarms on dropped pkts. + */ + if (seq_too) { + info->dup_since_incr = info->peer_multiskb - 1; + info->seq_num = 1; + info->last_seq_rcvd = 0; + } + + pg_reset_latency_counters(info); +} + +/* Adds an interface to the thread. The interface will be in + * the stopped queue untill started. + */ +static int add_interface_to_thread(struct pktgen_thread_info* pg_thread, + struct pktgen_interface_info* info) { + int rv = 0; + /* grab lock & insert into the stopped list */ + pg_lock(pg_thread, __FUNCTION__); + + if (info->pg_thread) { + printk("pktgen: ERROR: Already assigned to a thread.\n"); + rv = -EBUSY; + goto out; + } + + info->next = pg_thread->stopped_if_infos; + pg_thread->stopped_if_infos = info; + info->pg_thread = pg_thread; + + out: + pg_unlock(pg_thread, __FUNCTION__); + return rv; +} + +/* Set up structure for sending pkts, clear counters, add to rcv hash, + * create initial packet, and move from the stopped to the running + * interface_info list + */ +static int pg_start_interface(struct pktgen_thread_info* pg_thread, + struct pktgen_interface_info* info) { + PG_DEBUG(printk("Entering pg_start_interface..\n")); + pg_setup_inject(info); + + if (!info->odev) { + return -1; + } + + PG_DEBUG(printk("About to clean counters..\n")); + pg_clear_counters(info, 1); + + info->do_run_run = 1; /* Cranke yeself! */ + + info->skb = NULL; + + info->started_at = getCurUs(); + + pg_lock(pg_thread, __FUNCTION__); + { + /* Remove from the stopped list */ + struct pktgen_interface_info* p = pg_thread->stopped_if_infos; + if (p == info) { + pg_thread->stopped_if_infos = p->next; + p->next = NULL; + } + else { + while (p) { + if (p->next == info) { + p->next = p->next->next; + info->next = NULL; + break; + } + p = p->next; + } + } + + info->next_tx_ns = 0; /* Transmit immediately */ + pg_thread->next_tx_ns = 0; + + /* Move to the front of the running list */ + info->next = pg_thread->running_if_infos; + pg_thread->running_if_infos = info; + pg_thread->running_if_sz++; + } + pg_unlock(pg_thread, __FUNCTION__); + PG_DEBUG(printk("Leaving pg_start_interface..\n")); + return 0; +}/* pg_start_interface */ + + +/* set stopped-at timer, remove from running list, do counters & statistics + * NOTE: We do not remove from the rcv hash. + */ +static int pg_stop_interface(struct pktgen_thread_info* pg_thread, + struct pktgen_interface_info* info) { + __u64 total_us; + if (!info->do_run_run) { + printk("pktgen interface: %s is already stopped\n", info->ifname); + return -EINVAL; + } + + info->stopped_at = getCurMs(); + info->do_run_run = 0; + + /* The main worker loop will place it onto the stopped list if needed, + * next time this interface is asked to be re-inserted into the + * list. + */ + + total_us = info->stopped_at - info->started_at; + + { + __u64 idle = pg_div(info->idle_acc, 1000); /* convert to us */ + char *p = info->result; + __u64 pps = divremdi3(info->sofar * 1000, pg_div(total_us, 1000), PG_DIV); + __u64 bps = pps * 8 * (info->cur_pkt_size + 4); /* take 32bit ethernet CRC into account */ + + p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte) %llupps %lluMb/sec (%llubps) errors: %llu", + total_us, total_us - idle, idle, + info->sofar, + info->cur_pkt_size + 4, /* Add 4 to account for the ethernet checksum */ + pps, + bps >> 20, bps, info->errors + ); + } + return 0; +}/* pg_stop_interface */ + + +/* Re-inserts 'last' into the pg_thread's list. Calling code should + * make sure that 'last' is not already in the list. + */ +static struct pktgen_interface_info* pg_resort_pginfos(struct pktgen_thread_info* pg_thread, + struct pktgen_interface_info* last, + int setup_cur_if) { + struct pktgen_interface_info* rv = NULL; + + pg_lock(pg_thread, __FUNCTION__); + { + struct pktgen_interface_info* p = pg_thread->running_if_infos; + + if (last) { + if (!last->do_run_run) { + /* If this guy was stopped while 'current', then + * we'll want to place him on the stopped list + * here. + */ + last->next = pg_thread->stopped_if_infos; + pg_thread->stopped_if_infos = last; + pg_thread->running_if_sz--; + } + else { + /* re-insert */ + if (!p) { + pg_thread->running_if_infos = last; + last->next = NULL; + } + else { + /* Another special case, check to see if we should go at the + * front of the queue. + */ + if (p->next_tx_ns > last->next_tx_ns) { + last->next = p; + pg_thread->running_if_infos = last; + } + else { + int inserted = 0; + while (p->next) { + if (p->next->next_tx_ns > last->next_tx_ns) { + /* Insert into the list */ + last->next = p->next; + p->next = last; + inserted = 1; + break; + } + p = p->next; + } + if (!inserted) { + /* place at the end */ + last->next = NULL; + p->next = last; + } + } + } + } + } + + /* List is re-sorted, so grab the first one to return */ + rv = pg_thread->running_if_infos; + if (rv) { + /* Pop him off of the list. We do this here because we already + * have the lock. Calling code just has to be aware of this + * feature. + */ + pg_thread->running_if_infos = rv->next; + } + } + + if (setup_cur_if) { + pg_thread->cur_if = rv; + } + + pg_unlock(pg_thread, __FUNCTION__); + return rv; +}/* pg_resort_pginfos */ + + +void pg_stop_all_ifs(struct pktgen_thread_info* pg_thread) { + struct pktgen_interface_info* next = NULL; + + pg_lock(pg_thread, __FUNCTION__); + if (pg_thread->cur_if) { + /* Move it onto the stopped list */ + pg_stop_interface(pg_thread, pg_thread->cur_if); + pg_thread->cur_if->next = pg_thread->stopped_if_infos; + pg_thread->stopped_if_infos = pg_thread->cur_if; + pg_thread->cur_if = NULL; + } + pg_unlock(pg_thread, __FUNCTION__); + + /* These have their own locking */ + next = pg_resort_pginfos(pg_thread, NULL, 0); + while (next) { + pg_stop_interface(pg_thread, next); + next = pg_resort_pginfos(pg_thread, NULL, 0); + } +}/* pg_stop_all_ifs */ + + +void pg_rem_all_ifs(struct pktgen_thread_info* pg_thread) { + struct pktgen_interface_info* next = NULL; + + /* Remove all interfaces, clean up memory */ + while ((next = pg_thread->stopped_if_infos)) { + int rv = pg_rem_interface_info(pg_thread, next); + if (rv >= 0) { + kfree(next); + } + else { + printk("ERROR: failed to rem_interface: %i\n", rv); + } + } +}/* pg_rem_all_ifs */ + + +void pg_rem_from_thread_list(struct pktgen_thread_info* pg_thread) { + /* Remove from the thread list */ + pg_lock_thread_list(__FUNCTION__); + { + struct pktgen_thread_info* tmp = pktgen_threads; + if (tmp == pg_thread) { + pktgen_threads = tmp->next; + } + else { + while (tmp) { + if (tmp->next == pg_thread) { + tmp->next = pg_thread->next; + pg_thread->next = NULL; + break; + } + tmp = tmp->next; + } + } + } + pg_unlock_thread_list(__FUNCTION__); +}/* pg_rem_from_thread_list */ + + +/* Main loop of the thread. Send pkts. + */ +void pg_thread_worker(struct pktgen_thread_info* pg_thread) { + struct net_device *odev = NULL; + __u64 idle_start = 0; + struct pktgen_interface_info* next = NULL; + u32 next_ipg = 0; + u64 now = 0; /* in nano-seconds */ + u32 tx_since_softirq = 0; + u32 queue_stopped = 0; + + /* setup the thread environment */ + init_pktgen_kthread(pg_thread, "kpktgend"); + + PG_DEBUG(printk("Starting up pktgen thread: %s\n", pg_thread->name)); + + /* an endless loop in which we are doing our work */ + while (! pg_thread->terminate) { + + if (signal_pending(current)) { + /* we received a request to terminate ourself */ + break; + } + + /* Re-sorts the list, inserting 'next' (which is really the last one + * we used). It pops the top one off of the queue and returns it. + * Calling code must make sure to re-insert the returned value + */ + next = pg_resort_pginfos(pg_thread, next, 1); + + /* Make sure the notify_queue_woken callback will act appropriately */ + if (next) { + pg_thread->next_tx_ns = next->next_tx_ns; + } + + if (queue_stopped > pg_thread->running_if_sz) { + /* All our devices are all fulled up, schedule and hope to run + * again soon. + */ + /* Take this opportunity to run the soft-irq */ + do_softirq(); + tx_since_softirq = 0; + + pg_thread->queues_stopped++; + pg_thread->sleeping = 1; + interruptible_sleep_on_timeout(&(pg_thread->queue), 1); + pg_thread->sleeping = 0; + queue_stopped = 0; + } + + if (next) { + + odev = next->odev; + + if (next->ipg || (next->accum_delay_ns > 0)) { + + now = getRelativeCurNs(); + if (now < next->next_tx_ns) { + next_ipg = (u32)(next->next_tx_ns - now); + + /* These will not actually busy-spin now. Will run as + * much as 1ms fast, and will sleep in 1ms units, assuming + * our tick is 1ms. + */ + pg_nanodelay(next_ipg, next, pg_thread); + if (!next->do_run_run) { + /* We were stopped while sleeping */ + continue; + } + } + + /* This is max IPG, this has special meaning of + * "never transmit" + */ + if (next->ipg == 0x7FFFFFFF) { + next->next_tx_ns = getRelativeCurNs() + next->ipg; + continue; + } + } + + if (need_resched()) { + idle_start = getRelativeCurNs(); + schedule(); + next->idle_acc += getRelativeCurNs() - idle_start; + } + + if (netif_queue_stopped(odev)) { + next->queue_stopped++; + queue_stopped++; + + if (!netif_running(odev)) { + pg_stop_interface(pg_thread, next); + } + + continue; /* Try the next interface */ + } + + if (next->last_ok || !next->skb) { + if ((++next->fp_tmp >= next->multiskb ) || (!next->skb)) { + /* build a new pkt */ + if (next->skb) { + kfree_skb(next->skb); + } + next->skb = fill_packet(odev, next); + if (next->skb == NULL) { + if (net_ratelimit()) { + printk(KERN_INFO "pktgen: Couldn't allocate skb in fill_packet.\n"); + } + schedule(); + next->fp_tmp--; /* back out increment, OOM */ + continue; + } + next->fp++; + next->fp_tmp = 0; /* reset counter */ + /* Not sure what good knowing nr_frags is... + next->nr_frags = skb_shinfo(skb)->nr_frags; + */ + } + atomic_inc(&(next->skb->users)); + } + + spin_lock_bh(&odev->xmit_lock); + if (!netif_queue_stopped(odev)) { + if (odev->hard_start_xmit(next->skb, odev)) { + if (net_ratelimit()) { + printk(KERN_INFO "pktgen: Hard xmit error\n"); + } + next->errors++; + next->last_ok = 0; + next->queue_stopped++; + queue_stopped++; + if (next->ntx_fudge < 10000) { + next->ntx_fudge = 10000; + } + else { + next->ntx_fudge = next->ntx_fudge << 1; + } + } + else { + queue_stopped = 0; /* reset this, we tx'd one successfully */ + next->last_ok = 1; + next->sofar++; + next->tx_bytes += (next->cur_pkt_size + 4); /* count csum */ + next->ntx_fudge = 0; + } + } + else { /* Re-try it next time */ + queue_stopped++; + next->queue_stopped++; + next->last_ok = 0; + if (next->ntx_fudge < 10000) { + next->ntx_fudge = 10000; + } + else { + next->ntx_fudge = next->ntx_fudge << 1; + } + } + spin_unlock_bh(&odev->xmit_lock); + + next->next_tx_ns = getRelativeCurNs() + next->ipg + next->ntx_fudge; + + if (++tx_since_softirq > pg_thread->max_before_softirq) { + do_softirq(); + tx_since_softirq = 0; + } + + /* If next->count is zero, then run forever */ + if ((next->count != 0) && (next->sofar >= next->count)) { + if (atomic_read(&(next->skb->users)) != 1) { + idle_start = getRelativeCurNs(); + while (atomic_read(&(next->skb->users)) != 1) { + if (signal_pending(current)) { + break; + } + schedule(); + } + next->idle_acc += getRelativeCurNs() - idle_start; + } + pg_stop_interface(pg_thread, next); + }/* if we're done with a particular interface. */ + + }/* if could find the next interface to send on. */ + else { + /* fall asleep for a bit */ + pg_thread->sleeping = 1; + interruptible_sleep_on_timeout(&(pg_thread->queue), HZ/10); + pg_thread->sleeping = 0; + } + }//while true + + /* here we go only in case of termination of the thread */ + + PG_DEBUG(printk("pgthread: %s stopping all Interfaces.\n", pg_thread->name)); + pg_stop_all_ifs(pg_thread); + + PG_DEBUG(printk("pgthread: %s removing all Interfaces.\n", pg_thread->name)); + pg_rem_all_ifs(pg_thread); + + pg_rem_from_thread_list(pg_thread); + + /* cleanup the thread, leave */ + PG_DEBUG(printk("pgthread: %s calling exit_pktgen_kthread.\n", pg_thread->name)); + exit_pktgen_kthread(pg_thread); +} + +/* private functions */ +static void kthread_launcher(void *data) { + struct pktgen_thread_info *kthread = data; + kernel_thread((int (*)(void *))kthread->function, (void *)kthread, 0); +} + +/* create a new kernel thread. Called by the creator. */ +void start_pktgen_kthread(struct pktgen_thread_info *kthread) { + + /* initialize the semaphore: + we start with the semaphore locked. The new kernel + thread will setup its stuff and unlock it. This + control flow (the one that creates the thread) blocks + in the down operation below until the thread has reached + the up() operation. + */ + init_MUTEX_LOCKED(&kthread->startstop_sem); + + /* store the function to be executed in the data passed to + the launcher */ + kthread->function = pg_thread_worker; + + /* create the new thread by running a task through keventd */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) + /* initialize the task queue structure */ + kthread->tq.sync = 0; + INIT_LIST_HEAD(&kthread->tq.list); + kthread->tq.routine = kthread_launcher; + kthread->tq.data = kthread; + + /* and schedule it for execution */ + schedule_task(&kthread->tq); +#else + INIT_WORK(&(kthread->wq), kthread_launcher, kthread); + /* and schedule it for execution */ + schedule_work(&kthread->wq); + +#endif + + /* wait till it has reached the setup_thread routine */ + down(&kthread->startstop_sem); +} + +/* stop a kernel thread. Called by the removing instance */ +static void stop_pktgen_kthread(struct pktgen_thread_info *kthread) { + printk("pgthread: %s stop_pktgen_kthread.\n", kthread->name); + + if (kthread->thread == NULL) { + printk("stop_kthread: killing non existing thread!\n"); + return; + } + + /* Stop each interface */ + pg_lock(kthread, __FUNCTION__); + { + struct pktgen_interface_info* tmp = kthread->running_if_infos; + while (tmp) { + tmp->do_run_run = 0; + tmp->next_tx_ns = 0; + tmp = tmp->next; + } + if (kthread->cur_if) { + kthread->cur_if->do_run_run = 0; + kthread->cur_if->next_tx_ns = 0; + } + } + pg_unlock(kthread, __FUNCTION__); + + printk("pktgen: stopped all interfaces...\n"); + /* Wait for everything to fully stop */ + while (1) { + pg_lock(kthread, __FUNCTION__); + if (kthread->cur_if || kthread->running_if_infos) { + pg_unlock(kthread, __FUNCTION__); + if (need_resched()) { + schedule(); + } + mdelay(1); + } + else { + pg_unlock(kthread, __FUNCTION__); + break; + } + } + + printk("pktgen: About to lock kernel.\n"); + + /* this function needs to be protected with the big + kernel lock (lock_kernel()). The lock must be + grabbed before changing the terminate + flag and released after the down() call. */ + lock_kernel(); + + /* initialize the semaphore. We lock it here, the + leave_thread call of the thread to be terminated + will unlock it. As soon as we see the semaphore + unlocked, we know that the thread has exited. + */ + init_MUTEX_LOCKED(&kthread->startstop_sem); + + /* We need to do a memory barrier here to be sure that + the flags are visible on all CPUs. + */ + mb(); + + printk("pktgen: Setting terminate to true.\n"); + + /* set flag to request thread termination */ + kthread->terminate = 1; + + /* We need to do a memory barrier here to be sure that + the flags are visible on all CPUs. + */ + mb(); + kill_proc(kthread->thread->pid, SIGKILL, 1); + + printk("pktgen: Waiting for thread to stop..\n"); + /* block till thread terminated */ + down(&kthread->startstop_sem); + kthread->in_use = 0; + + /* release the big kernel lock */ + unlock_kernel(); + + /* now we are sure the thread is in zombie state. We + notify keventd to clean the process up. + */ + kill_proc(2, SIGCHLD, 1); + + printk("pgthread: %s done with stop_pktgen_kthread.\n", kthread->name); +}/* stop_pktgen_kthread */ + + +/* initialize new created thread. Called by the new thread. */ +void init_pktgen_kthread(struct pktgen_thread_info *kthread, char *name) { + /* lock the kernel. A new kernel thread starts without + the big kernel lock, regardless of the lock state + of the creator (the lock level is *not* inheritated) + */ + lock_kernel(); + + /* fill in thread structure */ + kthread->thread = current; + + /* set signal mask to what we want to respond */ + siginitsetinv(¤t->blocked, sigmask(SIGKILL)|sigmask(SIGINT)|sigmask(SIGTERM)); + + /* initialise wait queue */ + init_waitqueue_head(&kthread->queue); + + /* initialise termination flag */ + kthread->terminate = 0; + + /* set name of this process (max 15 chars + 0 !) */ + sprintf(current->comm, name); + + /* let others run */ + unlock_kernel(); + + /* tell the creator that we are ready and let him continue */ + up(&kthread->startstop_sem); +}/* init_pktgen_kthread */ + +/* cleanup of thread. Called by the exiting thread. */ +static void exit_pktgen_kthread(struct pktgen_thread_info *kthread) { + /* we are terminating */ + + /* lock the kernel, the exit will unlock it */ + lock_kernel(); + kthread->thread = NULL; + mb(); + + /* Clean up proc file system */ + if (strlen(kthread->fname)) { + remove_proc_entry(kthread->fname, NULL); + } + + /* notify the stop_kthread() routine that we are terminating. */ + up(&kthread->startstop_sem); + /* the kernel_thread that called clone() does a do_exit here. */ + + /* there is no race here between execution of the "killer" and real termination + of the thread (race window between up and do_exit), since both the + thread and the "killer" function are running with the kernel lock held. + The kernel lock will be freed after the thread exited, so the code + is really not executed anymore as soon as the unload functions gets + the kernel lock back. + The init process may not have made the cleanup of the process here, + but the cleanup can be done safely with the module unloaded. + */ +}/* exit_pktgen_kthread */ + + +/* proc/net/pg */ + +static char* pg_display_latency(struct pktgen_interface_info* info, char* p, int reset_latency) { + int i; + p += sprintf(p, " avg_latency: %dus min_lat: %dus max_lat: %dus pkts_in_sample: %llu\n", + info->avg_latency, info->min_latency, info->max_latency, + info->pkts_rcvd_since_clear); + p += sprintf(p, " Buckets(us) [ "); + for (i = 0; ilatency_bkts[i]); + } + p += sprintf(p, "]\n"); + + if (reset_latency) { + pg_reset_latency_counters(info); + } + return p; +} + +static int proc_pg_if_read(char *buf , char **start, off_t offset, + int len, int *eof, void *data) +{ + char *p; + int i; + struct pktgen_interface_info* info = (struct pktgen_interface_info*)(data); + __u64 sa; + __u64 stopped; + __u64 now = getCurUs(); + __u64 now_rel_ns = getRelativeCurNs(); + + p = buf; + p += sprintf(p, "VERSION-1\n"); /* Help with parsing compatibility */ + p += sprintf(p, "Params: count %llu min_pkt_size: %u max_pkt_size: %u cur_pkt_size %u\n frags: %d ipg: %u multiskb: %d ifname: %s\n", + info->count, info->min_pkt_size, info->max_pkt_size, info->cur_pkt_size, + info->nfrags, info->ipg, info->multiskb, info->ifname); + p += sprintf(p, " dst_min: %s dst_max: %s\n src_min: %s src_max: %s\n", + info->dst_min, info->dst_max, info->src_min, info->src_max); + p += sprintf(p, " src_mac: "); + for (i = 0; i < 6; i++) { + p += sprintf(p, "%02X%s", info->src_mac[i], i == 5 ? " " : ":"); + } + p += sprintf(p, "dst_mac: "); + for (i = 0; i < 6; i++) { + p += sprintf(p, "%02X%s", info->dst_mac[i], i == 5 ? "\n" : ":"); + } + p += sprintf(p, " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n", + info->udp_src_min, info->udp_src_max, info->udp_dst_min, + info->udp_dst_max); + p += sprintf(p, " src_mac_count: %d dst_mac_count: %d peer_multiskb: %d\n Flags: ", + info->src_mac_count, info->dst_mac_count, info->peer_multiskb); + if (info->flags & F_IPSRC_RND) { + p += sprintf(p, "IPSRC_RND "); + } + if (info->flags & F_IPDST_RND) { + p += sprintf(p, "IPDST_RND "); + } + if (info->flags & F_TXSIZE_RND) { + p += sprintf(p, "TXSIZE_RND "); + } + if (info->flags & F_UDPSRC_RND) { + p += sprintf(p, "UDPSRC_RND "); + } + if (info->flags & F_UDPDST_RND) { + p += sprintf(p, "UDPDST_RND "); + } + if (info->flags & F_MACSRC_RND) { + p += sprintf(p, "MACSRC_RND "); + } + if (info->flags & F_MACDST_RND) { + p += sprintf(p, "MACDST_RND "); + } + if (info->flags & F_IPMAC) { /* dhetheri */ + p += sprintf(p, "IPMAC "); + } + + p += sprintf(p, "\n"); + + sa = info->started_at; + stopped = info->stopped_at; + if (info->do_run_run) { + stopped = now; /* not really stopped, more like last-running-at */ + } + p += sprintf(p, "Current:\n pkts-sofar: %llu errors: %llu queue_stopped: %lu\naccum_delay: %lluns sleeps: %u nanodelays: %llu\n started: %lluus elapsed: %lluus\n idle: %lluns next_tx: %llu(%lli)ns\n", + info->sofar, info->errors, (unsigned long)(info->queue_stopped), + info->accum_delay_ns, info->sleeps, info->nanodelays, + sa, (stopped - sa), info->idle_acc, + info->next_tx_ns, (long long)(info->next_tx_ns) - (long long)(now_rel_ns)); + p += sprintf(p, " seq_num: %d cur_dst_mac_offset: %d cur_src_mac_offset: %d\n", + info->seq_num, info->cur_dst_mac_offset, info->cur_src_mac_offset); + p += sprintf(p, " cur_saddr: 0x%x cur_daddr: 0x%x cur_udp_dst: %d cur_udp_src: %d\n", + info->cur_saddr, info->cur_daddr, info->cur_udp_dst, info->cur_udp_src); + p += sprintf(p, " pkts_rcvd: %llu bytes_rcvd: %llu last_seq_rcvd: %d ooo_rcvd: %llu\n", + info->pkts_rcvd, info->bytes_rcvd, info->last_seq_rcvd, info->ooo_rcvd); + p += sprintf(p, " dup_rcvd: %llu seq_gap_rcvd(dropped): %llu non_pg_rcvd: %llu\n", + info->dup_rcvd, info->seq_gap_rcvd, info->non_pg_pkts_rcvd); + + p = pg_display_latency(info, p, 0); + + if (info->result[0]) + p += sprintf(p, "Result: %s\n", info->result); + else + p += sprintf(p, "Result: Idle\n"); + *eof = 1; + + return p - buf; +} + + +static int proc_pg_thread_read(char *buf , char **start, off_t offset, + int len, int *eof, void *data) +{ + char *p; + struct pktgen_thread_info* pg_thread = (struct pktgen_thread_info*)(data); + struct pktgen_interface_info* info = NULL; + + if (!pg_thread) { + printk("ERROR: could not find pg_thread in proc_pg_thread_read\n"); + return -EINVAL; + } + + p = buf; + p += sprintf(p, "VERSION-1 CFG_RT\n"); /* Help with parsing compatibility */ + p += sprintf(p, "PID: %i Name: %s max_before_softirq: %d queues_stopped: %u\n", + pg_thread->thread->pid, pg_thread->name, + pg_thread->max_before_softirq, pg_thread->queues_stopped); + + pg_lock(pg_thread, __FUNCTION__); + if (pg_thread->cur_if) { + p += sprintf(p, "Current: %s\n", pg_thread->cur_if->ifname); + } + else { + p += sprintf(p, "Current: NULL\n"); + } + pg_unlock(pg_thread, __FUNCTION__); + + p += sprintf(p, "Running: "); + + pg_lock(pg_thread, __FUNCTION__); + info = pg_thread->running_if_infos; + while (info) { + p += sprintf(p, "%s ", info->ifname); + info = info->next; + } + p += sprintf(p, "\nStopped: "); + info = pg_thread->stopped_if_infos; + while (info) { + p += sprintf(p, "%s ", info->ifname); + info = info->next; + } + + if (pg_thread->result[0]) + p += sprintf(p, "\nResult: %s\n", pg_thread->result); + else + p += sprintf(p, "\nResult: NA\n"); + *eof = 1; - if (pgh) { - struct timeval timestamp; - - pgh->pgh_magic = htonl(PKTGEN_MAGIC); - pgh->seq_num = htonl(pkt_dev->seq_num); - - do_gettimeofday(×tamp); - pgh->tv_sec = htonl(timestamp.tv_sec); - pgh->tv_usec = htonl(timestamp.tv_usec); - } - pkt_dev->seq_num++; - - return skb; -} + pg_unlock(pg_thread, __FUNCTION__); + + return p - buf; +}/* proc_pg_thread_read */ -/* - * scan_ip6, fmt_ip taken from dietlibc-0.21 - * Author Felix von Leitner - * - * Slightly modified for kernel. - * Should be candidate for net/ipv4/utils.c - * --ro - */ -static unsigned int scan_ip6(const char *s,char ip[16]) +static int proc_pg_ctrl_read(char *buf , char **start, off_t offset, + int len, int *eof, void *data) { - unsigned int i; - unsigned int len=0; - unsigned long u; - char suffix[16]; - unsigned int prefixlen=0; - unsigned int suffixlen=0; - __u32 tmp; - - for (i=0; i<16; i++) ip[i]=0; - - for (;;) { - if (*s == ':') { - len++; - if (s[1] == ':') { /* Found "::", skip to part 2 */ - s+=2; - len++; - break; - } - s++; - } - { - char *tmp; - u=simple_strtoul(s,&tmp,16); - i=tmp-s; - } + char *p; + struct pktgen_thread_info* pg_thread = NULL; + + p = buf; + p += sprintf(p, "VERSION-1\n"); /* Help with parsing compatibility */ + p += sprintf(p, "Threads: "); + + pg_lock_thread_list(__FUNCTION__); + pg_thread = pktgen_threads; + while (pg_thread) { + p += sprintf(p, "%s ", pg_thread->name); + pg_thread = pg_thread->next; + } + p += sprintf(p, "\n"); + + *eof = 1; - if (!i) return 0; - if (prefixlen==12 && s[i]=='.') { + pg_unlock_thread_list(__FUNCTION__); + return p - buf; +}/* proc_pg_ctrl_read */ - /* the last 4 bytes may be written as IPv4 address */ - tmp = in_aton(s); - memcpy((struct in_addr*)(ip+12), &tmp, sizeof(tmp)); - return i+len; - } - ip[prefixlen++] = (u >> 8); - ip[prefixlen++] = (u & 255); - s += i; len += i; - if (prefixlen==16) - return len; +static int isdelim(const char c) { + switch (c) { + case '\"': + case '\n': + case '\r': + case '\t': + case ' ': + case '=': + return 1; } + return 0; +} -/* part 2, after "::" */ - for (;;) { - if (*s == ':') { - if (suffixlen==0) - break; - s++; - len++; - } else if (suffixlen!=0) + +static int count_trail_chars(const char *buf, unsigned int maxlen) { + int i; + + for (i = 0; i < maxlen; i++) { + if (!isdelim(buf[i])) { break; - { - char *tmp; - u=simple_strtol(s,&tmp,16); - i=tmp-s; } - if (!i) { - if (*s) len--; + } + + return i; +} + + +static int strncpy_token(char* dst, const char* src, int mx) { + int i; + for (i = 0; i> 8); - suffix[suffixlen++] = (u & 255); - s += i; len += i; - if (prefixlen+suffixlen==16) - break; } - for (i=0; i9?hexdigit+'a'-10:hexdigit+'0'; -} + dst[i] = 0; + return i; +}/* strncpy_token */ -static int fmt_xlong(char* s,unsigned int i) { - char* bak=s; - *s=tohex((i>>12)&0xf); if (s!=bak || *s!='0') ++s; - *s=tohex((i>>8)&0xf); if (s!=bak || *s!='0') ++s; - *s=tohex((i>>4)&0xf); if (s!=bak || *s!='0') ++s; - *s=tohex(i&0xf); - return s-bak+1; -} -static unsigned int fmt_ip6(char *s,const char ip[16]) { - unsigned int len; +static unsigned int atoui(const char *buf) { unsigned int i; - unsigned int temp; - unsigned int compressing; - int j; - - len = 0; compressing = 0; - for (j=0; j<16; j+=2) { - -#ifdef V4MAPPEDPREFIX - if (j==12 && !memcmp(ip,V4mappedprefix,12)) { - inet_ntoa_r(*(struct in_addr*)(ip+12),s); - temp=strlen(s); - return len+temp; + int num = 0; + + for(i = 0; buf[i]; i++) { + char c = buf[i]; + if ((c >= '0') && (c <= '9')) { + num *= 10; + num += c - '0'; } -#endif - temp = ((unsigned long) (unsigned char) ip[j] << 8) + - (unsigned long) (unsigned char) ip[j+1]; - if (temp == 0) { - if (!compressing) { - compressing=1; - if (j==0) { - *s++=':'; ++len; - } - } - } else { - if (compressing) { - compressing=0; - *s++=':'; ++len; - } - i = fmt_xlong(s,temp); len += i; s += i; - if (j<14) { - *s++ = ':'; - ++len; - } + else { + break; } } - if (compressing) { - *s++=':'; ++len; - } - *s=0; - return len; + return num; } -static struct sk_buff *fill_packet_ipv6(struct net_device *odev, - struct pktgen_dev *pkt_dev) +static int proc_pg_if_write(struct file *file, const char *user_buffer, + unsigned long count, void *data) { - struct sk_buff *skb = NULL; - __u8 *eth; - struct udphdr *udph; - int datalen; - struct ipv6hdr *iph; - struct pktgen_hdr *pgh = NULL; - - skb = alloc_skb(pkt_dev->cur_pkt_size + 64 + 16, GFP_ATOMIC); - if (!skb) { - sprintf(pkt_dev->result, "No memory"); - return NULL; - } - - skb_reserve(skb, 16); - - /* Reserve for ethernet and IP header */ - eth = (__u8 *) skb_push(skb, 14); - iph = (struct ipv6hdr *)skb_put(skb, sizeof(struct ipv6hdr)); - udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr)); - - - /* Update any of the values, used when we're incrementing various - * fields. - */ - mod_cur_headers(pkt_dev); - + char name[16]; + struct pktgen_interface_info* info = (struct pktgen_interface_info*)(data); + char* kbuf; + char* p; + char* pg_result = &(info->result[0]); + int len; + int value; - memcpy(eth, pkt_dev->hh, 12); - *(u16*)ð[12] = __constant_htons(ETH_P_IPV6); - - - datalen = pkt_dev->cur_pkt_size-14- - sizeof(struct ipv6hdr)-sizeof(struct udphdr); /* Eth + IPh + UDPh */ - - if (datalen < sizeof(struct pktgen_hdr)) { - datalen = sizeof(struct pktgen_hdr); - if (net_ratelimit()) - printk(KERN_INFO "pktgen: increased datalen to %d\n", datalen); + if (count < 1) { + sprintf(pg_result, "Wrong command format"); + return -EINVAL; + } + + kbuf = kmalloc(count, GFP_KERNEL); + if (copy_from_user(kbuf, user_buffer, count)) { + kfree(kbuf); + return -EFAULT; } - udph->source = htons(pkt_dev->cur_udp_src); - udph->dest = htons(pkt_dev->cur_udp_dst); - udph->len = htons(datalen + sizeof(struct udphdr)); - udph->check = 0; /* No checksum */ - - *(u32*)iph = __constant_htonl(0x60000000); /* Version + flow */ - - iph->hop_limit = 32; - - iph->payload_len = htons(sizeof(struct udphdr) + datalen); - iph->nexthdr = IPPROTO_UDP; - - ipv6_addr_copy(&iph->daddr, &pkt_dev->cur_in6_daddr); - ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr); - - skb->mac.raw = ((u8 *)iph) - 14; - skb->protocol = __constant_htons(ETH_P_IPV6); - skb->dev = odev; - skb->pkt_type = PACKET_HOST; - - if (pkt_dev->nfrags <= 0) - pgh = (struct pktgen_hdr *)skb_put(skb, datalen); - else { - int frags = pkt_dev->nfrags; - int i; - - pgh = (struct pktgen_hdr*)(((char*)(udph)) + 8); - - if (frags > MAX_SKB_FRAGS) - frags = MAX_SKB_FRAGS; - if (datalen > frags*PAGE_SIZE) { - skb_put(skb, datalen-frags*PAGE_SIZE); - datalen = frags*PAGE_SIZE; + p = kbuf; + + while (p < (kbuf + count)) { + p += count_trail_chars(p, count - (p - kbuf)); + + if (p >= (kbuf + count)) { + break; } - i = 0; - while (datalen > 0) { - struct page *page = alloc_pages(GFP_KERNEL, 0); - skb_shinfo(skb)->frags[i].page = page; - skb_shinfo(skb)->frags[i].page_offset = 0; - skb_shinfo(skb)->frags[i].size = - (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); - datalen -= skb_shinfo(skb)->frags[i].size; - skb->len += skb_shinfo(skb)->frags[i].size; - skb->data_len += skb_shinfo(skb)->frags[i].size; - i++; - skb_shinfo(skb)->nr_frags = i; + /* Read variable name */ + if (debug) { + printk("pg_thread: %s,%lu\n", name, count); } - while (i < frags) { - int rem; - - if (i == 0) - break; + len = strlen("stop"); + if (!strncmp(p, "stop", len)) { + p += len; + if (info->do_run_run) { + strcpy(pg_result, "Stopping"); + pg_stop_interface(info->pg_thread, info); + } + else { + strcpy(pg_result, "Already stopped...\n"); + } + goto foundcmd; + } - rem = skb_shinfo(skb)->frags[i - 1].size / 2; - if (rem == 0) - break; + len = strlen("min_pkt_size "); + if (!strncmp(p, "min_pkt_size ", len)) { + char f[32]; + p += len; - skb_shinfo(skb)->frags[i - 1].size -= rem; + p += count_trail_chars(p, count - (p - kbuf)); - skb_shinfo(skb)->frags[i] = skb_shinfo(skb)->frags[i - 1]; - get_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb)->frags[i].page = skb_shinfo(skb)->frags[i - 1].page; - skb_shinfo(skb)->frags[i].page_offset += skb_shinfo(skb)->frags[i - 1].size; - skb_shinfo(skb)->frags[i].size = rem; - i++; - skb_shinfo(skb)->nr_frags = i; + strncpy_token(f, p, min(31, (int)(count - (p - kbuf)))); + f[31] = 0; + + p += strlen(f); + value = atoui(f); + + if (value < 14+20+8) + value = 14+20+8; + if (value != info->min_pkt_size) { + info->min_pkt_size = value; + info->cur_pkt_size = value; + } + sprintf(pg_result, "OK: min_pkt_size=%u", info->min_pkt_size); + goto foundcmd; } - } - /* Stamp the time, and sequence number, convert them to network byte order */ - /* should we update cloned packets too ? */ - if (pgh) { - struct timeval timestamp; - - pgh->pgh_magic = htonl(PKTGEN_MAGIC); - pgh->seq_num = htonl(pkt_dev->seq_num); - - do_gettimeofday(×tamp); - pgh->tv_sec = htonl(timestamp.tv_sec); - pgh->tv_usec = htonl(timestamp.tv_usec); - } - pkt_dev->seq_num++; - - return skb; -} + len = strlen("max_pkt_size "); + if (!strncmp(p, "max_pkt_size ", len)) { + char f[32]; + p += len; -static inline struct sk_buff *fill_packet(struct net_device *odev, - struct pktgen_dev *pkt_dev) -{ - if(pkt_dev->flags & F_IPV6) - return fill_packet_ipv6(odev, pkt_dev); - else - return fill_packet_ipv4(odev, pkt_dev); -} + p += count_trail_chars(p, (int)(count - (p - kbuf))); -static void pktgen_clear_counters(struct pktgen_dev *pkt_dev) -{ - pkt_dev->seq_num = 1; - pkt_dev->idle_acc = 0; - pkt_dev->sofar = 0; - pkt_dev->tx_bytes = 0; - pkt_dev->errors = 0; -} + strncpy_token(f, p, min(31, (int)(count - (p - kbuf)))); + f[31] = 0; + + p += strlen(f); + value = atoui(f); -/* Set up structure for sending pkts, clear counters */ + if (value < 14+20+8) + value = 14+20+8; + if (value != info->max_pkt_size) { + info->max_pkt_size = value; + info->cur_pkt_size = value; + } + sprintf(pg_result, "OK: max_pkt_size=%u", info->max_pkt_size); + goto foundcmd; + } + + len = strlen("min_pkt_size "); + if (!strncmp(p, "min_pkt_size ", len)) { + char f[32]; + p += len; -static void pktgen_run(struct pktgen_thread *t) -{ - struct pktgen_dev *pkt_dev = NULL; - int started = 0; + p += count_trail_chars(p, (int)(count - (p - kbuf))); - PG_DEBUG(printk("pktgen: entering pktgen_run. %p\n", t)); + strncpy_token(f, p, min(31, (int)(count - (p - kbuf)))); + f[31] = 0; + + p += strlen(f); + debug = atoui(f); + sprintf(pg_result, "OK: debug=%u", debug); + goto foundcmd; + } - if_lock(t); - for (pkt_dev = t->if_list; pkt_dev; pkt_dev = pkt_dev->next ) { + len = strlen("frags "); + if (!strncmp(p, "frags ", len)) { + char f[32]; + p += len; - /* - * setup odev and create initial packet. - */ - pktgen_setup_inject(pkt_dev); + p += count_trail_chars(p, (int)(count - (p - kbuf))); - if(pkt_dev->odev) { - pktgen_clear_counters(pkt_dev); - pkt_dev->running = 1; /* Cranke yeself! */ - pkt_dev->skb = NULL; - pkt_dev->started_at = getCurUs(); - pkt_dev->next_tx_us = getCurUs(); /* Transmit immediately */ - pkt_dev->next_tx_ns = 0; + strncpy_token(f, p, min(31, (int)(count - (p - kbuf)))); + f[31] = 0; - strcpy(pkt_dev->result, "Starting"); - started++; + p += strlen(f); + info->nfrags = atoui(f); + sprintf(pg_result, "OK: frags=%u", info->nfrags); + goto foundcmd; } - else - strcpy(pkt_dev->result, "Error starting"); - } - if_unlock(t); - if(started) t->control &= ~(T_STOP); -} -static void pktgen_stop_all_threads_ifs(void) -{ - struct pktgen_thread *t = pktgen_threads; - - PG_DEBUG(printk("pktgen: entering pktgen_stop_all_threads.\n")); - - thread_lock(); - while(t) { - pktgen_stop(t); - t = t->next; - } - thread_unlock(); -} + len = strlen("ipg "); + if (!strncmp(p, "ipg ", len)) { + char f[32]; + p += len; -static int thread_is_running(struct pktgen_thread *t ) -{ - struct pktgen_dev *next; - int res = 0; + p += count_trail_chars(p, (int)(count - (p - kbuf))); - for(next=t->if_list; next; next=next->next) { - if(next->running) { - res = 1; - break; + strncpy_token(f, p, min(31, (int)(count - (p - kbuf)))); + f[31] = 0; + + p += strlen(f); + info->ipg = atoui(f); + if ((getRelativeCurNs() + info->ipg) > info->next_tx_ns) { + info->next_tx_ns = getRelativeCurNs() + info->ipg; + } + sprintf(pg_result, "OK: ipg=%u", info->ipg); + goto foundcmd; } - } - return res; -} + + len = strlen("udp_src_min "); + if (!strncmp(p, "udp_src_min ", len)) { + char f[32]; + p += len; -static int pktgen_wait_thread_run(struct pktgen_thread *t ) -{ - if_lock(t); + p += count_trail_chars(p, (int)(count - (p - kbuf))); - while(thread_is_running(t)) { + strncpy_token(f, p, min(31, (int)(count - (p - kbuf)))); + f[31] = 0; + + p += strlen(f); + value = atoui(f); + + if (value != info->udp_src_min) { + info->udp_src_min = value; + info->cur_udp_src = value; + } + sprintf(pg_result, "OK: udp_src_min=%u", info->udp_src_min); + goto foundcmd; + } + + len = strlen("udp_dst_min "); + if (!strncmp(p, "udp_dst_min ", len)) { + char f[32]; + p += len; - if_unlock(t); + p += count_trail_chars(p, (int)(count - (p - kbuf))); - msleep_interruptible(100); + strncpy_token(f, p, min(31, (int)(count - (p - kbuf)))); + f[31] = 0; + + p += strlen(f); + value = atoui(f); + + if (value != info->udp_dst_min) { + info->udp_dst_min = value; + info->cur_udp_dst = value; + } + sprintf(pg_result, "OK: udp_dst_min=%u", info->udp_dst_min); + goto foundcmd; + } + + len = strlen("udp_src_max "); + if (!strncmp(p, "udp_src_max ", len)) { + char f[32]; + p += len; - if (signal_pending(current)) - goto signal; - if_lock(t); - } - if_unlock(t); - return 1; - signal: - return 0; -} + p += count_trail_chars(p, (int)(count - (p - kbuf))); -static int pktgen_wait_all_threads_run(void) -{ - struct pktgen_thread *t = pktgen_threads; - int sig = 1; - - while (t) { - sig = pktgen_wait_thread_run(t); - if( sig == 0 ) break; - thread_lock(); - t=t->next; - thread_unlock(); - } - if(sig == 0) { - thread_lock(); - while (t) { - t->control |= (T_STOP); - t=t->next; + strncpy_token(f, p, min(31, (int)(count - (p - kbuf)))); + f[31] = 0; + + p += strlen(f); + value = atoui(f); + + if (value != info->udp_src_max) { + info->udp_src_max = value; + info->cur_udp_src = value; + } + sprintf(pg_result, "OK: udp_src_max=%u", info->udp_src_max); + goto foundcmd; } - thread_unlock(); - } - return sig; -} + + len = strlen("udp_dst_max "); + if (!strncmp(p, "udp_dst_max ", len)) { + char f[32]; + p += len; -static void pktgen_run_all_threads(void) -{ - struct pktgen_thread *t = pktgen_threads; + p += count_trail_chars(p, (int)(count - (p - kbuf))); - PG_DEBUG(printk("pktgen: entering pktgen_run_all_threads.\n")); + strncpy_token(f, p, min(31, (int)(count - (p - kbuf)))); + f[31] = 0; + + p += strlen(f); + value = atoui(f); + + if (value != info->udp_dst_max) { + info->udp_dst_max = value; + info->cur_udp_dst = value; + } + sprintf(pg_result, "OK: udp_dst_max=%u", info->udp_dst_max); + goto foundcmd; + } - thread_lock(); + len = strlen("multiskb "); + if (!strncmp(p, "multiskb ", len)) { + char f[32]; + p += len; - while(t) { - t->control |= (T_RUN); - t = t->next; - } - thread_unlock(); + p += count_trail_chars(p, (int)(count - (p - kbuf))); - current->state = TASK_INTERRUPTIBLE; - schedule_timeout(HZ/8); /* Propagate thread->control */ + strncpy_token(f, p, min(31, (int)(count - (p - kbuf)))); + f[31] = 0; - pktgen_wait_all_threads_run(); -} + p += strlen(f); + info->multiskb = atoui(f); + + /* Can't use multi-skb > 0 with virtual interfaces, because they change + * the skb->dev pointer (at least) and so it's really impossible to send + * the exact same pkt over and over again + */ + if (info->odev && + ((info->odev->priv_flags & IFF_MAC_VLAN) || + (info->odev->priv_flags & IFF_802_1Q_VLAN))) { + if (info->multiskb > 0) { + printk("pktgen: WARNING: Cannot use multi-skb > 0 on virtual interfaces, setting to zero.\n"); + info->multiskb = 0; + } + } + + sprintf(pg_result, "OK: multiskb=%d", info->multiskb); + goto foundcmd; + } + + len = strlen("peer_multiskb "); + if (!strncmp(p, "peer_multiskb ", len)) { + char f[32]; + p += len; + p += count_trail_chars(p, (int)(count - (p - kbuf))); -static void show_results(struct pktgen_dev *pkt_dev, int nr_frags) -{ - __u64 total_us, bps, mbps, pps, idle; - char *p = pkt_dev->result; + strncpy_token(f, p, min(31, (int)(count - (p - kbuf)))); + f[31] = 0; + + p += strlen(f); + info->peer_multiskb = atoui(f); + + sprintf(pg_result, "OK: peer_multiskb=%d", + info->peer_multiskb); + goto foundcmd; + } + + len = strlen("count "); + if (!strncmp(p, "count ", len)) { + char f[32]; + p += len; - total_us = pkt_dev->stopped_at - pkt_dev->started_at; + p += count_trail_chars(p, (int)(count - (p - kbuf))); - idle = pkt_dev->idle_acc; + strncpy_token(f, p, min(31, (int)(count - (p - kbuf)))); + f[31] = 0; + + p += strlen(f); + info->count = atoui(f); + + sprintf(pg_result, "OK: count=%llu", info->count); + goto foundcmd; + } + + len = strlen("prot "); + if (!strncmp(p, "prot ", len)) { + char f[32]; + p += len; - p += sprintf(p, "OK: %llu(c%llu+d%llu) usec, %llu (%dbyte,%dfrags)\n", - (unsigned long long) total_us, - (unsigned long long)(total_us - idle), - (unsigned long long) idle, - (unsigned long long) pkt_dev->sofar, - pkt_dev->cur_pkt_size, nr_frags); + p += count_trail_chars(p, (int)(count - (p - kbuf))); - pps = pkt_dev->sofar * USEC_PER_SEC; + strncpy_token(f, p, min(31, (int)(count - (p - kbuf)))); + f[31] = 0; + + p += strlen(f); + info->prot = atoui(f); + + sprintf(pg_result, "OK: prot=%u", info->prot); + goto foundcmd; + } - while ((total_us >> 32) != 0) { - pps >>= 1; - total_us >>= 1; - } + len = strlen("src_mac_count "); + if (!strncmp(p, "src_mac_count ", len)) { + char f[32]; + p += len; - do_div(pps, total_us); - - bps = pps * 8 * pkt_dev->cur_pkt_size; + p += count_trail_chars(p, (int)(count - (p - kbuf))); - mbps = bps; - do_div(mbps, 1000000); - p += sprintf(p, " %llupps %lluMb/sec (%llubps) errors: %llu", - (unsigned long long) pps, - (unsigned long long) mbps, - (unsigned long long) bps, - (unsigned long long) pkt_dev->errors); -} - + strncpy_token(f, p, min(31, (int)(count - (p - kbuf)))); + f[31] = 0; + + p += strlen(f); + value = atoui(f); + + if (info->src_mac_count != value) { + info->src_mac_count = value; + info->cur_src_mac_offset = 0; + } + sprintf(pg_result, "OK: src_mac_count=%d", info->src_mac_count); + goto foundcmd; + } + + len = strlen("dst_mac_count "); + if (!strncmp(p, "dst_mac_count ", len)) { + char f[32]; + p += len; -/* Set stopped-at timer, remove from running list, do counters & statistics */ + p += count_trail_chars(p, (int)(count - (p - kbuf))); -static int pktgen_stop_device(struct pktgen_dev *pkt_dev) -{ - - if (!pkt_dev->running) { - printk("pktgen: interface: %s is already stopped\n", pkt_dev->ifname); - return -EINVAL; - } + strncpy_token(f, p, min(31, (int)(count - (p - kbuf)))); + f[31] = 0; + + p += strlen(f); + value = atoui(f); + + if (info->dst_mac_count != value) { + info->dst_mac_count = value; + info->cur_dst_mac_offset = 0; + } + sprintf(pg_result, "OK: dst_mac_count=%d", info->dst_mac_count); + goto foundcmd; + } + + len = strlen("flag "); + if (!strncmp(p, "flag ", len)) { + char f[32]; + p += len; - pkt_dev->stopped_at = getCurUs(); - pkt_dev->running = 0; + p += count_trail_chars(p, (int)(count - (p - kbuf))); - show_results(pkt_dev, skb_shinfo(pkt_dev->skb)->nr_frags); + strncpy_token(f, p, min(31, (int)(count - (p - kbuf)))); + f[31] = 0; + + p += strlen(f); - if (pkt_dev->skb) - kfree_skb(pkt_dev->skb); + if (strcmp(f, "IPSRC_RND") == 0) { + info->flags |= F_IPSRC_RND; + } + else if (strcmp(f, "!IPSRC_RND") == 0) { + info->flags &= ~F_IPSRC_RND; + } + else if (strcmp(f, "TXSIZE_RND") == 0) { + info->flags |= F_TXSIZE_RND; + } + else if (strcmp(f, "!TXSIZE_RND") == 0) { + info->flags &= ~F_TXSIZE_RND; + } + else if (strcmp(f, "IPDST_RND") == 0) { + info->flags |= F_IPDST_RND; + } + else if (strcmp(f, "!IPDST_RND") == 0) { + info->flags &= ~F_IPDST_RND; + } + else if (strcmp(f, "UDPSRC_RND") == 0) { + info->flags |= F_UDPSRC_RND; + } + else if (strcmp(f, "!UDPSRC_RND") == 0) { + info->flags &= ~F_UDPSRC_RND; + } + else if (strcmp(f, "UDPDST_RND") == 0) { + info->flags |= F_UDPDST_RND; + } + else if (strcmp(f, "!UDPDST_RND") == 0) { + info->flags &= ~F_UDPDST_RND; + } + else if (strcmp(f, "MACSRC_RND") == 0) { + info->flags |= F_MACSRC_RND; + } + else if (strcmp(f, "!MACSRC_RND") == 0) { + info->flags &= ~F_MACSRC_RND; + } + else if (strcmp(f, "MACDST_RND") == 0) { + info->flags |= F_MACDST_RND; + } + else if (strcmp(f, "!MACDST_RND") == 0) { + info->flags &= ~F_MACDST_RND; + } + else if (strcmp(f, "IPMAC") == 0) { /* dhetheri */ + info->flags |= F_IPMAC; /* dhetheri */ + } /* dhetheri */ + else if (strcmp(f, "!IPMAC") == 0) { /* dhetheri */ + info->flags &= ~F_IPMAC; /* dhetheri */ + } /* dhetheri */ + else { + sprintf(pg_result, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", + f, + "IPSRC_RND, IPDST_RND, TXSIZE_RND, UDPSRC_RND, UDPDST_RND, MACSRC_RND, MACDST_RND, IPMAC\n"); + } + sprintf(pg_result, "OK: flags=0x%x", info->flags); + goto foundcmd; + } + + len = strlen("dst_min "); + if (!strncmp(p, "dst_min ", 6)) { + char f[IP_NAME_SZ]; + p += len; - pkt_dev->skb = NULL; - - return 0; -} + p += count_trail_chars(p, (int)(count - (p - kbuf))); -static struct pktgen_dev *next_to_run(struct pktgen_thread *t ) -{ - struct pktgen_dev *next, *best = NULL; - - if_lock(t); + strncpy_token(f, p, min(IP_NAME_SZ-1, (int)(count - (p - kbuf)))); + f[IP_NAME_SZ-1] = 0; + + p += strlen(f); + if (strcmp(f, info->dst_min) != 0) { + memset(info->dst_min, 0, sizeof(info->dst_min)); + strcpy(info->dst_min, f); + info->daddr_min = in_aton(info->dst_min); + info->cur_daddr = info->daddr_min; + } + if(debug) + printk("pg: dst_min set to: %s\n", info->dst_min); + sprintf(pg_result, "OK: dst_min=%s", info->dst_min); + goto foundcmd; + } + + len = strlen("dst_max "); + if (!strncmp(p, "dst_max ", len)) { + char f[IP_NAME_SZ]; + p += len; - for(next=t->if_list; next ; next=next->next) { - if(!next->running) continue; - if(best == NULL) best=next; - else if ( next->next_tx_us < best->next_tx_us) - best = next; - } - if_unlock(t); - return best; -} + p += count_trail_chars(p, (int)(count - (p - kbuf))); -static void pktgen_stop(struct pktgen_thread *t) { - struct pktgen_dev *next = NULL; + strncpy_token(f, p, min(IP_NAME_SZ-1, (int)(count - (p - kbuf)))); + f[IP_NAME_SZ-1] = 0; + + p += strlen(f); + if (strcmp(f, info->dst_max) != 0) { + memset(info->dst_max, 0, sizeof(info->dst_max)); + strcpy(info->dst_max, f); + info->daddr_max = in_aton(info->dst_max); + info->cur_daddr = info->daddr_max; + } + if(debug) + printk("pg: dst_max set to: %s\n", info->dst_max); + sprintf(pg_result, "OK: dst_max=%s", info->dst_max); + goto foundcmd; + } + + len = strlen("src_min "); + if (!strncmp(p, "src_min ", len)) { + char f[IP_NAME_SZ]; + p += len; - PG_DEBUG(printk("pktgen: entering pktgen_stop.\n")); + p += count_trail_chars(p, (int)(count - (p - kbuf))); - if_lock(t); + strncpy_token(f, p, min(IP_NAME_SZ-1, (int)(count - (p - kbuf)))); + f[IP_NAME_SZ-1] = 0; + + p += strlen(f); + if (strcmp(f, info->src_min) != 0) { + memset(info->src_min, 0, sizeof(info->src_min)); + strcpy(info->src_min, f); + info->saddr_min = in_aton(info->src_min); + info->cur_saddr = info->saddr_min; + } + if(debug) + printk("pg: src_min set to: %s\n", info->src_min); + sprintf(pg_result, "OK: src_min=%s", info->src_min); + goto foundcmd; + } + + len = strlen("src_max "); + if (!strncmp(p, "src_max ", len)) { + char f[IP_NAME_SZ]; + p += len; - for(next=t->if_list; next; next=next->next) - pktgen_stop_device(next); + p += count_trail_chars(p, (int)(count - (p - kbuf))); - if_unlock(t); -} + strncpy_token(f, p, min(IP_NAME_SZ-1, (int)(count - (p - kbuf)))); + f[IP_NAME_SZ-1] = 0; + + p += strlen(f); + if (strcmp(f, info->src_max) != 0) { + memset(info->src_max, 0, sizeof(info->src_max)); + strcpy(info->src_max, f); + info->saddr_min = in_aton(info->src_max); + info->cur_saddr = info->saddr_max; + } + if(debug) + printk("pg: src_min set to: %s\n", info->src_min); + sprintf(pg_result, "OK: src_max=%s", info->src_max); + goto foundcmd; + } + + len = strlen("dst_max "); + if (!strncmp(p, "dst_mac ", len)) { + char f[IP_NAME_SZ]; + unsigned char old_dmac[6]; + unsigned char *m = info->dst_mac; + char* v = f; + memcpy(old_dmac, info->dst_mac, 6); + + p += len; -static void pktgen_rem_all_ifs(struct pktgen_thread *t) -{ - struct pktgen_dev *cur, *next = NULL; - - /* Remove all devices, free mem */ - - if_lock(t); + p += count_trail_chars(p, (int)(count - (p - kbuf))); - for(cur=t->if_list; cur; cur=next) { - next = cur->next; - pktgen_remove_device(t, cur); - } + strncpy_token(f, p, min(IP_NAME_SZ-1, (int)(count - (p - kbuf)))); + f[IP_NAME_SZ-1] = 0; + + p += strlen(f); - if_unlock(t); -} + for(*m = 0;*v && m < info->dst_mac + 6; v++) { + if (*v >= '0' && *v <= '9') { + *m *= 16; + *m += *v - '0'; + } + if (*v >= 'A' && *v <= 'F') { + *m *= 16; + *m += *v - 'A' + 10; + } + if (*v >= 'a' && *v <= 'f') { + *m *= 16; + *m += *v - 'a' + 10; + } + if (*v == ':') { + m++; + *m = 0; + } + } -static void pktgen_rem_thread(struct pktgen_thread *t) -{ - /* Remove from the thread list */ + if (memcmp(old_dmac, info->dst_mac, 6) != 0) { + /* Set up Dest MAC */ + memcpy(&(info->hh[0]), info->dst_mac, 6); + } + + sprintf(pg_result, "OK: dstmac"); + goto foundcmd; + } + + len = strlen("src_mac "); + if (!strncmp(p, "src_mac ", len)) { + char f[IP_NAME_SZ]; + char* v = f; + unsigned char old_smac[6]; + unsigned char *m = info->src_mac; + memcpy(old_smac, info->src_mac, 6); + + p += len; - struct pktgen_thread *tmp = pktgen_threads; + p += count_trail_chars(p, (int)(count - (p - kbuf))); - if (strlen(t->fname)) - remove_proc_entry(t->fname, NULL); + strncpy_token(f, p, min(IP_NAME_SZ-1, (int)(count - (p - kbuf)))); + f[IP_NAME_SZ-1] = 0; + + p += strlen(f); - thread_lock(); + for(*m = 0;*v && m < info->src_mac + 6; v++) { + if (*v >= '0' && *v <= '9') { + *m *= 16; + *m += *v - '0'; + } + if (*v >= 'A' && *v <= 'F') { + *m *= 16; + *m += *v - 'A' + 10; + } + if (*v >= 'a' && *v <= 'f') { + *m *= 16; + *m += *v - 'a' + 10; + } + if (*v == ':') { + m++; + *m = 0; + } + } - if (tmp == t) - pktgen_threads = tmp->next; - else { - while (tmp) { - if (tmp->next == t) { - tmp->next = t->next; - t->next = NULL; - break; + if (memcmp(old_smac, info->src_mac, 6) != 0) { + /* Default to the interface's mac if not explicitly set. */ + if ((!(info->flags & F_SET_SRCMAC)) && info->odev) { + memcpy(&(info->hh[6]), info->odev->dev_addr, 6); + } + else { + memcpy(&(info->hh[6]), info->src_mac, 6); + } } - tmp = tmp->next; + + sprintf(pg_result, "OK: srcmac"); + goto foundcmd; } - } - thread_unlock(); -} + + len = strlen("clear_counters"); + if (!strncmp(p, "clear_counters", len)) { + p += len; + + pg_clear_counters(info, 0); + sprintf(pg_result, "OK: Clearing counters...\n"); + goto foundcmd; + } + + if (!strncmp(p, "inject", 6) || !strncmp(p, "start", 5)) { -__inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) -{ - struct net_device *odev = NULL; - __u64 idle_start = 0; - int ret; + if (strncmp(p, "start", 5) == 0) { + p += 5; + } + else { + p += 6; + } + + if (info->do_run_run) { + strcpy(info->result, "Already running...\n"); + sprintf(pg_result, "Already running...\n"); + } + else { + int rv; + if ((rv = pg_start_interface(info->pg_thread, info)) >= 0) { + strcpy(info->result, "Starting"); + sprintf(pg_result, "Starting"); + } + else { + sprintf(info->result, "Error starting: %i\n", rv); + sprintf(pg_result, "Error starting: %i\n", rv); + } + } + goto foundcmd; + } - odev = pkt_dev->odev; + printk("Pktgen:pg_if_write: Unknown command -:%20s:-\n", p); + p++; + + foundcmd: + if (debug & 0x1000) { + printk("Command succeeded.\n"); + } + }/* while */ - if (pkt_dev->delay_us || pkt_dev->delay_ns) { - u64 now; + kfree(kbuf); + return count; +}/* proc_pg_if_write */ - now = getCurUs(); - if (now < pkt_dev->next_tx_us) - spin(pkt_dev, pkt_dev->next_tx_us); - /* This is max DELAY, this has special meaning of - * "never transmit" - */ - if (pkt_dev->delay_us == 0x7FFFFFFF) { - pkt_dev->next_tx_us = getCurUs() + pkt_dev->delay_us; - pkt_dev->next_tx_ns = pkt_dev->delay_ns; - goto out; - } - } +static int proc_pg_ctrl_write(struct file *file, const char *user_buffer, + unsigned long count, void *data) +{ + char name[16]; + struct pktgen_thread_info* pg_thread = NULL; + char* kbuf; + char* p; + int len; - if (netif_queue_stopped(odev) || need_resched()) { - idle_start = getCurUs(); - - if (!netif_running(odev)) { - pktgen_stop_device(pkt_dev); - goto out; - } - if (need_resched()) - schedule(); - - pkt_dev->idle_acc += getCurUs() - idle_start; - - if (netif_queue_stopped(odev)) { - pkt_dev->next_tx_us = getCurUs(); /* TODO */ - pkt_dev->next_tx_ns = 0; - goto out; /* Try the next interface */ - } + if (count < 1) { + return -EINVAL; } - - if (pkt_dev->last_ok || !pkt_dev->skb) { - if ((++pkt_dev->clone_count >= pkt_dev->clone_skb ) || (!pkt_dev->skb)) { - /* build a new pkt */ - if (pkt_dev->skb) - kfree_skb(pkt_dev->skb); - - pkt_dev->skb = fill_packet(odev, pkt_dev); - if (pkt_dev->skb == NULL) { - printk("pktgen: ERROR: couldn't allocate skb in fill_packet.\n"); - schedule(); - pkt_dev->clone_count--; /* back out increment, OOM */ - goto out; - } - pkt_dev->allocated_skbs++; - pkt_dev->clone_count = 0; /* reset counter */ - } + + kbuf = kmalloc(count, GFP_KERNEL); + if (copy_from_user(kbuf, user_buffer, count)) { + kfree(kbuf); + return -EFAULT; } - - spin_lock_bh(&odev->xmit_lock); - if (!netif_queue_stopped(odev)) { - u64 now; - - atomic_inc(&(pkt_dev->skb->users)); -retry_now: - ret = odev->hard_start_xmit(pkt_dev->skb, odev); - if (likely(ret == NETDEV_TX_OK)) { - pkt_dev->last_ok = 1; - pkt_dev->sofar++; - pkt_dev->seq_num++; - pkt_dev->tx_bytes += pkt_dev->cur_pkt_size; - - } else if (ret == NETDEV_TX_LOCKED - && (odev->features & NETIF_F_LLTX)) { - cpu_relax(); - goto retry_now; - } else { /* Retry it next time */ - - atomic_dec(&(pkt_dev->skb->users)); - - if (debug && net_ratelimit()) - printk(KERN_INFO "pktgen: Hard xmit error\n"); - - pkt_dev->errors++; - pkt_dev->last_ok = 0; - pkt_dev->next_tx_us = getCurUs(); /* TODO */ - pkt_dev->next_tx_ns = 0; - } - - pkt_dev->next_tx_us += pkt_dev->delay_us; - pkt_dev->next_tx_ns += pkt_dev->delay_ns; - if (pkt_dev->next_tx_ns > 1000) { - pkt_dev->next_tx_us++; - pkt_dev->next_tx_ns -= 1000; - } - - now = getCurUs(); - if (now > pkt_dev->next_tx_us) { - /* TODO: this code is slightly wonky. */ - pkt_dev->errors++; - pkt_dev->next_tx_us = now - pkt_dev->delay_us; - pkt_dev->next_tx_ns = 0; - } - } - - else { /* Retry it next time */ - pkt_dev->last_ok = 0; - pkt_dev->next_tx_us = getCurUs(); /* TODO */ - pkt_dev->next_tx_ns = 0; - } - spin_unlock_bh(&odev->xmit_lock); + p = kbuf; - /* If pkt_dev->count is zero, then run forever */ - if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) { - if (atomic_read(&(pkt_dev->skb->users)) != 1) { - idle_start = getCurUs(); - while (atomic_read(&(pkt_dev->skb->users)) != 1) { - if (signal_pending(current)) { - break; - } - schedule(); - } - pkt_dev->idle_acc += getCurUs() - idle_start; + while (p < (kbuf + count)) { + p += count_trail_chars(p, (int)(count - (p - kbuf))); + + if (p >= (kbuf + count)) { + break; } - - /* Done with this */ - pktgen_stop_device(pkt_dev); - } - out:; - } - -/* - * Main loop of the thread goes here - */ - -static void pktgen_thread_worker(struct pktgen_thread *t) -{ - DEFINE_WAIT(wait); - struct pktgen_dev *pkt_dev = NULL; - int cpu = t->cpu; - sigset_t tmpsig; - u32 max_before_softirq; - u32 tx_since_softirq = 0; - daemonize("pktgen/%d", cpu); + /* Read variable name */ + if (debug) { + printk("pg_thread: %s,%lu\n", name, count); + } - /* Block all signals except SIGKILL, SIGSTOP and SIGTERM */ + len = strlen("stop "); + if (!strncmp(p, "stop ", len)) { + char f[32]; + p += len; - spin_lock_irq(¤t->sighand->siglock); - tmpsig = current->blocked; - siginitsetinv(¤t->blocked, - sigmask(SIGKILL) | - sigmask(SIGSTOP)| - sigmask(SIGTERM)); + p += count_trail_chars(p, (int)(count - (p - kbuf))); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + strncpy_token(f, p, min(31, (int)(count - (p - kbuf)))); + f[31] = 0; + + p += strlen(f); - /* Migrate to the right CPU */ - set_cpus_allowed(current, cpumask_of_cpu(cpu)); - if (smp_processor_id() != cpu) - BUG(); + pg_thread = pg_find_thread(f); + if (pg_thread) { + printk("pktgen INFO: stopping thread: %s\n", + pg_thread->name); + stop_pktgen_kthread(pg_thread); + } - init_waitqueue_head(&t->queue); + goto foundcmd; + } - t->control &= ~(T_TERMINATE); - t->control &= ~(T_RUN); - t->control &= ~(T_STOP); - t->control &= ~(T_REMDEV); + len = strlen("start "); + if (!strncmp(p, "start ", len)) { + char f[32]; + p += len; - t->pid = current->pid; + p += count_trail_chars(p, (int)(count - (p - kbuf))); - PG_DEBUG(printk("pktgen: starting pktgen/%d: pid=%d\n", cpu, current->pid)); + strncpy_token(f, p, min(31, (int)(count - (p - kbuf)))); + f[31] = 0; + + p += strlen(f); - max_before_softirq = t->max_before_softirq; - - __set_current_state(TASK_INTERRUPTIBLE); - mb(); + pg_add_thread_info(f); + goto foundcmd; + } - while (1) { + printk("Pktgen:pgctrl_write: Unknown command -:%20s:-\n", p); + p++; - __set_current_state(TASK_RUNNING); - - /* - * Get next dev to xmit -- if any. - */ + foundcmd: + if (debug & 0x1000) { + printk("Command handled successfully.\n"); + } + + }/* while */ - pkt_dev = next_to_run(t); - - if (pkt_dev) { + kfree(kbuf); + return count; +}/* proc_pg_ctrl_write */ - pktgen_xmit(pkt_dev); - /* - * We like to stay RUNNING but must also give - * others fair share. - */ +static int proc_pg_thread_write(struct file *file, const char *user_buffer, + unsigned long count, void *data) +{ + char name[16]; + struct pktgen_thread_info* pg_thread = (struct pktgen_thread_info*)(data); + char* pg_result = &(pg_thread->result[0]); + char* kbuf; + char* p; + int len; + + if (count < 1) { + sprintf(pg_result, "Wrong command format"); + return -EINVAL; + } - tx_since_softirq += pkt_dev->last_ok; + kbuf = kmalloc(count, GFP_KERNEL); + if (copy_from_user(kbuf, user_buffer, count)) { + kfree(kbuf); + return -EFAULT; + } - if (tx_since_softirq > max_before_softirq) { - if (local_softirq_pending()) - do_softirq(); - tx_since_softirq = 0; - } - } else { - prepare_to_wait(&(t->queue), &wait, TASK_INTERRUPTIBLE); - schedule_timeout(HZ/10); - finish_wait(&(t->queue), &wait); - } - - /* - * Back from sleep, either due to the timeout or signal. - * We check if we have any "posted" work for us. - */ + p = kbuf; + + while (p < (kbuf + count)) { + p += count_trail_chars(p, (int)(count - (p - kbuf))); - if (t->control & T_TERMINATE || signal_pending(current)) - /* we received a request to terminate ourself */ - break; + if (p >= (kbuf + count)) { + break; + } - - if(t->control & T_STOP) { - pktgen_stop(t); - t->control &= ~(T_STOP); + /* Read variable name */ + if (debug) { + printk("pg_thread: %s,%lu\n", name, count); } - if(t->control & T_RUN) { - pktgen_run(t); - t->control &= ~(T_RUN); + len = strlen("add_interface "); + if (!strncmp(p, "add_interface ", len)) { + char f[32]; + p += len; + + p += count_trail_chars(p, (int)(count - (p - kbuf))); + + strncpy_token(f, p, min(31, (int)(count - (p - kbuf)))); + f[31] = 0; + + p += strlen(f); + + pg_add_interface_info(pg_thread, f); + goto foundcmd; } - if(t->control & T_REMDEV) { - pktgen_rem_all_ifs(t); - t->control &= ~(T_REMDEV); + len = strlen("rem_interface "); + if (!strncmp(p, "rem_interface ", len)) { + struct pktgen_interface_info* info = NULL; + char f[32]; + p += len; + + p += count_trail_chars(p, (int)(count - (p - kbuf))); + + strncpy_token(f, p, min(31, (int)(count - (p - kbuf)))); + f[31] = 0; + + p += strlen(f); + + info = pg_find_interface(pg_thread, f); + if (info) { + pg_rem_interface_info(pg_thread, info); + } + else { + printk("ERROR: Interface: %s is not found.\n", f); + } + goto foundcmd; } - if (need_resched()) - schedule(); - } + len = strlen("max_before_softirq "); + if (!strncmp(p, "max_before_softirq", len)) { + char f[32]; + p += len; + + p += count_trail_chars(p, (int)(count - (p - kbuf))); - PG_DEBUG(printk("pktgen: %s stopping all device\n", t->name)); - pktgen_stop(t); + strncpy_token(f, p, min(31, (int)(count - (p - kbuf)))); + f[31] = 0; + + p += strlen(f); - PG_DEBUG(printk("pktgen: %s removing all device\n", t->name)); - pktgen_rem_all_ifs(t); + pg_thread->max_before_softirq = atoui(f); + + goto foundcmd; + } + + printk("Pktgen:pg_thread_write: Unknown command -:%20s:-\n", p); + p++; + + foundcmd: + strcpy(pg_result, "ok"); + } + + kfree(kbuf); + return count; +}/* proc_pg_thread_write */ - PG_DEBUG(printk("pktgen: %s removing thread.\n", t->name)); - pktgen_rem_thread(t); -} -static struct pktgen_dev *pktgen_find_dev(struct pktgen_thread *t, const char* ifname) +int create_proc_dir(void) { - struct pktgen_dev *pkt_dev = NULL; - if_lock(t); + int len; + /* does proc_dir already exists */ + len = strlen(PG_PROC_DIR); - for(pkt_dev=t->if_list; pkt_dev; pkt_dev = pkt_dev->next ) { - if (strcmp(pkt_dev->ifname, ifname) == 0) { + for (pg_proc_dir = proc_net->subdir; pg_proc_dir; pg_proc_dir=pg_proc_dir->next) { + if ((pg_proc_dir->namelen == len) && + (! memcmp(pg_proc_dir->name, PG_PROC_DIR, len))) { break; } } - - if_unlock(t); - PG_DEBUG(printk("pktgen: find_dev(%s) returning %p\n", ifname,pkt_dev)); - return pkt_dev; + + if (!pg_proc_dir) { + pg_proc_dir = create_proc_entry(PG_PROC_DIR, S_IFDIR, proc_net); + } + + if (!pg_proc_dir) { + return -ENODEV; + } + + return 0; } -/* - * Adds a dev at front of if_list. - */ - -static int add_dev_to_thread(struct pktgen_thread *t, struct pktgen_dev *pkt_dev) +int remove_proc_dir(void) { - int rv = 0; - - if_lock(t); + remove_proc_entry(PG_PROC_DIR, proc_net); + return 0; +} - if (pkt_dev->pg_thread) { - printk("pktgen: ERROR: already assigned to a thread.\n"); - rv = -EBUSY; - goto out; +static struct pktgen_interface_info* pg_find_interface(struct pktgen_thread_info* pg_thread, + const char* ifname) { + struct pktgen_interface_info* rv = NULL; + pg_lock(pg_thread, __FUNCTION__); + + if (pg_thread->cur_if && (strcmp(pg_thread->cur_if->ifname, ifname) == 0)) { + rv = pg_thread->cur_if; + goto found; + } + + rv = pg_thread->running_if_infos; + while (rv) { + if (strcmp(rv->ifname, ifname) == 0) { + goto found; + } + rv = rv->next; } - pkt_dev->next =t->if_list; t->if_list=pkt_dev; - pkt_dev->pg_thread = t; - pkt_dev->running = 0; - out: - if_unlock(t); + rv = pg_thread->stopped_if_infos; + while (rv) { + if (strcmp(rv->ifname, ifname) == 0) { + goto found; + } + rv = rv->next; + } + found: + pg_unlock(pg_thread, __FUNCTION__); return rv; -} +}/* pg_find_interface */ -/* Called under thread lock */ - -static int pktgen_add_device(struct pktgen_thread *t, const char* ifname) -{ - struct pktgen_dev *pkt_dev; - - /* We don't allow a device to be on several threads */ - if( (pkt_dev = __pktgen_NN_threads(ifname, FIND)) == NULL) { - - pkt_dev = kmalloc(sizeof(struct pktgen_dev), GFP_KERNEL); - if (!pkt_dev) +static int pg_add_interface_info(struct pktgen_thread_info* pg_thread, const char* ifname) { + struct pktgen_interface_info* i = pg_find_interface(pg_thread, ifname); + if (!i) { + i = kmalloc(sizeof(struct pktgen_interface_info), GFP_KERNEL); + if (!i) { return -ENOMEM; - - memset(pkt_dev, 0, sizeof(struct pktgen_dev)); - - pkt_dev->flows = vmalloc(MAX_CFLOWS*sizeof(struct flow_state)); - if (pkt_dev->flows == NULL) { - kfree(pkt_dev); - return -ENOMEM; - } - memset(pkt_dev->flows, 0, MAX_CFLOWS*sizeof(struct flow_state)); - - pkt_dev->min_pkt_size = ETH_ZLEN; - pkt_dev->max_pkt_size = ETH_ZLEN; - pkt_dev->nfrags = 0; - pkt_dev->clone_skb = pg_clone_skb_d; - pkt_dev->delay_us = pg_delay_d / 1000; - pkt_dev->delay_ns = pg_delay_d % 1000; - pkt_dev->count = pg_count_d; - pkt_dev->sofar = 0; - pkt_dev->udp_src_min = 9; /* sink port */ - pkt_dev->udp_src_max = 9; - pkt_dev->udp_dst_min = 9; - pkt_dev->udp_dst_max = 9; - - strncpy(pkt_dev->ifname, ifname, 31); - sprintf(pkt_dev->fname, "net/%s/%s", PG_PROC_DIR, ifname); - - if (! pktgen_setup_dev(pkt_dev)) { - printk("pktgen: ERROR: pktgen_setup_dev failed.\n"); - if (pkt_dev->flows) - vfree(pkt_dev->flows); - kfree(pkt_dev); + } + memset(i, 0, sizeof(struct pktgen_interface_info)); + + i->min_pkt_size = ETH_ZLEN; + i->max_pkt_size = ETH_ZLEN; + i->nfrags = 0; + i->multiskb = pg_multiskb_d; + i->peer_multiskb = 0; + i->ipg = pg_ipg_d; + i->count = pg_count_d; + i->sofar = 0; + i->hh[12] = 0x08; /* fill in protocol. Rest is filled in later. */ + i->hh[13] = 0x00; + i->udp_src_min = 9; /* sink NULL */ + i->udp_src_max = 9; + i->udp_dst_min = 9; + i->udp_dst_max = 9; + i->rcv = pktgen_receive; + + strncpy(i->ifname, ifname, 31); + sprintf(i->fname, "net/%s/%s", PG_PROC_DIR, ifname); + + if (! pg_setup_interface(pg_thread, i)) { + printk("ERROR: pg_setup_interface failed.\n"); + kfree(i); return -ENODEV; } - pkt_dev->proc_ent = create_proc_entry(pkt_dev->fname, 0600, NULL); - if (!pkt_dev->proc_ent) { - printk("pktgen: cannot create %s procfs entry.\n", pkt_dev->fname); - if (pkt_dev->flows) - vfree(pkt_dev->flows); - kfree(pkt_dev); + i->proc_ent = create_proc_entry(i->fname, 0600, 0); + if (!i->proc_ent) { + printk("pktgen: Error: cannot create %s procfs entry.\n", i->fname); + kfree(i); return -EINVAL; } - pkt_dev->proc_ent->read_proc = proc_if_read; - pkt_dev->proc_ent->write_proc = proc_if_write; - pkt_dev->proc_ent->data = (void*)(pkt_dev); - pkt_dev->proc_ent->owner = THIS_MODULE; + i->proc_ent->read_proc = proc_pg_if_read; + i->proc_ent->write_proc = proc_pg_if_write; + i->proc_ent->data = (void*)(i); + i->proc_ent->owner = THIS_MODULE; - return add_dev_to_thread(t, pkt_dev); + return add_interface_to_thread(pg_thread, i); } else { - printk("pktgen: ERROR: interface already used.\n"); + printk("pktgen ERROR: interface already exists.\n"); return -EBUSY; } -} +}/* pg_add_interface_info */ -static struct pktgen_thread *pktgen_find_thread(const char* name) -{ - struct pktgen_thread *t = NULL; - thread_lock(); +/* return the first !in_use thread structure */ +static struct pktgen_thread_info* pg_gc_thread_list_helper(void) { + struct pktgen_thread_info* rv = NULL; + + pg_lock_thread_list(__FUNCTION__); - t = pktgen_threads; - while (t) { - if (strcmp(t->name, name) == 0) + rv = pktgen_threads; + while (rv) { + if (!rv->in_use) { break; + } + rv = rv->next; + } + pg_unlock_thread_list(__FUNCTION__); + return rv; +}/* pg_find_thread */ + +static void pg_gc_thread_list(void) { + struct pktgen_thread_info* t = NULL; + struct pktgen_thread_info* w = NULL; + + while ((t = pg_gc_thread_list_helper())) { + pg_lock_thread_list(__FUNCTION__); + if (pktgen_threads == t) { + pktgen_threads = t->next; + kfree(t); + } + else { + w = pktgen_threads; + while (w) { + if (w->next == t) { + w->next = t->next; + t->next = NULL; + kfree(t); + break; + } + w = w->next; + } + } + pg_unlock_thread_list(__FUNCTION__); + } +}/* pg_gc_thread_list */ + + +static struct pktgen_thread_info* pg_find_thread(const char* name) { + struct pktgen_thread_info* rv = NULL; + + pg_gc_thread_list(); + + pg_lock_thread_list(__FUNCTION__); - t = t->next; + rv = pktgen_threads; + while (rv) { + if (strcmp(rv->name, name) == 0) { + break; + } + rv = rv->next; } - thread_unlock(); - return t; -} + pg_unlock_thread_list(__FUNCTION__); + return rv; +}/* pg_find_thread */ -static int pktgen_create_thread(const char* name, int cpu) -{ - struct pktgen_thread *t = NULL; + +static int pg_add_thread_info(const char* name) { + struct pktgen_thread_info* pg_thread = NULL; if (strlen(name) > 31) { - printk("pktgen: ERROR: Thread name cannot be more than 31 characters.\n"); + printk("pktgen ERROR: Thread name cannot be more than 31 characters.\n"); return -EINVAL; } - if (pktgen_find_thread(name)) { - printk("pktgen: ERROR: thread: %s already exists\n", name); + if (pg_find_thread(name)) { + printk("pktgen ERROR: Thread: %s already exists\n", name); return -EINVAL; } - t = (struct pktgen_thread*)(kmalloc(sizeof(struct pktgen_thread), GFP_KERNEL)); - if (!t) { + pg_thread = (struct pktgen_thread_info*)(kmalloc(sizeof(struct pktgen_thread_info), GFP_KERNEL)); + if (!pg_thread) { printk("pktgen: ERROR: out of memory, can't create new thread.\n"); return -ENOMEM; } - memset(t, 0, sizeof(struct pktgen_thread)); - strcpy(t->name, name); - spin_lock_init(&t->if_lock); - t->cpu = cpu; - - sprintf(t->fname, "net/%s/%s", PG_PROC_DIR, t->name); - t->proc_ent = create_proc_entry(t->fname, 0600, NULL); - if (!t->proc_ent) { - printk("pktgen: cannot create %s procfs entry.\n", t->fname); - kfree(t); + memset(pg_thread, 0, sizeof(struct pktgen_thread_info)); + strcpy(pg_thread->name, name); + spin_lock_init(&(pg_thread->pg_threadlock)); + pg_thread->in_use = 1; + pg_thread->max_before_softirq = 10000000; + + sprintf(pg_thread->fname, "net/%s/%s", PG_PROC_DIR, pg_thread->name); + pg_thread->proc_ent = create_proc_entry(pg_thread->fname, 0600, 0); + if (!pg_thread->proc_ent) { + printk("pktgen: Error: cannot create %s procfs entry.\n", pg_thread->fname); + kfree(pg_thread); return -EINVAL; } - t->proc_ent->read_proc = proc_thread_read; - t->proc_ent->write_proc = proc_thread_write; - t->proc_ent->data = (void*)(t); - t->proc_ent->owner = THIS_MODULE; - - t->next = pktgen_threads; - pktgen_threads = t; - - if (kernel_thread((void *) pktgen_thread_worker, (void *) t, - CLONE_FS | CLONE_FILES | CLONE_SIGHAND) < 0) - printk("pktgen: kernel_thread() failed for cpu %d\n", t->cpu); - - return 0; -} - -/* - * Removes a device from the thread if_list. - */ -static void _rem_dev_from_if_list(struct pktgen_thread *t, struct pktgen_dev *pkt_dev) -{ - struct pktgen_dev *i, *prev = NULL; - - i = t->if_list; + pg_thread->proc_ent->read_proc = proc_pg_thread_read; + pg_thread->proc_ent->write_proc = proc_pg_thread_write; + pg_thread->proc_ent->data = (void*)(pg_thread); + pg_thread->proc_ent->owner = THIS_MODULE; - while(i) { - if(i == pkt_dev) { - if(prev) prev->next = i->next; - else t->if_list = NULL; - break; - } - prev = i; - i=i->next; - } -} + pg_thread->next = pktgen_threads; + pktgen_threads = pg_thread; -static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *pkt_dev) -{ + /* Start the thread running */ + start_pktgen_kthread(pg_thread); + return 0; +}/* pg_add_thread_info */ - PG_DEBUG(printk("pktgen: remove_device pkt_dev=%p\n", pkt_dev)); - if (pkt_dev->running) { - printk("pktgen:WARNING: trying to remove a running interface, stopping it now.\n"); - pktgen_stop_device(pkt_dev); +/* interface_info must be stopped and on the pg_thread stopped list + */ +static int pg_rem_interface_info(struct pktgen_thread_info* pg_thread, + struct pktgen_interface_info* info) { + if (info->do_run_run) { + printk("WARNING: trying to remove a running interface, stopping it now.\n"); + pg_stop_interface(pg_thread, info); } - /* Dis-associate from the interface */ - - if (pkt_dev->odev) { - dev_put(pkt_dev->odev); - pkt_dev->odev = NULL; - } + /* Diss-associate from the interface */ + check_remove_device(info); - /* And update the thread if_list */ - - _rem_dev_from_if_list(t, pkt_dev); - /* Clean up proc file system */ - - if (strlen(pkt_dev->fname)) - remove_proc_entry(pkt_dev->fname, NULL); - - if (pkt_dev->flows) - vfree(pkt_dev->flows); - kfree(pkt_dev); + if (strlen(info->fname)) { + remove_proc_entry(info->fname, NULL); + } + + pg_lock(pg_thread, __FUNCTION__); + { + /* Remove from the stopped list */ + struct pktgen_interface_info* p = pg_thread->stopped_if_infos; + if (p == info) { + pg_thread->stopped_if_infos = p->next; + p->next = NULL; + } + else { + while (p) { + if (p->next == info) { + p->next = p->next->next; + info->next = NULL; + break; + } + p = p->next; + } + } + + info->pg_thread = NULL; + } + pg_unlock(pg_thread, __FUNCTION__); + return 0; -} +}/* pg_rem_interface_info */ -static int __init pg_init(void) -{ - int cpu; + +static int __init pg_init(void) { + int i; printk(version); + /* Initialize our global variables */ + for (i = 0; iproc_fops = &pktgen_fops; + module_proc_ent->read_proc = proc_pg_ctrl_read; + module_proc_ent->write_proc = proc_pg_ctrl_write; + module_proc_ent->proc_fops = &(pktgen_fops); /* IOCTL hook */ module_proc_ent->data = NULL; + module_proc_ent->owner = THIS_MODULE; /* Register us to receive netdevice events */ register_netdevice_notifier(&pktgen_notifier_block); - for (cpu = 0; cpu < NR_CPUS ; cpu++) { - char buf[30]; + /* Register handler */ + handle_pktgen_hook = pktgen_receive; - if (!cpu_online(cpu)) - continue; - - sprintf(buf, "kpktgend_%i", cpu); - pktgen_create_thread(buf, cpu); + for (i = 0; icontrol |= (T_TERMINATE); - - wait_event_interruptible_timeout(queue, (t != pktgen_threads), HZ); + stop_pktgen_kthread(pktgen_threads); } - + /* Un-register us from receiving netdevice events */ unregister_netdevice_notifier(&pktgen_notifier_block); /* Clean up proc file system */ - remove_proc_entry(module_fname, NULL); remove_proc_dir(); + } module_init(pg_init); module_exit(pg_cleanup); -MODULE_AUTHOR("Robert Olsson "); MODULE_DESCRIPTION("Packet Generator tool"); MODULE_LICENSE("GPL"); -module_param(pg_count_d, int, 0); -module_param(pg_delay_d, int, 0); -module_param(pg_clone_skb_d, int, 0); -module_param(debug, int, 0); +MODULE_PARM(pg_count_d, "i"); +MODULE_PARM(pg_ipg_d, "i"); +MODULE_PARM(pg_thread_count, "i"); +MODULE_PARM(pg_multiskb_d, "i"); +MODULE_PARM(debug, "i"); --- linux-2.6.11/net/core/pktgen.h 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.11.p4s/net/core/pktgen.h 2005-07-11 16:06:34.000000000 -0700 @@ -0,0 +1,264 @@ +/* -*-linux-c-*- + * $Id: pg_patch.txt,v 1.2 2002/07/07 07:23:50 greear Exp $ + * pktgen.c: Packet Generator for performance evaluation. + * + * See pktgen.c for details of changes, etc. +*/ + + +#ifndef PKTGEN_H_INCLUDE_KERNEL__ +#define PKTGEN_H_INCLUDE_KERNEL__ + +#include + +/* The buckets are exponential in 'width' */ +#define LAT_BUCKETS_MAX 32 + +#define IP_NAME_SZ 32 + +#define PG_MAX_ACCUM_DELAY_NS 1000000 /* one ms */ + +/* Keep information per interface */ +struct pktgen_interface_info { + char ifname[32]; + + /* Parameters */ + + /* If min != max, then we will either do a linear iteration, or + * we will do a random selection from within the range. + */ + __u32 flags; + +#define F_IPSRC_RND (1<<0) /* IP-Src Random */ +#define F_IPDST_RND (1<<1) /* IP-Dst Random */ +#define F_UDPSRC_RND (1<<2) /* UDP-Src Random */ +#define F_UDPDST_RND (1<<3) /* UDP-Dst Random */ +#define F_MACSRC_RND (1<<4) /* MAC-Src Random */ +#define F_MACDST_RND (1<<5) /* MAC-Dst Random */ +#define F_SET_SRCMAC (1<<6) /* Specify-Src-Mac + (default is to use Interface's MAC Addr) */ +#define F_SET_SRCIP (1<<7) /* Specify-Src-IP + (default is to use Interface's IP Addr) */ +#define F_TXSIZE_RND (1<<8) /* Transmit size is random */ +#define F_IPMAC (1<<9) /* MAC address = 00:00:IP address (dhetheri) */ + + int min_pkt_size; /* = ETH_ZLEN; */ + int max_pkt_size; /* = ETH_ZLEN; */ + int nfrags; + __u32 ipg; /* Default Interpacket gap in nsec */ + __u64 count; /* Default No packets to send */ + __u64 sofar; /* How many pkts we've sent so far */ + __u64 tx_bytes; /* How many bytes we've transmitted */ + __u64 errors; /* Errors when trying to transmit, pkts will be re-sent */ + + /* runtime counters relating to multiskb */ + __u64 next_tx_ns; /* timestamp of when to tx next, in nano-seconds */ + __u64 fp; + __u32 fp_tmp; + int last_ok; /* Was last skb sent? + * Or a failed transmit of some sort? This will keep + * sequence numbers in order, for example. + */ + /* Fields relating to receiving pkts */ + __u32 last_seq_rcvd; + __u64 ooo_rcvd; /* out-of-order packets received */ + __u64 pkts_rcvd; /* packets received */ + __u64 dup_rcvd; /* duplicate packets received */ + __u64 bytes_rcvd; /* total bytes received, as obtained from the skb */ + __u64 seq_gap_rcvd; /* how many gaps we received. This coorelates to + * dropped pkts, except perhaps in cases where we also + * have re-ordered pkts. In that case, you have to tie-break + * by looking at send v/s received pkt totals for the interfaces + * involved. + */ + __u64 non_pg_pkts_rcvd; /* Count how many non-pktgen skb's we are sent to check. */ + __u64 dup_since_incr; /* How many dumplicates since the last seq number increment, + * used to detect gaps when multiskb > 1 + */ + int avg_latency; /* in micro-seconds */ + int min_latency; + int max_latency; + __u64 latency_bkts[LAT_BUCKETS_MAX]; + __u64 pkts_rcvd_since_clear; /* with regard to clearing/resetting the latency logic */ + + __u64 started_at; /* micro-seconds */ + __u64 stopped_at; /* micro-seconds */ + __u64 idle_acc; + __u32 seq_num; + + int multiskb; /* Use multiple SKBs during packet gen. If this number + * is greater than 1, then that many coppies of the same + * packet will be sent before a new packet is allocated. + * For instance, if you want to send 1024 identical packets + * before creating a new packet, set multiskb to 1024. + */ + int peer_multiskb; /* Helps detect drops when multiskb > 1 on peer */ + int do_run_run; /* if this changes to false, the test will stop */ + + char dst_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ + char dst_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ + char src_min[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ + char src_max[IP_NAME_SZ]; /* IP, ie 1.2.3.4 */ + + /* If we're doing ranges, random or incremental, then this + * defines the min/max for those ranges. + */ + __u32 saddr_min; /* inclusive, source IP address */ + __u32 saddr_max; /* exclusive, source IP address */ + __u32 daddr_min; /* inclusive, dest IP address */ + __u32 daddr_max; /* exclusive, dest IP address */ + + __u16 udp_src_min; /* inclusive, source UDP port */ + __u16 udp_src_max; /* exclusive, source UDP port */ + __u16 udp_dst_min; /* inclusive, dest UDP port */ + __u16 udp_dst_max; /* exclusive, dest UDP port */ + + __u32 src_mac_count; /* How many MACs to iterate through */ + __u32 dst_mac_count; /* How many MACs to iterate through */ + + unsigned char dst_mac[6]; + unsigned char src_mac[6]; + + __u32 cur_dst_mac_offset; + __u32 cur_src_mac_offset; + __u32 cur_saddr; + __u32 cur_daddr; + __u16 cur_udp_dst; + __u16 cur_udp_src; + __u32 cur_pkt_size; + + __u8 hh[14]; + /* = { + 0x00, 0x80, 0xC8, 0x79, 0xB3, 0xCB, + + We fill in SRC address later + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00 + }; + */ + __u16 prot; /* pad out the hh struct to an even 16 bytes, prot can + * be used to specify an IP protocol too (default is 0, + * which implies UDP + */ + char result[512]; + /* proc file names */ + char fname[80]; + + /* End of stuff that user-space should care about */ + long long accum_delay_ns; /* Used to sleep small amounts on average, w/out spinning */ + __u32 sleeps; /* How many times did it sleep on the wait queue? */ + __u32 queue_stopped; /* How many times was our network device queue stopped? */ + __u64 nanodelays; /* How many times has the nano-delay method been called? */ + u64 ntx_fudge; /* Used for exponential backoff when over-driving interfaces */ + + struct sk_buff* skb; /* skb we are to transmit next, mainly used for when we + * are transmitting the same one multiple times + */ + struct pktgen_thread_info* pg_thread; /* the owner */ + + struct pktgen_interface_info* next_hash; /* Used for chaining in the hash buckets */ + struct pktgen_interface_info* next; /* Used for chaining in the thread's run-queue */ + + + + struct net_device* odev; /* The out-going device. Note that the device should + * have it's pg_info pointer pointing back to this + * device. This will be set when the user specifies + * the out-going device name (not when the inject is + * started as it used to do.) + */ + + struct proc_dir_entry *proc_ent; + + int (*rcv) (struct sk_buff *skb); +}; /* pktgen_interface_info */ + + +struct pktgen_hdr { + __u32 pgh_magic; + __u32 seq_num; + struct timeval timestamp; +}; + + +/* Define some IOCTLs. Just picking random numbers, basically. */ +#define GET_PKTGEN_INTERFACE_INFO 0x7450 + +struct pktgen_ioctl_info { + char thread_name[32]; + char interface_name[32]; + struct pktgen_interface_info info; +}; + + +struct pktgen_thread_info { + struct pktgen_interface_info* running_if_infos; /* list of running interfaces, current will + * not be in this list. + */ + struct pktgen_interface_info* stopped_if_infos; /* list of stopped interfaces. */ + struct pktgen_interface_info* cur_if; /* Current (running) interface we are servicing in + * the main thread loop. + */ + + int running_if_sz; + struct pktgen_thread_info* next; + char name[32]; + char fname[128]; /* name of proc file */ + struct proc_dir_entry *proc_ent; + char result[512]; + u32 max_before_softirq; /* We'll call do_softirq to prevent starvation. */ + + spinlock_t pg_threadlock; + + /* Linux task structure of thread */ + struct task_struct *thread; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) + /* Task queue need to launch thread */ + struct tq_struct tq; +#else + /* Task queue need to launch thread */ + struct work_struct wq; +#endif + /* function to be started as thread */ + void (*function) (struct pktgen_thread_info *kthread); + + /* semaphore needed on start and creation of thread. */ + struct semaphore startstop_sem; + + /* public data */ + + /* queue thread is waiting on. Gets initialized by + init_kthread, can be used by thread itself. + */ + wait_queue_head_t queue; + + /* flag to tell thread whether to die or not. + When the thread receives a signal, it must check + the value of terminate and call exit_kthread and terminate + if set. + */ + int terminate; + + int in_use; /* if 0, then we can delete or re-use this struct */ + + __u64 next_tx_ns; /* timestamp of when to tx next, in nano-seconds */ + + u32 queues_stopped; /* How many times were all queues blocked */ + char sleeping; /* Are we sleeping or not */ + char pad[3]; + + atomic_t nqw_ref_count; + + /* additional data to pass to kernel thread */ + void *arg; +};/* struct pktgen_thread_info */ + +/* Defined in dev.c */ +extern int (*handle_pktgen_hook)(struct sk_buff *skb); + +/* Returns < 0 if the skb is not a pktgen buffer. */ +int pktgen_receive(struct sk_buff* skb); + + +#endif --- linux-2.6.11/Documentation/networking/pktgen.txt 2005-03-01 23:38:09.000000000 -0800 +++ linux-2.6.11.p4s/Documentation/networking/pktgen.txt 2005-07-11 16:06:34.000000000 -0700 @@ -1,214 +1,118 @@ +How to use the Linux packet generator module. - - HOWTO for the linux packet generator - ------------------------------------ - -Date: 041221 - -Enable CONFIG_NET_PKTGEN to compile and build pktgen.o either in kernel -or as module. Module is preferred. insmod pktgen if needed. Once running -pktgen creates a thread on each CPU where each thread has affinty it's CPU. -Monitoring and controlling is done via /proc. Easiest to select a suitable -a sample script and configure. - -On a dual CPU: - -ps aux | grep pkt -root 129 0.3 0.0 0 0 ? SW 2003 523:20 [pktgen/0] -root 130 0.3 0.0 0 0 ? SW 2003 509:50 [pktgen/1] - - -For montoring and control pktgen creates: - /proc/net/pktgen/pgctrl - /proc/net/pktgen/kpktgend_X - /proc/net/pktgen/ethX - - -Viewing threads -=============== -/proc/net/pktgen/kpktgend_0 -Name: kpktgend_0 max_before_softirq: 10000 -Running: -Stopped: eth1 -Result: OK: max_before_softirq=10000 - -Most important the devices assigend to thread. Note! A device can only belong -to one thread. - - -Viewing devices -=============== - -Parm section holds configured info. Current hold running stats. -Result is printed after run or after interruption. Example: - -/proc/net/pktgen/eth1 - -Params: count 10000000 min_pkt_size: 60 max_pkt_size: 60 - frags: 0 delay: 0 clone_skb: 1000000 ifname: eth1 - flows: 0 flowlen: 0 - dst_min: 10.10.11.2 dst_max: - src_min: src_max: - src_mac: 00:00:00:00:00:00 dst_mac: 00:04:23:AC:FD:82 - udp_src_min: 9 udp_src_max: 9 udp_dst_min: 9 udp_dst_max: 9 - src_mac_count: 0 dst_mac_count: 0 - Flags: -Current: - pkts-sofar: 10000000 errors: 39664 - started: 1103053986245187us stopped: 1103053999346329us idle: 880401us - seq_num: 10000011 cur_dst_mac_offset: 0 cur_src_mac_offset: 0 - cur_saddr: 0x10a0a0a cur_daddr: 0x20b0a0a - cur_udp_dst: 9 cur_udp_src: 9 - flows: 0 -Result: OK: 13101142(c12220741+d880401) usec, 10000000 (60byte,0frags) - 763292pps 390Mb/sec (390805504bps) errors: 39664 - -Confguring threads and devices -============================== -This is done via the /proc interface easiest done via pgset in the scripts - -Examples: - - pgset "clone_skb 1" sets the number of copies of the same packet - pgset "clone_skb 0" use single SKB for all transmits - pgset "pkt_size 9014" sets packet size to 9014 - pgset "frags 5" packet will consist of 5 fragments - pgset "count 200000" sets number of packets to send, set to zero - for continious sends untill explicitl stopped. - - pgset "delay 5000" adds delay to hard_start_xmit(). nanoseconds - - pgset "dst 10.0.0.1" sets IP destination address +1. Enable CONFIG_NET_PKTGEN to compile and build pktgen.o, install it + in the place where insmod may find it. +2. Add an interface to the kpktgend_0 thread: + echo "add_interface eth1" > /proc/net/pktgen/kpktgend_0 +2a. Add more interfaces as needed. +3. Configure interfaces by setting values as defined below. The + general strategy is: echo "command" > /proc/net/pktgen/[device] + For example: echo "multiskb 100" > /proc/net/pktgen/eth1 + + "multiskb 100" Will send 100 identical pkts before creating + new packet with new timestamp, etc. + "multiskb 0" Will create new skb for all transmits. + "peer_multiskb 100" Helps us determine dropped & dup pkts, sender's multiskb. + "min_pkt_size 60" sets packet minimum size to 60 (64 counting CRC) + "max_pkt_size 1514" sets packet size to 1514 (1518 counting CRC) + "frags 5" packet will consist of 5 fragments + "count 200000" sets number of packets to send, set to zero + for continious sends untill explicitly + stopped. + "ipg 5000" sets artificial gap inserted between packets + to 5000 nanoseconds + "dst 10.0.0.1" sets IP destination address (BEWARE! This generator is very aggressive!) - - pgset "dst_min 10.0.0.1" Same as dst - pgset "dst_max 10.0.0.254" Set the maximum destination IP. - pgset "src_min 10.0.0.1" Set the minimum (or only) source IP. - pgset "src_max 10.0.0.254" Set the maximum source IP. - pgset "dst6 fec0::1" IPV6 destination address - pgset "src6 fec0::2" IPV6 source address - pgset "dstmac 00:00:00:00:00:00" sets MAC destination address - pgset "srcmac 00:00:00:00:00:00" sets MAC source address - - pgset "src_mac_count 1" Sets the number of MACs we'll range through. - The 'minimum' MAC is what you set with srcmac. - - pgset "dst_mac_count 1" Sets the number of MACs we'll range through. - The 'minimum' MAC is what you set with dstmac. - - pgset "flag [name]" Set a flag to determine behaviour. Current flags - are: IPSRC_RND #IP Source is random (between min/max), - IPDST_RND, UDPSRC_RND, - UDPDST_RND, MACSRC_RND, MACDST_RND - - pgset "udp_src_min 9" set UDP source port min, If < udp_src_max, then + "dst_min 10.0.0.1" Same as dst + "dst_max 10.0.0.254" Set the maximum destination IP. + "src_min 10.0.0.1" Set the minimum (or only) source IP. + "src_max 10.0.0.254" Set the maximum source IP. + "dst_mac 00:00:00:00:00:00" sets MAC destination address + "src_mac 00:00:00:00:00:00" sets MAC source address + "src_mac_count 1" Sets the number of MACs we'll range through. The + 'minimum' MAC is what you set with srcmac. + "dst_mac_count 1" Sets the number of MACs we'll range through. The + 'minimum' MAC is what you set with dstmac. + "flag [name]" Set a flag to determine behaviour. Prepend '!' to the + flag to turn it off. Current flags are: + IPSRC_RND #IP Source is random (between min/max), + IPDST_RND, UDPSRC_RND, TXSIZE_RND + UDPDST_RND, MACSRC_RND, MACDST_RND + "udp_src_min 9" set UDP source port min, If < udp_src_max, then cycle through the port range. - - pgset "udp_src_max 9" set UDP source port max. - pgset "udp_dst_min 9" set UDP destination port min, If < udp_dst_max, then + "udp_src_max 9" set UDP source port max. + "udp_dst_min 9" set UDP destination port min, If < udp_dst_max, then cycle through the port range. - pgset "udp_dst_max 9" set UDP destination port max. - - pgset stop aborts injection. Also, ^C aborts generator. - - -Example scripts -=============== - -A collection of small tutorial scripts for pktgen is in expamples dir. - -pktgen.conf-1-1 # 1 CPU 1 dev -pktgen.conf-1-2 # 1 CPU 2 dev -pktgen.conf-2-1 # 2 CPU's 1 dev -pktgen.conf-2-2 # 2 CPU's 2 dev -pktgen.conf-1-1-rdos # 1 CPU 1 dev w. route DoS -pktgen.conf-1-1-ip6 # 1 CPU 1 dev ipv6 -pktgen.conf-1-1-ip6-rdos # 1 CPU 1 dev ipv6 w. route DoS -pktgen.conf-1-1-flows # 1 CPU 1 dev multiple flows. - -Run in shell: ./pktgen.conf-X-Y It does all the setup including sending. - - -Interrupt affinity -=================== -Note when adding devices to a specific CPU there good idea to also assign -/proc/irq/XX/smp_affinity so the TX-interrupts gets bound to the same CPU. -as this reduces cache bouncing when freeing skb's. - - -Current commands and configuration options -========================================== - -** Pgcontrol commands: - -start -stop - -** Thread commands: - -add_device -rem_device_all -max_before_softirq - - -** Device commands: - -count -clone_skb -debug - -frags -delay - -src_mac_count -dst_mac_count - -pkt_size -min_pkt_size -max_pkt_size - -udp_src_min -udp_src_max - -udp_dst_min -udp_dst_max - -flag - IPSRC_RND - TXSIZE_RND - IPDST_RND - UDPSRC_RND - UDPDST_RND - MACSRC_RND - MACDST_RND - -dst_min -dst_max - -src_min -src_max - -dst_mac -src_mac - -clear_counters - -dst6 -src6 - -flows -flowlen - -References: -ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/ -ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/examples/ - -Paper from Linux-Kongress in Erlangen 2004. -ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/pktgen_paper.pdf - -Thanks to: -Grant Grundler for testing on IA-64 and parisc, Harald Welte, Lennert Buytenhek -Stephen Hemminger, Andi Kleen, Dave Miller and many others. - + "udp_dst_max 9" set UDP destination port max. + "stop" Stops this interface from transmitting. It will still + receive packets and record their latency, etc. + "start" Starts the interface transmitting packets. + "clear_counters" Clear the packet and latency counters. + +You can start and stop threads by echoing commands to the /proc/net/pktgen/pgctrl +file. Supported commands are: + "stop kpktgend_0" Stop thread 0. + "start threadXX" Start (create) thread XX. You may wish to create one thread + per CPU. + + +You can control manage the interfaces on a thread by echoing commands to +the /proc/net/pktgen/[thread] file. Supported commands are: + "add_interface eth1" Add interface eth1 to the chosen thread. + "rem_interface eth1" Remove interface eth1 from the chosen thread. + "max_before_softirq" Maximum loops before we cause a call to do_softirq, + this is to help mitigate starvatation on the RX side. + + +You can examine various counters and parameters by reading the appropriate +proc file: + +[root@localhost lanforge]# cat /proc/net/pktgen/kpktgend_0 +VERSION-1 +Name: kpktgend_0 +Current: eth2 +Running: eth6 +Stopped: eth1 eth5 +Result: NA + + +[root@localhost lanforge]# cat /proc/net/pktgen/eth2 +VERSION-1 +Params: count 0 pkt_size: 300 frags: 0 ipg: 0 multiskb: 0 ifname "eth2" + dst_min: 172.2.1.1 dst_max: 172.2.1.6 src_min: 172.1.1.4 src_max: 172.1.1.8 + src_mac: 00:00:00:00:00:00 dst_mac: 00:00:00:00:00:00 + udp_src_min: 99 udp_src_max: 1005 udp_dst_min: 9 udp_dst_max: 9 + src_mac_count: 0 dst_mac_count: 0 + Flags: IPSRC_RND IPDST_RND UDPSRC_RND +Current: + pkts-sofar: 158835950 errors: 0 + started: 1026024703542360us elapsed: 4756326418us + idle: 1723232054307ns next_tx: 27997154666566(-3202934)ns + seq_num: 158835951 cur_dst_mac_offset: 0 cur_src_mac_offset: 0 + cur_saddr: 0x60101ac cur_daddr: 0x30102ac cur_udp_dst: 9 cur_udp_src: 966 + pkts_rcvd: 476002 bytes_rcvd: 159929440 last_seq_rcvd: 476002 ooo_rcvd: 0 + dup_rcvd: 0 seq_gap_rcvd(dropped): 0 non_pg_rcvd: 0 + avg_latency: 41us min_latency: 40us max_latency: 347us pkts_in_sample: 476002 + Buckets(us) [ 0 0 0 0 0 0 311968 164008 23 3 0 0 0 0 0 0 0 0 0 0 ] +Result: OK: ipg=0 + +[root@localhost lanforge]# cat /proc/net/pktgen/eth6 +VERSION-1 +Params: count 0 pkt_size: 300 frags: 0 ipg: 11062341 multiskb: 0 ifname "eth6" + dst_min: 90 dst_max: 90 src_min: 90 src_max: 90 + src_mac: 00:00:00:00:00:00 dst_mac: 00:00:00:00:00:00 + udp_src_min: 9 udp_src_max: 9 udp_dst_min: 9 udp_dst_max: 9 + src_mac_count: 0 dst_mac_count: 0 + Flags: +Current: + pkts-sofar: 479940 errors: 0 + started: 1026024703542707us elapsed: 4795667656us + idle: 109585100905ns next_tx: 28042807786397(-79364)ns + seq_num: 479941 cur_dst_mac_offset: 0 cur_src_mac_offset: 0 + cur_saddr: 0x0 cur_daddr: 0x0 cur_udp_dst: 9 cur_udp_src: 9 + pkts_rcvd: 160323509 bytes_rcvd: 50392479910 last_seq_rcvd: 160323509 ooo_rcvd: 0 + dup_rcvd: 0 seq_gap_rcvd(dropped): 0 non_pg_rcvd: 0 + avg_latency: 230us min_latency: 36us max_latency: 1837us pkts_in_sample: 160323509 + Buckets(us) [ 0 0 0 0 0 0 287725 2618755 54130607 98979415 80358 4226649 0 0 0 0 0 0 0 0 ] +Result: OK: ipg=11062341 -Good luck with the linux net-development. \ No newline at end of file --- linux-2.6.11/include/linux/if_macvlan.h 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.11.p4s/include/linux/if_macvlan.h 2005-07-11 16:06:34.000000000 -0700 @@ -0,0 +1,57 @@ +/* -*- linux-c -*- */ +#ifndef _LINUX_IF_MACVLAN_H +#define _LINUX_IF_MACVLAN_H + +/* the ioctl commands */ + +/* actions */ +#define MACVLAN_ENABLE 1 +#define MACVLAN_DISABLE 2 +#define MACVLAN_ADD 3 +#define MACVLAN_DEL 4 +#define MACVLAN_BIND 5 +#define MACVLAN_UNBIND 6 + +/* informative */ +#define MACVLAN_GET_NUM_PORTS 7 +#define MACVLAN_GET_PORT_NAME 8 +#define MACVLAN_GET_NUM_VLANS 9 +#define MACVLAN_GET_VLAN_NAME 10 +#define MACVLAN_GET_NUM_MACS 11 +#define MACVLAN_GET_MAC_NAME 12 + +#define MACVLAN_SET_PORT_FLAGS 13 +#define MACVLAN_GET_PORT_FLAGS 14 + +/* If this IOCTL succeedes, we are a MAC-VLAN interface, otherwise, we are not. */ +#define MACVLAN_IS_MACVLAN 15 + + +#ifdef __KERNEL__ +#include +#include +extern int (*macvlan_ioctl_hook)(unsigned long arg); + +/* Returns >= 0 if it consumed the packet, otherwise let the pkt + * be processed by the netif_rx method, as if macvlan's didn't + * exist. + */ +extern int (*macvlan_handle_frame_hook)(struct sk_buff *skb); +#endif + +struct macvlan_ioctl_reply { + int num; + char name[IFNAMSIZ]; +}; + +struct macvlan_ioctl { + int cmd; + int portidx; + char *ifname; + int ifidx; /* flags when setting port flags */ + unsigned char *macaddr; + int macaddridx; + struct macvlan_ioctl_reply *reply; +}; + +#endif --- linux-2.6.11/include/linux/sockios.h 2005-03-01 23:37:31.000000000 -0800 +++ linux-2.6.11.p4s/include/linux/sockios.h 2005-07-11 16:06:34.000000000 -0700 @@ -65,6 +65,8 @@ #define SIOCDIFADDR 0x8936 /* delete PA address */ #define SIOCSIFHWBROADCAST 0x8937 /* set hardware broadcast addr */ #define SIOCGIFCOUNT 0x8938 /* get number of devices */ +#define SIOCGIFWEIGHT 0x8939 /* get weight of device, in stones */ +#define SIOCSIFWEIGHT 0x893a /* set weight of device, in stones */ #define SIOCGIFBR 0x8940 /* Bridging support */ #define SIOCSIFBR 0x8941 /* Set bridging options */ @@ -94,6 +96,13 @@ #define SIOCGRARP 0x8961 /* get RARP table entry */ #define SIOCSRARP 0x8962 /* set RARP table entry */ +/* MAC address based VLAN control calls */ +#define SIOCGIFMACVLAN 0x8965 /* Mac address multiplex/demultiplex support */ +#define SIOCSIFMACVLAN 0x8966 /* Set macvlan options */ + +#define SIOCGIFREDIRDEV 0x8967 /* Redirect device get ioctl */ +#define SIOCSIFREDIRDEV 0x8968 /* Set redirect dev options */ + /* Driver configuration calls */ #define SIOCGIFMAP 0x8970 /* Get device parameters */ @@ -122,6 +131,15 @@ #define SIOCBRADDIF 0x89a2 /* add interface to bridge */ #define SIOCBRDELIF 0x89a3 /* remove interface from bridge */ +/* Ben's little hack land */ +#define SIOCSACCEPTLOCALADDRS 0x89ba /* Allow interfaces to accept pkts from + * local interfaces...use with SO_BINDTODEVICE + */ +#define SIOCGACCEPTLOCALADDRS 0x89bb /* Allow interfaces to accept pkts from + * local interfaces...use with SO_BINDTODEVICE + */ + + /* Device private ioctl calls */ /* --- linux-2.6.11/net/Kconfig 2005-03-01 23:38:34.000000000 -0800 +++ linux-2.6.11.p4s/net/Kconfig 2005-07-11 16:06:34.000000000 -0700 @@ -326,6 +326,21 @@ If unsure, say N. +config MACVLAN + tristate "MAC-VLAN support" + depends on EXPERIMENTAL + help + This allows one to create virtual interfaces that map packets to + or from specific MAC addresses to a particular interface. + +config REDIRDEV + tristate "Redirect-net-device support" + depends on EXPERIMENTAL + help + This allows one to create virtual interfaces that effectively + swap tx for rx, allowing one to create bridges and similar + constructs all in the same machine. + config VLAN_8021Q tristate "802.1Q VLAN Support" ---help--- @@ -627,6 +642,14 @@ To compile this code as a module, choose M here: the module will be called pktgen. +config SUPPORT_SEND_BAD_CRC + bool "Support Send Bad CRC (USE WITH CAUTION)" + ---help--- + When enabled, one can send a specially crafted packet to the ethernet + device via a raw socket and it will be sent with the last 4 bytes of + the packet as the ethernet CRC. Requires driver support. Current driver + support is limited to e100 and e1000. + endmenu endmenu --- linux-2.6.11/net/Makefile 2005-03-01 23:38:07.000000000 -0800 +++ linux-2.6.11.p4s/net/Makefile 2005-07-11 16:06:34.000000000 -0700 @@ -42,6 +42,8 @@ obj-$(CONFIG_ECONET) += econet/ obj-$(CONFIG_VLAN_8021Q) += 8021q/ obj-$(CONFIG_IP_SCTP) += sctp/ +obj-$(CONFIG_MACVLAN) += macvlan/ +obj-$(CONFIG_REDIRDEV) += redir/ ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o --- linux-2.6.11/net/macvlan/Makefile 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.11.p4s/net/macvlan/Makefile 2005-07-11 16:06:34.000000000 -0700 @@ -0,0 +1,10 @@ +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... + +obj-$(CONFIG_MACVLAN) := macvlan.o + + --- linux-2.6.11/net/macvlan/macvlan.c 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.11.p4s/net/macvlan/macvlan.c 2005-08-19 16:33:10.000000000 -0700 @@ -0,0 +1,2154 @@ +/* -*- linux-c -*- +####################################################################### +# +# (C) Copyright 2001-2004 +# Alex Zeffertt, Cambridge Broadband Ltd, ajz@cambridgebroadband.com +# Re-worked by Ben Greear +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +####################################################################### +# Notes: +# +# This file implements the macvlan.o MAC address based VLAN support +# module. +# +# This provides an IOCTL interface which allows you to +# It uses an IOCTL interface which allows you to +# +# 1. enable/disable MAC address based VLANS over an ether type net_device +# 2. add/remove a MAC address based VLAN - which is an ether type net_device +# layered over the original MACVLAN enabled ether type net_device. +# 3. bind/unbind MAC addresses to/from particular MAC address based VLANs +# 4. discover the state of MAC address based VLANs on the system. +# 5. set/get port flags, including whether to bind to destination MAC +# or source mac. +# 6. Traffic to/from eth0 will not be affected. + +# Example: (Assuming you are using source binding) +# +# If you enable MAC address based VLANS over eth0 +# +# You may then create further VLANs, e.g. eth0#1 eth0#2 .... +# These will not receive any frames until you bind MAC addresses to them. +# If you bind 11:22:33:44:55:66 to eth0#1, then any frames received by +# eth0 with source MAC 11:22:33:44:55:66 will be routed up through eth0#1 +# instead of eth0. +# +# Example: (Assuming you are using destination (local) binding) +# +# If you enable MAC address based VLANS over eth0 +# +# You may then create further VLANs, e.g. eth0#1 eth0#2 .... +# These will not receive any frames until you bind MAC addresses to them. +# If you bind 11:22:33:44:55:66 to eth0#1, then any broadcast/multicast +# frames, or frames with a destination MAC 11:22:33:44:55:66 +# will be routed up through eth0#1 instead of eth0 +# +# For broadcasts, the packet will be duplicated for every VLAN +# with at least one MAC attached. Attaching more than one MAC +# when destination binding makes no sense...don't do it! +# +# +####################################################################### +*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#ifdef CONFIG_PROC_FS +#include +#define MVL_PROC_DIR "macvlan" +#define MVL_PROC_CFG "config" +#define PORT_CFG_FILE_NAME "config" +static struct proc_dir_entry *mvl_proc_dir; +static struct proc_dir_entry *mvl_proc_cfg; +#endif + +#include "macvlan.h" + +/* Defined in socket.c */ +void macvlan_ioctl_set(int (*hook)(unsigned long)); + + +/*********************************************************/ +/* defines */ +/*********************************************************/ + +#if 0 +#define DEBUG(format,args...) printk(KERN_ERR format, ##args); +#else +#define DEBUG(format,args...) +#endif + + +#undef MVL_USE_RW_LOCKS +#ifdef MVL_USE_RW_LOCKS +/* Must hold this lock to make any changes to the macvlan structures. + */ +static rwlock_t mvl_cfg_lock = RW_LOCK_UNLOCKED; + +#define MVL_READ_LOCK /* printk("%i: read-lock port list\n", __LINE__); */ \ + BUG_ON(in_interrupt()); \ + read_lock(&mvl_cfg_lock); +#define MVL_READ_UNLOCK /* printk("%i: read-unlock port list\n", __LINE__); */ \ + BUG_ON(in_interrupt()); \ + read_unlock(&mvl_cfg_lock); + +#define MVL_WRITE_LOCK /* printk("%i: write-lock port list\n", __LINE__); */ \ + BUG_ON(in_interrupt()); \ + write_lock(&mvl_cfg_lock); +#define MVL_WRITE_UNLOCK /* printk("%i: write-unlock port list\n", __LINE__); */ \ + BUG_ON(in_interrupt()); \ + write_unlock(&mvl_cfg_lock); + + +#define MVL_IRQ_RLOCK(a) /* printk("%i: read-unlock port list\n", __LINE__); */ { \ + __u64 now = getCurUs(); \ + __u64 later; \ + read_lock_irqsave(&mvl_cfg_lock, a); \ + later = getCurUs(); \ + if ((later - now) > 100) { \ + printk("took: %lluus to acquire read lock, line: %i\n", \ + later - now, __LINE__); \ + }} + +#define MVL_IRQ_RUNLOCK(a) /* printk("%i: read-unlock port list\n", __LINE__); */ \ + read_unlock_irqrestore(&mvl_cfg_lock, a); +#else +/* Must hold this lock to make any changes to the macvlan structures. + */ +static spinlock_t mvl_cfg_lock = SPIN_LOCK_UNLOCKED; + +#define MVL_READ_LOCK(a) MVL_WRITE_LOCK(a) +#define MVL_READ_UNLOCK(a) MVL_WRITE_UNLOCK(a) + +#define MVL_WRITE_LOCK(a) /* printk("%i: write-lock port list\n", __LINE__); */ \ + spin_lock_irqsave(&mvl_cfg_lock, a); +#define MVL_WRITE_UNLOCK(a) /* printk("%i: write-unlock port list\n", __LINE__); */ \ + spin_unlock_irqrestore(&mvl_cfg_lock, a); \ + + +#define MVL_IRQ_RLOCK(a) /* printk("%i: read-unlock port list\n", __LINE__); */ \ + spin_lock_irqsave(&mvl_cfg_lock, a); \ + +#define MVL_IRQ_RUNLOCK(a) /* printk("%i: read-unlock port list\n", __LINE__); */ \ + spin_unlock_irqrestore(&mvl_cfg_lock, a); +#endif + + +/*********************************************************/ +/* file scope variables */ +/*********************************************************/ + +static struct macvlan_port *port_list = NULL; + +static atomic_t macvlan_nports; +static atomic_t mvl_vlan_counter; + +static int debug_lvl = 0; + + +/*********************************************************/ +/* forward declarations */ +/*********************************************************/ +static int macvlan_hash_rem(const char* vlan_ifname, + const unsigned char* mac); + +#ifdef MVL_CONFIG_PROC_FS +static int read_mvl_glbl(char *page, char **start, off_t off, + int count, int *eof, void *data); +static int write_mvl_glbl(struct file *file, const char *buffer, + unsigned long count, void *data); +static int read_mvl(char *page, char **start, off_t off, + int count, int *eof, void *data); +static int write_mvl(struct file *file, const char *buffer, + unsigned long count, void *data); +static int read_mvl_port(char *page, char **start, off_t off, + int count, int *eof, void *data); +static int write_mvl_port(struct file *file, const char *buffer, + unsigned long count, void *data); +#endif + + + +/*********************************************************/ +/* function definitions */ +/*********************************************************/ + +/** Convert to micro-seconds */ +static inline __u64 tv_to_us(const struct timeval* tv) { + __u64 us = tv->tv_usec; + us += (__u64)tv->tv_sec * (__u64)1000000; + return us; +} + + +/* Since the epoc. More precise over long periods of time than + * getRelativeCurMs + */ +static inline __u64 getCurUs(void) { + struct timeval tv; + do_gettimeofday(&tv); + return tv_to_us(&tv); +} + + +char toupper(char in) { + if ((in >= 'a') && (in <= 'z')) { + in -= ('a' - 'A'); + } + return in; +} + +#define iswhitespace(x)\ + ((x) == ' ' || (x) == '\n' || (x) == '\r' || (x) == '\r' ) + +#define skip_whitespace(x) { while (iswhitespace(*x)) (x)++; } + +static int copy_next_word(char *dst, char *src, int len) { + char *p; + for (p=src; p < src + len ; p++) { + if ( iswhitespace(*p)) + break; + *dst++ = *p; + } + return p - src; +} + + +static int toMacString(unsigned char* rslt_mac, const char* raw_mac) { + // Turn HEX into bytes. First, gather all the useful HEX + char tmp[12]; //More than 12 is useless, at least right now + char c; + int j = 0; //tmp's index. + int i; + char tmp_bt[3]; + for (i = 0; i= '0') && (c <= '9')) || ((c >= 'A') && (c <= 'F'))) { + tmp[j] = c; + //VLOG_ERR(VLOG << " c: " << c << endl); + if (j == 11) { + break; //done + } + j++; + } + else { + if ((c == ':') || (c == ' ') || (c == '.')) { + // Ok, valid divider + } + else { + // Invalid header + return -EINVAL; + } + } + } + + if (j != 11) { + //msg->append("ERROR: Not enough HEX values in the input string.\n"); + return -EINVAL; + } + + for (i = 0; i<6; i++) { + tmp_bt[0] = tmp[i*2]; + tmp_bt[1] = tmp[i*2 +1]; + tmp_bt[2] = 0; + //VLOG_ERR(VLOG << " tmp_bt -:" << tmp_bt << ":- i: " << i << endl); + rslt_mac[i] = (unsigned char)(simple_strtol(tmp_bt, NULL, 16) & 0xFF); + //VLOG_ERR(VLOG << " rslt_mac[" << i << "] -:" << rslt_mac[i] << ":-\n"); + } + return 0; +}//toMacString + + +struct macvlan_vlan* macvlan_find_vlan_in_port(struct macvlan_port* port, + const char* ifname) { + struct macvlan_vlan* vlan; + for (vlan = port->vlan_list; vlan; vlan = vlan->next) { + if (!strcmp(vlan->dev->name, ifname)) { + return vlan; + } + } + return NULL; +} + + +/* Find port by mac-vlan interface name (eth1#777) */ +struct macvlan_port* macvlan_find_port_for_mvlan_ifname(const char* ifname) { + struct macvlan_port* port; + for (port = port_list; port; port = port->next) { + if (macvlan_find_vlan_in_port(port, ifname)) { + break; + } + } + return port; +} + +struct macvlan_port* macvlan_find_port_for_underlying_ifname(const char* ifname) { + struct macvlan_port* port; + //printk("finding port for underlying ifname: %s\n", ifname); + for (port = port_list; port; port = port->next) { + //printk("Testing port: %p name: %s\n", port, port->dev->name); + if (strcmp(port->dev->name, ifname) == 0) { + break; + } + } + //printk("done finding port: %p\n", port); + return port; +} + +/* + * Rebuild the Ethernet MAC header. This is called after an ARP + * (or in future other address resolution) has completed on this + * sk_buff. We now let ARP fill in the other fields. + * + * This routine CANNOT use cached dst->neigh! + * Really, it is used only when dst->neigh is wrong. + * + */ +int macvlan_dev_rebuild_header(struct sk_buff *skb) { + struct net_device *dev = skb->dev; + struct ethhdr *veth = (struct ethhdr *)(skb->data); + + switch (veth->h_proto) { +#ifdef CONFIG_INET + case __constant_htons(ETH_P_IP): + + return arp_find(veth->h_dest, skb); +#endif + default: + DEBUG("%s: unable to resolve type %X addresses.\n", + dev->name, (int)veth->h_proto); + + memcpy(veth->h_source, dev->dev_addr, ETH_ALEN); + break; + }; + + return 0; +} + + + +static struct net_device_stats *macvlan_get_stats(struct net_device *dev) +{ + struct macvlan_vlan *vlan = dev->priv; + + return &vlan->statistics; +} + +static int macvlan_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct macvlan_vlan *vlan = dev->priv; + int rv; + struct sk_buff* skb2; + + DEBUG("%s: \n", __PRETTY_FUNCTION__); + + skb->dev = vlan->lowerdev; + + /* Please note, dev_queue_xmit consumes the pkt regardless of the + * error value. So, will copy the skb first and free if successful. + */ + skb2 = skb_get(skb); + rv = dev_queue_xmit(skb2); /* Upon return, skb2 is considered freed */ + if (rv != 0) { + /* The skb memory should still be valid since we made a copy, + * so can return error code here. + */ + return rv; + } + else { + /* Was success, need to free the skb reference since we bumped up the + * user count above. + */ + vlan->statistics.tx_packets++; + vlan->statistics.tx_bytes += skb->len; + kfree_skb(skb); + return 0; + } +}/* macvlan xmit */ + +static int macvlan_open(struct net_device *dev) +{ + netif_start_queue(dev); + return 0; +} + +static void macvlan_set_multicast_list(struct net_device *dev) +{ + /* TODO ??? */ +} + +static int macvlan_stop(struct net_device *dev) +{ + netif_stop_queue(dev); + return 0; +} + +static int macvlan_accept_fastpath(struct net_device *dev, struct dst_entry *dst) +{ + return -1; +} + + +/* + * Create the VLAN header for an arbitrary protocol layer + * + * saddr=NULL means use device source address + * daddr=NULL means leave destination address (eg unresolved arp) + * + * This is called when the SKB is moving down the stack towards the + * physical devices. + */ +int macvlan_hard_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, void *daddr, void *saddr, + unsigned len) +{ + struct macvlan_vlan *vlan = dev->priv; + + DEBUG("%s: \n", __PRETTY_FUNCTION__); + + /* Before delegating work to the lower layer, enter our MAC-address */ + saddr = dev->dev_addr; + + dev = vlan->lowerdev; + + /* Now make the underlying real hard header */ + return dev->hard_header(skb, dev, type, daddr, saddr, len); +} + + +void macvlan_dev_destructor(struct net_device *dev) { + atomic_dec(&mvl_vlan_counter); + if (dev->priv) { + //printk("dst: %s", dev->name); + kfree(dev->priv); + dev->priv = NULL; + } + else { + //printk("dst2: %s", dev->name); + } +} + + +static int macvlan_vlan_create(const char* port_name, int newifidx) { + struct macvlan_vlan *vlan = NULL; + struct macvlan_port* port; + char newifname[IFNAMSIZ+1]; + struct net_device* td = NULL; + struct net_device* nnd = NULL; + unsigned long flags; + int rv; + + //printk("in macvlan_vlan_create "); + + //printk("malloc "); + if ((vlan = kmalloc(sizeof(*vlan), GFP_KERNEL)) == NULL) { + DEBUG("macvlan: kmalloc failure\n"); + rv = -ENOMEM; + goto outfree; + } + memset(vlan, 0, sizeof(*vlan)); + + //printk("4 "); + if ((nnd = kmalloc(sizeof(struct net_device), GFP_KERNEL)) == NULL) { + DEBUG("macvlan: kmalloc net_device failure\n"); + rv = -ENOMEM; + goto outfree; + } + memset(nnd, 0, sizeof(struct net_device)); + + + MVL_WRITE_LOCK(flags); + + //printk("--*-- "); + /* find the port to which ifname belongs */ + port = macvlan_find_port_for_underlying_ifname(port_name); + //printk("port: %p name: %s\n", port, port_name); + if (!port) { + rv = -ENODEV; + goto unlockout; + } + + BUG_ON(!port->dev); + + //printk("1 "); + if (newifidx < 0) { + /* Find the next free index */ + int i; + for (i = 0; idev->name, i); + newifname[IFNAMSIZ] = 0; + if ((td = dev_get_by_name(newifname)) == NULL) { + newifidx = i; + break; + } + dev_put(td); + } + + if (newifidx < 0) { + printk("macvlan: Could not find a free index, reached max: %i\n", i); + } + td = NULL; // Make sure we don't accidentally use this later. + } + + //printk("2 "); + /* generate a name for the new vlan */ + snprintf(newifname, IFNAMSIZ, "%s#%d", port->dev->name, newifidx); + newifname[IFNAMSIZ] = 0; + + if ((td = dev_get_by_name(newifname)) != NULL) { + DEBUG("macvlan: vlan by that name already exists\n"); + rv = -EEXIST; + goto unlockout; + } + + //printk("4 "); + vlan->dev = nnd; + + //printk("5 "); + strcpy(vlan->dev->name, newifname); + ether_setup(vlan->dev); + + dev_hold(vlan->dev); /* MVL code holds reference */ + + vlan->dev->priv = vlan; + vlan->port = port; + vlan->lowerdev = port->dev; + /* Set a flag so we know this is a vlan device */ + vlan->dev->priv_flags |= (IFF_MAC_VLAN); + + //printk("6 "); + /* dev->do_ioctl = macvlan_do_ioctl; */ + vlan->dev->get_stats = macvlan_get_stats; + vlan->dev->hard_start_xmit = macvlan_xmit; + vlan->dev->hard_header = macvlan_hard_header; + vlan->dev->rebuild_header = macvlan_dev_rebuild_header; + vlan->dev->open = macvlan_open; + vlan->dev->set_multicast_list = macvlan_set_multicast_list; + vlan->dev->stop = macvlan_stop; + vlan->dev->accept_fastpath = macvlan_accept_fastpath; + vlan->dev->tx_queue_len = 0; + vlan->dev->set_mac_address = NULL; + vlan->dev->priv = vlan; + vlan->dev->destructor = macvlan_dev_destructor; + + /* This will change if you are using Destination (local) binding, + * when you add a MAC to it.. + */ + memcpy(vlan->dev->dev_addr, vlan->lowerdev->dev_addr, ETH_ALEN); + + DEBUG("macvlan: created vlan %p\n", vlan); + + atomic_inc(&port->ndevs); + + /* link to list */ + //printk("8 "); + vlan->next = port->vlan_list; + port->vlan_list = vlan; + + //printk("End of mac_vlan create1\n"); + + MVL_WRITE_UNLOCK(flags); + register_netdev(vlan->dev); + +#ifdef MVL_CONFIG_PROC_FS + //printk("7 "); + if (vlan->port->proc_dir) { + vlan->proc_ent = create_proc_read_entry(vlan->dev->name, S_IRUGO, + vlan->port->proc_dir, + read_mvl, vlan); + if (!vlan->proc_ent) { + printk("ERROR: Could not create proc entry for device: %s\n", + vlan->dev->name); + } + else { + vlan->proc_ent->write_proc = write_mvl; + vlan->proc_ent->owner = THIS_MODULE; + } + } +#endif + + //printk("End of mac_vlan create2\n"); + + atomic_inc(&mvl_vlan_counter); + //printk("9\n"); + rv = 0; + goto out; + + /* Error case, clean up vlan memory */ + unlockout: + MVL_WRITE_UNLOCK(flags); + outfree: + if (vlan) { + kfree(vlan); + } + if (nnd) { + kfree(nnd); + } + if (td) { + dev_put(td); + } + out: + return rv; +} /* macvlan_vlan_create */ + + +/* Has locking internally */ +int macvlan_vlan_cleanup(const char* ifname) { + int i; + struct macvlan_port* port; + struct macvlan_vlan* vlan; + struct macvlan_vlan* walker; + struct macvlan_vlan* prev; + unsigned long flags; + int rv; + + DEBUG(__FUNCTION__"(%p)\n",vlan); + //printk("mvl_cln: %s", ifname); + + MVL_WRITE_LOCK(flags); + /* NOTE: Cannot depend on device name, it can be changed. --Ben */ + port = macvlan_find_port_for_mvlan_ifname(ifname); + if (!port) { + rv = -ENODEV; + goto unlockout; + } + + //printk("1 "); + vlan = macvlan_find_vlan_in_port(port, ifname); + BUG_ON(!vlan); + + if (vlan->dev->flags & IFF_UP) { + rv = -EBUSY; + goto unlockout; + } + + //printk("2 "); + for (i = 0; iport->hash_table[i]; + struct macvlan_hash_entry* prev = NULL; + while (tmp) { + if (tmp->vlan == vlan) { + if (prev) { + prev->next = tmp->next; + kfree(tmp); + tmp = prev->next; + } + else { + vlan->port->hash_table[i] = tmp->next; + kfree(tmp); + tmp = vlan->port->hash_table[i]; + } + } + else { + prev = tmp; + tmp = tmp->next; + } + } + }/* for all hash buckets */ + //printk("3 "); + +#ifdef MVL_CONFIG_PROC_FS + if (vlan->proc_ent) { + remove_proc_entry(vlan->dev->name, vlan->port->proc_dir); + vlan->proc_ent = NULL; + } +#endif + + + /* + * remove the vlan in question from the list + */ + prev = NULL; + walker = port->vlan_list; + while (walker) { + if (walker == vlan) { + if (prev) { + prev->next = walker->next; + } + else { + port->vlan_list = walker->next; + } + break; + } + prev = walker; + walker = walker->next; + }/* while */ + BUG_ON(walker != vlan); + + atomic_dec(&port->ndevs); + + //printk("4 "); + //printk("End of mac_vlan cleanup1, ref-cnt: %i\n", atomic_read(&vlan->dev->refcnt)); + dev_put(vlan->dev); + + MVL_WRITE_UNLOCK(flags); + + //printk("End of mac_vlan cleanup2, ref-cnt: %i\n", atomic_read(&vlan->dev->refcnt)); + unregister_netdev(vlan->dev); + + /* VLAN will be deleted when the device is deleted */ + + //printk("5 "); + rv = 0; + goto out; + + unlockout: + MVL_WRITE_UNLOCK(flags); + + out: + return rv; + +} /* mac_vlan cleanup */ + + + +static int macvlan_port_set_flags(const char* ifname, int flags) { + struct macvlan_port *port; + + /* find the port to which ifname belongs */ + port = macvlan_find_port_for_underlying_ifname(ifname); + if (!port) { + return -ENODEV; + } + else { + port->flags = flags; + } + return 0; +}/* macvlan_port_set_flags */ + +static int macvlan_port_create(const char* ifname) { + struct macvlan_port *port; + struct net_device* dev; + int rv = 0; + unsigned long flags; + + MVL_WRITE_LOCK(flags); + port = macvlan_find_port_for_underlying_ifname(ifname); + if (port != NULL) { + rv = -EEXIST; + goto outunlock; + } + + dev = dev_get_by_name(ifname); + if (dev == NULL) { + rv = -ENODEV; + goto outunlock; + } + + if ((dev->macvlan_priv != NULL) + || (dev->flags & IFF_LOOPBACK) + || (dev->type != ARPHRD_ETHER)) { + printk("macvlan: lower layer failed port_create, dev: %s " + "dev->macvlan_priv=%p dev->flags=%08x dev->type=%08x\n", + dev->name, dev->macvlan_priv, dev->flags, dev->type); + dev_put(dev); + rv = -EINVAL; + goto outunlock; + } + + if ((port = kmalloc(sizeof(*port), GFP_ATOMIC)) == NULL) { + dev_put(dev); + rv = -ENOBUFS; + goto outunlock; + } + + memset(port, 0, sizeof(*port)); + port->dev = dev; + + dev->macvlan_priv = port; + + atomic_inc(&macvlan_nports); + + /* Link into our list */ + port->next = port_list; + port_list = port; + + /* Unlock our write lock on the ports... */ + MVL_WRITE_UNLOCK(flags); + + /* TODO: Could use multicast filters in some NICs at least. */ + dev_set_promiscuity(dev, 1); + +#ifdef MVL_CONFIG_PROC_FS + if (mvl_proc_dir) { + port->proc_dir = proc_mkdir(port->dev->name, mvl_proc_dir); + + if (port->proc_dir) { + port->proc_ent = create_proc_read_entry(PORT_CFG_FILE_NAME, S_IRUGO, + port->proc_dir, + read_mvl_port, port); + if (port->proc_ent) { + port->proc_ent->write_proc = write_mvl_port; + port->proc_ent->owner = THIS_MODULE; + } + else { + printk("macvlan: ERROR: failed to create proc entry for port: %s\n", + port->dev->name); + } + } + } +#endif + + DEBUG("macvlan: created port=%p\n", port); + return 0; + +outunlock: + MVL_WRITE_UNLOCK(flags); + return rv; + +}/* macvlan_port_create */ + + +/* Clears all memory, kfree's it if possible. + */ +static int macvlan_port_cleanup(const char* ifname) { + struct macvlan_port *port; + struct macvlan_port *prev; + struct macvlan_port *walker; + int i; + unsigned long flags; + int rv = 0; + + MVL_WRITE_LOCK(flags); + port = macvlan_find_port_for_underlying_ifname(ifname); + if (!port) { + rv = -ENODEV; + goto outunlock; + } + + if (port->vlan_list) { + rv = -EBUSY; + goto outunlock; + } + + /* hash table should be empty at this point */ + for (i = 0 ; i < MACVLAN_HASH_LEN; i++) { + BUG_ON(port->hash_table[i]); + } + + /* Remove from our port list */ + prev = NULL; + walker = port_list; + while (walker) { + if (walker == port) { + if (prev) { + prev->next = walker->next; + } + else { + port_list = walker->next; + } + break; + } + prev = walker; + walker = walker->next; + } + BUG_ON(walker != port); + + + port->dev->macvlan_priv = NULL; + dev_put(port->dev); + + MVL_WRITE_UNLOCK(flags); + +#ifdef MVL_CONFIG_PROC_FS + if (port->proc_dir) { + if (port->proc_ent) { + remove_proc_entry(PORT_CFG_FILE_NAME, port->proc_dir); + port->proc_ent = NULL; + } + + remove_proc_entry(port->dev->name, mvl_proc_dir); + port->proc_dir = NULL; + } +#endif + + dev_set_promiscuity(port->dev, -1); + + atomic_dec(&macvlan_nports); + + kfree(port); + + return 0; + +outunlock: + MVL_WRITE_UNLOCK(flags); + return rv; + +}/* macvlan_port_cleanup */ + + +static inline struct macvlan_vlan *macvlan_hash_lookup(struct macvlan_port *port, + const unsigned char *src) { + /* + * The hashing function is to simply + * take the bottom source address byte + */ + struct macvlan_hash_entry *entry; + unsigned int bucket = VLAN_BUCKET(src); + for (entry = port->hash_table[bucket]; entry; entry = entry->next) { + if (memcmp(entry->mac, src, ETH_ALEN) == 0) { + /*DEBUG("macvlan: matched %02x:%02x:%02x:%02x:%02x:%02x to vlan %p\n", + src[0],src[1],src[2],src[3],src[4],src[5],entry->vlan); */ + return entry->vlan; + } + } + return NULL; +} + + +static int macvlan_hash_add(const char* ifname, + const unsigned char* macaddr) { + + struct macvlan_port *port; + struct macvlan_vlan *vlan; + unsigned int bucket = VLAN_BUCKET(macaddr); + struct macvlan_hash_entry* entry; + + + /* find the port in question */ + port = macvlan_find_port_for_mvlan_ifname(ifname); + if (!port) { + return -ENODEV; + } + + /* find the vlan layered over this port */ + vlan = macvlan_find_vlan_in_port(port, ifname); + BUG_ON(!vlan); + + /* check it's not already in the hash lookup table */ + if (macvlan_hash_lookup(port, macaddr)) { + DEBUG("macvlan: user tried to add mac addr twice!\n"); + return -EEXIST; + } + + if ((atomic_read(&vlan->nmacs) > 0) + && (port->flags & MVL_FILTER_ON_DEST)) { + printk("macvlan: Already have a MAC on this vlan: %s and we are filtering on DEST, so no more are allowed!\n", + ifname); + return -EINVAL; + } + + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + if (!entry) { + return -ENOBUFS; + } + memset(entry, 0, sizeof(*entry)); + + memcpy(entry->mac, macaddr, sizeof(entry->mac)); + entry->vlan = vlan; + entry->next = port->hash_table[bucket]; + port->hash_table[bucket] = entry; + DEBUG("macvlan: added %02x:%02x:%02x:%02x:%02x:%02x to vlan %p\n", + entry->src[0],entry->src[1],entry->src[2], + entry->src[3],entry->src[4],entry->src[5], + vlan); + + atomic_inc(&vlan->nmacs); + + if (port->flags & MVL_FILTER_ON_DEST) { + /* Set the MAC on the vlan device so that it sends pkts correctly. */ + memcpy(vlan->dev->dev_addr, macaddr, ETH_ALEN); + } + + return 0; +} /* macvlan_hash_add */ + +/* cleans up the mac hash entry memory (kfree). */ +static int macvlan_hash_rem(const char* vlan_ifname, + const unsigned char* mac) { + int bucket = VLAN_BUCKET(mac); + struct macvlan_port *port; + struct macvlan_hash_entry *entry; + struct macvlan_hash_entry* prev; + + /* find the port in question */ + port = macvlan_find_port_for_mvlan_ifname(vlan_ifname); + + if (!port) { + return -ENODEV; + } + + entry = port->hash_table[bucket]; + prev = NULL; + //printk("hash_rem, found port: %p bucket: %i entry: %p\n", + // port, bucket, entry); + while (entry) { + //printk("Testing entry: %p\n", entry); + if (memcmp(entry->mac, mac, ETH_ALEN) == 0) { + if (prev) { + prev->next = entry->next; + } + else { + port->hash_table[bucket] = entry->next; + } + atomic_dec(&entry->vlan->nmacs); + kfree(entry); + return 0; + } + prev = entry; + entry = entry->next; + } + + return -EINVAL; +}/* macvlan_hash_rem */ + + +static int macvlan_ioctl_deviceless_stub(unsigned long arg) { + int err = 0; + struct macvlan_ioctl req; + struct macvlan_ioctl_reply rep; + unsigned long flags; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (copy_from_user(&req, (void *)arg, sizeof(req))) + return -EFAULT; + + memset(&rep, 0, sizeof(rep)); + + switch (req.cmd) + { + case MACVLAN_ENABLE: + { + /* + * enable creation of mac based vlans + * layered over an ethernet device + */ + char ifname[IFNAMSIZ]; + + /* Get name of ethernet device */ + if(copy_from_user(ifname, (void *)req.ifname, sizeof(ifname))) { + err = -EFAULT; + break; + } + ifname[IFNAMSIZ-1] = '\0'; + + err = macvlan_port_create(ifname); + + break; + } + case MACVLAN_DISABLE: + { + /* + * disable creation of mac based vlans + * layered over an ethernet device + */ + char ifname[IFNAMSIZ]; + + /* Get name of ethernet device */ + if(copy_from_user(ifname, (void *)req.ifname, sizeof(ifname))) { + err = -EFAULT; + break; + } + ifname[IFNAMSIZ-1] = '\0'; + + err = macvlan_port_cleanup(ifname); + + break; + } + case MACVLAN_ADD: + { + /* + * create a new mac based vlan + */ + char ifname[IFNAMSIZ]; + int ifidx; + + /* Get name of port over which we are creating a vlan */ + if(copy_from_user(ifname, (void *)req.ifname, sizeof(ifname))) { + err = -EFAULT; + break; + } + ifname[IFNAMSIZ-1] = '\0'; + + /* Get index of new vlan we are creating */ + ifidx = req.ifidx; + + /* Has internal locking. */ + err = macvlan_vlan_create(ifname, ifidx); + + break; + } + case MACVLAN_SET_PORT_FLAGS: + { + /* + * Set a macvlan_port's flags + */ + char ifname[IFNAMSIZ]; + + /* Get name of port over which we are creating a vlan */ + if(copy_from_user(ifname, (void *)req.ifname, sizeof(ifname))) { + err = -EFAULT; + break; + } + ifname[IFNAMSIZ-1] = '\0'; + + MVL_WRITE_LOCK(flags); + err = macvlan_port_set_flags(ifname, req.ifidx); + MVL_WRITE_UNLOCK(flags); + + break; + } + case MACVLAN_GET_PORT_FLAGS: + { + /* + * Set a macvlan_port's flags + */ + struct macvlan_port *port; + char ifname[IFNAMSIZ]; + + /* Get name of port over which we are creating a vlan */ + if(copy_from_user(ifname, (void *)req.ifname, sizeof(ifname))) { + err = -EFAULT; + break; + } + ifname[IFNAMSIZ-1] = '\0'; + + MVL_READ_LOCK(flags); + /* find the port to which ifname belongs */ + port = macvlan_find_port_for_mvlan_ifname(ifname); + if (!port) { + err = -ENODEV; + } + else { + rep.num = port->flags; + } + MVL_READ_UNLOCK(flags); + + if (copy_to_user((void *)req.reply, &rep, sizeof(rep))) { + err = -EFAULT; + } + + break; + } + case MACVLAN_DEL: + { + /* + * destroy a mac based vlan + */ + char ifname[IFNAMSIZ]; + + /* Get name of vlan to remove */ + if (copy_from_user(ifname, (void *)req.ifname, sizeof(ifname))) { + err = -EFAULT; + break; + } + ifname[IFNAMSIZ-1] = '\0'; + + /* Has internal locking */ + err = macvlan_vlan_cleanup(ifname); + break; + } + + case MACVLAN_BIND: + { + /* + * Bind a MAC address to vlan + */ + char ifname[IFNAMSIZ]; + unsigned char macaddr[ETH_ALEN]; + + /* Get name of vlan */ + if (copy_from_user(ifname, (void *)req.ifname, sizeof(ifname))) { + err = -EFAULT; + break; + } + ifname[IFNAMSIZ-1] = '\0'; + + /* Get mac address to bind to vlan */ + if (copy_from_user(macaddr, (void *)req.macaddr, sizeof(macaddr))) { + err = -EFAULT; + break; + } + + MVL_WRITE_LOCK(flags); + err = macvlan_hash_add(ifname, macaddr); + MVL_WRITE_UNLOCK(flags); + break; + } + case MACVLAN_UNBIND: + { + /* + * Unbind a MAC address from a vlan + */ + char ifname[IFNAMSIZ]; + unsigned char macaddr[ETH_ALEN]; + + /* Get name of vlan */ + if (copy_from_user(ifname, (void *)req.ifname, sizeof(ifname))) { + err = -EFAULT; + break; + } + ifname[IFNAMSIZ-1] = '\0'; + + /* Get mac address to unbind */ + if (copy_from_user(macaddr, (void *)req.macaddr, sizeof(macaddr))) { + err = -EFAULT; + break; + } + + MVL_WRITE_LOCK(flags); + err = macvlan_hash_rem(ifname, macaddr); + MVL_WRITE_UNLOCK(flags); + break; + } + + case MACVLAN_IS_MACVLAN: + { + /* + * Give user-space a chance of determining if we are a MAC-VLAN nor not. + * (If the IOCTL fails, we are not, otherwise we are.) + */ + struct macvlan_port *port; + char ifname[IFNAMSIZ]; + + /* Get name of vlan */ + if(copy_from_user(ifname, (void *)req.ifname, sizeof(ifname))) { + err = -EFAULT; + break; + } + ifname[IFNAMSIZ-1] = '\0'; + + MVL_READ_LOCK(flags); + /* find the port in question */ + port = macvlan_find_port_for_mvlan_ifname(ifname); + MVL_READ_UNLOCK(flags); + + if (!port) { + /* printk("device: %s is NOT a MAC-VLAN\n", ifname); */ + err = -ENODEV; + } + else { + /* printk("device: %s IS a MAC-VLAN\n", ifname); */ + err = 0; + } + break; + } + case MACVLAN_GET_NUM_PORTS: + { + /* + * how many ethernet devices have mac based vlans enabled over them + */ + rep.num = atomic_read(&macvlan_nports); + if (copy_to_user((void *)req.reply, &rep, sizeof(rep))) { + err = -EFAULT; + break; + } + break; + } + case MACVLAN_GET_PORT_NAME: + { + /* + * name the nth device which has mac based vlans enabled over it + */ + struct macvlan_port *port; + int n = req.portidx; + + MVL_READ_LOCK(flags); + /* find the port in question */ + for (port = port_list; port && n; port = port->next, n--); + if (!port) { + err = -ENODEV; + MVL_READ_UNLOCK(flags); + } + else { + memcpy(rep.name, port->dev->name, IFNAMSIZ); + MVL_READ_UNLOCK(flags); + + if (copy_to_user((void *)req.reply, &rep, sizeof(rep))) { + err = -EFAULT; + } + } + break; + } + case MACVLAN_GET_NUM_VLANS: + { + /* + * how many vlans are layered over the nth mac-based + * vlan enabled device + */ + + struct macvlan_port *port; + int n = req.portidx; + + MVL_READ_LOCK(flags); + /* find the port in question */ + for (port = port_list; port && n; port = port->next, n--); + + if (!port) { + err = -ENODEV; + MVL_READ_UNLOCK(flags); + } + else { + rep.num = atomic_read(&port->ndevs); + MVL_READ_UNLOCK(flags); + if (copy_to_user((void *)req.reply, &rep, sizeof(rep))) { + err = -EFAULT; + } + } + + break; + } + case MACVLAN_GET_VLAN_NAME: + { + /* + * what's the name of the mth vlan layered over the nth + * mac-based-vlan enabled ethernet device + */ + struct macvlan_port *port; + struct macvlan_vlan *vlan; + int n = req.portidx; + int m = req.ifidx; + + + MVL_READ_LOCK(flags); + /* find the port in question */ + for (port = port_list; port && n; port = port->next, n--); + if (!port) { + err = -EINVAL; + MVL_READ_UNLOCK(flags); + } + else { + /* find the vlan in question */ + for (vlan = port->vlan_list; vlan && m; vlan = vlan->next, m--); + + if (!vlan) { + err = -ENODEV; + } + else { + memcpy(rep.name, vlan->dev->name, IFNAMSIZ); + } + MVL_READ_UNLOCK(flags); + if (copy_to_user((void *)req.reply, &rep, sizeof(rep))) { + err = -EFAULT; + } + } + break; + } + case MACVLAN_GET_NUM_MACS: + { + /* + * how many mac addresses are owned by the mth vlan + * layered over the nth mac-based-vlan enabled + * ethernet device + */ + struct macvlan_port *port; + struct macvlan_vlan *vlan; + int n = req.portidx; + int m = req.ifidx; + + + MVL_READ_LOCK(flags); + /* find the port in question */ + for (port = port_list; port && n; port = port->next, n--); + + if (!port) { + err = -EINVAL; + MVL_READ_UNLOCK(flags); + } + else { + /* find the vlan in question */ + for (vlan = port->vlan_list; vlan && m; vlan = vlan->next, m--); + + if (!vlan) { + err = -ENODEV; + } + else { + rep.num = atomic_read(&vlan->nmacs); + } + MVL_READ_UNLOCK(flags); + if (copy_to_user((void *)req.reply, &rep, sizeof(rep))) { + err = -EFAULT; + } + } + break; + } + case MACVLAN_GET_MAC_NAME: + { + /* + * what's the pth mac address owned by the mth vlan + * layered over the nth mac-based-vlan enabled + * ethernet device + */ + struct macvlan_port *port; + struct macvlan_vlan *vlan; + struct macvlan_hash_entry *entry; + int n = req.portidx; + int m = req.ifidx; + int p = req.macaddridx; + + MVL_READ_LOCK(flags); + /* find the port in question */ + for (port = port_list; port && n; port = port->next, n--); + + if (!port) { + err = -EINVAL; + MVL_READ_UNLOCK(flags); + } + else { + /* find the vlan in question */ + for (vlan = port->vlan_list; vlan && m; vlan = vlan->next, m--); + + if (!vlan) { + err = -ENODEV; + } + else { + /* find the mac addr in question */ + int i; + for (i = 0; ihash_table[i]; + while (entry) { + if (entry->vlan == vlan) { + if (--p == 0) { + memcpy(rep.name, entry->mac, sizeof(entry->mac)); + goto found_one; + } + } + entry = entry->next; + } /* while */ + }/* for */ + + /* Didn't find one */ + err = -ENODEV; + } + + found_one: + + MVL_READ_UNLOCK(flags); + if (copy_to_user((void *)req.reply, &rep, sizeof(rep))) { + err = -EFAULT; + } + } + break; + } + default: + err = -EOPNOTSUPP; + break; + } + + /* printk("Returning err: %i\n", err); */ + return err; +}/* ioctl handler */ + + +/* Return >= 0 if packet is consumed, otherwise return < 0. */ +static inline int mvl_handle_frame_fos(struct macvlan_port* port, struct sk_buff* skb) { + struct macvlan_vlan *vlan; /* the higher layer i/f to which skbuff is mapped */ + int rv; + unsigned long flags; + + DEBUG("%s: got port: %p, not filtering on DEST\n", __PRETTY_FUNCTION__, port); + + MVL_IRQ_RLOCK(flags); + if (!(vlan = macvlan_hash_lookup(port, eth_hdr(skb)->h_source))) { + /* not for us, but don't delete it, others may consume it */ + rv = -ENODEV; + } + else { + if (!(vlan->dev->flags & IFF_UP)) { + rv = 1; /* was consumed */ + kfree_skb(skb); + } + else { + vlan->statistics.rx_packets++; + /* Count the lower-level's header to make our counters look more + * like an ethernet device. */ + vlan->statistics.rx_bytes += (skb->len + vlan->lowerdev->hard_header_len); + + skb->dev = vlan->dev; + dev_hold(skb->dev); + + MVL_IRQ_RUNLOCK(flags); + netif_rx(skb); + dev_put(skb->dev); + rv = 0; + goto out; + } + } + + MVL_IRQ_RLOCK(flags); + out: + return rv; +} /* filter on source */ + + +/* Return >= 0 if packet is consumed, otherwise return < 0. */ +static inline int mvl_handle_frame_fod(struct macvlan_port* port, struct sk_buff* skb) { + struct macvlan_vlan *vlan; /* the higher layer i/f to which skbuff is mapped */ + int rv; + unsigned long flags; + struct net_device* sdev; + + /* Filtering on destination.. */ + /* If it's a broadcast pkt, send it to all of them. Otherwise, + * send it to just one of them. + */ + if ((skb->pkt_type == PACKET_BROADCAST) || (skb->pkt_type == PACKET_MULTICAST)) { + /* never consume if we take this code branch, because it's bcast */ + DEBUG("%s: got port: %p, filtering on DEST, type is bcast or multicast\n", + __PRETTY_FUNCTION__, port); + //printk("fod: "); + MVL_IRQ_RLOCK(flags); + //printk("1 "); + for (vlan = port->vlan_list; vlan; vlan = vlan->next) { + //printk("."); + DEBUG("%s: got vlan: %s, nmacs: %i, up: %i\n", + __PRETTY_FUNCTION__, vlan->dev->name, + vlan->nmacs, (vlan->dev->flags & IFF_UP)); + if (atomic_read(&vlan->nmacs) && (vlan->dev->flags & IFF_UP)) { + struct sk_buff* nskb; + + atomic_inc(&skb->users); + nskb = skb_share_check(skb, GFP_ATOMIC); + if (!nskb) { + vlan->statistics.rx_fifo_errors++; + vlan->statistics.rx_errors++; + } + else { + vlan->statistics.rx_packets++; + /* Count the lower-level's header to make our counters + * look more like an ethernet device. */ + vlan->statistics.rx_bytes += + (nskb->len + vlan->lowerdev->hard_header_len); + vlan->statistics.multicast++; + + nskb->dev = vlan->dev; + netif_rx(nskb); + } + } + } + //printk("2 "); + rv = -1; /* did not consume this pkt, merely tasted it */ + MVL_IRQ_RUNLOCK(flags); + goto out; + } + else { + struct ethhdr *eth = eth_hdr(skb); + char* d = eth->h_dest; + /* Not a broadcast, try to find our port based on DESTINATION */ + //printk("fodNB "); + MVL_IRQ_RLOCK(flags); + if (!(vlan = macvlan_hash_lookup(port, d))) { + /* not for us */ + DEBUG("%s: not a broadcast, and could not find vlan for dest: %2hx:%2hx:%2hx:%2hx:%2hx:%2hx\n", + __PRETTY_FUNCTION__, d[0], d[1], d[2], d[3], d[4], d[5]); + + rv = -ENODEV; + //printk("1 "); + } + else { + DEBUG("%s: not a broadcast, found vlan for dest: " + "%2hx:%2hx:%2hx:%2hx:%2hx:%2hx, up: %i\n", + __PRETTY_FUNCTION__, d[0], d[1], d[2], d[3], d[4], d[5], + (vlan->dev->flags & IFF_UP)); + + if (!(vlan->dev->flags & IFF_UP)) { + kfree_skb(skb); + rv = 0; /* consume */ + } + else { + vlan->statistics.rx_packets++; + /* Count the lower-level's header to make our counters + * look more like an ethernet device. */ + vlan->statistics.rx_bytes += + (skb->len + vlan->lowerdev->hard_header_len); + + skb->dev = vlan->dev; + sdev = skb->dev; + if (!(eth->h_dest[0] & 1)) { + /* if it's not multicast, see if it's + * for us, or not. + */ + if (memcmp(sdev->dev_addr, eth->h_dest, ETH_ALEN)) { + skb->pkt_type = PACKET_OTHERHOST; + } + else { + skb->pkt_type = PACKET_HOST; + } + } + dev_hold(sdev); + MVL_IRQ_RUNLOCK(flags); + //printk("2 "); + netif_rx(skb); + dev_put(sdev); + //printk("3 "); + rv = 0; + goto out; + } + } + }/* else, was not broadcast */ + + MVL_IRQ_RUNLOCK(flags); + //printk("4 "); + + out: + //printk("5 "); + return rv; +}/* filter on dest */ + + +/* global entry point when receiving a pkt from lower-level devices. Return + * >= 0 if we consume, otherwise packet will be sent to the rest of the stack + * as normal. + * + */ +static int macvlan_handle_frame(struct sk_buff *skb) +{ + struct macvlan_port *port; /* maps skbuffs arriving from a lower layer + * i/f to a higher layer i/f */ + int rv = 0; + + port = skb->dev->macvlan_priv; + if (port->flags & MVL_FILTER_ON_DEST) { + rv = mvl_handle_frame_fod(port, skb); + } + else { + rv = mvl_handle_frame_fos(port, skb); + } + + return rv; +} + + +#ifdef MVL_CONFIG_PROC_FS + +static int read_mvl_glbl(char *page, char **start, off_t off, + int count, int *eof, void *data) { + int ret = -1; + char *p = page; + int mx_len = (4096 - (p - page)); + + if (! *eof ) { + struct macvlan_port* port; + int cnt; + unsigned long flags; + + /* Global counts here... */ + p += sprintf(p, "MAC-VLAN module:\n"); + + p += sprintf(p, " port count: %i vlan_counter: %i\n", + atomic_read(&macvlan_nports), + atomic_read(&mvl_vlan_counter)); + + MVL_READ_LOCK(flags); + port = port_list; + while (port) { + p += sprintf(p, " %s num_vlans: %i flags: %x\n", + port->dev->name, atomic_read(&port->ndevs), port->flags); + + /* catch overflow */ + cnt = p - page; + if (cnt > (mx_len - 60)) { + if (mx_len - cnt >= 20) { + p += sprintf(p, "OUT_OF_SPACE!\n"); + } + break; + } + + port = port->next; + } + + ret = p - page; + MVL_READ_UNLOCK(flags); + } + return ret; +} /* read_mvl_glbl */ + +static int write_mvl_glbl(struct file *file, const char *buffer, + unsigned long count, void *data) { + char *p; + const char *end; + int ret=count; + int len; + char dev_name[2][IFNAMSIZ]; + char* tmps = NULL; + unsigned long flags; + + MVL_WRITE_LOCK(flags); + + end = buffer+count; + + for (p= (char *) buffer; p< end ; ) { + if (iswhitespace(*p)) { + p++; + continue; + } + + memset(dev_name[0], 0 ,IFNAMSIZ); + memset(dev_name[1], 0 ,IFNAMSIZ); + + len = strlen("add_port "); + if (strncmp(p, "add_port ", len)==0) + { + p += len; + + if ( (p + IFNAMSIZ) <= end) + p += copy_next_word(dev_name[0], p, IFNAMSIZ); + else + p += copy_next_word(dev_name[0], p, end-p ); + + skip_whitespace(p); + + /* This can fail, but not sure how to return failure + * to user-space here. + * NOTE: Does it's own internal locking, so release + * the lock here..then re-acquire after. + */ + MVL_WRITE_UNLOCK(flags); + macvlan_port_create(dev_name[0]); + MVL_WRITE_LOCK(flags); + goto forend; + } + + len = strlen("remove_port "); + if (strncmp(p,"remove_port ",len)==0) { + p += len; + + if ( (p + IFNAMSIZ) <= end) + p += copy_next_word(dev_name[0], p, IFNAMSIZ); + else + p += copy_next_word(dev_name[0], p, end-p ); + + skip_whitespace(p); + + MVL_WRITE_UNLOCK(flags); + macvlan_port_cleanup(dev_name[0]); + MVL_WRITE_LOCK(flags); + goto forend; + } + + len = strlen("debug_lvl "); + if (strncmp(p,"debug_lvl ",len)==0) + { + p += len; + + if ( (p + IFNAMSIZ) <= end) + p += copy_next_word(dev_name[0], p, IFNAMSIZ); + else + p += copy_next_word(dev_name[0], p, end-p ); + + skip_whitespace(p); + + debug_lvl = simple_strtoul(dev_name[0], &tmps, 10); + goto forend; + } + + printk("ERROR: Unsupported command\n"); + + forend: + p++; + } + + MVL_WRITE_UNLOCK(flags); + + return ret; +} /* write_mvl_glbl */ + +/* Proc file read for mac-vlan. */ +static int read_mvl(char *page, char **start, off_t off, + int count, int *eof, void *data) { + int ret = -1; + if (! *eof ) { + char *p = page; + struct macvlan_vlan* vlan = (struct macvlan_vlan*)(data); + struct macvlan_hash_entry* entry; + int i; + int count = 0; + int cnt; + int mx_len = 4096; + unsigned long flags; + + + MVL_READ_LOCK(flags); + + /* Global counts here... */ + p += sprintf(p, "MAC-VLAN %s:\n", vlan->dev->name); + + p += sprintf(p, " MAC count: %i lower_dev: %s macvlan-port: %s\n", + atomic_read(&vlan->nmacs), vlan->lowerdev->name, + vlan->port->dev->name); + + for (i = 0; iport->hash_table[i]; + while (entry) { + if (entry->vlan == vlan) { + /* catch overflow */ + cnt = p - page; + if (cnt > (mx_len - 40)) { + if (mx_len - cnt >= 20) { + p += sprintf(p, "OUT_OF_SPACE!\n"); + } + goto outofspace; + } + + p += sprintf(p, " [%i] %02hx:%02hx:%02hx:%02hx:%02hx:%02hx\n", + count, entry->mac[0], entry->mac[1], entry->mac[2], + entry->mac[3], entry->mac[4], entry->mac[5]); + count++; + + } + entry = entry->next; + }/* while */ + }/* for */ + + outofspace: + + ret = p - page; + + MVL_READ_UNLOCK(flags); + } + return ret; +} /* read_mvl_glbl */ + + +static int write_mvl(struct file *file, const char *buffer, + unsigned long count, void *data) { + char *p; + const char *end; + int ret=count; + int len; + char arg[MVL_MX_ARG_LEN+1]; + + struct macvlan_vlan* vlan = (struct macvlan_vlan*)(data); + char mac[ETH_ALEN]; + unsigned long flags; + + MVL_WRITE_LOCK(flags); + + end = buffer+count; + + for (p= (char *) buffer; p< end ; ) { + if (iswhitespace(*p)) { + p++; + continue; + } + + memset(arg, 0, MVL_MX_ARG_LEN+1); + + len = strlen("add_mac "); + if (strncmp(p, "add_mac ", len)==0) { + p += len; + + if ( (p + MVL_MX_ARG_LEN) <= end) + p += copy_next_word(arg, p, MVL_MX_ARG_LEN); + else + p += copy_next_word(arg, p, end-p); + + skip_whitespace(p); + + if (toMacString(mac, arg) < 0) { + printk("macvlan: MAC format is incorrect: %s\n", + arg); + } + else { + /* This can fail, but not sure how to return failure + * to user-space here. + */ + macvlan_hash_add(vlan->dev->name, mac); + } + goto forend; + } + + len = strlen("remove_mac "); + if (strncmp(p,"remove_mac ",len)==0) { + p += len; + + if ( (p + MVL_MX_ARG_LEN) <= end) + p += copy_next_word(arg, p, MVL_MX_ARG_LEN); + else + p += copy_next_word(arg, p, end-p); + + skip_whitespace(p); + + if (toMacString(mac, arg) < 0) { + printk("macvlan: MAC format is incorrect: %s\n", + arg); + } + else { + /* This can fail, but not sure how to return failure + * to user-space here. + */ + macvlan_hash_rem(vlan->dev->name, mac); + } + goto forend; + } + + printk("ERROR: Unsupported command\n"); + + forend: + p++; + } + + MVL_WRITE_UNLOCK(flags); + + return ret; +} /* write_mvl */ + + +static int read_mvl_port(char *page, char **start, off_t off, + int count, int *eof, void *data) { + int ret = -1; + char *p = page; + int mx_len = (4096 - (p - page)); + int i; + + if (! *eof ) { + struct macvlan_port* port = (struct macvlan_port*)(data); + int cnt; + struct macvlan_vlan* vlan; + struct macvlan_hash_entry* entry; + unsigned long flags; + + MVL_READ_LOCK(flags); + + /* Global counts here... */ + p += sprintf(p, "MAC-VLAN Port: %s\n", port->dev->name); + + p += sprintf(p, " vlan count: %i\n", atomic_read(&port->ndevs)); + + vlan = port->vlan_list; + while (vlan) { + p += sprintf(p, " %s\n", vlan->dev->name); + + /* catch overflow */ + cnt = p - page; + if (cnt > (mx_len - 40)) { + if (mx_len - cnt >= 20) { + p += sprintf(p, "OUT_OF_SPACE!\n"); + } + goto outofspace; + } + + vlan = vlan->next; + } + + /* MAC addr hash */ + + for (i = 0; ihash_table[i]) { + p += sprintf(p, " [%i] ", i); + entry = port->hash_table[i]; + while (entry) { + /* catch overflow */ + cnt = p - page; + if (cnt > (mx_len - 40)) { + if (mx_len - cnt >= 20) { + p += sprintf(p, "OUT_OF_SPACE!\n"); + } + goto outofspace; + } + + p += sprintf(p, " %02hx:%02hx:%02hx:%02hx:%02hx:%02hx", + entry->mac[0], entry->mac[1], entry->mac[2], + entry->mac[3], entry->mac[4], entry->mac[5]); + + entry = entry->next; + } + p += sprintf(p, "\n"); + } + } + + outofspace: + ret = p - page; + MVL_READ_UNLOCK(flags); + } + return ret; +} /* read_mvl_glbl */ + + +static int write_mvl_port(struct file *file, const char *buffer, + unsigned long count, void *data) { + char *p; + const char *end; + int ret=count; + int len; + char dev_name[2][IFNAMSIZ]; + char* tmps = NULL; + struct macvlan_port* port = (struct macvlan_port*)(data); + unsigned long flags; + + end = buffer+count; + + for (p= (char *) buffer; p< end ; ) { + if (iswhitespace(*p)) { + p++; + continue; + } + + memset(dev_name[0], 0 ,IFNAMSIZ); + memset(dev_name[1], 0 ,IFNAMSIZ); + + len = strlen("add_vlan "); + if (strncmp(p, "add_vlan ", len)==0) { + p += len; + + if ( (p + IFNAMSIZ) <= end) + p += copy_next_word(dev_name[0], p, IFNAMSIZ); + else + p += copy_next_word(dev_name[0], p, end-p ); + + skip_whitespace(p); + + /* This can fail, but not sure how to return failure + * to user-space here. + */ + /* has internal locking */ + macvlan_vlan_create(port->dev->name, + simple_strtoul(dev_name[0], &tmps, 10)); + goto forend; + } + + len = strlen("set_flags "); + if (strncmp(p, "set_flags ", len)==0) { + p += len; + + if ( (p + IFNAMSIZ) <= end) + p += copy_next_word(dev_name[0], p, IFNAMSIZ); + else + p += copy_next_word(dev_name[0], p, end-p ); + + skip_whitespace(p); + + /* This can fail, but not sure how to return failure + * to user-space here. + */ + + MVL_WRITE_LOCK(flags); + macvlan_port_set_flags(port->dev->name, + simple_strtoul(dev_name[0], &tmps, 16)); + MVL_WRITE_UNLOCK(flags); + goto forend; + } + + len = strlen("remove_vlan "); + if (strncmp(p,"remove_vlan ",len)==0) { + p += len; + + if ( (p + IFNAMSIZ) <= end) + p += copy_next_word(dev_name[0], p, IFNAMSIZ); + else + p += copy_next_word(dev_name[0], p, end-p ); + + skip_whitespace(p); + + /* Has internal locking */ + macvlan_vlan_cleanup(dev_name[0]); + goto forend; + } + + printk("ERROR: Unsupported command\n"); + + forend: + p++; + } + + return ret; +} /* write_mvl_port */ + + +#endif + + +static int __init macvlan_init(void) { + printk (KERN_INFO "MAC address based VLAN support Revision: 1.4 (9-04)\n"); + + port_list = NULL; + + macvlan_ioctl_set(macvlan_ioctl_deviceless_stub); + macvlan_handle_frame_hook = macvlan_handle_frame; + +#ifdef MVL_CONFIG_PROC_FS + + mvl_proc_dir = proc_mkdir(MVL_PROC_DIR, proc_net); + if (mvl_proc_dir) { + mvl_proc_cfg = create_proc_read_entry(MVL_PROC_CFG, S_IRUGO, mvl_proc_dir, + read_mvl_glbl, NULL); + if (mvl_proc_cfg) { + mvl_proc_cfg->write_proc = write_mvl_glbl; + mvl_proc_cfg->owner = THIS_MODULE; + } + } +#endif + + + return 0; +} + +static void macvlan_cleanup(void) { + struct macvlan_port *port; + char nm[IFNAMSIZ+1]; + unsigned long flags; + int tst; + + macvlan_handle_frame_hook = NULL; + macvlan_ioctl_set(NULL); + + MVL_WRITE_LOCK(flags); + /* destroy all existing ports */ + while ((port = port_list)) { + strncpy(nm, port->dev->name, IFNAMSIZ); + MVL_WRITE_UNLOCK(flags); + if ((tst = macvlan_port_cleanup(nm)) < 0) { + printk("macvlan: WARNING: Failed port_cleanup in macvlan_cleanup, err: %d name: %s\n", tst, nm); + BUG(); + MVL_WRITE_LOCK(flags); + break; + } + MVL_WRITE_LOCK(flags); + } + MVL_WRITE_UNLOCK(flags); + +#ifdef MVL_CONFIG_PROC_FS + if (mvl_proc_cfg) { + remove_proc_entry(MVL_PROC_CFG, mvl_proc_dir); + mvl_proc_cfg = NULL; + } + if (mvl_proc_dir) { + remove_proc_entry(MVL_PROC_DIR, proc_net); + mvl_proc_dir = NULL; + } +#endif + +}/* macvlan_cleanup */ + + +module_init(macvlan_init); +module_exit(macvlan_cleanup); +MODULE_LICENSE("GPL"); --- linux-2.6.11/net/macvlan/macvlan.h 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.11.p4s/net/macvlan/macvlan.h 2005-07-11 16:06:34.000000000 -0700 @@ -0,0 +1,88 @@ +/* -*- linux-c -*- + +# (C) Copyright 2001-2003 +# Alex Zeffertt, Cambridge Broadband Ltd, ajz@cambridgebroadband.com +# Re-worked by Ben Greear + +*/ + +#ifndef MACVLAN_KERNEL_H_FILE__ +#define MACVLAN_KERNEL_H_FILE__ + + +/* NOTE: If you change this below, you should probably change macvlan_hash_lookup as + * well. Especially if you make this bigger. + */ +#define MACVLAN_HASH_LEN 256 + +#define VLAN_BUCKET(a) (a[5] % MACVLAN_HASH_LEN) + +/* This can be made as large as desired, and mainly helps keep bad + * IOCTL arguments from taking down the box. + */ +#define MAX_MACVLANS_PER_PORT 10000 + +/* Proc file related */ +#define MVL_MX_ARG_LEN 80 + +#ifdef CONFIG_PROC_FS + +/* To use or not to use the PROC-FS */ +#define MVL_CONFIG_PROC_FS + +#endif + + +/*********************************************************/ +/* types */ +/*********************************************************/ +/* a macvlan_vlan represents an upper layer interface */ +struct macvlan_vlan { + struct net_device* dev; + struct net_device_stats statistics; + struct macvlan_vlan *next; + struct macvlan_port *port; + struct net_device *lowerdev; + atomic_t nmacs; /* the number of mac addresses bound to this vlan */ + +#ifdef MVL_CONFIG_PROC_FS + struct proc_dir_entry* proc_ent; +#endif + +}; + +struct macvlan_hash_entry { + unsigned char mac[ETH_ALEN]; /* the eth hdr source to match. Can + * match as destination too, see flags in + * macvlan_port. Cannot match on both. */ + struct macvlan_vlan *vlan; /* the vlan target */ + struct macvlan_hash_entry *next;/* next entry in list (same hash, any dev) */ +}; + + +/* + * a macvlan_port represents a mux/demux between a mac- + * based-vlan enabled ethernet device and vlans + * layered on top of it + */ +struct macvlan_port { + /* MAC to vlan lookup */ + struct macvlan_hash_entry *hash_table[MACVLAN_HASH_LEN]; + struct net_device *dev; /* the mac-based-vlan enabled ethernet device */ + atomic_t ndevs; /* number of vlans layered over dev */ + struct macvlan_vlan *vlan_list; /* list of vlans layered over this port */ + struct macvlan_port *next; /* next port */ + +#define MVL_FILTER_ON_DEST 0x1 /* 0x1 filter-on-destination (instead of source) */ + int flags; + +#ifdef MVL_CONFIG_PROC_FS + struct proc_dir_entry* proc_dir; + struct proc_dir_entry* proc_ent; +#endif + +}; + + +#endif + --- linux-2.6.11/net/packet/af_packet.c 2005-03-01 23:38:13.000000000 -0800 +++ linux-2.6.11.p4s/net/packet/af_packet.c 2005-07-11 16:06:34.000000000 -0700 @@ -72,6 +72,7 @@ #include #include #include +#include #ifdef CONFIG_INET #include @@ -313,6 +314,13 @@ struct net_device *dev; unsigned short proto=0; int err; + int kludge = 0; + +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + if (sk->sk_flags & SOCK_DONT_DO_LL_FCS) { + kludge = 4; // We're doing our own CRC + } +#endif /* * Get and verify the address. @@ -333,7 +341,7 @@ */ saddr->spkt_device[13] = 0; - dev = dev_get_by_name(saddr->spkt_device); + dev = dev_get_by_name(saddr->spkt_device); /* DAMN, we aught to hash this! */ err = -ENODEV; if (dev == NULL) goto out_unlock; @@ -344,7 +352,7 @@ */ err = -EMSGSIZE; - if(len>dev->mtu+dev->hard_header_len) + if (len > (dev->mtu + dev->hard_header_len + kludge)) goto out_unlock; err = -ENOBUFS; @@ -386,6 +394,15 @@ if (err) goto out_free; +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + if (sk->sk_flags & SOCK_DONT_DO_LL_FCS) { + skb->general_flags |= DONT_DO_TX_CRC; + } + else { + skb->general_flags &= ~(DONT_DO_TX_CRC); + } +#endif + err = -ENETDOWN; if (!(dev->flags & IFF_UP)) goto out_free; @@ -693,6 +710,13 @@ unsigned short proto; unsigned char *addr; int ifindex, err, reserve = 0; + int kludge = 0; + +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + if (sk->sk_flags & SOCK_DONT_DO_LL_FCS) { + kludge = 4; // We're doing our own CRC + } +#endif /* * Get and verify the address. @@ -722,7 +746,7 @@ reserve = dev->hard_header_len; err = -EMSGSIZE; - if (len > dev->mtu+reserve) + if (len > (dev->mtu + reserve + kludge)) goto out_unlock; skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev), @@ -753,6 +777,15 @@ skb->dev = dev; skb->priority = sk->sk_priority; +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + if (sk->sk_flags & SOCK_DONT_DO_LL_FCS) { + skb->general_flags |= DONT_DO_TX_CRC; + } + else { + skb->general_flags &= ~(DONT_DO_TX_CRC); + } +#endif + err = -ENETDOWN; if (!(dev->flags & IFF_UP)) goto out_free; --- linux-2.6.11/net/socket.c 2005-03-01 23:37:58.000000000 -0800 +++ linux-2.6.11.p4s/net/socket.c 2005-07-11 16:06:35.000000000 -0700 @@ -813,6 +813,30 @@ } EXPORT_SYMBOL(vlan_ioctl_set); +static DECLARE_MUTEX(macvlan_ioctl_mutex); +static int (*macvlan_ioctl_hook)(void __user*); + +void macvlan_ioctl_set(int (*hook)(void __user*)) +{ + down(&macvlan_ioctl_mutex); + macvlan_ioctl_hook = hook; + up(&macvlan_ioctl_mutex); +} +EXPORT_SYMBOL(macvlan_ioctl_set); + + +static DECLARE_MUTEX(redirdev_ioctl_mutex); +static int (*redirdev_ioctl_hook)(void __user*); + +void redirdev_ioctl_set(int (*hook)(void __user*)) +{ + down(&redirdev_ioctl_mutex); + redirdev_ioctl_hook = hook; + up(&redirdev_ioctl_mutex); +} +EXPORT_SYMBOL(redirdev_ioctl_set); + + static DECLARE_MUTEX(dlci_ioctl_mutex); static int (*dlci_ioctl_hook)(unsigned int, void __user *); @@ -880,6 +904,28 @@ err = vlan_ioctl_hook(argp); up(&vlan_ioctl_mutex); break; + case SIOCGIFMACVLAN: + case SIOCSIFMACVLAN: + err = -ENOPKG; + if (!macvlan_ioctl_hook) + request_module("macvlan"); + + down(&macvlan_ioctl_mutex); + if (macvlan_ioctl_hook) + err = macvlan_ioctl_hook(argp); + up(&macvlan_ioctl_mutex); + break; + case SIOCGIFREDIRDEV: + case SIOCSIFREDIRDEV: + err = -ENOPKG; + if (!redirdev_ioctl_hook) + request_module("redirdev"); + + down(&redirdev_ioctl_mutex); + if (redirdev_ioctl_hook) + err = redirdev_ioctl_hook(argp); + up(&redirdev_ioctl_mutex); + break; case SIOCGIFDIVERT: case SIOCSIFDIVERT: /* Convert this to call through a hook */ --- linux-2.6.11/net/ipv4/arp.c 2005-03-01 23:38:25.000000000 -0800 +++ linux-2.6.11.p4s/net/ipv4/arp.c 2005-07-11 16:06:35.000000000 -0700 @@ -1,4 +1,4 @@ -/* linux/net/inet/arp.c +/* linux/net/inet/arp.c -*-linux-c-*- * * Version: $Id: arp.c,v 1.99 2001/08/30 22:55:42 davem Exp $ * @@ -419,6 +419,28 @@ return !inet_confirm_addr(dev, sip, tip, scope); } + +static int is_ip_on_dev(struct net_device* dev, __u32 ip) { + int rv = 0; + struct in_device* in_dev = in_dev_get(dev); + if (in_dev) { + struct in_ifaddr *ifa; + + rcu_read_lock(); + for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { + if (ifa->ifa_address == ip) { + /* match */ + rv = 1; + break; + } + } + rcu_read_unlock(); + in_dev_put(in_dev); + } + return rv; +} + + static int arp_filter(__u32 sip, __u32 tip, struct net_device *dev) { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip, @@ -430,9 +452,35 @@ if (ip_route_output_key(&rt, &fl) < 0) return 1; if (rt->u.dst.dev != dev) { - NET_INC_STATS_BH(LINUX_MIB_ARPFILTER); - flag = 1; - } + if ((dev->priv_flags & IFF_ACCEPT_LOCAL_ADDRS) && + (rt->u.dst.dev == &loopback_dev)) { + /* Accept these IFF target-ip == dev's IP */ + /* TODO: Need to force the ARP response back out the interface + * instead of letting it route locally. + */ + + if (is_ip_on_dev(dev, tip)) { + /* OK, we'll let this special case slide, so that we can + * arp from one local interface to another. This seems + * to work, but could use some review. --Ben + */ + /*printk("arp_filter, sip: %x tip: %x dev: %s, STS override (ip on dev)\n", + sip, tip, dev->name);*/ + } + else { + /*printk("arp_filter, sip: %x tip: %x dev: %s, IP is NOT on dev\n", + sip, tip, dev->name);*/ + NET_INC_STATS_BH(LINUX_MIB_ARPFILTER); + flag = 1; + } + } + else { + /*printk("arp_filter, not lpbk sip: %x tip: %x dev: %s flgs: %hx dst.dev: %p lbk: %p\n", + sip, tip, dev->name, dev->priv_flags, rt->u.dst.dev, &loopback_dev);*/ + NET_INC_STATS_BH(LINUX_MIB_ARPFILTER); + flag = 1; + } + } ip_rt_put(rt); return flag; } --- linux-2.6.11/net/ipv4/fib_frontend.c 2005-03-01 23:38:33.000000000 -0800 +++ linux-2.6.11.p4s/net/ipv4/fib_frontend.c 2005-07-11 16:06:35.000000000 -0700 @@ -185,8 +185,17 @@ if (fib_lookup(&fl, &res)) goto last_resort; - if (res.type != RTN_UNICAST) - goto e_inval_res; + + if (res.type != RTN_UNICAST) { + if ((res.type == RTN_LOCAL) && + (dev->priv_flags & IFF_ACCEPT_LOCAL_ADDRS)) { + /* All is OK */ + } + else { + goto e_inval_res; + } + } + *spec_dst = FIB_RES_PREFSRC(res); fib_combine_itag(itag, &res); #ifdef CONFIG_IP_ROUTE_MULTIPATH --- linux-2.6.11/net/8021q/vlan_dev.c 2005-03-01 23:38:26.000000000 -0800 +++ linux-2.6.11.p4s/net/8021q/vlan_dev.c 2005-07-11 16:06:35.000000000 -0700 @@ -438,6 +438,11 @@ struct net_device_stats *stats = vlan_dev_get_stats(dev); struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data); + /* Please note, dev_queue_xmit consumes the pkt regardless of the + * return value. So, will copy the skb first and free if successful. + */ + struct sk_buff* skb2 = skb_get(skb); + /* Handle non-VLAN frames if they are sent to us, for example by DHCP. * * NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING @@ -467,6 +472,10 @@ skb = __vlan_put_tag(skb, veth_TCI); if (!skb) { stats->tx_dropped++; + /* Free the extra copy, assuming this is a non-recoverable + * issue and we don't want calling code to retry. + */ + kfree_skb(skb2); return 0; } @@ -484,13 +493,24 @@ veth->h_vlan_proto, veth->h_vlan_TCI, veth->h_vlan_encapsulated_proto); #endif - stats->tx_packets++; /* for statics only */ - stats->tx_bytes += skb->len; - skb->dev = VLAN_DEV_INFO(dev)->real_dev; - dev_queue_xmit(skb); - return 0; + { + int rv = dev_queue_xmit(skb); + if (rv == 0) { + /* Was success, need to free the skb reference since + * we bumped up the user count above. If there was an + * error instead, then the skb2 will not be freed, and so + * the calling code will be able to re-send it. + */ + + stats->tx_packets++; /* for statics only */ + stats->tx_bytes += skb2->len; + + kfree_skb(skb2); + } + return rv; + } } int vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) --- linux-2.6.11/net/8021q/vlan.c 2005-03-01 23:38:19.000000000 -0800 +++ linux-2.6.11.p4s/net/8021q/vlan.c 2005-07-11 16:06:35.000000000 -0700 @@ -1,4 +1,4 @@ -/* +/* -*- linux-c -*- * INET 802.1Q VLAN * Ethernet-type device handling. * --- linux-2.6.11/include/linux/if_vlan.h 2005-03-01 23:37:49.000000000 -0800 +++ linux-2.6.11.p4s/include/linux/if_vlan.h 2005-07-11 16:06:35.000000000 -0700 @@ -1,4 +1,4 @@ -/* +/* -*- linux-c -*- * VLAN An implementation of 802.1Q VLAN tagging. * * Authors: Ben Greear --- linux-2.6.11/include/linux/ethtool.h 2005-03-01 23:37:50.000000000 -0800 +++ linux-2.6.11.p4s/include/linux/ethtool.h 2005-07-11 16:06:35.000000000 -0700 @@ -1,4 +1,4 @@ -/* +/* -*-linux-c-*- * ethtool.h: Defines for Linux ethtool. * * Copyright (C) 1998 David S. Miller (davem@redhat.com) @@ -293,6 +293,10 @@ * get_strings: Return a set of strings that describe the requested objects * phys_id: Identify the device * get_stats: Return statistics about the device + * set_rx_all: Set or clear IFF_ACCEPT_ALL_FRAMES, see if.h + * get_rx_all: Return 1 if set, 0 if not. + * set_save_fcs: Set or clear IFF_SAVE_FCS, see if.h + * get_save_fcs: Return 1 if set, 0 if not. * * Description: * @@ -351,10 +355,22 @@ int (*phys_id)(struct net_device *, u32); int (*get_stats_count)(struct net_device *); void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *, u64 *); + int (*set_rx_all)(struct net_device *, u32); + int (*get_rx_all)(struct net_device *, u32 *); + int (*set_save_fcs)(struct net_device *, u32); + int (*get_save_fcs)(struct net_device *, u32 *); int (*begin)(struct net_device *); void (*complete)(struct net_device *); }; + +/* for dumping net-device statistics */ +struct ethtool_ndstats { + u32 cmd; /* ETHTOOL_GNDSTATS */ + u8 data[0]; /* sizeof(struct net_device_stats) */ +}; + + /* CMDs currently supported */ #define ETHTOOL_GSET 0x00000001 /* Get settings. */ #define ETHTOOL_SSET 0x00000002 /* Set settings. */ @@ -389,6 +405,15 @@ #define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */ #define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */ + +#define ETHTOOL_GNDSTATS 0x00000070 /* get standard net-device statistics */ +#define ETHTOOL_GETRXALL 0x00000071 /* Retrieve whether or not + * IFF_ACCEPT_ALL_FRAMES is set. */ +#define ETHTOOL_SETRXALL 0x00000072 /* Set IFF_ACCEPT_ALL_FRAMES */ +#define ETHTOOL_GETRXFCS 0x00000073 /* Set IFF_SAVE_FCS */ +#define ETHTOOL_SETRXFCS 0x00000074 /* Set IFF_SAVE_FCS */ + + /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET #define SPARC_ETH_SSET ETHTOOL_SSET --- linux-2.6.11/net/core/ethtool.c 2005-03-01 23:38:37.000000000 -0800 +++ linux-2.6.11.p4s/net/core/ethtool.c 2005-07-11 16:06:35.000000000 -0700 @@ -1,4 +1,4 @@ -/* +/* -*- linux-c -*- * net/core/ethtool.c - Ethtool ioctl handler * Copyright (c) 2003 Matthew Wilcox * @@ -32,6 +32,12 @@ return (dev->features & NETIF_F_IP_CSUM) != 0; } +u32 ethtool_op_get_rx_all(struct net_device *dev, u32* retval) +{ + *retval = ((dev->priv_flags & IFF_ACCEPT_ALL_FRAMES) != 0); + return 0; +} + int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) { if (data) @@ -674,6 +680,88 @@ return ret; } + +static int ethtool_get_rx_all(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GSG }; + int rv = 0; + + if (!dev->ethtool_ops->get_rx_all) + return -EOPNOTSUPP; + + if ((rv = dev->ethtool_ops->get_rx_all(dev, &edata.data)) < 0) { + return rv; + } + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + + +static int ethtool_set_rx_all(struct net_device *dev, void *useraddr) +{ + struct ethtool_value id; + + if (!dev->ethtool_ops->set_rx_all) + return -EOPNOTSUPP; + + if (copy_from_user(&id, useraddr, sizeof(id))) + return -EFAULT; + + return dev->ethtool_ops->set_rx_all(dev, id.data); +} + +static int ethtool_get_rx_fcs(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GSG }; + int rv = 0; + + if (!dev->ethtool_ops->get_save_fcs) + return -EOPNOTSUPP; + + if ((rv = dev->ethtool_ops->get_save_fcs(dev, &edata.data)) < 0) { + return rv; + } + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + + +static int ethtool_set_rx_fcs(struct net_device *dev, void *useraddr) +{ + struct ethtool_value id; + + if (!dev->ethtool_ops->set_save_fcs) + return -EOPNOTSUPP; + + if (copy_from_user(&id, useraddr, sizeof(id))) + return -EFAULT; + + return dev->ethtool_ops->set_save_fcs(dev, id.data); +} + + +/* Handle some generic ethtool commands here */ +static int ethtool_get_netdev_stats(struct net_device *dev, void *useraddr) { + + struct ethtool_ndstats* nds = (struct ethtool_ndstats*)(useraddr); + + struct net_device_stats *stats = dev->get_stats(dev); + if (stats) { + if (copy_to_user(nds->data, stats, sizeof(*stats))) { + return -EFAULT; + } + } + else { + return -EOPNOTSUPP; + } + return 0; +} + + /* The main entry point in this file. Called from net/core/dev.c */ int dev_ethtool(struct ifreq *ifr) @@ -693,16 +781,26 @@ if (!dev || !netif_device_present(dev)) return -ENODEV; - if (!dev->ethtool_ops) - goto ioctl; - if (copy_from_user(ðcmd, useraddr, sizeof (ethcmd))) return -EFAULT; - if(dev->ethtool_ops->begin) + if(dev->ethtool_ops && dev->ethtool_ops->begin) if ((rc = dev->ethtool_ops->begin(dev)) < 0) return rc; + /* Handle some generic operations that do not require specific + * ethtool handlers. + */ + switch (ethcmd) { + case ETHTOOL_GNDSTATS: + return ethtool_get_netdev_stats(dev, useraddr); + default: + break; + } + + if (!dev->ethtool_ops) + goto ioctl; + switch (ethcmd) { case ETHTOOL_GSET: rc = ethtool_get_settings(dev, useraddr); @@ -792,6 +890,18 @@ case ETHTOOL_PHYS_ID: rc = ethtool_phys_id(dev, useraddr); break; + case ETHTOOL_SETRXALL: + rc = ethtool_set_rx_all(dev, useraddr); + break; + case ETHTOOL_GETRXALL: + rc = ethtool_get_rx_all(dev, useraddr); + break; + case ETHTOOL_SETRXFCS: + rc = ethtool_set_rx_fcs(dev, useraddr); + break; + case ETHTOOL_GETRXFCS: + rc = ethtool_get_rx_fcs(dev, useraddr); + break; case ETHTOOL_GSTATS: rc = ethtool_get_stats(dev, useraddr); break; --- linux-2.6.11/drivers/net/e1000/e1000_main.c 2005-03-01 23:38:37.000000000 -0800 +++ linux-2.6.11.p4s/drivers/net/e1000/e1000_main.c 2005-07-11 16:06:35.000000000 -0700 @@ -1,4 +1,4 @@ -/******************************************************************************* +/** -*-linux-c -*- *************************************************************** Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved. @@ -130,7 +130,7 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter); static void e1000_clean_tx_ring(struct e1000_adapter *adapter); static void e1000_clean_rx_ring(struct e1000_adapter *adapter); -static void e1000_set_multi(struct net_device *netdev); +void e1000_set_multi(struct net_device *netdev); static void e1000_update_phy_info(unsigned long data); static void e1000_watchdog(unsigned long data); static void e1000_82547_tx_fifo_stall(unsigned long data); @@ -520,6 +520,10 @@ if(pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; + /* Has ability to receive all frames (even bad CRCs and such) */ + netdev->features |= NETIF_F_RX_ALL | NETIF_F_SAVE_CRC; + + /* hard_start_xmit is safe against parallel locking */ netdev->features |= NETIF_F_LLTX; @@ -1295,7 +1299,7 @@ * promiscuous mode, and all-multi behavior. **/ -static void +void e1000_set_multi(struct net_device *netdev) { struct e1000_adapter *adapter = netdev->priv; @@ -1323,6 +1327,35 @@ E1000_WRITE_REG(hw, RCTL, rctl); + + /* This is useful for using ethereal or tcpdump to sniff + * packets in promiscuous mode without stripping VLAN/priority + * information, and also letting bad packets through. + * + * THIS IS NOT PRODUCTION CODE - FOR INTERNAL USE ONLY!!! + * + */ + if (netdev->priv_flags & IFF_ACCEPT_ALL_FRAMES) { + uint32_t ctrl; + /*printk("%s: Enabling acceptance of ALL frames (bad CRC too).\n", + netdev->name); */ + /* store bad packets, promisc/multicast all, no VLAN + * filter */ + rctl = E1000_READ_REG(hw, RCTL); + rctl |= (E1000_RCTL_SBP | E1000_RCTL_UPE | E1000_RCTL_MPE); + rctl &= ~(E1000_RCTL_VFE | E1000_RCTL_CFIEN); + E1000_WRITE_REG(hw, RCTL, rctl); + /* disable VLAN tagging/striping */ + ctrl = E1000_READ_REG(hw, CTRL); + ctrl &= ~E1000_CTRL_VME; + E1000_WRITE_REG(hw, CTRL, ctrl); + } + else { + /* TODO: Do we need a way to explicitly turn this off if it was + * previously enabled, or will it magically go back to normal??? --Ben + */ + } + /* 82542 2.0 needs to be in reset to write receive address registers */ if(hw->mac_type == e1000_82542_rev2_0) @@ -1519,6 +1552,7 @@ #define E1000_TX_FLAGS_CSUM 0x00000001 #define E1000_TX_FLAGS_VLAN 0x00000002 #define E1000_TX_FLAGS_TSO 0x00000004 +#define E1000_TX_FLAGS_NO_FCS 0x00000008 #define E1000_TX_FLAGS_VLAN_MASK 0xffff0000 #define E1000_TX_FLAGS_VLAN_SHIFT 16 @@ -1723,6 +1757,13 @@ txd_upper |= (tx_flags & E1000_TX_FLAGS_VLAN_MASK); } +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + if (unlikely(tx_flags & E1000_TX_FLAGS_NO_FCS)) { + txd_lower &= ~(E1000_TXD_CMD_IFCS); + /* printk("Disabling CRC in tx_queue, txd_lower: 0x%x\n", txd_lower); */ + } +#endif + i = tx_ring->next_to_use; while(count--) { @@ -1737,6 +1778,14 @@ tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd); +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + /* txd_cmd re-enables FCS, so we'll re-disable it here as desired. */ + if (unlikely(tx_flags & E1000_TX_FLAGS_NO_FCS)) { + tx_desc->lower.data &= ~(cpu_to_le32(E1000_TXD_CMD_IFCS)); + /* printk("Disabling2 CRC in tx_queue, txd_lower: 0x%x\n", tx_desc->lower.data); */ + } +#endif + /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, @@ -1874,6 +1923,12 @@ else if(likely(e1000_tx_csum(adapter, skb))) tx_flags |= E1000_TX_FLAGS_CSUM; +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + if (unlikely(skb->general_flags & DONT_DO_TX_CRC)) { + tx_flags |= E1000_TX_FLAGS_NO_FCS; + } +#endif + e1000_tx_queue(adapter, e1000_tx_map(adapter, skb, first, max_per_txd, nr_frags, mss), tx_flags); @@ -2333,7 +2388,11 @@ goto next_desc; } - if(unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK)) { + /* If we are accepting all frames, then do not pay attention to the + * framing errors. + */ + if (unlikely(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) && + !(netdev->priv_flags & IFF_ACCEPT_ALL_FRAMES)) { last_byte = *(skb->data + length - 1); if(TBI_ACCEPT(&adapter->hw, rx_desc->status, rx_desc->errors, length, last_byte)) { @@ -2351,7 +2410,12 @@ } /* Good Receive */ - skb_put(skb, length - ETHERNET_FCS_SIZE); + if (netdev->priv_flags & IFF_SAVE_FCS) { + skb_put(skb, length); + } + else { + skb_put(skb, length - ETHERNET_FCS_SIZE); + } /* Receive Checksum Offload */ e1000_rx_checksum(adapter, rx_desc, skb); --- linux-2.6.11/drivers/net/e1000/e1000_ethtool.c 2005-03-01 23:38:08.000000000 -0800 +++ linux-2.6.11.p4s/drivers/net/e1000/e1000_ethtool.c 2005-07-11 16:06:35.000000000 -0700 @@ -1,4 +1,4 @@ -/******************************************************************************* +/*** -*-linux-c-*- ************************************************************** Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved. @@ -39,6 +39,7 @@ extern void e1000_down(struct e1000_adapter *adapter); extern void e1000_reset(struct e1000_adapter *adapter); extern int e1000_set_spd_dplx(struct e1000_adapter *adapter, uint16_t spddplx); +extern void e1000_set_multi(struct net_device *netdev); extern int e1000_setup_rx_resources(struct e1000_adapter *adapter); extern int e1000_setup_tx_resources(struct e1000_adapter *adapter); extern void e1000_free_rx_resources(struct e1000_adapter *adapter); @@ -1629,6 +1630,58 @@ } } +static int e1000_ethtool_setrxall(struct net_device *netdev, uint32_t val) { + unsigned short old_flags = netdev->priv_flags; + if (val) { + netdev->priv_flags |= IFF_ACCEPT_ALL_FRAMES; + } + else { + netdev->priv_flags &= ~(IFF_ACCEPT_ALL_FRAMES); + } + + /* printk("e1000_ethtool_setrxall (%s) val: %d\n", + netdev->name, val); */ + if (old_flags != netdev->priv_flags) { + spin_lock_bh(&netdev->xmit_lock); + if (netif_running(netdev)) { + /*printk("Kicking e1000 for setrxall..\n");*/ + e1000_set_multi(netdev); + } else { + /* Value will be flushed into the hardware when the device is + * brought up. + */ + } + spin_unlock_bh(&netdev->xmit_lock); + } + return 0; +} + +static int e1000_ethtool_set_save_fcs(struct net_device *netdev, uint32_t val) { + spin_lock_bh(&netdev->xmit_lock); + if (val) { + netdev->priv_flags |= IFF_SAVE_FCS; + } + else { + netdev->priv_flags &= ~IFF_SAVE_FCS; + } + spin_unlock_bh(&netdev->xmit_lock); + return 0; +} + +static int e1000_ethtool_get_save_fcs(struct net_device *netdev, uint32_t* val) { + *val = !!(netdev->priv_flags & IFF_SAVE_FCS); + /*printk("GETRXALL, data: %d priv_flags: %hx\n", + edata.data, netdev->priv_flags);*/ + return 0; +} + +static int e1000_ethtool_getrxall(struct net_device *netdev, uint32_t* val) { + *val = !!(netdev->priv_flags & IFF_ACCEPT_ALL_FRAMES); + /*printk("GETRXALL, data: %d priv_flags: %hx\n", + edata.data, netdev->priv_flags);*/ + return 0; +} + struct ethtool_ops e1000_ethtool_ops = { .get_settings = e1000_get_settings, .set_settings = e1000_set_settings, @@ -1664,6 +1717,10 @@ .phys_id = e1000_phys_id, .get_stats_count = e1000_get_stats_count, .get_ethtool_stats = e1000_get_ethtool_stats, + .get_rx_all = e1000_ethtool_getrxall, + .set_rx_all = e1000_ethtool_setrxall, + .set_save_fcs = e1000_ethtool_set_save_fcs, + .get_save_fcs = e1000_ethtool_get_save_fcs, }; void e1000_set_ethtool_ops(struct net_device *netdev) --- linux-2.6.11/drivers/net/e100.c 2005-03-01 23:38:33.000000000 -0800 +++ linux-2.6.11.p4s/drivers/net/e100.c 2005-07-11 16:10:00.000000000 -0700 @@ -1,4 +1,4 @@ -/******************************************************************************* +/*** -*-linux-c-*- *********************************************************** Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved. @@ -364,6 +364,7 @@ cb_ucode = 0x0005, cb_dump = 0x0006, cb_tx_sf = 0x0008, + cb_tx_nc = 0x0010, /* 0 == controler does CRC, ie normal. 1 == CRC from memory */ cb_cid = 0x1f00, cb_i = 0x2000, cb_s = 0x4000, @@ -400,7 +401,7 @@ /*5*/ u8 X(tx_dma_max_count:7, dma_max_count_enable:1); /*6*/ u8 X(X(X(X(X(X(X(late_scb_update:1, direct_rx_dma:1), tno_intr:1), cna_intr:1), standard_tcb:1), standard_stat_counter:1), - rx_discard_overruns:1), rx_save_bad_frames:1); + rx_save_overruns:1), rx_save_bad_frames:1); /*7*/ u8 X(X(X(X(X(rx_discard_short_frames:1, tx_underrun_retry:2), pad7:2), rx_extended_rfd:1), tx_two_frames_in_fifo:1), tx_dynamic_tbd:1); @@ -530,6 +531,8 @@ multicast_all = (1 << 2), wol_magic = (1 << 3), ich_10h_workaround = (1 << 4), + accept_all_frames = (1 << 5), + save_fcs = (1 << 6), } flags ____cacheline_aligned; enum mac mac; @@ -961,6 +964,16 @@ config->promiscuous_mode = 0x1; /* 1=on, 0=off */ } + if(nic->flags & accept_all_frames) { + config->rx_save_overruns = 0x1; /* 1=save, 0=discard */ + config->rx_save_bad_frames = 0x1; /* 1=save, 0=discard */ + config->rx_discard_short_frames = 0x0; /* 1=discard, 0=save */ + } + + if(nic->flags & save_fcs) { + config->rx_crc_transfer = 0x1; /* 1=save, 0=discard */ + } + if(nic->flags & multicast_all) config->multicast_all = 0x1; /* 1=accept, 0=no */ @@ -1140,6 +1153,16 @@ else nic->flags &= ~promiscuous; + if(netdev->flags & IFF_ACCEPT_ALL_FRAMES) + nic->flags |= accept_all_frames; + else + nic->flags &= ~accept_all_frames; + + if(netdev->flags & IFF_SAVE_FCS) + nic->flags |= save_fcs; + else + nic->flags &= ~save_fcs; + if(netdev->flags & IFF_ALLMULTI || netdev->mc_count > E100_MAX_MULTICAST_ADDRS) nic->flags |= multicast_all; @@ -1279,6 +1302,19 @@ struct sk_buff *skb) { cb->command = nic->tx_command; + +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + /* Use the last 4 bytes of the SKB payload packet as the CRC, used for + * testing, ie sending frames with bad CRC. + */ + if (unlikely(skb->general_flags & DONT_DO_TX_CRC)) { + cb->command |= __constant_cpu_to_le16(cb_tx_nc); + } + else { + cb->command &= ~__constant_cpu_to_le16(cb_tx_nc); + } +#endif + cb->u.tcb.tbd_array = cb->dma_addr + offsetof(struct cb, u.tcb.tbd); cb->u.tcb.tcb_byte_count = 0; cb->u.tcb.threshold = nic->tx_threshold; @@ -1486,7 +1522,21 @@ skb_reserve(skb, sizeof(struct rfd)); skb_put(skb, actual_size); skb->protocol = eth_type_trans(skb, nic->netdev); - + /* NOTE: The config step turns on acceptance of various bogus frames + * when in loopback or promisc mode, but this code will still throw + * them away unless you also set the new 'accept_all_frames' flag. + * Perhaps the implementors meant to accept the bogus frames in + * promisc mode here?? --Ben + */ + if(unlikely(!(nic->flags & accept_all_frames))) { + if(actual_size > nic->netdev->mtu + VLAN_ETH_HLEN) { + /* Received oversized frame */ + nic->net_stats.rx_over_errors++; + } + /* We're accepting all, so pass the bogons on up the stack. */ + goto process_skb; + } + if(unlikely(!(rfd_status & cb_ok))) { /* Don't indicate if hardware indicates errors */ nic->net_stats.rx_dropped++; @@ -1497,6 +1547,7 @@ nic->net_stats.rx_dropped++; dev_kfree_skb_any(skb); } else { + process_skb: nic->net_stats.rx_packets++; nic->net_stats.rx_bytes += actual_size; nic->netdev->last_rx = jiffies; @@ -1810,6 +1861,63 @@ return err; } +static int e100_set_rxall(struct net_device *netdev, u32 data) +{ + struct nic *nic = netdev->priv; + if (data) { + netdev->priv_flags |= IFF_ACCEPT_ALL_FRAMES; + nic->flags |= accept_all_frames; + } + else { + netdev->priv_flags &= ~(IFF_ACCEPT_ALL_FRAMES); + nic->flags &= ~accept_all_frames; + } + + e100_exec_cb(nic, NULL, e100_configure); + + return 0; +} + +static int e100_get_rxall(struct net_device *netdev, u32* data) +{ + struct nic *nic = netdev->priv; + if (nic->flags & accept_all_frames) { + *data = 1; + } + else { + *data = 0; + } + + return 0; +} + +static int e100_set_save_fcs(struct net_device *netdev, u32 data) +{ + struct nic *nic = netdev->priv; + if (data) { + nic->flags |= save_fcs; + } + else { + nic->flags &= ~save_fcs; + } + e100_exec_cb(nic, NULL, e100_configure); + + return 0; +} + +static int e100_get_save_fcs(struct net_device *netdev, u32* data) +{ + struct nic *nic = netdev->priv; + if (nic->flags & save_fcs) { + *data = 1; + } + else { + *data = 0; + } + + return 0; +} + static void e100_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *info) { @@ -2105,6 +2213,10 @@ .phys_id = e100_phys_id, .get_stats_count = e100_get_stats_count, .get_ethtool_stats = e100_get_ethtool_stats, + .set_rx_all = e100_set_rxall, + .get_rx_all = e100_get_rxall, + .set_save_fcs = e100_set_save_fcs, + .get_save_fcs = e100_get_save_fcs, }; static int e100_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) --- linux-2.6.11/include/asm-i386/socket.h 2005-03-01 23:37:49.000000000 -0800 +++ linux-2.6.11.p4s/include/asm-i386/socket.h 2005-07-11 16:06:35.000000000 -0700 @@ -47,4 +47,8 @@ #define SO_PEERSEC 31 +/* Instruct lower device to not calculate the frame + * checksum. Useful only for testing, afaik. --Ben */ +#define SO_NOFCS 50 + #endif /* _ASM_SOCKET_H */ --- linux-2.6.11/include/linux/skbuff.h 2005-03-01 23:38:38.000000000 -0800 +++ linux-2.6.11.p4s/include/linux/skbuff.h 2005-07-11 16:06:35.000000000 -0700 @@ -273,6 +273,11 @@ #endif +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC +#define DONT_DO_TX_CRC (1<<0) + unsigned int general_flags; +#endif + /* These elements must be at the end, see alloc_skb() for details. */ unsigned int truesize; atomic_t users; --- linux-2.6.11/net/core/skbuff.c 2005-03-01 23:38:17.000000000 -0800 +++ linux-2.6.11.p4s/net/core/skbuff.c 2005-07-11 16:06:35.000000000 -0700 @@ -155,6 +155,10 @@ skb_shinfo(skb)->tso_size = 0; skb_shinfo(skb)->tso_segs = 0; skb_shinfo(skb)->frag_list = NULL; + +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + skb->general_flags = 0; +#endif out: return skb; nodata: --- linux-2.6.11/include/net/sock.h 2005-03-01 23:38:17.000000000 -0800 +++ linux-2.6.11.p4s/include/net/sock.h 2005-07-11 16:06:35.000000000 -0700 @@ -391,6 +391,7 @@ SOCK_DESTROY, SOCK_BROADCAST, SOCK_TIMESTAMP, + SOCK_DONT_DO_LL_FCS, }; static inline void sock_set_flag(struct sock *sk, enum sock_flags flag) --- linux-2.6.11/net/core/sock.c 2005-03-01 23:38:17.000000000 -0800 +++ linux-2.6.11.p4s/net/core/sock.c 2005-07-11 16:06:35.000000000 -0700 @@ -332,6 +332,18 @@ sock_warn_obsolete_bsdism("setsockopt"); break; +#ifdef CONFIG_SUPPORT_SEND_BAD_CRC + case SO_NOFCS: + /* printk("SO_NOFCS, valbool: %d, sk: %p\n", + (int)(valbool), sk); */ + if (valbool) { + sk->sk_flags |= SOCK_DONT_DO_LL_FCS; + } + else { + sk->sk_flags &= ~(SOCK_DONT_DO_LL_FCS); + } + break; +#endif case SO_PASSCRED: sock->passcred = valbool; break; --- linux-2.6.11/include/linux/if_redirdev.h 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.11.p4s/include/linux/if_redirdev.h 2005-07-11 16:06:35.000000000 -0700 @@ -0,0 +1,34 @@ +/* -*- linux-c -*- */ +#ifndef _LINUX_IF_REDIRDEV_H +#define _LINUX_IF_REDIRDEV_H + +/* the ioctl commands */ + +#define REDIRDEV_ADD 2090 +#define REDIRDEV_DEL 2091 +/* If this IOCTL succeedes, we are a Redirect-Device + interface, otherwise, we are not. */ +#define REDIRDEV_IS_REDIRDEV 2092 +#define REDIRDEV_GET_BY_IDX 2093 +#define REDIRDEV_GET_BY_NAME 2094 + +#ifdef __KERNEL__ +#include +#include +extern int (*redirdev_ioctl_hook)(void*); + +#endif + +/* Request and response */ +struct redirdev_ioctl { + u32 cmd; + u32 ifidx; /* when getting info by idx */ + +#define RDD_ASSOCIATED (1<<0) + u32 flags; /* 1<<0: Is the interface associated with tx-dev or not */ + u32 not_used; /* explicitly align 64-bit */ + char ifname[IFNAMSIZ]; + char txifname[IFNAMSIZ]; +}; + +#endif --- linux-2.6.11/net/redir/redirdev.c 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.11.p4s/net/redir/redirdev.c 2005-08-19 18:27:07.000000000 -0700 @@ -0,0 +1,889 @@ +/* -*- linux-c -*- +####################################################################### +# +# (C) Copyright 2005 +# Ben Greear +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307 USA +####################################################################### +# Notes: +# +# This file implements the Redirect-net-device module. A pair of +# redir devices linked to each other act like two ethernet interfaces +# connected with a cross-over cable. +# +# This provides an IOCTL interface which allows you to +# It uses an IOCTL interface which allows you to +# +# 1. create redirect device +# 2. delete redirect device +# +####################################################################### +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifdef CONFIG_PROC_FS +#include +#define RDD_PROC_DIR "redirdev" +#define RDD_PROC_CFG "config" +static struct proc_dir_entry *rdd_proc_dir; +static struct proc_dir_entry *rdd_proc_cfg; +#endif + +#include "redirdev.h" + +/* Defined in socket.c */ +void redirdev_ioctl_set(int (*hook)(void*)); +static int redirdev_device_event(struct notifier_block *unused, + unsigned long event, void *ptr); + +static struct notifier_block redirdev_notifier_block = { + .notifier_call = redirdev_device_event, +}; + +/*********************************************************/ +/* defines */ +/*********************************************************/ + +/* Must hold this lock to make any changes to the Redirect-Device structures. + */ +static spinlock_t rdd_cfg_lock = SPIN_LOCK_UNLOCKED; + + +/*********************************************************/ +/* file scope variables */ +/*********************************************************/ + +static struct redirdev* rdds = NULL; + +static atomic_t rdd_dev_counter; + +static int debug_lvl = 0; + + +/*********************************************************/ +/* forward declarations */ +/*********************************************************/ + +#ifdef RDD_CONFIG_PROC_FS +static int read_rdd_glbl(char *page, char **start, off_t off, + int count, int *eof, void *data); +static int write_rdd_glbl(struct file *file, const char *buffer, + unsigned long count, void *data); +#endif + + + +/*********************************************************/ +/* function definitions */ +/*********************************************************/ + + +#define iswhitespace(x)\ + ((x) == ' ' || (x) == '\n' || (x) == '\r' || (x) == '\r' ) + +#define skip_whitespace(x) { while (iswhitespace(*x)) (x)++; } + +static int copy_next_word(char *dst, char *src, int len) { + char *p; + for (p=src; p < src + len ; p++) { + if ( iswhitespace(*p)) + break; + *dst++ = *p; + } + return p - src; +} + +/* Grab the RDD lock before calling this method. */ +struct redirdev* rdd_find_dev_by_name(const char* ifname) { + struct redirdev* d; + //printk("finding port for underlying ifname: %s\n", ifname); + for (d = rdds; d; d = d->next) { + //printk("Testing port: %p name: %s\n", port, port->dev->name); + if (strcmp(d->dev->name, ifname) == 0) { + break; + } + } + //printk("done finding port: %p\n", port); + return d; +} + +/* Grab the RDD lock before calling this method. */ +struct redirdev* rdd_find_dev_by_txdev_name(const char* ifname) { + struct redirdev* d; + for (d = rdds; d; d = d->next) { + if (d->tx_dev) { + if (strcmp(d->tx_dev->name, ifname) == 0) { + break; + } + } + } + return d; +} + + +static struct net_device_stats *redirdev_get_stats(struct net_device *dev) +{ + struct redirdev* rdd = dev->priv; + + return &rdd->statistics; +} + +/** Bump our tx counters and then act as if this was received from + * the network on the tx_dev device. Since we don't do any CSUM + * activity in this driver, make sure SKB as marked as not checksummed + * yet. + */ +static int redirdev_xmit(struct sk_buff *iskb, struct net_device *dev) { + struct redirdev* rdd = dev->priv; + struct net_device_stats* txs; + + if (unlikely(!rdd->tx_dev)) { + printk("ERROR: tx_dev null in redirdev_xmit.\n"); + kfree_skb(iskb); + rdd->statistics.tx_errors++; + return 0; + } + + //printk("%s: dev: %s tx_dev: %s\n", + // __PRETTY_FUNCTION__, dev->name, rdd->tx_dev->name); + + if (netif_running(rdd->tx_dev)) { + + /* We need to free the old skb so that the socket + * account works correctly. We'll make a copy and + * then forward that to the other device. + */ + + struct sk_buff* skb = skb_clone(iskb, GFP_ATOMIC); + + kfree_skb(iskb); //Let the sending socket reclaim it's memory + + if (!skb) { + rdd->statistics.tx_dropped++; + } + else { + int rv; + skb->dev = rdd->tx_dev; + + /* We didn't calculate the csum, so mark as such. */ + skb->ip_summed = CHECKSUM_UNNECESSARY;//NONE; + + rdd->statistics.tx_packets++; + rdd->statistics.tx_bytes += skb->len; + + txs = rdd->tx_dev->get_stats(rdd->tx_dev); + txs->rx_packets++; + txs->rx_bytes += skb->len; + + /* Call this on the receiving net device. This assumes + * that all devices are ethernet or ethernet-like. Valid + * for now. TODO: Generalize tx_dev ?? + */ + skb->pkt_type = PACKET_HOST; //Reset this to default. + + skb->protocol = eth_type_trans(skb, skb->dev); + + if (skb->dst) { + dst_release(skb->dst); + skb->dst = NULL; + } + + //printk("skb->protocol: %x pkt_type: %u\n", + // (unsigned int)(skb->protocol), + // (unsigned int)(skb->pkt_type)); + rv = netif_rx(skb); + if (rv != NET_RX_SUCCESS) { + // TODO: Remove + //printk("netif_rx rv: %i\n", (int)(rv)); + } + rdd->tx_dev->last_rx = jiffies; + rdd->dev->trans_start = jiffies; + } + } + else { + /* Chunk the packet and log some errors */ + rdd->statistics.tx_errors++; + kfree_skb(iskb); + } + return 0; +}/* redir xmit */ + +static int redirdev_open(struct net_device *dev) { + struct redirdev* rdd = dev->priv; + if (!rdd->tx_dev) { + rdd->tx_dev = dev_get_by_name(rdd->tx_dev_name); + } + if (!rdd->tx_dev) { + printk("redir: Could not start device %s because tx_dev: %s is not found.\n", + dev->name, rdd->tx_dev_name); + return -ENODEV; + } + else { + printk("redirdev: Starting device: %s\n", dev->name); + netif_start_queue(dev); + return 0; + } +} + +//static void redirdev_set_multicast_list(struct net_device *dev) { + /* TODO ??? */ +//} + +static int redirdev_stop(struct net_device *dev) { + struct redirdev* rdd = dev->priv; + printk("redirdev: stopping device: %s\n", dev->name); + netif_stop_queue(dev); + if (rdd->tx_dev) { + struct net_device* tmp = rdd->tx_dev; + rdd->tx_dev = NULL; + printk(" releasing reference to dev: %s\n", tmp->name); + dev_put(tmp); + } + printk(" done stopping %s\n", dev->name); + return 0; +} + + +void redirdev_dev_destructor(struct net_device *dev) { + atomic_dec(&rdd_dev_counter); + if (dev->priv) { + //printk("dst: %s", dev->name); + kfree(dev->priv); + dev->priv = NULL; + } + else { + //printk("dst2: %s", dev->name); + } +} + +int redirdev_change_mtu(struct net_device *dev, int new_mtu) { + dev->mtu = new_mtu; + return 0; +} + +static int redirdev_create(const char* newifname, + const char* txdevname) { + struct redirdev *rdd = NULL; + struct net_device* td = NULL; + struct net_device* nnd = NULL; + struct net_device* txd = NULL; + unsigned long flags; + int rv; + + if ((strlen(txdevname) == 0) || + (strlen(newifname) == 0)) { + printk("redirdev: ERROR: Must specify ifname and txifname" + " when creating redirect devices!\n"); + rv = -ENODEV; + goto out; + } + + printk("redirdev: creating interface: -:%s:- with tx_dev: -:%s:-\n", + newifname, txdevname); + + + //printk("malloc "); + if ((rdd = kmalloc(sizeof(*rdd), GFP_KERNEL)) == NULL) { + //printk("redirdev: kmalloc failure\n"); + rv = -ENOMEM; + goto outfree; + } + memset(rdd, 0, sizeof(*rdd)); + + //printk("4 "); + if ((nnd = kmalloc(sizeof(struct net_device), GFP_KERNEL)) == NULL) { + //printk("redirdev: kmalloc net_device failure\n"); + rv = -ENOMEM; + goto outfree; + } + memset(nnd, 0, sizeof(struct net_device)); + + if ((td = dev_get_by_name(newifname)) != NULL) { + //printk("redirdev: device by that name already exists\n"); + rv = -EEXIST; + goto outfree; + } + + /* If it's not here yet, no problem, will associate later */ + txd = dev_get_by_name(txdevname); + strncpy(rdd->tx_dev_name, txdevname, IFNAMSIZ); + + //printk("4 "); + rdd->dev = nnd; + + //printk("5 "); + strncpy(rdd->dev->name, newifname, IFNAMSIZ-1); + rdd->dev->name[IFNAMSIZ-1] = 0; //Ensure null termination. + ether_setup(rdd->dev); + + dev_hold(rdd->dev); /* RDD code holds reference */ + + rdd->dev->priv = rdd; + rdd->tx_dev = txd; + + //printk("6 "); + rdd->dev->get_stats = redirdev_get_stats; + rdd->dev->hard_start_xmit = redirdev_xmit; + rdd->dev->change_mtu = redirdev_change_mtu; + rdd->dev->open = redirdev_open; + rdd->dev->stop = redirdev_stop; + rdd->dev->destructor = redirdev_dev_destructor; + + // Defaults are fine for these + //rdd->dev->rebuild_header = redirdev_dev_rebuild_header; + //rdd->dev->set_multicast_list = redirdev_set_multicast_list; + //rdd->dev->hard_header = redirdev_hard_header; + + rdd->dev->dev_addr[0] = 0; + rdd->dev->dev_addr[1] = net_random(); + rdd->dev->dev_addr[2] = net_random(); + rdd->dev->dev_addr[3] = net_random(); + rdd->dev->dev_addr[4] = net_random(); + rdd->dev->dev_addr[5] = net_random(); + + /* No qdisc for us */ + rdd->dev->qdisc = NULL; + rdd->dev->tx_queue_len = 0; + + //printk("redirdev: created redirect-device %p\n", vlan); + + /* link to list */ + //printk("8 "); + spin_lock_irqsave(&rdd_cfg_lock, flags); + rdd->next = rdds; + rdds = rdd; + spin_unlock_irqrestore(&rdd_cfg_lock, flags); + + //printk("End of redirdev_create, registering rdd->dev: %p (%s)\n", + // rdd->dev, rdd->dev->name); + + register_netdev(rdd->dev); + + //printk("End of mac_vlan create2\n"); + + atomic_inc(&rdd_dev_counter); + //printk("9\n"); + rv = 0; + goto out; + + /* Error case, clean up vlan memory */ + outfree: + if (rdd) { + kfree(rdd); + } + if (nnd) { + kfree(nnd); + } + if (td) { + dev_put(td); + } + if (txd) { + dev_put(txd); + } + out: + return rv; +} /* redirdev_create */ + +static int redirdev_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) { + struct net_device* dev = ptr; + struct redirdev* rdd; + unsigned long flags; + + spin_lock_irqsave(&rdd_cfg_lock, flags); + rdd = rdd_find_dev_by_txdev_name(dev->name); + spin_unlock_irqrestore(&rdd_cfg_lock, flags); + + if (!rdd) { + //printk("redirdev: Ignoring event: %lu for device: %s\n", + // event, dev->name); + goto out; + } + + + /* It is OK that we do not hold the group lock right now, + * as we run under the RTNL lock. + */ + + switch (event) { + case NETDEV_CHANGE: + case NETDEV_UP: + case NETDEV_DOWN: + //printk("redirdev: Ignoring change/up/down for device: %s\n", + // dev->name); + /* Ignore for now */ + break; + + case NETDEV_UNREGISTER: + /* Stop the redir-dev too */ + printk("Device: %s is going away, closing redir-device: %s too.\n", + dev->name, rdd->dev->name); + dev_close(rdd->dev); + break; + + }; + +out: + return NOTIFY_DONE; +} + +/* Has locking internally */ +int redirdev_cleanup(const char* ifname, int force) { + struct redirdev* d; //walker + struct redirdev* prev = NULL; + unsigned long flags; + int rv; + + //printk(__FUNCTION__"(%p)\n",vlan); + //printk("rdd_cln: %s", ifname); + + spin_lock_irqsave(&rdd_cfg_lock, flags); + for (d = rdds; d; d = d->next) { + if (strcmp(d->dev->name, ifname) == 0) { + if ((d->dev->flags & IFF_UP) && (!force)) { + rv = -EBUSY; + goto unlockout; + } + + // Un-link from the list. + if (prev) { + prev->next = d->next; + d->next = NULL; + } + else { + // This means we're first in line + rdds = d->next; + d->next = NULL; + } + + break; + } + prev = d; + } + + spin_unlock_irqrestore(&rdd_cfg_lock, flags); + + if (d) { + if (d->dev->flags & IFF_UP) { + BUG_ON(!force); + + rtnl_lock(); + dev_close(d->dev); + rtnl_unlock(); + } + + if (d->tx_dev) { + dev_put(d->tx_dev); + } + + dev_put(d->dev); + unregister_netdev(d->dev); + rv = 0; + } + else { + rv = -ENODEV; + } + goto out; + + unlockout: + spin_unlock_irqrestore(&rdd_cfg_lock, flags); + + out: + return rv; +} /* redirdev cleanup */ + + +static int redirdev_ioctl_deviceless_stub(void* arg) { + int err = 0; + struct redirdev_ioctl req; + unsigned long flags; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (copy_from_user(&req, arg, sizeof(req))) + return -EFAULT; + + switch (req.cmd) { + case REDIRDEV_ADD: { + /* + * create a new redirect device + */ + req.txifname[IFNAMSIZ-1] = '\0'; + req.ifname[IFNAMSIZ-1] = '\0'; + printk("Creating redir via ioctl, ifname: %s txifname: %s\n", + req.ifname, req.txifname); + + /* Has internal locking. */ + err = redirdev_create(req.ifname, req.txifname); + break; + } + case REDIRDEV_DEL: { + /* + * destroy a redirect device + */ + req.ifname[IFNAMSIZ-1] = '\0'; + + /* Has internal locking */ + err = redirdev_cleanup(req.ifname, 0); + break; + } + + case REDIRDEV_IS_REDIRDEV: { + /* + * Give user-space a chance of determining if we are a redirect-device + * or not. + * (If the IOCTL fails, we are not, otherwise we are.) + */ + struct redirdev* rdd; + req.ifname[IFNAMSIZ-1] = '\0'; + + spin_lock_irqsave(&rdd_cfg_lock, flags); + /* find the port in question */ + rdd = rdd_find_dev_by_name(req.ifname); + spin_unlock_irqrestore(&rdd_cfg_lock, flags); + + if (!rdd) { + /* printk("device: %s is NOT a REDIR device\n", ifname); */ + err = -ENODEV; + } + else { + /* printk("device: %s IS a MAC-VLAN\n", ifname); */ + err = 0; + } + break; + } + case REDIRDEV_GET_BY_IDX: { + /* + * get the nth redirdev name + */ + struct redirdev *rdd; + int n = req.ifidx; + + spin_lock_irqsave(&rdd_cfg_lock, flags); + /* find the port in question */ + for (rdd = rdds; rdd && n; rdd = rdd->next, n--); + if (!rdd) { + err = -ENODEV; + spin_unlock_irqrestore(&rdd_cfg_lock, flags); + } + else { + memcpy(req.ifname, rdd->dev->name, IFNAMSIZ); + memcpy(req.txifname, rdd->tx_dev_name, IFNAMSIZ); + if (rdd->tx_dev) { + req.flags |= RDD_ASSOCIATED; + } + else { + req.flags &= ~RDD_ASSOCIATED; + } + spin_unlock_irqrestore(&rdd_cfg_lock, flags); + + if (copy_to_user(arg, &req, sizeof(req))) { + err = -EFAULT; + } + } + break; + } + case REDIRDEV_GET_BY_NAME: { + /* + * get info on the specified redirect device + */ + struct redirdev *rdd; + req.ifname[IFNAMSIZ-1] = '\0'; + + spin_lock_irqsave(&rdd_cfg_lock, flags); + /* find the port in question */ + rdd = rdd_find_dev_by_name(req.ifname); + + if (!rdd) { + err = -ENODEV; + spin_unlock_irqrestore(&rdd_cfg_lock, flags); + } + else { + memcpy(req.ifname, rdd->dev->name, IFNAMSIZ); + memcpy(req.txifname, rdd->tx_dev_name, IFNAMSIZ); + if (rdd->tx_dev) { + req.flags |= RDD_ASSOCIATED; + } + else { + req.flags &= ~RDD_ASSOCIATED; + } + spin_unlock_irqrestore(&rdd_cfg_lock, flags); + + if (copy_to_user(arg, &req, sizeof(req))) { + err = -EFAULT; + } + } + break; + } + default: + printk("ERROR: Un-supported redirdev ioctl command: %u\n", + (unsigned int)(req.cmd)); + send_sig(SIGSEGV, current, 1); // TODO: Remove + err = -EOPNOTSUPP; + break; + }//switch + + /* printk("Returning err: %i\n", err); */ + return err; +}/* ioctl handler */ + + +#ifdef RDD_CONFIG_PROC_FS + +static int read_rdd_glbl(char *page, char **start, off_t off, + int count, int *eof, void *data) { + int ret = -1; + char *p = page; + int mx_len = (4096 - (p - page)); + + if (! *eof ) { + struct redirdev* rdd; + int cnt; + unsigned long flags; + + /* Global counts here... */ + p += sprintf(p, "Redirect-Device module:\n"); + + p += sprintf(p, " redirect-devices: %i\n", + atomic_read(&rdd_dev_counter)); + + spin_lock_irqsave(&rdd_cfg_lock, flags); + rdd = rdds; + while (rdd) { + if (rdd->tx_dev) { + p += sprintf(p, " %s tx-dev: %s\n", + rdd->dev->name, rdd->tx_dev->name); + } + else { + p += sprintf(p, " %s tx-dev: [%s]\n", + rdd->dev->name, rdd->tx_dev_name); + } + + /* catch overflow */ + cnt = p - page; + if (cnt > (mx_len - 60)) { + if (mx_len - cnt >= 20) { + p += sprintf(p, "OUT_OF_SPACE!\n"); + } + break; + } + + rdd = rdd->next; + } + + ret = p - page; + spin_unlock_irqrestore(&rdd_cfg_lock, flags); + } + return ret; +} /* read_rdd_glbl */ + +static int write_rdd_glbl(struct file *file, const char *buffer, + unsigned long count, void *data) { + char *p; + const char *end; + int ret=count; + int len; + char dev_name[2][IFNAMSIZ]; + char* tmps = NULL; + int tmp_rv; + char ss[50]; + end = buffer + count; + + snprintf(ss, 50, "redir proc cmd: %%.%lus", count); + + printk(ss, buffer); + + for (p= (char *) buffer; p< end ; ) { + if (iswhitespace(*p)) { + p++; + continue; + } + + memset(dev_name[0], 0 ,IFNAMSIZ); + memset(dev_name[1], 0 ,IFNAMSIZ); + + len = strlen("add_rdd "); + if (strncmp(p, "add_rdd ", len)==0) + { + p += len; + + if ( (p + IFNAMSIZ) <= end) + p += copy_next_word(dev_name[0], p, IFNAMSIZ); + else + p += copy_next_word(dev_name[0], p, end-p ); + + skip_whitespace(p); + + if ( (p + IFNAMSIZ) <= end) + p += copy_next_word(dev_name[1], p, IFNAMSIZ); + else + p += copy_next_word(dev_name[1], p, end-p ); + + skip_whitespace(p); + + /* This can fail, but not sure how to return failure + * to user-space here. + * NOTE: Does it's own internal locking. + */ + redirdev_create(dev_name[0], dev_name[1]); + goto forend; + } + + len = strlen("remove_rdd "); + if (strncmp(p,"remove_rdd ", len)==0) { + p += len; + + if ( (p + IFNAMSIZ) <= end) + p += copy_next_word(dev_name[0], p, IFNAMSIZ); + else + p += copy_next_word(dev_name[0], p, end-p ); + + skip_whitespace(p); + + tmp_rv = redirdev_cleanup(dev_name[0], 0); + if (tmp_rv < 0) { + printk("redirdev: ERROR: Failed redirdev_cleanup, error: %d\n", tmp_rv); + } + + goto forend; + } + + len = strlen("debug_lvl "); + if (strncmp(p,"debug_lvl ",len)==0) + { + p += len; + + if ( (p + IFNAMSIZ) <= end) + p += copy_next_word(dev_name[0], p, IFNAMSIZ); + else + p += copy_next_word(dev_name[0], p, end-p ); + + skip_whitespace(p); + + debug_lvl = simple_strtoul(dev_name[0], &tmps, 10); + goto forend; + } + + printk("ERROR: Unsupported command\n"); + + forend: + p++; + } + + return ret; +} /* write_rdd_glbl */ + +#endif + + +static int __init redirdev_init(void) { + int err; + printk(KERN_INFO "Redirect-Network-Device: 1.0 \n"); + + rdds = NULL; + + redirdev_ioctl_set(redirdev_ioctl_deviceless_stub); + +#ifdef RDD_CONFIG_PROC_FS + + rdd_proc_dir = proc_mkdir(RDD_PROC_DIR, proc_net); + if (rdd_proc_dir) { + rdd_proc_cfg = create_proc_read_entry(RDD_PROC_CFG, S_IRUGO, rdd_proc_dir, + read_rdd_glbl, NULL); + if (rdd_proc_cfg) { + rdd_proc_cfg->write_proc = write_rdd_glbl; + rdd_proc_cfg->owner = THIS_MODULE; + } + } +#endif + + /* Register us to receive netdevice events */ + err = register_netdevice_notifier(&redirdev_notifier_block); + if (err < 0) { + printk("ERROR: redirdev: Failed to register netdevice notifier callback!\n"); + } + + return 0; +} + +static void redirdev_module_cleanup(void) { + char nm[IFNAMSIZ+1]; + unsigned long flags; + + redirdev_ioctl_set(NULL); + + spin_lock_irqsave(&rdd_cfg_lock, flags); + /* destroy all redirect devices */ + while (rdds) { + strncpy(nm, rdds->dev->name, IFNAMSIZ); + spin_unlock_irqrestore(&rdd_cfg_lock, flags); + if (redirdev_cleanup(nm, 1) < 0) { + printk("redirdev: ERROR: Failed redir_cleanup in redir_module_cleanup\n"); + + } + spin_lock_irqsave(&rdd_cfg_lock, flags); + } + spin_unlock_irqrestore(&rdd_cfg_lock, flags); + + /* Un-register us from receiving netdevice events */ + unregister_netdevice_notifier(&redirdev_notifier_block); + +#ifdef RDD_CONFIG_PROC_FS + if (rdd_proc_cfg) { + remove_proc_entry(RDD_PROC_CFG, rdd_proc_dir); + rdd_proc_cfg = NULL; + } + if (rdd_proc_dir) { + remove_proc_entry(RDD_PROC_DIR, proc_net); + rdd_proc_dir = NULL; + } +#endif + +}/* redirdev_cleanup */ + + +module_init(redirdev_init); +module_exit(redirdev_module_cleanup); +MODULE_LICENSE("GPL"); --- linux-2.6.11/net/redir/redirdev.h 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.11.p4s/net/redir/redirdev.h 2005-07-11 16:06:35.000000000 -0700 @@ -0,0 +1,39 @@ +/* -*- linux-c -*- + +# (C) Copyright 2005 +# Ben Greear +# Released under the GPL version 2 +*/ + +#ifndef REDIRDEV_KERNEL_H_FILE__ +#define REDIRDEV_KERNEL_H_FILE__ + + +/* Proc file related */ +#define RDD_MX_ARG_LEN 80 + +#ifdef CONFIG_PROC_FS + +/* To use or not to use the PROC-FS */ +#define RDD_CONFIG_PROC_FS + +#endif + + +/*********************************************************/ +/* types */ +/*********************************************************/ +struct redirdev { + /* Can be NULL if not yet associated */ + struct net_device* tx_dev; /* Call rx on this device when a packet + * is _transmitted_ on this redirect + * device. + */ + struct net_device* dev; /* the device struct this belongs too */ + struct redirdev *next; + char tx_dev_name[IFNAMSIZ]; + struct net_device_stats statistics; +}; + +#endif + --- linux-2.6.11/net/redir/Makefile 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.11.p4s/net/redir/Makefile 2005-07-11 16:06:35.000000000 -0700 @@ -0,0 +1,10 @@ +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definition is now in the main makefile... + +obj-$(CONFIG_REDIRDEV) := redirdev.o + +