diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/Documentation/web100/locking.txt linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/Documentation/web100/locking.txt *** linux-2.4.20.orig/Documentation/web100/locking.txt Thu Jan 1 01:00:00 1970 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/Documentation/web100/locking.txt Mon Jan 19 17:41:50 2004 *************** *** 0 **** --- 1,33 ---- + Web100 Locking Model for Linux 2.4 + John Heffner + August 2, 2001 + + + 1. Lookup Structures + + The connections entries are kept linked together simultaneously in a table + and in a list. Only entries in these structures can be looked up. To + protect these lookup structures, we have a single global reader-writer + spinlock, web100_linkage_lock. Since we grab the lock both from user space + and in the bottom half, we must do a [read/write]_lock_bh. As this disables + the local BH's, this lock should *not* be held for very long. + + + 2. Data Integrity + + The statistics are protected by the sock's lock. Any code modifying or + reading the statistics should hold the sock lock while doing so. We assume + that if the socket is gone, the statistics should not be modified, so + readers need not hold any lock. + + + 3. Statistics Destruction + + A statistics structure keeps a count of the number of references to it, + wc_users. When a lookup is performed, the reference count should be + incremented (while the linkage lock is held) by calling web100_stats_use. + When the reference is no longer needed, decrement the count by calling + web100_stats_unuse. The latter function will free the statistics when there + are no remaining references. The lookup structures keep one reference. The + sock also keeps one, since the sock may be destroyed before it ever enters + the ESTABLISHED state. diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/Documentation/web100/proc_interface.txt linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/Documentation/web100/proc_interface.txt *** linux-2.4.20.orig/Documentation/web100/proc_interface.txt Thu Jan 1 01:00:00 1970 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/Documentation/web100/proc_interface.txt Mon Jan 19 17:41:50 2004 *************** *** 0 **** --- 1,102 ---- + WEB100 proc interface notes + =========================== + + The web100 modifications to the kernel collect information about the + state of a TCP transfer in a kernel data structure that is linked + out of the "sock" TCP structure in sock.h. Please see + "include/net/web100_stats.h" for the structure definition. + + The API for this structure is provided through the /proc interface. + This document provides a brief description of this interface. Please + see fs/proc/web100.c for source code. + + First, kernel creates the /proc/web100 directory and the file + /proc/web100/header at system boot time. + + Each new TCP connection is assigned a unique, unchanging number + (similar to a pid), and its directory name is that number as ASCII + decimal. These directories persist for about sixty seconds after the + connection is terminated (goes into a CLOSED or TIME_WAIT state). The + connection stats will not change after the connection is terminated. + (So a connection whose state variable is TIME_WAIT is not necessarily + still in TIME_WAIT.) It should be noted that what is meant by a + "connection" here is actually one side of a connection. If a + connection is created from the local host to the local host, two + connection ID's will be created. + + When writing an application to read from the proc interface, it should be + taken into consideration that the directories and their files can disappear at + any time (they do so at an interrupt level). So if a file open fails on a + file you just looked up (say, with glob), that's probably normal and the + program should handle it gracefully. + + Another seemingly strange thing that can happen is that stats for multiple + connections with the same four-tuple can show up. No more than one of the + connections may be in any state but CLOSED or TIME_WAIT. This behavior is + correct, and should be handled as such. + + The algorithms governing the connection numbers are not yet final. + Currently, for simplification, it is only possible to have 32768 + connections. + + Inside each connection directory is an identical set of files. One is + spec-ascii, which contains the connection four-tuple in human-readable + format. One can, for example, see all outgoing ssh connections by executing + "grep ':22$' /proc/web100/*/spec-ascii" from the command prompt. + + The remaining files provide access to states of TCP-KIS variables in + local host byte-order. Since the number, names, and contents of these + files can and will change with releases, they are described in a + header file -- /proc/web100/header. A file named spec, which contains the + variables describing the connection's four-tuple, should be present + for any release. + + The header file is in human-readable format as follows: + + + / + + + ... + + / + ... + The filename is the name of the file inside each connection directory. (The + / is prepended to make it clear it is a new file, not a new variable in the + previous file. There is also an empty line before each filename.) Each + file has an arbitrary number of variables, and there are an arbitrary number + of files. The type is an integer, and is currently defined something like: + + enum { + WEB100_TYPE_INTEGER, + WEB100_TYPE_INTEGER32, + WEB100_TYPE_IP_ADDRESS, + WEB100_TYPE_COUNTER32, + WEB100_TYPE_GAUGE32, + WEB100_TYPE_UNSIGNED32, + WEB100_TYPE_TIME_TICKS, + WEB100_TYPE_COUNTER64, + WEB100_TYPE_UNSIGNED16 + }; + + in the kernel source file fs/proc/web100.c. These correspond to + MIB-II types. (RFC2578) + + To read variables, seek to the appropriate offset, then read the appropriate + amount of data. (Length is implied by the type.) Multiple variables may be + read with a single read, and will be read atomically when doing so. + Currently, all variables are readable, but this may not be true in the + future. + + To write variables, seek to the appropriate offset, and write the + appropriate amount of data. Only a single variable may be written at one + time. If variables must be atomically written, a variable should be used as + a flag to signal that the write is done, and the kernel code depending on + the variables should be written to handle this. + + See: http://www.web100.org + Please send coments to prog@web100.org + + John Heffner, Matt Mathis, R. Reddy + August 2000, Jan 2001 + diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/Documentation/web100/sysctl.txt linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/Documentation/web100/sysctl.txt *** linux-2.4.20.orig/Documentation/web100/sysctl.txt Thu Jan 1 01:00:00 1970 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/Documentation/web100/sysctl.txt Mon Jan 19 17:41:50 2004 *************** *** 0 **** --- 1,50 ---- + Web100 sysctl variables + John Heffner + October 10, 2002 + + net.ipv4.WAD_FloydAIMD + This value is used for WAD_FloydAIMD by a connection when its KIS + variable is 0. This variable requires that private extenisons be + enabled. + + net.ipv4.WAD_IFQ + This value is used for WAD_IFQ by a connection when its KIS + variable is 0. This variable requires that Net100 extensions be + enabled. + + net.ipv4.WAD_MaxBurst + This value is used for WAD_MaxBurst by a connection when its KIS + variable is 0. This variable requires that Net100 extensions be + enabled. + + net.ipv4.web100_default_wscale + This will be the minimum window scale advertised. + + net.ipv4.web100_no_metrics_save + When non-zero, TCP metrics will not be saved the the route dest + cache. NOTE: values already in the cache will not be flushed + by writing to this variable. To do so, as root write to + net.ipv4.route.flush. This variable requires that Net100 + extensions be enabled. + + net.ipv4.web100_rbufmode + The X_RBufMode KIS variable for each connection is set to this value + upon creation of the statistics structure. + + net.ipv4.web100_rcvbuf_emu + If this is non-zero and RBufMode is 1, then we will set LimRwin + when an application does a setsockopt(SO_RCVBUF). + + net.ipv4.web100_sbufmode + The X_SBufMode KIS variable for each connection is set to this value + upon creation of the statistics structure. + + net.ipv4.web100_sndbuf_emu + If this is non-zero and SBufMode is 1, then we will set LimCwnd + when an application does a setsockopt(SO_SNDBUF). + + net.ipv4.web100_fperms + Sets the file permissions of the files in /proc/web100/*/ + + net.ipv4.web100_gid + Sets the group of the files in /proc/web100/*/ diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/Documentation/web100/tuning.txt linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/Documentation/web100/tuning.txt *** linux-2.4.20.orig/Documentation/web100/tuning.txt Thu Jan 1 01:00:00 1970 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/Documentation/web100/tuning.txt Mon Jan 19 17:41:50 2004 *************** *** 0 **** --- 1,36 ---- + One of the primary features of the 2.1 release of the Web100 kernel patch is + that is contains a new style of TCP buffer management which effectively + "auto-tunes" both sending and receiving flows. The algorithms used are very + similar to those described in + , and will be further + described in a future Web100 paper. + + Enabling/Disabling Autotuning + ----------------------------- + System-wide and per-connection controls have been provided for enabling and + disabling these experimental algorithms. KIS variables X_SBufMode and + X_RBufMode are the per-connection controls. For each, a value of 0 uses the + classic Linux buffering, and a value of 1 uses the Web100 autotuning. + Currently no other values are accepted. The system-wide sysctl variables + net.ipv4.web100_sbufmode and net.ipv4.web100_rbufmode are defaults for the + KIS variables, loaded at connection startup. Changing the sysctl variables + will NOT affect currently established connections. + + Mis-tuning + ---------- + For diagnostic or demonstration purposes, it may be useful to mis-tune + connections. Previously, this was done by writing to the SndbufSet or + RcvbufSet and then STuneMode or RTuneMode variables. These are now + deprecated. The suggested method of mis-tuning now is to use LimCwnd and + LimRwin. These have precise implementation-independent definitions -- they + are simply clamps on cwnd and rwin. + + Support for legacy applications + ------------------------------ + To provide backward compatibility, the old tuning variables do still have + functionality, though it has been somewhat altered. SndbufSet and RcvbufSet + immediately set sndbuf and rcvbuf, respectively. They also set LimCwnd and + LimRwin. The deprecated variables SMaxWinBuf and RMaxWinBuf also have the + same effects. The new variables X_Sndbuf and X_Rcvbuf as well as the + deprecated variables SAppBuf and RAppBuf write to sndbuf and rcvbuf but do + not set LimCwnd and LimRwin. diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/Makefile linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/Makefile *** linux-2.4.20.orig/Makefile Thu Nov 28 23:53:16 2002 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/Makefile Mon Jan 19 17:41:50 2004 *************** *** 1,7 **** VERSION = 2 PATCHLEVEL = 4 SUBLEVEL = 20 ! EXTRAVERSION = KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) --- 1,7 ---- VERSION = 2 PATCHLEVEL = 4 SUBLEVEL = 20 ! EXTRAVERSION = _altAIMD-0.3_web100-2.3.3_sacks KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/arch/alpha/kernel/time.c linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/arch/alpha/kernel/time.c *** linux-2.4.20.orig/arch/alpha/kernel/time.c Mon Feb 25 19:37:52 2002 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/arch/alpha/kernel/time.c Mon Jan 19 17:41:50 2004 *************** time_init(void) *** 395,400 **** --- 395,424 ---- } } + #ifdef CONFIG_WEB100_STATS + void get_mono_time(__u64 *time) + { + #ifdef CONFIG_SMP + *time = jiffies * (1000000 / HZ); + #else + unsigned long flags; + unsigned long delta_cycles, delta_usec, partial_tick, lost, now; + + read_lock_irqsave(&xtime_lock, flags); + delta_cycles = rpcc() - state.last_time; + partial_tick = state.partial_tick; + now = jiffies; + read_unlock_irqrestore(&xtime_lock, flags); + + delta_usec = (delta_cycles * state.scaled_ticks_per_cycle + + partial_tick) * 15625; + delta_usec = ((delta_usec / ((1UL << (FIX_SHIFT-6-1)) * HZ)) + 1) / 2; + + *time = now * (1000000 / HZ) + delta_usec; + #endif + } + #endif + /* * Use the cycle counter to estimate an displacement from the last time * tick. Unfortunately the Alpha designers made only the low 32-bits of diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/arch/i386/kernel/time.c linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/arch/i386/kernel/time.c *** linux-2.4.20.orig/arch/i386/kernel/time.c Thu Nov 28 23:53:09 2002 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/arch/i386/kernel/time.c Mon Jan 19 17:41:50 2004 *************** static unsigned long (*do_gettimeoffset) *** 262,267 **** --- 262,278 ---- #endif + #ifdef CONFIG_WEB100_STATS + void get_mono_time(__u64 *time) + { + unsigned long flags; + + read_lock_irqsave(&xtime_lock, flags); + *time = (__u64)jiffies * (1000000 / HZ) + do_gettimeoffset(); + read_unlock_irqrestore(&xtime_lock, flags); + } + #endif + /* * This version of gettimeofday has microsecond resolution * and better than microsecond precision on fast x86 machines with TSC. diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/arch/ia64/kernel/time.c linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/arch/ia64/kernel/time.c *** linux-2.4.20.orig/arch/ia64/kernel/time.c Thu Nov 28 23:53:09 2002 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/arch/ia64/kernel/time.c Mon Jan 19 17:41:50 2004 *************** gettimeoffset (void) *** 83,88 **** --- 83,100 ---- return (elapsed_cycles*local_cpu_data->usec_per_cyc) >> IA64_USEC_PER_CYC_SHIFT; } + #ifdef CONFIG_WEB100_STATS + void + get_mono_time(__u64 *time) + { + unsigned long flags; + + read_lock_irqsave(&xtime_lock, flags); + *time = (__u64)jiffies * (1000000 / HZ) + gettimeoffset(); + read_unlock_irqrestore(&xtime_lock, flags); + } + #endif + void do_settimeofday (struct timeval *tv) { diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/arch/ppc/kernel/time.c linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/arch/ppc/kernel/time.c *** linux-2.4.20.orig/arch/ppc/kernel/time.c Thu Nov 28 23:53:11 2002 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/arch/ppc/kernel/time.c Mon Jan 19 17:41:50 2004 *************** unsigned mulhwu_scale_factor(unsigned in *** 432,434 **** --- 432,452 ---- return mlt; } + #ifdef CONFIG_WEB100_STATS + void get_mono_time(__u64 *time) + { + unsigned delta, lost_ticks; + unsigned long flags; + + read_lock_irqsave(&xtime_lock, flags); + delta = tb_ticks_since(tb_last_stamp); + #ifdef CONFIG_SMP + if( !smp_tb_synchronized) + delta = 0; + #endif + lost_ticks = jiffies - wall_jiffies; + read_unlock_irqrestore(&xtime_lock, flags); + + *time = mulhwu(tb_to_us, tb_ticks_per_jiffy * lost_ticks + delta) * (1000000/HZ); + } + #endif diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/fs/proc/Makefile linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/fs/proc/Makefile *** linux-2.4.20.orig/fs/proc/Makefile Wed May 9 00:41:32 2001 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/fs/proc/Makefile Mon Jan 19 17:41:50 2004 *************** ifeq ($(CONFIG_PROC_DEVICETREE),y) *** 18,21 **** --- 18,25 ---- obj-y += proc_devtree.o endif + ifeq ($(CONFIG_WEB100_STATS),y) + obj-y += web100.o + endif + include $(TOPDIR)/Rules.make diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/fs/proc/root.c linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/fs/proc/root.c *** linux-2.4.20.orig/fs/proc/root.c Sat Aug 3 01:39:45 2002 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/fs/proc/root.c Mon Jan 19 17:41:50 2004 *************** void __init proc_root_init(void) *** 68,73 **** --- 68,77 ---- proc_rtas_init(); #endif proc_bus = proc_mkdir("bus", 0); + + #ifdef CONFIG_WEB100_STATS + proc_web100_init(); + #endif } static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry) diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/fs/proc/web100.c linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/fs/proc/web100.c *** linux-2.4.20.orig/fs/proc/web100.c Thu Jan 1 01:00:00 1970 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/fs/proc/web100.c Mon Jan 19 17:41:50 2004 *************** *** 0 **** --- 1,1385 ---- + /* + * fs/proc/web100.c + * + * Copyright (C) 2001 Matt Mathis + * Copyright (C) 2001 John Heffner + * + * The Web 100 project. See http://www.web100.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + + #include + #include + #include + #include + #include + #include + #include + + #define WEB100MIB_BLOCK_SIZE PAGE_SIZE - 1024 + + extern __u32 sysctl_wmem_default; + extern __u32 sysctl_wmem_max; + + struct proc_dir_entry *proc_web100_dir; + static struct proc_dir_entry *proc_web100_header; + + + /* + * Web100 variable reading/writing + */ + + enum web100_connection_inos { + PROC_CONN_SPEC_ASCII = 1, + PROC_CONN_SPEC, + PROC_CONN_READ, + PROC_CONN_TEST, + PROC_CONN_TUNE, + PROC_CONN_HIGH_INO /* Keep at the end */ + }; + + enum { + WEB100_TYPE_INTEGER = 0, + WEB100_TYPE_INTEGER32, + WEB100_TYPE_INET_ADDRESS_IPV4, + WEB100_TYPE_IP_ADDRESS = WEB100_TYPE_INET_ADDRESS_IPV4, /* Depricated */ + WEB100_TYPE_COUNTER32, + WEB100_TYPE_GAUGE32, + WEB100_TYPE_UNSIGNED32, + WEB100_TYPE_TIME_TICKS, + WEB100_TYPE_COUNTER64, + WEB100_TYPE_INET_PORT_NUMBER, + WEB100_TYPE_UNSIGNED16 = WEB100_TYPE_INET_PORT_NUMBER, /* Depricated */ + WEB100_TYPE_INET_ADDRESS, + WEB100_TYPE_INET_ADDRESS_IPV6, + }; + + struct web100_var; + typedef int (*web100_rwfunc_t)(void *buf, struct web100stats *stats, + struct web100_var *vp); + + /* The printed variable description should look something like this (in ASCII): + * varname offset type + * where offset is the offset into the file. + */ + struct web100_var { + char *name; + __u32 type; + int len; + + web100_rwfunc_t read; + unsigned long read_data; /* read handler-specific data */ + + web100_rwfunc_t write; + unsigned long write_data; /* write handler-specific data */ + + struct web100_var *next; + }; + + struct web100_file { + int len; + char *name; + int low_ino; + mode_t mode; + + struct web100_var *first_var; + }; + + #define F(name,ino,perm) { sizeof (name) - 1, (name), (ino), (perm), NULL } + static struct web100_file web100_file_arr[] = { + F("spec-ascii", PROC_CONN_SPEC_ASCII, S_IFREG | S_IRUGO), + F("spec", PROC_CONN_SPEC, S_IFREG | S_IRUGO), + F("read", PROC_CONN_READ, 0), + F("test", PROC_CONN_TEST, 0), + F("tune", PROC_CONN_TUNE, 0) }; + #undef F + #define WEB100_FILE_ARR_SIZE (sizeof (web100_file_arr) / sizeof (struct web100_file)) + + /* This works only if the array is built in the correct order. */ + static inline struct web100_file *web100_file_lookup(int ino) { + return &web100_file_arr[ino - 1]; + } + + static void add_var(struct web100_file *file, char *name, int type, + web100_rwfunc_t read, unsigned long read_data, + web100_rwfunc_t write, unsigned long write_data) + { + struct web100_var *var; + + /* Again, assuming add_var is only called at init. */ + if ((var = kmalloc(sizeof (struct web100_var), GFP_KERNEL)) == NULL) + panic("No memory available for Web100 var.\n"); + + var->name = name; + var->type = type; + switch (type) { + case WEB100_TYPE_INET_PORT_NUMBER: + var->len = 2; + break; + case WEB100_TYPE_INTEGER: + case WEB100_TYPE_INTEGER32: + case WEB100_TYPE_COUNTER32: + case WEB100_TYPE_GAUGE32: + case WEB100_TYPE_UNSIGNED32: + case WEB100_TYPE_TIME_TICKS: + var->len = 4; + break; + case WEB100_TYPE_COUNTER64: + var->len = 8; + break; + case WEB100_TYPE_INET_ADDRESS: + var->len = 17; + break; + default: + printk("Web100: Warning: Adding variable of unknown type.\n"); + var->len = 0; + } + + var->read = read; + var->read_data = read_data; + + var->write = write; + var->write_data = write_data; + + var->next = file->first_var; + file->first_var = var; + } + + + /* + * proc filesystem routines + */ + + static struct inode *proc_web100_make_inode(struct super_block *sb, int ino) + { + struct inode *inode; + + inode = new_inode(sb); + if (!inode) + goto out; + + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_ino = ino; + + inode->i_uid = 0; + inode->i_gid = 0; + + out: + return inode; + } + + static inline ino_t ino_from_cid(int cid) + { + return (cid << 8) | 0x80000000; + } + + static inline ino_t ino_from_parts(ino_t dir_ino, __u16 low_ino) + { + return (dir_ino & ~0xff) | low_ino; + } + + static inline int cid_from_ino(ino_t ino) + { + return (ino & 0x7fffff00) >> 8; + } + + static inline int low_from_ino(ino_t ino) + { + return ino & 0xff; + } + + static int connection_file_open(struct inode *inode, struct file *file) + { + int cid = cid_from_ino(inode->i_ino); + struct web100stats *stats; + + read_lock_bh(&web100_linkage_lock); + stats = web100stats_lookup(cid); + if (stats == NULL || stats->wc_dead) { + read_unlock_bh(&web100_linkage_lock); + return -ENOENT; + } + web100_stats_use(stats); + read_unlock_bh(&web100_linkage_lock); + + return 0; + } + + static int connection_file_release(struct inode *inode, struct file *file) + { + int cid = cid_from_ino(inode->i_ino); + struct web100stats *stats; + + read_lock_bh(&web100_linkage_lock); + stats = web100stats_lookup(cid); + if (stats == NULL) { + read_unlock_bh(&web100_linkage_lock); + return -ENOENT; + } + read_unlock_bh(&web100_linkage_lock); + web100_stats_unuse(stats); + + return 0; + } + + /** /proc/web100// **/ + static ssize_t connection_file_rw(int read, struct file *file, + char *buf, size_t nbytes, loff_t *ppos) + { + int low_ino = low_from_ino(file->f_dentry->d_inode->i_ino); + int cid = cid_from_ino(file->f_dentry->d_inode->i_ino); + struct web100stats *stats; + struct web100_file *fp; + struct web100_var *vp; + int pos; + int n; + int err; + web100_rwfunc_t rwfunc; + char *page; + + /* We're only going to let them read one page at a time. + * We shouldn't ever read more than a page, anyway, though. + */ + if (nbytes > PAGE_SIZE) + nbytes = PAGE_SIZE; + + if ((err = verify_area(read ? VERIFY_WRITE : VERIFY_READ, buf, nbytes)) < 0) + return err; + + if ((page = (char *)get_free_page(GFP_KERNEL)) == NULL) + return -ENOMEM; + + if (!read) { + if (copy_from_user(page, buf, nbytes)) + return -EFAULT; + } + + fp = web100_file_lookup(low_ino); + if (fp == NULL) { + printk("Unregistered Web100 file.\n"); + return 0; + } + + read_lock_bh(&web100_linkage_lock); + stats = web100stats_lookup(cid); + read_unlock_bh(&web100_linkage_lock); + if (stats == NULL) + return -ENOENT; + + lock_sock(stats->wc_sk); + + /* TODO: seek in constant time, not linear. -JWH */ + pos = 0; + n = 0; + vp = fp->first_var; + while (vp && nbytes > n) { + if (pos > *ppos) { + err = -ESPIPE; + goto err_out; + } + if (pos == *ppos) { + if (vp->len > nbytes - n) + break; + + if (read) + rwfunc = vp->read; + else + rwfunc = vp->write; + if (rwfunc == NULL) { + err = -EACCES; + goto err_out; + } + + err = rwfunc(page + n, stats, vp); + + if (err < 0) + goto err_out; + n += vp->len; + *ppos += vp->len; + } + pos += vp->len; + vp = vp->next; + } + + release_sock(stats->wc_sk); + + if (read) { + if (copy_to_user(buf, page, n)) + return -EFAULT; + } + free_page((unsigned long)page); + + return n; + + err_out: + release_sock(stats->wc_sk); + + return err; + } + + static ssize_t connection_file_read(struct file *file, + char *buf, size_t nbytes, loff_t *ppos) + { + return connection_file_rw(1, file, buf, nbytes, ppos); + } + + static ssize_t connection_file_write(struct file *file, + const char *buf, size_t nbytes, loff_t *ppos) + { + return connection_file_rw(0, file, (char *)buf, nbytes, ppos); + } + + static struct file_operations connection_file_fops = { + open: connection_file_open, + release: connection_file_release, + read: connection_file_read, + write: connection_file_write + }; + + + static size_t v6addr_str(char *dest, short *addr) + { + int start = -1, end = -1; + int i, j; + int pos; + + /* Find longest subsequence of 0's in addr */ + for (i = 0; i < 8; i++) { + if (addr[i] == 0) { + for (j = i + 1; addr[j] == 0 && j < 8; j++); + if (j - i > end - start) { + end = j; + start = i; + } + i = j; + } + } + if (end - start == 1) + start = -1; + + pos = 0; + for (i = 0; i < 8; i++) { + if (i > 0) + pos += sprintf(dest + pos, ":"); + if (i == start) { + pos += sprintf(dest + pos, ":"); + i += end - start - 1; + } else { + pos += sprintf(dest + pos, "%hx", ntohs(addr[i])); + } + } + + return pos; + } + + /** /proc/web100//spec_ascii **/ + static ssize_t connection_spec_ascii_read(struct file * file, char * buf, + size_t nbytes, loff_t *ppos) + { + __u32 local_addr, remote_addr; + __u16 local_port, remote_port; + int cid; + struct web100stats *stats; + struct web100directs *vars; + char tmpbuf[100]; + int len = 0; + + if (*ppos != 0) + return 0; + + cid = cid_from_ino(file->f_dentry->d_parent->d_inode->i_ino); + + read_lock_bh(&web100_linkage_lock); + stats = web100stats_lookup(cid); + read_unlock_bh(&web100_linkage_lock); + if (stats == NULL) + return -ENOENT; + vars = &stats->wc_vars; + + if (vars->LocalAddressType == WC_ADDRTYPE_IPV4) { + /* These values should not change while stats are linked. + * We don't need to lock the sock. */ + local_addr = ntohl(vars->LocalAddress.v4addr); + remote_addr = ntohl(vars->RemAddress.v4addr); + local_port = vars->LocalPort; + remote_port = vars->RemPort; + + len = sprintf(tmpbuf, "%d.%d.%d.%d:%d %d.%d.%d.%d:%d\n", + (local_addr >> 24) & 0xff, + (local_addr >> 16) & 0xff, + (local_addr >> 8) & 0xff, + local_addr & 0xff, + local_port, + (remote_addr >> 24) & 0xff, + (remote_addr >> 16) & 0xff, + (remote_addr >> 8) & 0xff, + remote_addr & 0xff, + remote_port); + } else if (vars->LocalAddressType == WC_ADDRTYPE_IPV6) { + local_port = vars->LocalPort; + remote_port = vars->RemPort; + + len += v6addr_str(tmpbuf + len, (short *)&vars->LocalAddress.v6addr.addr); + len += sprintf(tmpbuf + len, ".%d ", local_port); + len += v6addr_str(tmpbuf + len, (short *)&vars->RemAddress.v6addr.addr); + len += sprintf(tmpbuf + len, ".%d\n", remote_port); + } else { + printk(KERN_ERR "connection_spec_ascii_read: LocalAddressType invalid\n"); + return 0; + } + + len = len > nbytes ? nbytes : len; + if (copy_to_user(buf, tmpbuf, len)) + return -EFAULT; + *ppos += len; + return len; + } + + static struct file_operations connection_spec_ascii_fops = { + open: connection_file_open, + release: connection_file_release, + read: connection_spec_ascii_read + }; + + + /** /proc/web100// **/ + static int connection_dir_readdir(struct file *filp, + void *dirent, filldir_t filldir) + { + int i; + struct inode *inode = filp->f_dentry->d_inode; + struct web100_file *p; + + i = filp->f_pos; + switch (i) { + case 0: + if (filldir(dirent, ".", 1, i, inode->i_ino, DT_DIR) < 0) + return 0; + i++; + filp->f_pos++; + /* fall through */ + case 1: + if (filldir(dirent, "..", 2, i, proc_web100_dir->low_ino, DT_DIR) < 0) + return 0; + i++; + filp->f_pos++; + /* fall through */ + default: + i -= 2; + if (i >= WEB100_FILE_ARR_SIZE) + return 1; + p = &web100_file_arr[i]; + while (p->name) { + if (filldir(dirent, p->name, p->len, filp->f_pos, + ino_from_parts(inode->i_ino, p->low_ino), + p->mode >> 12) < 0) + return 0; + filp->f_pos++; + p++; + } + } + + return 1; + } + + static struct dentry *connection_dir_lookup(struct inode *dir, + struct dentry *dentry) + { + struct inode *inode; + struct web100_file *p; + struct web100stats *stats; + uid_t uid; + + inode = NULL; + for (p = &web100_file_arr[0]; p->name; p++) { + if (p->len != dentry->d_name.len) + continue; + if (!memcmp(dentry->d_name.name, p->name, p->len)) + break; + } + if (!p->name) + return ERR_PTR(-ENOENT); + + read_lock_bh(&web100_linkage_lock); + if ((stats = web100stats_lookup(cid_from_ino(dir->i_ino))) == NULL) { + read_unlock_bh(&web100_linkage_lock); + printk("connection_dir_lookup: stats == NULL\n"); + return ERR_PTR(-ENOENT); + } + uid = sock_i_uid(stats->wc_sk); + read_unlock_bh(&web100_linkage_lock); + + inode = proc_web100_make_inode(dir->i_sb, ino_from_parts(dir->i_ino, p->low_ino)); + if (!inode) + return ERR_PTR(-ENOMEM); + inode->i_mode = p->mode ? p->mode : S_IFREG | sysctl_web100_fperms; + inode->i_uid = uid; + inode->i_gid = sysctl_web100_gid; + + switch (p->low_ino) { + case PROC_CONN_SPEC_ASCII: + inode->i_fop = &connection_spec_ascii_fops; + break; + case PROC_CONN_SPEC: + case PROC_CONN_READ: + case PROC_CONN_TEST: + case PROC_CONN_TUNE: + inode->i_fop = &connection_file_fops; + break; + default: + printk("Web100: impossible type (%d)\n", p->low_ino); + iput(inode); + return ERR_PTR(-EINVAL); + } + + d_add(dentry, inode); + return NULL; + } + + static struct inode_operations connection_dir_iops = { + lookup: connection_dir_lookup + }; + + static struct file_operations connection_dir_fops = { + readdir: connection_dir_readdir + }; + + + /** /proc/web100/header **/ + static ssize_t header_read(struct file * file, char * buf, + size_t nbytes, loff_t *ppos) + { + int len = 0; + loff_t offset; + char *tmpbuf; + struct web100_file *fp; + struct web100_var *vp; + int n, tmp; + int i; + int ret = 0; + + /* We will assume the variable description list will not change + * after init. (True at least right now.) Otherwise, we would have + * to have a lock on it. + */ + + if ((tmpbuf = (char *)get_free_page(GFP_KERNEL)) == NULL) + return -ENOMEM; + + offset = sprintf(tmpbuf, "%s\n", web100_version_string); + + for (i = 0; i < WEB100_FILE_ARR_SIZE; i++) { + int file_offset = 0; + + if ((fp = &web100_file_arr[i]) == NULL) + continue; + + if (fp->first_var == NULL) + continue; + + offset += sprintf(tmpbuf + offset, "\n/%s\n", fp->name); + + vp = fp->first_var; + while (vp) { + if (offset > WEB100MIB_BLOCK_SIZE) { + len += offset; + if (*ppos < len) { + n = min(offset, min_t(loff_t, nbytes, len - *ppos)); + if (copy_to_user(buf, tmpbuf + max_t(loff_t, *ppos - len + offset, 0), n)) + return -EFAULT; + buf += n; + if (nbytes == n) { + *ppos += n; + ret = n; + goto out; + } + } + offset = 0; + } + + offset += sprintf(tmpbuf + offset, "%s %d %d %d\n", + vp->name, file_offset, vp->type, vp->len); + file_offset += vp->len; + + vp = vp->next; + } + } + len += offset; + if (*ppos < len) { + n = min(offset, min_t(loff_t, nbytes, len - *ppos)); + if (copy_to_user(buf, tmpbuf + max_t(loff_t, *ppos - len + offset, 0), n)) + return -EFAULT; + if (nbytes <= len - *ppos) { + *ppos += nbytes; + ret = nbytes; + goto out; + } else { + tmp = len - *ppos; + *ppos = len; + ret = tmp; + goto out; + } + } + + out: + free_page((unsigned long)tmpbuf); + return ret; + } + + static struct file_operations header_file_operations = { + read: header_read + }; + + + /** /proc/web100/ **/ + #define FIRST_CONNECTION_ENTRY 256 + #define NUMBUF_LEN 11 + + static int get_connection_list(int pos, int *cids, int max) + { + struct web100stats *stats; + int n; + + pos -= FIRST_CONNECTION_ENTRY; + n = 0; + + read_lock_bh(&web100_linkage_lock); + + stats = web100stats_first; + while (stats && n < max) { + if (!stats->wc_dead) { + if (pos <= 0) + cids[n++] = stats->wc_cid; + else + pos--; + } + + stats = stats->wc_next; + } + + read_unlock_bh(&web100_linkage_lock); + + return n; + } + + static int cid_to_str(int cid, char *buf) + { + int len, tmp, i; + + if (cid == 0) { /* a special case */ + len = 1; + } else { + tmp = cid; + for (len = 0; len < NUMBUF_LEN - 1 && tmp > 0; len++) + tmp /= 10; + } + + for (i = 0; i < len; i++) { + buf[len - i - 1] = '0' + (cid % 10); + cid /= 10; + } + buf[len] = '\0'; + + return len; + } + + static int web100_dir_readdir(struct file *filp, + void *dirent, filldir_t filldir) + { + int err; + unsigned n, i; + int *cids; + int len; + ino_t ino; + char name[NUMBUF_LEN]; + int n_conns; + + if (filp->f_pos < FIRST_CONNECTION_ENTRY) { + if ((err = proc_readdir(filp, dirent, filldir)) < 0) + return err; + filp->f_pos = FIRST_CONNECTION_ENTRY; + } + n_conns = WEB100_MAX_CONNS * 2; + do { + n_conns /= 2; + cids = kmalloc(n_conns * sizeof (int), GFP_KERNEL); + } while (cids == NULL && n_conns > 0); + if (cids == NULL) + return -ENOMEM; + n = get_connection_list(filp->f_pos, cids, n_conns); + + for (i = 0; i < n; i++) { + ino = ino_from_cid(cids[i]); + len = cid_to_str(cids[i], name); + if (filldir(dirent, name, len, filp->f_pos, + ino, DT_DIR) < 0) { + break; + } + filp->f_pos++; + } + + kfree(cids); + + return 0; + } + + static inline struct dentry *web100_dir_dent(void) + { + struct qstr qstr; + + qstr.name = "web100"; + qstr.len = 6; + qstr.hash = full_name_hash(qstr.name, qstr.len); + + return d_lookup(proc_mnt->mnt_sb->s_root, &qstr); + } + + void web100_proc_nlink_update(nlink_t nlink) + { + struct dentry *dent; + + dent = web100_dir_dent(); + if (dent) + dent->d_inode->i_nlink = nlink; + dput(dent); + } + + int web100_proc_dointvec_update(ctl_table *ctl, int write, struct file *filp, + void *buffer, size_t *lenp) + { + unsigned n, i; + int *cids; + int err; + struct qstr qstr; + struct dentry *web100_dent, *conn_dent, *dent; + struct inode *inode; + struct web100_file *p; + char name[NUMBUF_LEN]; + + if ((err = proc_dointvec(ctl, write, filp, buffer, lenp)) != 0) + return err; + + if ((web100_dent = web100_dir_dent()) == NULL) + return 0; + + if ((cids = kmalloc(WEB100_MAX_CONNS * sizeof (int), GFP_KERNEL)) == NULL) + return -ENOMEM; + n = get_connection_list(FIRST_CONNECTION_ENTRY, cids, WEB100_MAX_CONNS); + for (i = 0; i < n; i++) { + qstr.len = cid_to_str(cids[i], name); + qstr.name = name; + qstr.hash = full_name_hash(qstr.name, qstr.len); + if ((conn_dent = d_lookup(web100_dent, &qstr)) != NULL) { + for (p = &web100_file_arr[0]; p->name; p++) { + qstr.name = p->name; + qstr.len = p->len; + qstr.hash = full_name_hash(qstr.name, qstr.len); + if ((dent = d_lookup(conn_dent, &qstr)) != NULL) { + inode = dent->d_inode; + if ((inode->i_mode = p->mode) == 0) + inode->i_mode = S_IFREG | sysctl_web100_fperms; + inode->i_gid = sysctl_web100_gid; + dput(dent); + } + } + dput(conn_dent); + } + } + dput(web100_dent); + kfree(cids); + printk("web100_proc_dointvec_update: set fperms = %d, gid = %d\n", + sysctl_web100_fperms, sysctl_web100_gid); + + return 0; + } + + static int web100_proc_connection_revalidate(struct dentry *dentry, int flags) + { + int ret = 1; + + if (dentry->d_inode == NULL) + return 0; + read_lock_bh(&web100_linkage_lock); + if (web100stats_lookup(cid_from_ino(dentry->d_inode->i_ino)) == NULL) { + ret = 0; + d_drop(dentry); + } + read_unlock_bh(&web100_linkage_lock); + + return ret; + } + + static struct dentry_operations web100_dir_dentry_operations = { + d_revalidate: web100_proc_connection_revalidate + }; + + static struct dentry *web100_dir_lookup(struct inode *dir, + struct dentry *dentry) + { + char *name; + int len; + int cid; + unsigned c; + struct inode *inode; + unsigned long ino; + struct web100stats *stats; + + if (proc_lookup(dir, dentry) == NULL) + return NULL; + + cid = 0; + name = (char *)(dentry->d_name.name); + len = dentry->d_name.len; + if (len <= 0) /* I don't think this can happen */ + return ERR_PTR(-EINVAL); + while (len-- > 0) { + c = *name - '0'; + name++; + cid *= 10; + cid += c; + if (c > 9 || c < 0 || (cid == 0 && len != 0) || cid >= WEB100_MAX_CONNS) { + cid = -1; + break; + } + } + if (cid < 0) + return ERR_PTR(-ENOENT); + + read_lock_bh(&web100_linkage_lock); + stats = web100stats_lookup(cid); + if (stats == NULL || stats->wc_dead) { + read_unlock_bh(&web100_linkage_lock); + return ERR_PTR(-ENOENT); + } + read_unlock_bh(&web100_linkage_lock); + + ino = ino_from_cid(cid); + inode = proc_web100_make_inode(dir->i_sb, ino); + if (inode == NULL) + return ERR_PTR(-ENOMEM); + inode->i_nlink = 2; + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; + inode->i_flags |= S_IMMUTABLE; /* ? */ + inode->i_op = &connection_dir_iops; + inode->i_fop = &connection_dir_fops; + + dentry->d_op = &web100_dir_dentry_operations; + d_add(dentry, inode); + return NULL; + } + + static struct file_operations web100_dir_fops = { + readdir: web100_dir_readdir + }; + + static struct inode_operations web100_dir_iops = { + lookup: web100_dir_lookup + }; + + + /* + * Read/write handlers + */ + + /* A read handler for reading directly from the stats */ + /* read_data is the byte offset into struct web100stats */ + static int read_stats(void *buf, struct web100stats *stats, + struct web100_var *vp) + { + memcpy(buf, (char *)stats + vp->read_data, vp->len); + + return 0; + } + + /* A write handler for writing directly to the stats */ + /* write_data is a byte offset into struct web100stats */ + static int write_stats(void *buf, struct web100stats *stats, + struct web100_var *vp) + { + memcpy((char *)stats + vp->read_data, buf, vp->len); + + return 0; + } + + int read_LimCwnd(void *buf, struct web100stats *stats, struct web100_var *vp) + { + struct tcp_opt *tp = &stats->wc_sk->tp_pinfo.af_tcp; + __u32 tmp = (__u32)(tp->snd_cwnd_clamp * tp->mss_cache); + + memcpy(buf, &tmp, 4); + + return 0; + } + + int write_LimCwnd(void *buf, struct web100stats *stats, struct web100_var *vp) + { + struct tcp_opt *tp = &stats->wc_sk->tp_pinfo.af_tcp; + + tp->snd_cwnd_clamp = min(*(__u32 *)buf / tp->mss_cache, 65535U); + + return 0; + } + + int write_LimRwin(void *buf, struct web100stats *stats, struct web100_var *vp) + { + __u32 val = *(__u32 *)buf; + struct tcp_opt *tp = &stats->wc_sk->tp_pinfo.af_tcp; + + stats->wc_vars.LimRwin = tp->window_clamp = + min(val, 65535U << tp->rcv_wscale); + + return 0; + } + + extern __u32 sysctl_wmem_default; + extern __u32 sysctl_rmem_default; + + int write_SBufMode(void *buf, struct web100stats *stats, struct web100_var *vp) + { + __u32 val = *(__u32 *)buf; + struct sock *sk = stats->wc_sk; + + switch (val) { + case WC_BUFMODE_OS: + sk->userlocks &= ~SOCK_SNDBUF_LOCK; + break; + case WC_BUFMODE_WEB100: + sk->userlocks |= SOCK_SNDBUF_LOCK; + sk->sndbuf = sysctl_wmem_default; + sk->write_space(sk); + break; + default: + return 1; + } + stats->wc_vars.X_SBufMode = val; + + return 0; + } + + int write_RBufMode(void *buf, struct web100stats *stats, struct web100_var *vp) + { + __u32 val = *(__u32 *)buf; + struct sock *sk = stats->wc_sk; + + switch (val) { + case WC_BUFMODE_OS: + sk->userlocks &= ~SOCK_RCVBUF_LOCK; + stats->wc_vars.LimRwin = sk->tp_pinfo.af_tcp.window_clamp; + break; + case WC_BUFMODE_WEB100: + sk->userlocks |= SOCK_RCVBUF_LOCK; + sk->rcvbuf = sysctl_rmem_default; + stats->wc_vars.LimRwin = 0xffffffff; + break; + default: + return 1; + } + stats->wc_vars.X_RBufMode = val; + + return 0; + } + + /* A read handler for reading directly from the sk */ + /* read_data is a byte offset into the sk */ + static int read_sk(void *buf, struct web100stats *stats, + struct web100_var *vp) + { + /* Fill data with 0's if the connection is gone. */ + if (stats->wc_sk == NULL) + memset(buf, 0, vp->len); + else + memcpy(buf, (char *)(stats->wc_sk) + vp->read_data, vp->len); + + return 0; + } + + static int write_sk(void *buf, struct web100stats *stats, struct web100_var *vp) + { + if (stats->wc_sk == NULL) + return 1; + else + memcpy((char *)(stats->wc_sk) + vp->write_data, buf, vp->len); + + return 0; + } + + #ifdef HAVE_MONO_TIME + extern void get_mono_time(__u64 *time); + #endif + + /* clean the clock */ + __u64 web100_mono_time() + { + #ifdef HAVE_MONO_TIME + __u64 time; + get_mono_time(&time); + return time; + #else + struct timeval now; + static struct timeval before; + + do_gettimeofday(&now); + + /* assure monotonic, no matter what */ + if ((now.tv_sec > before.tv_sec) || + ((now.tv_sec == before.tv_sec) && (now.tv_usec > before.tv_usec))) { + before = now; + } else { + before.tv_usec++; + if (before.tv_usec >= 1000000) { + before.tv_usec -= 1000000; + before.tv_sec++; + } + } + + return (1000000ULL * (__u64)before.tv_sec + before.tv_usec); + #endif + } + + /* A read handler to get the low part of the current time in usec */ + static int read_now(void *buf, struct web100stats *stats, + struct web100_var *vp) + { + __u64 val; + + val = web100_mono_time(); + val -= stats->wc_start_monotime; + memcpy(buf, (char *)&val, vp->len); + + return 0; + } + + #ifdef CONFIG_WEB100_NET100 + static int write_mss(void *buf, struct web100stats *stats, struct web100_var *vp) + { + struct sock *sk = stats->wc_sk; + struct tcp_opt *tp; + __u32 val = *(__u32 *)buf; + + if (sk == NULL) + return 1; + tp = &sk->tp_pinfo.af_tcp; + + if (val > tp->mss_cache) + return 1; + if (val < 1) + return 1; + + tp->mss_cache = val; + web100_update_mss(tp); + + return 0; + } + #endif + + static int write_sndbuf(void *buf, struct web100stats *stats, struct web100_var *vp) + { + (__u32)(stats->wc_sk->sndbuf) = *(__u32 *)buf; + + return write_LimCwnd(buf, stats, vp); + } + + static int write_rcvbuf(void *buf, struct web100stats *stats, struct web100_var *vp) + { + (__u32)(stats->wc_sk->rcvbuf) = *(__u32 *)buf; + + return write_LimRwin(buf, stats, vp); + } + + static int rw_noop(void *buf, struct web100stats *stats, struct web100_var *vp) + { + return 0; + } + + /* + * init + */ + + void __init proc_web100_init(void) + { + /* Set up the proc files. */ + proc_web100_dir = proc_mkdir("web100", NULL); + proc_web100_dir->proc_iops = &web100_dir_iops; + proc_web100_dir->proc_fops = &web100_dir_fops; + + proc_web100_header = create_proc_entry("header", S_IFREG | S_IRUGO, + proc_web100_dir); + proc_web100_header->proc_fops = &header_file_operations; + + /* Set up the contents of the proc files. */ + #define OFFSET_IN(type,var) ((unsigned long)(&(((type *)NULL)->var))) + #define OFFSET_ST(field) ((unsigned long)(&(((struct web100stats *)NULL)->wc_vars.field))) + #define OFFSET_SK(field) ((unsigned long)(&(((struct sock *)NULL)->field))) + + #define ADD_RO_STATSVAR(ino,name,type) \ + add_var(web100_file_lookup(ino), #name, type, \ + read_stats, OFFSET_ST(name), NULL, 0) + + #define ADD_RO_STATSRENAME(ino,name,type,var) \ + add_var(web100_file_lookup(ino), name, type, \ + read_stats, OFFSET_ST(var), NULL, 0) + + #define ADD_RO_STATSVAR_DEP(ino,name,type) \ + add_var(web100_file_lookup(ino), "_" #name, type, \ + read_stats, OFFSET_ST(name), NULL, 0) + + #define ADD_WO_STATSVAR(ino,name,type) \ + add_var(web100_file_lookup(ino), #name, type, NULL, 0, \ + write_stats, OFFSET_ST(name)) + + #define ADD_WO_STATSVAR_DEP(ino,name,type) \ + add_var(web100_file_lookup(ino), "_" #name, type, NULL, 0, \ + write_stats, OFFSET_ST(name)) + + #define ADD_RW_STATSVAR(ino,name,type) \ + add_var(web100_file_lookup(ino), #name, type, \ + read_stats, OFFSET_ST(name), \ + write_stats, OFFSET_ST(name)) + + #define ADD_RW_STATSVAR_DEP(ino,name,type) \ + add_var(web100_file_lookup(ino), "_" #name, type, \ + read_stats, OFFSET_ST(name), \ + write_stats, OFFSET_ST(name)) + + #define ADD_RO_SKVAR(ino,name,type,var) \ + add_var(web100_file_lookup(ino), #name, type, \ + read_sk, OFFSET_SK(var), NULL, 0) + + #define ADD_RW_SKVAR(ino,name,type,var) \ + add_var(web100_file_lookup(ino), #name, type, \ + read_sk, OFFSET_SK(var), write_sk, OFFSET_SK(var)) + + #define ADD_NOOP(ino,name,type) \ + add_var(web100_file_lookup(ino), #name, type, \ + rw_noop, 0, rw_noop, 0) + + /* spec */ + ADD_RO_STATSVAR(PROC_CONN_SPEC, LocalAddressType, WEB100_TYPE_INTEGER); + ADD_RO_STATSVAR(PROC_CONN_SPEC, LocalAddress, WEB100_TYPE_INET_ADDRESS); + ADD_RO_STATSVAR(PROC_CONN_SPEC, LocalPort, WEB100_TYPE_INET_PORT_NUMBER); + ADD_RO_STATSVAR(PROC_CONN_SPEC, RemAddress, WEB100_TYPE_INET_ADDRESS); + ADD_RO_STATSVAR(PROC_CONN_SPEC, RemPort, WEB100_TYPE_INET_PORT_NUMBER); + ADD_RO_STATSRENAME(PROC_CONN_SPEC, "_RemoteAddress", WEB100_TYPE_INET_ADDRESS, RemAddress); + ADD_RO_STATSRENAME(PROC_CONN_SPEC, "_RemotePort", WEB100_TYPE_INET_PORT_NUMBER, RemPort); + + /* read */ + /* STATE */ + ADD_RO_STATSVAR(PROC_CONN_READ, State, WEB100_TYPE_INTEGER); + ADD_RO_STATSVAR(PROC_CONN_READ, SACKEnabled, WEB100_TYPE_INTEGER); + ADD_RO_STATSVAR(PROC_CONN_READ, TimestampsEnabled, WEB100_TYPE_INTEGER); + ADD_RO_STATSVAR(PROC_CONN_READ, NagleEnabled, WEB100_TYPE_INTEGER); + ADD_RO_STATSVAR(PROC_CONN_READ, ECNEnabled, WEB100_TYPE_INTEGER); + ADD_RO_STATSVAR(PROC_CONN_READ, SndWinScale, WEB100_TYPE_INTEGER); + ADD_RO_STATSVAR(PROC_CONN_READ, RcvWinScale, WEB100_TYPE_INTEGER); + + /* SYN OPTIONS */ + ADD_RO_STATSVAR(PROC_CONN_READ, ActiveOpen, WEB100_TYPE_INTEGER); + ADD_RO_STATSVAR(PROC_CONN_READ, MSSRcvd, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, WinScaleRcvd, WEB100_TYPE_INTEGER); + ADD_RO_STATSVAR(PROC_CONN_READ, WinScaleSent, WEB100_TYPE_INTEGER); + + /* DATA */ + ADD_RO_STATSVAR(PROC_CONN_READ, PktsOut, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, DataPktsOut, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR_DEP(PROC_CONN_READ, AckPktsOut, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, DataBytesOut, WEB100_TYPE_COUNTER64); + ADD_RO_STATSVAR(PROC_CONN_READ, PktsIn, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, DataPktsIn, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR_DEP(PROC_CONN_READ, AckPktsIn, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, DataBytesIn, WEB100_TYPE_COUNTER64); + ADD_RO_STATSVAR(PROC_CONN_READ, SndUna, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, SndNxt, WEB100_TYPE_INTEGER32); + ADD_RO_STATSVAR(PROC_CONN_READ, SndMax, WEB100_TYPE_COUNTER32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_snd_una", WEB100_TYPE_COUNTER32, SndUna); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_snd_nxt", WEB100_TYPE_INTEGER32, SndNxt); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_snd_max", WEB100_TYPE_COUNTER32, SndMax); + ADD_RO_STATSVAR(PROC_CONN_READ, ThruBytesAcked, WEB100_TYPE_COUNTER64); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_ThruBytesSent", WEB100_TYPE_COUNTER64, ThruBytesAcked); + ADD_RO_STATSVAR(PROC_CONN_READ, SndISS, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR_DEP(PROC_CONN_READ, SendWraps, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, RcvNxt, WEB100_TYPE_COUNTER32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_rcv_nxt", WEB100_TYPE_COUNTER32, RcvNxt); + ADD_RO_STATSVAR(PROC_CONN_READ, ThruBytesReceived, WEB100_TYPE_COUNTER64); + ADD_RO_STATSVAR(PROC_CONN_READ, RecvISS, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR_DEP(PROC_CONN_READ, RecvWraps, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR_DEP(PROC_CONN_READ, StartTime, WEB100_TYPE_INTEGER32); + ADD_RO_STATSVAR(PROC_CONN_READ, StartTimeSec, WEB100_TYPE_INTEGER32); + ADD_RO_STATSVAR(PROC_CONN_READ, StartTimeUsec, WEB100_TYPE_INTEGER32); + add_var(web100_file_lookup(PROC_CONN_READ), "Duration", WEB100_TYPE_COUNTER64, read_now, 0, NULL, 0); + add_var(web100_file_lookup(PROC_CONN_READ), "_CurrTime", WEB100_TYPE_COUNTER64, read_now, 0, NULL, 0); + + /* SENDER CONGESTION */ + ADD_RO_STATSRENAME(PROC_CONN_READ, "SndLimTransSender", WEB100_TYPE_COUNTER32, SndLimTrans[WC_SNDLIM_SENDER]); + ADD_RO_STATSRENAME(PROC_CONN_READ, "SndLimBytesSender", WEB100_TYPE_COUNTER64, SndLimBytes[WC_SNDLIM_SENDER]); + ADD_RO_STATSRENAME(PROC_CONN_READ, "SndLimTimeSender", WEB100_TYPE_COUNTER32, SndLimTime[WC_SNDLIM_SENDER]); + ADD_RO_STATSRENAME(PROC_CONN_READ, "SndLimTransCwnd", WEB100_TYPE_COUNTER32, SndLimTrans[WC_SNDLIM_CWND]); + ADD_RO_STATSRENAME(PROC_CONN_READ, "SndLimBytesCwnd", WEB100_TYPE_COUNTER64, SndLimBytes[WC_SNDLIM_CWND]); + ADD_RO_STATSRENAME(PROC_CONN_READ, "SndLimTimeCwnd", WEB100_TYPE_COUNTER32, SndLimTime[WC_SNDLIM_CWND]); + ADD_RO_STATSRENAME(PROC_CONN_READ, "SndLimTransRwin", WEB100_TYPE_COUNTER32, SndLimTrans[WC_SNDLIM_RWIN]); + ADD_RO_STATSRENAME(PROC_CONN_READ, "SndLimBytesRwin", WEB100_TYPE_COUNTER64, SndLimBytes[WC_SNDLIM_RWIN]); + ADD_RO_STATSRENAME(PROC_CONN_READ, "SndLimTimeRwin", WEB100_TYPE_COUNTER32, SndLimTime[WC_SNDLIM_RWIN]); + ADD_RO_STATSVAR(PROC_CONN_READ, SlowStart, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, CongAvoid, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, CongestionSignals, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, OtherReductions, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, X_OtherReductionsCV, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, X_OtherReductionsCM, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, CongestionOverCount, WEB100_TYPE_COUNTER32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_Recoveries", WEB100_TYPE_COUNTER32, CongestionSignals); + ADD_RO_STATSVAR(PROC_CONN_READ, CurCwnd, WEB100_TYPE_GAUGE32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_CurrentCwnd", WEB100_TYPE_GAUGE32, CurCwnd); + ADD_RO_STATSVAR(PROC_CONN_READ, MaxCwnd, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, CurSsthresh, WEB100_TYPE_GAUGE32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_CurrentSsthresh", WEB100_TYPE_GAUGE32, CurSsthresh); + add_var(web100_file_lookup(PROC_CONN_READ), "LimCwnd", WEB100_TYPE_GAUGE32, read_LimCwnd, 0, NULL, 0); + ADD_RO_STATSVAR(PROC_CONN_READ, MaxSsthresh, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, MinSsthresh, WEB100_TYPE_GAUGE32); + + /* SENDER PATH MODEL */ + ADD_RO_STATSVAR(PROC_CONN_READ, FastRetran, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, Timeouts, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, SubsequentTimeouts, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, CurTimeoutCount, WEB100_TYPE_GAUGE32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_CurrTimeoutCount", WEB100_TYPE_GAUGE32, CurTimeoutCount); + ADD_RO_STATSVAR(PROC_CONN_READ, AbruptTimeouts, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, PktsRetrans, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, BytesRetrans, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, DupAcksIn, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, SACKsRcvd, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, SACKBlocksRcvd, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, PreCongSumCwnd, WEB100_TYPE_COUNTER32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_SumCwndAtCong", WEB100_TYPE_COUNTER32, PreCongSumCwnd); + ADD_RO_STATSVAR(PROC_CONN_READ, PreCongSumRTT, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR_DEP(PROC_CONN_READ, PreCongCountRTT, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, PostCongSumRTT, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, PostCongCountRTT, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, ECERcvd, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, SendStall, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, QuenchRcvd, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, RetranThresh, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, NonRecovDA, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, AckAfterFR, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, DSACKDups, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, SampleRTT, WEB100_TYPE_GAUGE32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_SampledRTT", WEB100_TYPE_GAUGE32, SampleRTT); + ADD_RO_STATSVAR(PROC_CONN_READ, SmoothedRTT, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, RTTVar, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, MaxRTT, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, MinRTT, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, SumRTT, WEB100_TYPE_COUNTER64); + ADD_RO_STATSVAR(PROC_CONN_READ, CountRTT, WEB100_TYPE_COUNTER32); + ADD_RO_STATSVAR(PROC_CONN_READ, CurRTO, WEB100_TYPE_GAUGE32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_CurrentRTO", WEB100_TYPE_GAUGE32, CurRTO); + ADD_RO_STATSVAR(PROC_CONN_READ, MaxRTO, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, MinRTO, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, CurMSS, WEB100_TYPE_GAUGE32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_CurrentMSS", WEB100_TYPE_GAUGE32, CurMSS); + ADD_RO_STATSVAR(PROC_CONN_READ, MaxMSS, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, MinMSS, WEB100_TYPE_GAUGE32); + + /* SENDER BUFFER */ + #define PROC_CONN_XTEST PROC_CONN_READ /* lazy */ + ADD_RO_SKVAR(PROC_CONN_READ, _Sndbuf, WEB100_TYPE_GAUGE32, sndbuf); + ADD_RO_STATSVAR(PROC_CONN_READ, CurRetxQueue, WEB100_TYPE_GAUGE32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_CurRetranQueue", WEB100_TYPE_GAUGE32, CurRetxQueue); + ADD_RO_STATSVAR(PROC_CONN_READ, MaxRetxQueue, WEB100_TYPE_GAUGE32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_MaxRetranQueue", WEB100_TYPE_GAUGE32, MaxRetxQueue); + ADD_RO_STATSVAR(PROC_CONN_READ, CurAppWQueue, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, MaxAppWQueue, WEB100_TYPE_GAUGE32); + + /* SENDER BUFFER TUNING - See below */ + + /* LOCAL RECEIVER */ + ADD_RO_STATSVAR(PROC_CONN_READ, CurRwinSent, WEB100_TYPE_GAUGE32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_CurrentRwinSent", WEB100_TYPE_GAUGE32, CurRwinSent); + ADD_RO_STATSVAR(PROC_CONN_READ, MaxRwinSent, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, MinRwinSent, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, LimRwin, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, DupAcksOut, WEB100_TYPE_COUNTER32); + ADD_RO_SKVAR(PROC_CONN_READ, _Rcvbuf, WEB100_TYPE_GAUGE32, rcvbuf); + ADD_RO_STATSVAR(PROC_CONN_READ, CurReasmQueue, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, MaxReasmQueue, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, CurAppRQueue, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, MaxAppRQueue, WEB100_TYPE_GAUGE32); + ADD_RO_SKVAR(PROC_CONN_XTEST, X_rcv_ssthresh, WEB100_TYPE_GAUGE32, tp_pinfo.af_tcp.rcv_ssthresh); + ADD_RO_SKVAR(PROC_CONN_XTEST, X_wnd_clamp, WEB100_TYPE_GAUGE32, tp_pinfo.af_tcp.window_clamp); + ADD_RO_STATSVAR(PROC_CONN_XTEST, X_dbg1, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_XTEST, X_dbg2, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_XTEST, X_dbg3, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_XTEST, X_dbg4, WEB100_TYPE_GAUGE32); + + /* OBSERVED RECEIVER */ + ADD_RO_STATSVAR(PROC_CONN_READ, CurRwinRcvd, WEB100_TYPE_GAUGE32); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_CurrentRwinRcvd", WEB100_TYPE_GAUGE32, CurRwinRcvd); + ADD_RO_STATSVAR(PROC_CONN_READ, MaxRwinRcvd, WEB100_TYPE_GAUGE32); + ADD_RO_STATSVAR(PROC_CONN_READ, MinRwinRcvd, WEB100_TYPE_GAUGE32); + + /* CONNECTION ID */ + ADD_RO_STATSVAR(PROC_CONN_READ, LocalAddressType, WEB100_TYPE_INTEGER); + ADD_RO_STATSVAR(PROC_CONN_READ, LocalAddress, WEB100_TYPE_INET_ADDRESS); + ADD_RO_STATSVAR(PROC_CONN_READ, LocalPort, WEB100_TYPE_INET_PORT_NUMBER); + ADD_RO_STATSVAR(PROC_CONN_READ, RemAddress, WEB100_TYPE_INET_ADDRESS); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_RemoteAddress", WEB100_TYPE_INET_ADDRESS, RemAddress); + ADD_RO_STATSVAR(PROC_CONN_READ, RemPort, WEB100_TYPE_INET_PORT_NUMBER); + ADD_RO_STATSRENAME(PROC_CONN_READ, "_RemotePort", WEB100_TYPE_INET_PORT_NUMBER, RemPort); + + ADD_RO_STATSVAR(PROC_CONN_READ, X_RcvRTT, WEB100_TYPE_GAUGE32); + + /* tune */ + add_var(web100_file_lookup(PROC_CONN_TUNE), "LimCwnd", + WEB100_TYPE_GAUGE32, read_LimCwnd, 0, + write_LimCwnd, 0); + add_var(web100_file_lookup(PROC_CONN_TUNE), "LimRwin", + WEB100_TYPE_GAUGE32, read_stats, OFFSET_ST(LimRwin), + write_LimRwin, 0); + #ifdef CONFIG_WEB100_NET100 + add_var(web100_file_lookup(PROC_CONN_TUNE), "CurMSS", + WEB100_TYPE_GAUGE32, read_stats, OFFSET_ST(CurMSS), + write_mss, 0); + #endif + add_var(web100_file_lookup(PROC_CONN_TUNE), "X_SBufMode", + WEB100_TYPE_INTEGER, read_stats, OFFSET_ST(X_SBufMode), + write_SBufMode, 0); + add_var(web100_file_lookup(PROC_CONN_TUNE), "X_RBufMode", + WEB100_TYPE_INTEGER, read_stats, OFFSET_ST(X_RBufMode), + write_RBufMode, 0); + + ADD_RW_SKVAR(PROC_CONN_TUNE, X_Sndbuf, WEB100_TYPE_GAUGE32, sndbuf); + ADD_RW_SKVAR(PROC_CONN_TUNE, X_Rcvbuf, WEB100_TYPE_GAUGE32, rcvbuf); + + ADD_NOOP(PROC_CONN_TUNE, _STuneMode, WEB100_TYPE_INTEGER); + ADD_RO_SKVAR(PROC_CONN_TUNE, _SndbufGet, WEB100_TYPE_GAUGE32, sndbuf); + add_var(web100_file_lookup(PROC_CONN_TUNE), "_SndbufSet", + WEB100_TYPE_GAUGE32, read_sk, OFFSET_SK(sndbuf), + write_sndbuf, 0); + ADD_RW_SKVAR(PROC_CONN_TUNE, _SAppBuf, WEB100_TYPE_GAUGE32, sndbuf); + add_var(web100_file_lookup(PROC_CONN_TUNE), "_SMaxWinBuf", + WEB100_TYPE_GAUGE32, read_sk, OFFSET_SK(sndbuf), + write_sndbuf, 0); + ADD_NOOP(PROC_CONN_TUNE, _SXtra, WEB100_TYPE_GAUGE32); + ADD_NOOP(PROC_CONN_TUNE, _STuneErr, WEB100_TYPE_INTEGER); + + ADD_NOOP(PROC_CONN_TUNE, _RTuneMode, WEB100_TYPE_INTEGER); + ADD_RO_SKVAR(PROC_CONN_TUNE, _RcvbufGet, WEB100_TYPE_GAUGE32, rcvbuf); + add_var(web100_file_lookup(PROC_CONN_TUNE), "_RcvbufSet", + WEB100_TYPE_GAUGE32, read_sk, OFFSET_SK(rcvbuf), + write_rcvbuf, 0); + ADD_RW_SKVAR(PROC_CONN_TUNE, _RAppBuf, WEB100_TYPE_GAUGE32, rcvbuf); + add_var(web100_file_lookup(PROC_CONN_TUNE), "_RMaxWinBuf", + WEB100_TYPE_GAUGE32, read_sk, OFFSET_SK(rcvbuf), + write_rcvbuf, 0); + ADD_NOOP(PROC_CONN_TUNE, _RXtra, WEB100_TYPE_GAUGE32); + ADD_NOOP(PROC_CONN_TUNE, _RTuneErr, WEB100_TYPE_INTEGER); + + #ifdef CONFIG_WEB100_NET100 + ADD_RW_STATSVAR(PROC_CONN_TUNE, WAD_IFQ, WEB100_TYPE_GAUGE32); + ADD_RW_STATSVAR(PROC_CONN_TUNE, WAD_MaxBurst, WEB100_TYPE_GAUGE32); + ADD_RW_STATSVAR(PROC_CONN_TUNE, WAD_AI, WEB100_TYPE_GAUGE32); + ADD_RW_STATSVAR(PROC_CONN_TUNE, WAD_MD, WEB100_TYPE_GAUGE32); + #endif + } diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/include/asm-alpha/timex.h linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/asm-alpha/timex.h *** linux-2.4.20.orig/include/asm-alpha/timex.h Thu Nov 28 23:53:15 2002 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/asm-alpha/timex.h Mon Jan 19 17:41:50 2004 *************** *** 17,22 **** --- 17,26 ---- * which isn't an evil thing. */ + #ifdef CONFIG_WEB100_STATS + #define HAVE_MONO_TIME 1 + #endif + typedef unsigned int cycles_t; extern cycles_t cacheflush_time; diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/include/asm-i386/timex.h linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/asm-i386/timex.h *** linux-2.4.20.orig/include/asm-i386/timex.h Thu Nov 28 23:53:15 2002 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/asm-i386/timex.h Mon Jan 19 17:41:50 2004 *************** *** 20,25 **** --- 20,29 ---- (1000000/CLOCK_TICK_FACTOR) / (CLOCK_TICK_RATE/CLOCK_TICK_FACTOR)) \ << (SHIFT_SCALE-SHIFT_HZ)) / HZ) + #ifdef CONFIG_WEB100_STATS + #define HAVE_MONO_TIME 1 + #endif + /* * Standard way to access the cycle counter on i586+ CPUs. * Currently only used on SMP. diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/include/asm-ia64/timex.h linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/asm-ia64/timex.h *** linux-2.4.20.orig/include/asm-ia64/timex.h Thu Nov 28 23:53:15 2002 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/asm-ia64/timex.h Mon Jan 19 17:41:51 2004 *************** *** 10,15 **** --- 10,19 ---- * Also removed cacheflush_time as it's entirely unused. */ + #ifdef CONFIG_WEB100_STATS + #define HAVE_MONO_TIME 1 + #endif + typedef unsigned long cycles_t; static inline cycles_t diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/include/asm-ppc/timex.h linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/asm-ppc/timex.h *** linux-2.4.20.orig/include/asm-ppc/timex.h Thu Nov 28 23:53:15 2002 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/asm-ppc/timex.h Mon Jan 19 17:41:51 2004 *************** *** 19,24 **** --- 19,28 ---- (1000000/CLOCK_TICK_FACTOR) / (CLOCK_TICK_RATE/CLOCK_TICK_FACTOR)) \ << (SHIFT_SCALE-SHIFT_HZ)) / HZ) + #ifdef CONFIG_WEB100_STATS + #define HAVE_MONO_TIME 1 + #endif + typedef unsigned long cycles_t; /* diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/include/linux/netlink.h linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/linux/netlink.h *** linux-2.4.20.orig/include/linux/netlink.h Thu Nov 28 23:53:15 2002 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/linux/netlink.h Mon Jan 19 17:41:51 2004 *************** *** 8,13 **** --- 8,16 ---- #define NETLINK_TCPDIAG 4 /* TCP socket monitoring */ #define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */ #define NETLINK_ARPD 8 + #ifdef CONFIG_WEB100_STATS + #define NETLINK_WEB100 10 + #endif #define NETLINK_ROUTE6 11 /* af_inet6 route comm channel */ #define NETLINK_IP6_FW 13 #define NETLINK_DNRTMSG 14 /* DECnet routing messages */ diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/include/linux/proc_fs.h linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/linux/proc_fs.h *** linux-2.4.20.orig/include/linux/proc_fs.h Sat Aug 3 01:39:45 2002 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/linux/proc_fs.h Mon Jan 19 17:41:51 2004 *************** extern struct proc_dir_entry *proc_root_ *** 86,91 **** --- 86,95 ---- extern void proc_root_init(void); extern void proc_misc_init(void); + #ifdef CONFIG_WEB100_STATS + extern void proc_web100_init(void); + #endif + struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry); void proc_pid_delete_inode(struct inode *inode); int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/include/linux/sysctl.h linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/linux/sysctl.h *** linux-2.4.20.orig/include/linux/sysctl.h Thu Nov 28 23:53:15 2002 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/linux/sysctl.h Mon Jan 19 17:41:51 2004 *************** enum *** 292,298 **** NET_IPV4_NONLOCAL_BIND=88, NET_IPV4_ICMP_RATELIMIT=89, NET_IPV4_ICMP_RATEMASK=90, ! NET_TCP_TW_REUSE=91 }; enum { --- 292,350 ---- NET_IPV4_NONLOCAL_BIND=88, NET_IPV4_ICMP_RATELIMIT=89, NET_IPV4_ICMP_RATEMASK=90, ! NET_TCP_TW_REUSE=91, ! ! NET_TCP_MODERATE_ON_TXQ=92, ! NET_TCP_MODERATE_CWND=93, ! ! #ifdef CONFIG_LIMITED_SLOW_START ! NET_TCP_SS_MAX_SSTHRESH=94, ! #endif ! ! #ifdef CONFIG_ALTAIMD ! NET_TCP_ALTAIMD=95, ! ! /* Scalable TCP */ ! NET_TCP_SCALABLE_HIGHWIN=96, ! NET_TCP_SCALABLE_1_ON_A=95, ! NET_TCP_SCALABLE_1_ON_B=100, ! ! /* GRIDDT */ ! NET_TCP_GRIDDT_MSS_REF=101, ! NET_TCP_GRIDDT_RTT_REF=102, ! NET_TCP_GRIDDT_ADD_CNT_CLAMP=103, ! #endif ! ! #ifdef CONFIG_RFC3465 ! NET_TCP_ABC=104, ! NET_TCP_ABC_L=105, ! #endif ! ! #ifdef CONFIG_TCP_PKTDROP ! NET_IPV4_TCP_PKTDROP_RATE=106, ! #endif ! ! #ifdef CONFIG_TCP_SACK ! NET_TCP_NEW_RENO=107, ! NET_TCP_SLACK_RTO=108, ! #endif ! ! #ifdef CONFIG_WEB100 ! NET_IPV4_WEB100_DEFAULT_WSCALE, ! #endif ! #ifdef CONFIG_WEB100_NET100 ! NET_IPV4_WEB100_NO_METRICS_SAVE, ! NET_IPV4_WAD_IFQ, ! NET_IPV4_WAD_MAX_BURST, ! #endif ! #ifdef CONFIG_WEB100_STATS ! NET_IPV4_WEB100_SBUFMODE, ! NET_IPV4_WEB100_RBUFMODE, ! NET_IPV4_WEB100_FPERMS, ! NET_IPV4_WEB100_GID, ! NET_IPV4_WEB100_SNDBUF_EMU, ! NET_IPV4_WEB100_RCVBUF_EMU, ! #endif }; enum { *************** extern int proc_doulongvec_minmax(ctl_ta *** 658,663 **** --- 710,719 ---- void *, size_t *); extern int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int, struct file *, void *, size_t *); + #ifdef CONFIG_WEB100_STATS + extern int web100_proc_dointvec_update(ctl_table *, int, struct file *, + void *, size_t *); + #endif extern int do_sysctl (int *name, int nlen, void *oldval, size_t *oldlenp, diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/include/linux/tcp.h linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/linux/tcp.h *** linux-2.4.20.orig/include/linux/tcp.h Thu Nov 22 19:47:11 2001 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/linux/tcp.h Mon Jan 19 17:41:51 2004 *************** enum { *** 128,133 **** --- 128,150 ---- #define TCP_INFO 11 /* Information about this connection. */ #define TCP_QUICKACK 12 /* Block/reenable quick acks */ + #ifdef CONFIG_ALTAIMD + + enum { + AIMD_VANILLA = 0, + AIMD_HSTCP = 1, + AIMD_SCALABLE = 2, + AIMD_HTCP = 3, + AIMD_GRIDDT = 4 + }; + + /* Scalable TCP options */ + #define TCP_SCALABLE_HIGHWIN 13 /* threshold for scalable activity */ + #define TCP_SCALABLE_1_ON_A 14 /* window increases to cwnd + a per ack */ + #define TCP_SCALABLE_1_ON_B 15 /* window decreases to (1-b)*cwnd on cong */ + + #endif + #define TCPI_OPT_TIMESTAMPS 1 #define TCPI_OPT_SACK 2 #define TCPI_OPT_WSCALE 4 diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/include/net/snmp.h linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/net/snmp.h *** linux-2.4.20.orig/include/net/snmp.h Thu Nov 22 19:47:11 2001 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/net/snmp.h Mon Jan 19 17:41:51 2004 *************** struct linux_mib *** 256,261 **** --- 256,271 ---- unsigned long TCPAbortOnLinger; unsigned long TCPAbortFailed; unsigned long TCPMemoryPressures; + #ifdef CONFIG_TCP_SACK + unsigned long TCPHintClears; + unsigned long TCPMarkHeadHintHits; + unsigned long TCPUpdateScoreHintHits; + unsigned long TCPXmitRetranLostHintHits; + unsigned long TCPXmitRetranForwardHintHits; + unsigned long TCPSackFastPathHintHits; + unsigned long TCPSackBlockEasyHits; + unsigned long TCPSackBlockNormalised; + #endif unsigned long __pad[0]; } ____cacheline_aligned; diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/include/net/sock.h linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/net/sock.h *** linux-2.4.20.orig/include/net/sock.h Sat Aug 3 01:39:46 2002 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/net/sock.h Mon Jan 19 17:41:51 2004 *************** struct atm_vcc; *** 103,108 **** --- 103,112 ---- #include #endif + #ifdef CONFIG_WEB100_STATS + #include + #endif + #include #include *************** struct tcp_sack_block { *** 245,250 **** --- 249,270 ---- __u32 end_seq; }; + + #ifdef CONFIG_TCP_SACK2 + struct sacked_list_item { + struct sk_buff *skb; + struct sacked_list_item *prev; + struct sacked_list_item *next; + }; + + struct sacked_list_details { + struct sacked_list_item *sacked_list_head; + struct sacked_list_item *sacked_list_tail; + __u16 sacked_list_size; + }; + #endif + + struct tcp_opt { int tcp_header_len; /* Bytes of tcp header to send */ *************** struct tcp_opt { *** 316,332 **** __u32 left_out; /* Packets which leaved network */ __u32 retrans_out; /* Retransmitted packets out */ - /* * Slow start and congestion control (see also Nagle, and Karn & Partridge) */ __u32 snd_ssthresh; /* Slow start size threshold */ __u32 snd_cwnd; /* Sending congestion window */ ! __u16 snd_cwnd_cnt; /* Linear increase counter */ ! __u16 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ __u32 snd_cwnd_used; __u32 snd_cwnd_stamp; /* Two commonly used timers in both sender and receiver paths. */ unsigned long timeout; struct timer_list retransmit_timer; /* Resend (no ack) */ --- 336,374 ---- __u32 left_out; /* Packets which leaved network */ __u32 retrans_out; /* Retransmitted packets out */ /* * Slow start and congestion control (see also Nagle, and Karn & Partridge) */ __u32 snd_ssthresh; /* Slow start size threshold */ + + #ifdef CONFIG_LIMITED_SLOW_START + __u32 snd_lss_k; /* Linear slow start increment */ + __u16 snd_ssthresh_cnt1; /* Linear slow start counter for Limited Slow Start */ + __u16 snd_ssthresh_cnt2; /* Linear slow start counter for Limited Slow Start */ + #endif + __u32 snd_cwnd; /* Sending congestion window */ ! __u32 snd_cwnd_cnt; /* Linear increase counter */ ! __u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ __u32 snd_cwnd_used; __u32 snd_cwnd_stamp; + #ifdef CONFIG_ALTAIMD + /* HTCP variables */ + __u16 snd_ccount; /*number of RTT's since last back-off */ + __u16 snd_cwnd_cnt2; /* counter used as part of snd_ccount calculation */ + __u32 snd_minRTT; /* minimum RTT */ + __u8 snd_decreasenum; /*current backoff factor <<7 */ + __u8 snd_decreasenum2; /*min backoff factor <<7 */ + __u32 snd_packetcount;/* number of packets acked since snd_lasttime */ + __u32 snd_lasttime; + __u32 snd_maxB; /* max throughput achieved in current congestion epoch */ + __u32 snd_oldmaxB; /* max throughput achieved in previous congestion epoch */ + __u32 snd_minB; /* min throughput achieved in current congestion epoch */ + __u32 snd_Bi; /* current achieved throughput */ + __u32 snd_modecount; + #endif + /* Two commonly used timers in both sender and receiver paths. */ unsigned long timeout; struct timer_list retransmit_timer; /* Resend (no ack) */ *************** struct tcp_opt { *** 334,339 **** --- 376,385 ---- struct sk_buff_head out_of_order_queue; /* Out of order segments go here */ + #ifdef CONFIG_TCP_SACK2 + struct sacked_list_details sacked_list; + #endif + struct tcp_func *af_specific; /* Operations which are AF_INET{4,6} specific */ struct sk_buff *send_head; /* Front of stuff to transmit */ struct page *sndmsg_page; /* Cached page for sendmsg */ *************** struct tcp_opt { *** 394,399 **** --- 440,467 ---- __u8 urg_mode; /* In urgent mode */ __u32 snd_up; /* Urgent pointer */ + #ifdef CONFIG_ALTAIMD + __u32 rh_target; /* target when rate-halving */ + + /* HSTCP TCP options */ + __u8 hstcp_entry_index; /* reference to the hstcp AIMD parameters */ + + /* Scalable TCP options */ + __u32 scalable_highwin; /* when does scalable kick in */ + __u8 scalable_1_on_a; /* increase by a each ack */ + __u8 scalable_1_on_b; /* decrease by (1-b)cwnd each cong event */ + + /* GridDT */ + __u8 griddt_snd_cwnd_inc; /* Cwnd additive increment */ + __u32 griddt_snd_cwnd_add_cnt;/* Counter to update snd_cwnd_inc */ + __u32 griddt_min_rtt; /* Min RTT over the life of the conection */ + #endif + + #ifdef CONFIG_RFC3465 + /* Appropiate Byte Counting (RFC3465) */ + __u32 bytes_acked; + #endif + /* The syn_wait_lock is necessary only to avoid tcp_get_info having * to grab the main lock sock while browsing the listening hash * (otherwise it's deadlock prone). *************** struct tcp_opt { *** 418,423 **** --- 486,538 ---- int linger2; unsigned long last_synq_overflow; + + #ifdef CONFIG_TCP_SACK + /* retrans queue hinting */ + struct sk_buff* mark_head_lost_skb_hint; + int mark_head_lost_cnt_hint; + + struct sk_buff* update_scoreboard_skb_hint; + + struct sk_buff* xmit_retransmit_queue_lost_skb_hint; + int xmit_retransmit_queue_lost_cnt_hint; + struct sk_buff* xmit_retransmit_queue_forward_skb_hint; + int xmit_retransmit_queue_forward_cnt_hint; + + /* SACK fastpath */ + struct tcp_sack_block recv_sack_cache[4]; + int sackfastpath_facket_cnt_hint; + struct sk_buff* sackfastpath_skb_hint; + #endif + + /* CONFIG_WEB100_STATS */ + /* For storing Web100 protocol-specific instrument data */ + #ifdef CONFIG_WEB100_STATS + struct web100stats *tcp_stats; + #endif + + int rcv_space; /* space available for rcv queue */ + int rcv_alloc; /* space used by rcv queue */ + __u32 rcv_hi_seq; /* highest received valid sequence number */ + __u32 rcv_prev_tstamp; /* last timestamp sent */ + + /* For DRS-style window measurement */ + __u32 rcv_rtt; + __u32 rcv_rtt_seq; + unsigned long rcv_rtt_time; + __u32 rcv_winest_seq; + unsigned long rcv_winest_time; + + /* For timestamps window measurement */ + __u8 rcv_tswin_pending; /* Measurement pending */ + __u32 rcv_tswin_tstamp; /* time stamp we are waiting to be echoed */ + __u32 rcv_tswin_seq; /* rcv_hi_seq when we first sent rcv_tswin_tstamp */ + + #ifdef CONFIG_TCP_PKTDROP + /* YTL: TCP Packet Dropping Counter */ + __u32 pktdrop_cnt; /* looped counter for number of packets in */ + #endif + }; *************** struct sock { *** 586,592 **** #endif /* CONFIG_SPX */ } tp_pinfo; ! int err, err_soft; /* Soft holds errors that don't cause failure but are the cause of a persistent failure not just --- 701,707 ---- #endif /* CONFIG_SPX */ } tp_pinfo; ! int err, err_soft; /* Soft holds errors that don't cause failure but are the cause of a persistent failure not just *************** struct sock { *** 658,664 **** #endif } protinfo; - /* This part is used for the timeout functions. */ struct timer_list timer; /* This is the sock cleanup timer. */ struct timeval stamp; --- 773,778 ---- *************** struct sock { *** 678,683 **** --- 792,799 ---- int (*backlog_rcv) (struct sock *sk, struct sk_buff *skb); void (*destruct)(struct sock *sk); + + int retx_alloc; }; /* The per-socket spinlock must be held here. */ *************** do { spin_lock_bh(&((__sk)->lock.slock)) *** 803,808 **** --- 919,925 ---- #define bh_lock_sock(__sk) spin_lock(&((__sk)->lock.slock)) #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->lock.slock)) + extern struct sock * sk_alloc(int family, int priority, int zero_it); extern void sk_free(struct sock *sk); *************** sock_recv_timestamp(struct msghdr *msg, *** 1301,1304 **** --- 1418,1509 ---- extern __u32 sysctl_wmem_max; extern __u32 sysctl_rmem_max; + #ifdef CONFIG_TCP_SACK2 + #define sacked_list_head(tp) (tp->sacked_list).sacked_list_head + #define sacked_list_tail(tp) (tp->sacked_list).sacked_list_tail + #define sacked_list_size(tp) (tp->sacked_list).sacked_list_size + #define sacked_list_lock(tp) (tp->sacked_list).lock + + static inline void free_sacked_list (struct sacked_list_item *sl, struct tcp_opt *tp) + { + if (sl == NULL) printk("free_sacked_list: sl==NULL\n"); + if (tp == NULL) printk("free_sacked_list: tp==NULL\n"); + + if (sl->prev != NULL) + (sl->prev)->next = sl->next; + if (sl->next != NULL) + (sl->next)->prev = sl->prev; + if (sacked_list_head(tp) == sl && sacked_list_head(tp) != NULL) + sacked_list_head(tp)=sl->next; + if (sacked_list_tail(tp) == sl && sacked_list_tail(tp) != NULL) + sacked_list_tail(tp)=sl->prev; + if (sacked_list_size(tp) == 0) + printk("free_sacked_list(): sacked_list_size(tp) < 0\n"); + else + sacked_list_size(tp)--; + + kfree(sl); + + } + + static inline void create_sacked_list (struct tcp_opt *tp) + { + if (tp == NULL) printk("create_sacked_list: tp==NULL\n"); + + sacked_list_head(tp) = NULL; + sacked_list_tail(tp) = NULL; + sacked_list_size(tp) = 0; + } + + static inline void purge_sacked_list (struct tcp_opt *tp) + { + if (tp == NULL) printk("purge_sacked_list: tp==NULL\n"); + + while (sacked_list_head(tp) != NULL) + free_sacked_list (sacked_list_head(tp), tp); + + sacked_list_head(tp) = NULL; + sacked_list_tail(tp) = NULL; + sacked_list_size(tp) = 0; + } + + static inline void destroy_sacked_list (struct tcp_opt *tp) + { + if (tp == NULL) printk("destroy_sacked_list: tp==NULL\n"); + + purge_sacked_list(tp); + sacked_list_head(tp) = NULL; + sacked_list_tail(tp) = NULL; + sacked_list_size(tp) = 0; + } + + static inline int add_sacked_list_tail (struct sk_buff *skb, struct tcp_opt *tp) + { + struct sacked_list_item *sl_free; + + if (tp == NULL) printk("add_sacked_list_tail: tp==NULL\n"); + + if (sacked_list_size(tp) >= DFLT_TCP_SACKED_LIST_SIZE) + /* out of space */ + return 1; + + sl_free = kmalloc(sizeof(struct sacked_list_item), GFP_ATOMIC); + if (sl_free == NULL) + /* out of space */ + return 1; + + sl_free->prev = sacked_list_tail(tp); + sl_free->next = NULL; + sl_free->skb = skb; + if (sacked_list_tail(tp) != NULL) + (sacked_list_tail(tp))->next = sl_free; + sacked_list_tail(tp) = sl_free; + if (sacked_list_head(tp) == NULL) + sacked_list_head(tp)=sacked_list_tail(tp); + sacked_list_size(tp)++; + + return 0; + } + #endif + #endif /* _SOCK_H */ diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/include/net/tcp.h linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/net/tcp.h *** linux-2.4.20.orig/include/net/tcp.h Thu Nov 28 23:53:15 2002 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/net/tcp.h Mon Jan 19 17:41:51 2004 *************** *** 31,36 **** --- 31,40 ---- #include #include + #ifdef CONFIG_WEB100_STATS + #include + #endif + /* This is for all connections with a full identity, no wildcards. * New scheme, half the table is for TIME_WAIT, the other half is * for the rest. I'll experiment with dynamic table growth later. *************** extern int sysctl_tcp_app_win; *** 461,470 **** --- 465,541 ---- extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_tw_reuse; + #ifdef CONFIG_ALTAIMD + extern int sysctl_tcp_altAIMD; + + /* hstcp */ + struct hstcp_entry { + __u32 cwnd; + __u8 a_val; + __u8 b_val; + }; + extern struct hstcp_entry hstcp_table[]; + + /* scalabletcp */ + extern int sysctl_tcp_scalable_highwin; + extern int sysctl_tcp_scalable_1_on_a; + extern int sysctl_tcp_scalable_1_on_b; + + /* YTL: GridDT Stuff - is this the best place to put it??? */ + extern int sysctl_tcp_griddt_mss_ref; + extern __u32 sysctl_tcp_griddt_rtt_ref; + extern __u32 sysctl_tcp_griddt_add_cnt_clamp; + #endif + + #ifdef CONFIG_RFC3465 + extern int sysctl_tcp_abc; + extern int sysctl_tcp_abc_L; + #endif + + extern int sysctl_tcp_moderate_on_txq; + + #ifdef CONFIG_MODCWND + extern int sysctl_tcp_moderate_cwnd; + #endif + + #ifdef CONFIG_WEB100 + extern int sysctl_web100_default_wscale; + #endif + #ifdef CONFIG_WEB100_NET100 + extern int sysctl_web100_no_metrics_save; + extern int sysctl_WAD_IFQ; + extern int sysctl_WAD_MaxBurst; + #endif + #ifdef CONFIG_WEB100_STATS + extern int sysctl_web100_sbufmode; + extern int sysctl_web100_rbufmode; + extern int sysctl_web100_fperms; + extern int sysctl_web100_gid; + extern int sysctl_web100_sndbuf_emu; + extern int sysctl_web100_rcvbuf_emu; + #endif + + #ifdef CONFIG_TCP_PKTDROP + extern __u32 sysctl_tcp_pktdrop_rate; + #endif + extern atomic_t tcp_memory_allocated; extern atomic_t tcp_sockets_allocated; extern int tcp_memory_pressure; + #ifdef CONFIG_WEB100_STATS + extern atomic_t tcp_rwin_announced; + #endif + + #ifdef CONFIG_LIMITED_SLOW_START + extern int sysctl_tcp_ss_max_ssthresh; + #endif + + #ifdef CONFIG_TCP_SACK + extern int sysctl_tcp_new_reno; + extern int sysctl_tcp_slack_rto; + #endif + struct open_request; struct or_calltable { *************** extern int tcp_retransmit_skb(struct soc *** 796,801 **** --- 867,876 ---- extern void tcp_xmit_retransmit_queue(struct sock *); extern void tcp_simple_retransmit(struct sock *); + #ifdef CONFIG_TCP_SACK + extern void clear_all_retrans_hints(struct tcp_opt *); + #endif + extern void tcp_send_probe0(struct sock *); extern void tcp_send_partial(struct sock *); extern int tcp_write_wakeup(struct sock *); *************** static inline void tcp_fast_path_check(s *** 946,952 **** { if (skb_queue_len(&tp->out_of_order_queue) == 0 && tp->rcv_wnd && ! atomic_read(&sk->rmem_alloc) < sk->rcvbuf && !tp->urg_data) tcp_fast_path_on(tp); } --- 1021,1030 ---- { if (skb_queue_len(&tp->out_of_order_queue) == 0 && tp->rcv_wnd && ! #ifdef CONFIG_WEB100_STATS ! (sysctl_web100_rbufmode == 1 || ! atomic_read(&sk->rmem_alloc) < sk->rcvbuf) && ! #endif !tp->urg_data) tcp_fast_path_on(tp); } *************** struct tcp_skb_cb { *** 1024,1029 **** --- 1102,1109 ---- __u16 urg_ptr; /* Valid w/URG flags is set. */ __u32 ack_seq; /* Sequence number ACK'd */ + + __u8 transmitted; }; #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0])) *************** struct tcp_skb_cb { *** 1034,1039 **** --- 1114,1131 ---- (skb != (struct sk_buff *)&(sk)->write_queue); \ skb=skb->next) + #ifdef CONFIG_TCP_SACK + #define for_retrans_queue_from(skb, skb_init, sk, tp) \ + for (skb = (struct sk_buff *)(skb_init); \ + (skb != (tp)->send_head) && \ + (skb != (struct sk_buff *)&(sk)->write_queue); \ + skb=skb->next) + + #define hint_advance(skb, snd_una) \ + if( (skb) && !after(TCP_SKB_CB(skb)->end_seq, snd_una)) \ + skb=NULL; + #endif + #include *************** static inline int tcp_wspace(struct sock *** 1051,1056 **** --- 1143,1168 ---- return sk->sndbuf - sk->wmem_queued; } + #ifdef CONFIG_ALTAIMD + /* HTCP functions */ + static inline void htcp_reset(struct tcp_opt *tp) + { + tp->snd_ccount=0; + tp->snd_cwnd_cnt2=0; + } + + static inline void decreasenum_check(struct tcp_opt *tp) + { + if (tp->snd_decreasenum < 1<<6) tp->snd_decreasenum=1<<6; /* 0.5 */ + if (tp->snd_decreasenum > 102) tp->snd_decreasenum=102; /* 102>>7 is 0.8 */ + } + + static inline void decreasenum_reset(struct tcp_opt *tp) + { + tp->snd_decreasenum=1<<6; /* 0.5 */ + tp->snd_modecount=0; + } + #endif /* This determines how many packets are "in the network" to the best * of our knowledge. In many cases it is conservative, but where *************** static __inline__ unsigned int tcp_packe *** 1071,1076 **** --- 1183,1209 ---- return tp->packets_out - tp->left_out + tp->retrans_out; } + #ifdef CONFIG_ALTAIMD + /* Determine the AIMD parameters to return based on the current value of cwnd */ + static inline struct hstcp_entry altAIMD_get_hstcp_entry( struct tcp_opt *tp ) + { + if ( tp->snd_cwnd > hstcp_table[tp->hstcp_entry_index].cwnd ) { + while ( tp->snd_cwnd > hstcp_table[tp->hstcp_entry_index].cwnd ) + tp->hstcp_entry_index++; + } else if ( tp->snd_cwnd < hstcp_table[tp->hstcp_entry_index-1].cwnd) { + while ( tp->snd_cwnd < hstcp_table[tp->hstcp_entry_index-1].cwnd ) + tp->hstcp_entry_index--; + } + + WEB100_VAR_SET(tp, WAD_AI, hstcp_table[tp->hstcp_entry_index].a_val ); + WEB100_VAR_SET(tp, WAD_MD, hstcp_table[tp->hstcp_entry_index].b_val ); + + return hstcp_table[tp->hstcp_entry_index]; + } + + + #endif + /* Recalculate snd_ssthresh, we want to set it to: * * one half the current congestion window, but no *************** static __inline__ unsigned int tcp_packe *** 1078,1084 **** --- 1211,1240 ---- */ static inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp) { + + /* store the amount to reduce cwnd by rate halving */ + #ifdef CONFIG_ALTAIMD + /* if HSTCP */ + if( sysctl_tcp_altAIMD == AIMD_HSTCP ){ + /* not forgetting taht the b parameter is value*8 */ + tp->rh_target = max( ( tp->snd_cwnd * (256 - altAIMD_get_hstcp_entry( tp ).b_val) ) >> 8, 2U); + } + /* if scalable tcp */ + else if ( sysctl_tcp_altAIMD == AIMD_SCALABLE && ( tp->snd_cwnd > sysctl_tcp_scalable_highwin ) ){ + tp->rh_target = max(tp->snd_cwnd-(tp->snd_cwnd/sysctl_tcp_scalable_1_on_b), 2U); + } + else if ( sysctl_tcp_altAIMD == AIMD_HTCP ) { + tp->rh_target = max((tp->snd_cwnd*tp->snd_decreasenum)>>7, 2U); + } + /* otherwise do normal halving */ + else { + tp->rh_target = max(tp->snd_cwnd >> 1U, 2U); + } + + return tp->rh_target; + #else return max(tp->snd_cwnd >> 1U, 2U); + #endif } /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd. *************** static inline void __tcp_enter_cwr(struc *** 1130,1143 **** --- 1286,1306 ---- tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1U); tp->snd_cwnd_cnt = 0; + #ifdef CONFIG_ALTAIMD + htcp_reset(tp); + #endif tp->high_seq = tp->snd_nxt; tp->snd_cwnd_stamp = tcp_time_stamp; TCP_ECN_queue_cwr(tp); + WEB100_UPDATE_FUNC(tp, web100_update_congestion(tp, 0)); } static inline void tcp_enter_cwr(struct tcp_opt *tp) { tp->prior_ssthresh = 0; + #ifdef CONFIG_RFC3465 + tp->bytes_acked=0; + #endif if (tp->ca_state < TCP_CA_CWR) { __tcp_enter_cwr(tp); tp->ca_state = TCP_CA_CWR; *************** extern __u32 tcp_init_cwnd(struct tcp_op *** 1151,1159 **** --- 1314,1355 ---- */ static __inline__ __u32 tcp_max_burst(struct tcp_opt *tp) { + #ifdef CONFIG_WEB100_NET100 + return (NET100_WAD(tp, WAD_MaxBurst, sysctl_WAD_MaxBurst)); + #endif return 3; } + /* CWND moderation, preventing bursts due to too big ACKs + * in dubious situations. + */ + static __inline__ void tcp_moderate_cwnd(struct tcp_opt *tp) + { + + #ifdef CONFIG_MODCWND + if ( sysctl_tcp_moderate_cwnd ) { + #endif + + #ifdef CONFIG_WEB100_STATS + { + u32 t = tcp_packets_in_flight(tp) + tcp_max_burst(tp); + if (t < tp->snd_cwnd) { + tp->snd_cwnd = t; + WEB100_VAR_INC(tp, OtherReductions); + WEB100_VAR_INC(tp, X_OtherReductionsCM); + } + }; + #else + tp->snd_cwnd = min(tp->snd_cwnd, + tcp_packets_in_flight(tp)+tcp_max_burst(tp)); + #endif + tp->snd_cwnd_stamp = tcp_time_stamp; + + #ifdef CONFIG_MODCWND + } + #endif + } + static __inline__ int tcp_minshall_check(struct tcp_opt *tp) { return after(tp->snd_sml,tp->snd_una) && *************** tcp_nagle_check(struct tcp_opt *tp, stru *** 1188,1194 **** /* This checks if the data bearing packet SKB (usually tp->send_head) * should be put on the wire right now. */ ! static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb, unsigned cur_mss, int nonagle) { /* RFC 1122 - section 4.2.3.4 --- 1384,1394 ---- /* This checks if the data bearing packet SKB (usually tp->send_head) * should be put on the wire right now. */ ! /* Web100: ! * Modified to return WC_SNDLIM_NONE when ok, reason if not ok. ! * The name is changed because we have changed the return value. ! */ ! static __inline__ int tcp_snd_wait(struct tcp_opt *tp, struct sk_buff *skb, unsigned cur_mss, int nonagle) { /* RFC 1122 - section 4.2.3.4 *************** static __inline__ int tcp_snd_test(struc *** 1211,1225 **** * to get new data) and if room at tail of skb is * not enough to save something seriously (<32 for now). */ ! /* Don't be strict about the congestion window for the * final FIN frame. -DaveM */ return ((nonagle==1 || tp->urg_mode ! || !tcp_nagle_check(tp, skb, cur_mss, nonagle)) && ! ((tcp_packets_in_flight(tp) < tp->snd_cwnd) || ! (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) && ! !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd)); } static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_opt *tp) --- 1411,1435 ---- * to get new data) and if room at tail of skb is * not enough to save something seriously (<32 for now). */ ! if ((tcp_packets_in_flight(tp) >= tp->snd_cwnd) && ! !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) ! return WC_SNDLIM_CWND; ! if (after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd)) ! return WC_SNDLIM_RWIN; ! if (!(nonagle == 1 || tp->urg_mode || ! !tcp_nagle_check(tp, skb, cur_mss, nonagle))) ! return WC_SNDLIM_SENDER; ! return WC_SNDLIM_NONE; ! #if 0 /* Don't be strict about the congestion window for the * final FIN frame. -DaveM */ return ((nonagle==1 || tp->urg_mode ! || !tcp_nagle_check(tp, skb, cur_mss, nonagle)) && ! ((tcp_packets_in_flight(tp) < tp->snd_cwnd) || ! (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) && ! !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd)); ! #endif } static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_opt *tp) *************** static __inline__ void __tcp_push_pendin *** 1247,1253 **** if (skb) { if (!tcp_skb_is_last(sk, skb)) nonagle = 1; ! if (!tcp_snd_test(tp, skb, cur_mss, nonagle) || tcp_write_xmit(sk, nonagle)) tcp_check_probe_timer(sk, tp); } --- 1457,1463 ---- if (skb) { if (!tcp_skb_is_last(sk, skb)) nonagle = 1; ! if (tcp_snd_wait(tp, skb, cur_mss, nonagle) != WC_SNDLIM_NONE || tcp_write_xmit(sk, nonagle)) tcp_check_probe_timer(sk, tp); } *************** static __inline__ int tcp_may_send_now(s *** 1265,1272 **** struct sk_buff *skb = tp->send_head; return (skb && ! tcp_snd_test(tp, skb, tcp_current_mss(sk), ! tcp_skb_is_last(sk, skb) ? 1 : tp->nonagle)); } static __inline__ void tcp_init_wl(struct tcp_opt *tp, u32 ack, u32 seq) --- 1475,1483 ---- struct sk_buff *skb = tp->send_head; return (skb && ! tcp_snd_wait(tp, skb, tcp_current_mss(sk), ! tcp_skb_is_last(sk, skb) ? 1 : tp->nonagle) ! == WC_SNDLIM_NONE); } static __inline__ void tcp_init_wl(struct tcp_opt *tp, u32 ack, u32 seq) *************** static char *statename[]={ *** 1364,1369 **** --- 1575,1582 ---- static __inline__ void tcp_set_state(struct sock *sk, int state) { int oldstate = sk->state; + + WEB100_VAR_SET(&sk->tp_pinfo.af_tcp, State, web100_state(state)); switch (state) { case TCP_ESTABLISHED: *************** static inline void tcp_select_initial_wi *** 1520,1527 **** if (*rcv_wscale && sysctl_tcp_app_win && space>=mss && space - max((space>>sysctl_tcp_app_win), mss>>*rcv_wscale) < 65536/2) (*rcv_wscale)--; } ! /* Set initial window to value enough for senders, * following RFC1414. Senders, not following this RFC, * will be satisfied with 2. --- 1733,1744 ---- if (*rcv_wscale && sysctl_tcp_app_win && space>=mss && space - max((space>>sysctl_tcp_app_win), mss>>*rcv_wscale) < 65536/2) (*rcv_wscale)--; + #ifdef CONFIG_WEB100 + (*rcv_wscale) = max((__u8)sysctl_web100_default_wscale, + (*rcv_wscale)); + #endif } ! /* Set initial window to value enough for senders, * following RFC1414. Senders, not following this RFC, * will be satisfied with 2. *************** static __inline__ void tcp_openreq_init( *** 1668,1679 **** #define TCP_MEM_QUANTUM ((int)PAGE_SIZE) ! static inline void tcp_free_skb(struct sock *sk, struct sk_buff *skb) { sk->tp_pinfo.af_tcp.queue_shrunk = 1; sk->wmem_queued -= skb->truesize; sk->forward_alloc += skb->truesize; ! __kfree_skb(skb); } static inline void tcp_charge_skb(struct sock *sk, struct sk_buff *skb) --- 1885,1898 ---- #define TCP_MEM_QUANTUM ((int)PAGE_SIZE) ! static inline void tcp_uncharge_skb(struct sock *sk, struct sk_buff *skb) { sk->tp_pinfo.af_tcp.queue_shrunk = 1; sk->wmem_queued -= skb->truesize; sk->forward_alloc += skb->truesize; ! #ifdef CONFIG_WEB100_STATS ! web100_update_writeq(sk); ! #endif } static inline void tcp_charge_skb(struct sock *sk, struct sk_buff *skb) *************** static inline void tcp_charge_skb(struct *** 1682,1687 **** --- 1901,1950 ---- sk->forward_alloc -= skb->truesize; } + #ifdef CONFIG_WEB100_STATS + extern atomic_t tcp_retx_mem; + + static inline void tcp_retx_uncharge_skb(struct sock *sk, struct sk_buff *skb) + { + unsigned len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq; + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + + tp->tcp_stats->wc_vars.CurRetxQueue -= len; + + atomic_sub(skb->truesize, &tcp_retx_mem); + sk->retx_alloc -= skb->truesize; + sock_put(sk); + } + + static inline void tcp_retx_charge_skb(struct sock *sk, struct sk_buff *skb) + { + unsigned len = TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq; + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct web100directs *vars = &tp->tcp_stats->wc_vars; + + vars->CurRetxQueue += len; + if (vars->MaxRetxQueue < vars->CurRetxQueue) + vars->MaxRetxQueue = vars->CurRetxQueue; + + TCP_SKB_CB(skb)->transmitted = 1; + sock_hold(sk); + tcp_uncharge_skb(sk, skb); + atomic_add(skb->truesize, &tcp_retx_mem); + sk->retx_alloc += skb->truesize; + } + #endif + + static inline void tcp_free_skb(struct sock *sk, struct sk_buff *skb) + { + #ifdef CONFIG_WEB100_STATS + if (TCP_SKB_CB(skb)->transmitted) + tcp_retx_uncharge_skb(sk, skb); + else + #endif + tcp_uncharge_skb(sk, skb); + __kfree_skb(skb); + } + extern void __tcp_mem_reclaim(struct sock *sk); extern int tcp_mem_schedule(struct sock *sk, int size, int kind); *************** static inline void tcp_enter_memory_pres *** 1701,1706 **** --- 1964,1972 ---- static inline void tcp_moderate_sndbuf(struct sock *sk) { + #ifdef CONFIG_WEB100_STATS + if (sysctl_web100_sbufmode != 1) + #endif if (!(sk->userlocks&SOCK_SNDBUF_LOCK)) { sk->sndbuf = min(sk->sndbuf, sk->wmem_queued/2); sk->sndbuf = max(sk->sndbuf, SOCK_MIN_SNDBUF); *************** static inline struct page * tcp_alloc_pa *** 1747,1753 **** static inline void tcp_writequeue_purge(struct sock *sk) { struct sk_buff *skb; ! while ((skb = __skb_dequeue(&sk->write_queue)) != NULL) tcp_free_skb(sk, skb); tcp_mem_reclaim(sk); --- 2013,2023 ---- static inline void tcp_writequeue_purge(struct sock *sk) { struct sk_buff *skb; ! #ifdef CONFIG_TCP_SACK2 ! struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); ! ! purge_sacked_list(tp); ! #endif while ((skb = __skb_dequeue(&sk->write_queue)) != NULL) tcp_free_skb(sk, skb); tcp_mem_reclaim(sk); diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/include/net/web100.h linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/net/web100.h *** linux-2.4.20.orig/include/net/web100.h Thu Jan 1 01:00:00 1970 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/net/web100.h Mon Jan 19 17:41:51 2004 *************** *** 0 **** --- 1,122 ---- + /* + * include/net/web100.h + * + * Copyright (C) 2001 Matt Mathis + * Copyright (C) 2001 John Heffner + * + * The Web 100 project. See http://www.web100.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + + #ifndef _WEB100_H + #define _WEB100_H + + #include + + #define WEB100_MAX_CONNS (1<<15) + + #define WEB100_DELAY_MAX HZ + + /* Netlink */ + #define WC_NL_TYPE_CONNECT 0 + #define WC_NL_TYPE_DISCONNECT 1 + + struct web100_netlink_msg { + int type; + int cid; + }; + + /* The syntax of this version string is subject to future changes */ + extern char *web100_version_string; + + /* Stats structures */ + extern struct web100stats *web100stats_arr[]; + extern struct web100stats *web100stats_first; + + /* For locking the creation and destruction of stats structures. */ + extern rwlock_t web100_linkage_lock; + + /* For /proc/web100 */ + extern struct web100stats *web100stats_lookup(int cid); + + /* For the TCP code */ + extern int web100_stats_create(struct sock *sk); + extern void web100_stats_destroy(struct web100stats *stats); + extern void web100_stats_free(struct web100stats *stats); + extern void web100_stats_establish(struct sock *sk); + + extern void web100_tune_sndbuf_ack(struct sock *sk); + extern void web100_tune_sndbuf_snd(struct sock *sk); + extern void web100_tune_rcvbuf(struct sock *sk); + + extern void web100_update_snd_nxt(struct tcp_opt *tp); + extern void web100_update_rtt(struct tcp_opt *tp, unsigned long rtt_sample); + extern void web100_update_timeout(struct tcp_opt *tp); + extern void web100_update_mss(struct tcp_opt *tp); + extern void web100_update_cwnd(struct tcp_opt *tp); + extern void web100_update_rwin_rcvd(struct tcp_opt *tp); + extern void web100_update_sndlim(struct tcp_opt *tp, int why); + extern void web100_update_rcv_nxt(struct tcp_opt *tp); + extern void web100_update_rwin_sent(struct tcp_opt *tp); + extern void web100_update_congestion(struct tcp_opt *tp, int why); + extern void web100_update_segsend(struct tcp_opt *tp, struct sk_buff *skb); + extern void web100_update_segrecv(struct tcp_opt *tp, struct sk_buff *skb); + extern void web100_update_rcvbuf(struct sock *sk, int rcvbuf); + extern void web100_update_writeq(struct sock *sk); + extern void web100_update_recvq(struct sock *sk); + extern void web100_update_ofoq(struct sock *sk); + #ifdef CONFIG_WEB100_NET100 + extern void web100_update_floyd_aimd(struct tcp_opt *tp); + #endif + + extern void web100_stats_init(void); + + /* For the IP code */ + extern int web100_delay_output(struct sk_buff *skb, int (*output)(struct sk_buff *)); + + extern __u64 web100_mono_time(void); + + /* You may have to hold web100_linkage_lock here to prevent + stats from disappearing. */ + static inline void web100_stats_use(struct web100stats *stats) + { + sock_hold(stats->wc_sk); + atomic_inc(&stats->wc_users); + } + + /* You MUST NOT hold web100_linkage_lock here. */ + static inline void web100_stats_unuse(struct web100stats *stats) + { + if (atomic_dec_and_test(&stats->wc_users)) { + struct sock *sk = stats->wc_sk; + web100_stats_free(stats); + sock_put(sk); + } + } + + /* A mapping between Linux and Web100 states. This could easily just + * be an array. */ + static inline int web100_state(int state) + { + switch (state) { + case TCP_ESTABLISHED: return WC_STATE_ESTABLISHED; + case TCP_SYN_SENT: return WC_STATE_SYNSENT; + case TCP_SYN_RECV: return WC_STATE_SYNRECEIVED; + case TCP_FIN_WAIT1: return WC_STATE_FINWAIT1; + case TCP_FIN_WAIT2: return WC_STATE_FINWAIT2; + case TCP_TIME_WAIT: return WC_STATE_TIMEWAIT; + case TCP_CLOSE: return WC_STATE_CLOSED; + case TCP_CLOSE_WAIT: return WC_STATE_CLOSEWAIT; + case TCP_LAST_ACK: return WC_STATE_LASTACK; + case TCP_LISTEN: return WC_STATE_LISTEN; + case TCP_CLOSING: return WC_STATE_CLOSING; + default: return 0; + } + } + + #endif /* _WEB100_H */ diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/include/net/web100_stats.h linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/net/web100_stats.h *** linux-2.4.20.orig/include/net/web100_stats.h Thu Jan 1 01:00:00 1970 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/include/net/web100_stats.h Mon Jan 19 17:41:51 2004 *************** *** 0 **** --- 1,353 ---- + /* + * include/net/web100_stats.h + * + * Copyright (C) 2001 Matt Mathis + * Copyright (C) 2001 John Heffner + * Copyright (C) 2000 Jeff Semke + * + * The Web 100 project. See http://www.web100.org + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + + /* TODO: make sure that the time duration states below include: + Congestion Avoidance, Slow Start, Timeouts, Idle Application, and + Window Limited cases */ + /* TODO: Consider adding sysctl variable to enable/disable WC stats updates. + Probably should still create stats structures if compiled with WC support, + even if sysctl(wc) is turned off. That would allow the stats to be updated + if the sysctl(wc) is turned back on. */ + /* TODO: Add all variables needed to do user-level auto-tuning, including + writeable parameters */ + + + #ifndef _WEB100_STATS_H + #define _WEB100_STATS_H + + enum wc_sndlim_states { + WC_SNDLIM_NONE = -1, + WC_SNDLIM_SENDER, + WC_SNDLIM_CWND, + WC_SNDLIM_RWIN, + WC_SNDLIM_STARTUP, + WC_SNDLIM_NSTATES /* Keep at end */ + }; + + #ifndef CONFIG_WEB100_STATS + + #define WEB100_VAR_INC(tp,var) do {} while (0) + #define WEB100_VAR_DEC(tp,var) do {} while (0) + #define WEB100_VAR_SET(tp,var,val) do {} while (0) + #define WEB100_VAR_ADD(tp,var,val) do {} while (0) + #define WEB100_UPDATE_FUNC(tp,func) do {} while (0) + #define NET100_WAD(tp, var, def) (def) + + #else /* CONFIG_WEB100_STATS */ /* { */ + + #include + + #define WEB100_CHECK(tp,expr) \ + do { if ((tp)->tcp_stats) (expr); } while (0) + #define WEB100_VAR_INC(tp,var) \ + WEB100_CHECK(tp, ((tp)->tcp_stats->wc_vars.var)++) + #define WEB100_VAR_DEC(tp,var) \ + WEB100_CHECK(tp, ((tp)->tcp_stats->wc_vars.var)--) + #define WEB100_VAR_ADD(tp,var,val) \ + WEB100_CHECK(tp, ((tp)->tcp_stats->wc_vars.var) += (val)) + #define WEB100_VAR_SET(tp,var,val) \ + WEB100_CHECK(tp, ((tp)->tcp_stats->wc_vars.var) = (val)) + #define WEB100_UPDATE_FUNC(tp,func) \ + WEB100_CHECK(tp, func) + #ifdef CONFIG_WEB100_NET100 + #define NET100_WAD(tp, var, def) \ + (((tp)->tcp_stats && (tp)->tcp_stats->wc_vars.var) ? (tp)->tcp_stats->wc_vars.var : (def)) + #else + #define NET100_WAD(tp, var, def) (def) + #endif + + /* SMIv2 types - RFC 1902 */ + typedef __s32 INTEGER; + typedef INTEGER Integer32; + typedef __u32 IpAddress; + typedef __u32 Counter32; + typedef __u32 Unsigned32; + typedef Unsigned32 Gauge32; + typedef __u32 TimeTicks; + typedef __u64 Counter64; + typedef __u32 Unsigned16; + + /* New inet address types specified in INET-ADDRESS-MIB */ + typedef Unsigned16 InetPortNumber; + typedef enum { + WC_ADDRTYPE_UNKNOWN = 0, + WC_ADDRTYPE_IPV4, + WC_ADDRTYPE_IPV6, + WC_ADDRTYPE_DNS = 16 + } InetAddressType; + typedef IpAddress InetAddresIPv4; + typedef struct { + __u8 addr[16]; + __u8 type; + } InetAddresIPv6; + typedef union { + InetAddresIPv4 v4addr; + InetAddresIPv6 v6addr; + } InetAddress; + + typedef enum { + truthValueTrue = 1, + truthValueFalse = 2 + } TruthValue; + + enum wc_states { + WC_STATE_CLOSED = 1, + WC_STATE_LISTEN, + WC_STATE_SYNSENT, + WC_STATE_SYNRECEIVED, + WC_STATE_ESTABLISHED, + WC_STATE_FINWAIT1, + WC_STATE_FINWAIT2, + WC_STATE_CLOSEWAIT, + WC_STATE_LASTACK, + WC_STATE_CLOSING, + WC_STATE_TIMEWAIT, + WC_STATE_DELETECB + }; + + enum wc_stunemodes { + WC_STUNEMODE_DEFAULT = 0, /* OS native */ + WC_STUNEMODE_SETSOCKOPT, /* OS native setsockopt() */ + WC_STUNEMODE_FIXED, /* Manual via the web100 API */ + WC_STUNEMODE_AUTO, + WC_STUNEMODE_EXP1, + WC_STUNEMODE_EXP2 + }; + + enum wc_rtunemodes { + WC_RTUNEMODE_DEFAULT = 0, + WC_RTUNEMODE_SETSOCKOPT, + WC_RTUNEMODE_FIXED, + WC_RTUNEMODE_AUTO, + WC_RTUNEMODE_EXP1, + WC_RTUNEMODE_EXP2 + }; + + enum wc_bufmodes { + WC_BUFMODE_OS = 0, + WC_BUFMODE_WEB100, + }; + + enum { + WC_SE_BELOW_DATA_WINDOW = 1, + WC_SE_ABOVE_DATA_WINDOW, + WC_SE_BELOW_ACK_WINDOW, + WC_SE_ABOVE_ACK_WINDOW, + WC_SE_BELOW_TSW_WINDOW, + WC_SE_ABOVE_TSW_WINDOW, + WC_SE_DATA_CHECKSUM + }; + + + /* + * Variables that can be read and written directly. + * + * Should contain most variables from TCP-KIS 0.1. Commented feilds are + * either not implemented or have handlers and do not need struct storage. + */ + struct web100directs { + /* STATE */ + INTEGER State; + TruthValue SACKEnabled; + TruthValue TimestampsEnabled; + TruthValue NagleEnabled; + TruthValue ECNEnabled; + Integer32 SndWinScale; + Integer32 RcvWinScale; + + /* SYN OPTIONS */ + INTEGER ActiveOpen; + /* Gauge32 MSSSent; */ + Gauge32 MSSRcvd; + Integer32 WinScaleRcvd; + Integer32 WinScaleSent; + /* INTEGER SACKokSent; */ + /* INTEGER SACKokRcvd; */ + /* INTEGER TimestampSent; */ + /* INTEGER TimestampRcvd; */ + + /* DATA */ + Counter32 PktsOut; + Counter32 DataPktsOut; + Counter32 AckPktsOut; /* DEPRICATED */ + Counter64 DataBytesOut; + Counter32 PktsIn; + Counter32 DataPktsIn; + Counter32 AckPktsIn; /* DEPRICATED */ + Counter64 DataBytesIn; + /* Counter32 SoftErrors; */ + /* INTEGER SoftErrorReason; */ + Counter32 SndUna; + Counter32 SndNxt; + Integer32 SndMax; + Counter64 ThruBytesAcked; + Counter32 SndISS; /* SndInitial */ + Counter32 SendWraps; /* DEPRICATED */ + Counter32 RcvNxt; + Counter64 ThruBytesReceived; + Counter32 RecvISS; /* RecInitial */ + Counter32 RecvWraps; /* DEPRICATED */ + /* Counter64 Duration; */ + Integer32 StartTime; /* DEPRICATED */ + Integer32 StartTimeSec; + Integer32 StartTimeUsec; + + /* SENDER CONGESTION */ + Counter32 SndLimTrans[WC_SNDLIM_NSTATES]; + Counter32 SndLimTime[WC_SNDLIM_NSTATES]; + Counter64 SndLimBytes[WC_SNDLIM_NSTATES]; + Counter32 SlowStart; + Counter32 CongAvoid; + Counter32 CongestionSignals; + Counter32 OtherReductions; + Counter32 X_OtherReductionsCV; + Counter32 X_OtherReductionsCM; + Counter32 CongestionOverCount; + Gauge32 CurCwnd; + Gauge32 MaxCwnd; + /* Gauge32 LimCwnd; */ + Gauge32 CurSsthresh; + Gauge32 MaxSsthresh; + Gauge32 MinSsthresh; + + /* SENDER PATH MODEL */ + Counter32 FastRetran; + Counter32 Timeouts; + Counter32 SubsequentTimeouts; + Gauge32 CurTimeoutCount; + Counter32 AbruptTimeouts; + Counter32 PktsRetrans; + Counter32 BytesRetrans; + Counter32 DupAcksIn; + Counter32 SACKsRcvd; + Counter32 SACKBlocksRcvd; + Counter32 PreCongSumCwnd; + Counter32 PreCongSumRTT; + Counter32 PreCongCountRTT; /* DEPRICATED */ + Counter32 PostCongSumRTT; + Counter32 PostCongCountRTT; + /* Counter32 ECNsignals; */ + Counter32 ECERcvd; + Counter32 SendStall; + Counter32 QuenchRcvd; + Gauge32 RetranThresh; + /* Counter32 SndDupAckEpisodes; */ + /* Counter64 SumBytesReordered; */ + Counter32 NonRecovDA; + Counter32 AckAfterFR; + Counter32 DSACKDups; + Gauge32 SampleRTT; + Gauge32 SmoothedRTT; + Gauge32 RTTVar; + Gauge32 MaxRTT; + Gauge32 MinRTT; + Counter64 SumRTT; + Counter32 CountRTT; + Gauge32 CurRTO; + Gauge32 MaxRTO; + Gauge32 MinRTO; + Gauge32 CurMSS; + Gauge32 MaxMSS; + Gauge32 MinMSS; + + /* LOCAL SENDER BUFFER */ + Gauge32 CurRetxQueue; + Gauge32 MaxRetxQueue; + Gauge32 CurAppWQueue; + Gauge32 MaxAppWQueue; + + /* LOCAL RECEIVER */ + Gauge32 CurRwinSent; + Gauge32 MaxRwinSent; + Gauge32 MinRwinSent; + Integer32 LimRwin; + /* Counter32 DupAckEpisodes; */ + Counter32 DupAcksOut; + /* Counter32 CERcvd; */ + /* Counter32 ECNSent; */ + /* Counter32 ECNNonceRcvd; */ + Gauge32 CurReasmQueue; + Gauge32 MaxReasmQueue; + Gauge32 CurAppRQueue; + Gauge32 MaxAppRQueue; + Gauge32 X_rcv_ssthresh; + Gauge32 X_wnd_clamp; + Gauge32 X_dbg1; + Gauge32 X_dbg2; + Gauge32 X_dbg3; + Gauge32 X_dbg4; + + /* OBSERVED RECEIVER */ + Gauge32 CurRwinRcvd; + Gauge32 MaxRwinRcvd; + Gauge32 MinRwinRcvd; + + /* CONNECTION ID */ + InetAddressType LocalAddressType; + InetAddress LocalAddress; + InetPortNumber LocalPort; + /* InetAddressType RemAddressType; */ + InetAddress RemAddress; + InetPortNumber RemPort; + /* Integer32 IdId; */ + + Gauge32 X_RcvRTT; + + INTEGER X_SBufMode; + INTEGER X_RBufMode; + + #ifdef CONFIG_WEB100_NET100 + /* support for the NET100 Work Around Deamon (WAD) */ + Gauge32 WAD_IFQ; + Gauge32 WAD_MaxBurst; + Gauge32 WAD_AI; + Gauge32 WAD_MD; + #endif + }; + + struct web100stats { + int wc_cid; + + struct sock *wc_sk; + + atomic_t wc_users; + __u8 wc_dead; + + struct web100stats *wc_next; + struct web100stats *wc_prev; + + struct web100stats *wc_hash_next; + struct web100stats *wc_hash_prev; + + struct web100stats *wc_death_next; + + int wc_limstate; + __u64 wc_limstate_bytes; + struct timeval wc_limstate_time; + + __u64 wc_start_monotime; + + int wc_lss_k; + int wc_lss_cnt1; + int wc_lss_cnt2; + int wc_flindex; + + struct web100directs wc_vars; + }; + + #endif /* CONFIG_WEB100_STATS */ /* } */ + + #endif /*_WEB100_STATS_H */ diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/net/ipv4/#Config.inf# linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/ipv4/#Config.inf# *** linux-2.4.20.orig/net/ipv4/#Config.inf# Thu Jan 1 01:00:00 1970 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/ipv4/#Config.inf# Mon Jan 19 17:41:51 2004 *************** *** 0 **** --- 1,6 ---- + + + + + + diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/net/ipv4/Config.in linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/ipv4/Config.in *** linux-2.4.20.orig/net/ipv4/Config.in Fri Dec 21 17:42:05 2001 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/ipv4/Config.in Mon Jan 19 17:41:51 2004 *************** bool ' IP: TCP syncookie support (disab *** 44,46 **** --- 44,104 ---- if [ "$CONFIG_NETFILTER" != "n" ]; then source net/ipv4/netfilter/Config.in fi + + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + bool ' IP: TCP SACK Patch (EXPERIMENTAL)' CONFIG_TCP_SACK_ON + if [ "$CONFIG_TCP_SACK_ON" = "y" ]; then + bool ' IP: Tom Kelly TCP SACK Fast-Path Patch' CONFIG_TCP_SACK 0 + bool ' IP: Douglas Leigh TCP SACK Patch' CONFIG_TCP_SACK2 0 + fi + if [ "$CONFIG_TCP_SACK2" = "y" ]; then + int ' IP: TCP SACK List Size' DFLT_TCP_SACKED_LIST_SIZE 15000 + fi + fi + + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + bool ' IP: Alternative TCP AIMD Availablity (EXPERIMENTAL)' CONFIG_ALTAIMD + if [ "$CONFIG_ALTAIMD" = "y" ]; then + int ' TCP: Default Alternative AIMD Stack' CONFIG_DLFT_ALTAIMD 0 + fi + bool ' IP: Alternative TCP SlowStart Availablity [Limited SlowStart]' CONFIG_LIMITED_SLOW_START + if [ "$CONFIG_LIMITED_SLOW_START" = "y" ]; then + int ' TCP: Default Max SsThresh Value' CONFIG_SS_MAX_SSTHRESH 100 + fi + fi + + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + bool ' IP: Appropiate Byte Sizing Availablity (ABC - RFC3465)' CONFIG_RFC3465 + if [ "$CONFIG_RFC3465" = "y" ]; then + int ' TCP: ABC ON by default' CONFIG_DFLT_RFC3465 0 + fi + fi + + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + bool ' IP: TCP Moderate Congestion Window Toggle Availablity' CONFIG_MODCWND + if [ "$CONFIG_MODCWND" = "y" ]; then + int ' TCP: Moderate Congestion Window ON by default' CONFIG_DFLT_MODCWND 1 + fi + fi + + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + bool ' Web100 networking enhancements' CONFIG_WEB100 + if [ "$CONFIG_WEB100" = "y" ]; then + bool ' Web100: TCP statistics' CONFIG_WEB100_STATS + if [ "$CONFIG_WEB100_STATS" = "y" ]; then + int ' Web100: Default file permissions' CONFIG_WEB100_FPERMS 0666 + int ' Web100: Default gid' CONFIG_WEB100_GID 0 + bool ' Web100: Net100 extensions' CONFIG_WEB100_NET100 + # Netlink is always enabled now. + bool ' Web100: netlink event notification service' CONFIG_WEB100_NETLINK + fi + int ' Web100: default winscale initial value' CONFIG_WEB100_WINSCALE_VAL 7 + fi + fi + + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + bool ' IP: TCP Packet Dropping Available (EXPERIMENTAL)' CONFIG_TCP_PKTDROP + if [ "$CONFIG_TCP_PKTDROP" = "y" ]; then + int ' IP: TCP Packet Dropping Rate Default Value' CONFIG_TCP_PKTDROP_RATE 0 + fi + fi diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/net/ipv4/Makefile linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/ipv4/Makefile *** linux-2.4.20.orig/net/ipv4/Makefile Fri Dec 21 17:42:05 2001 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/ipv4/Makefile Mon Jan 19 17:41:51 2004 *************** obj-$(CONFIG_NET_IPIP) += ipip.o *** 25,29 **** --- 25,30 ---- obj-$(CONFIG_NET_IPGRE) += ip_gre.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_IP_PNP) += ipconfig.o + obj-$(CONFIG_WEB100_STATS) += web100_stats.o include $(TOPDIR)/Rules.make diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/net/ipv4/proc.c linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/ipv4/proc.c *** linux-2.4.20.orig/net/ipv4/proc.c Thu Nov 28 23:53:15 2002 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/ipv4/proc.c Mon Jan 19 17:41:51 2004 *************** int netstat_get_info(char *buffer, char *** 192,198 **** --- 192,205 ---- " TCPDSACKOldSent TCPDSACKOfoSent TCPDSACKRecv TCPDSACKOfoRecv" " TCPAbortOnSyn TCPAbortOnData TCPAbortOnClose" " TCPAbortOnMemory TCPAbortOnTimeout TCPAbortOnLinger" + #ifndef CONFIG_TCP_SACK " TCPAbortFailed TCPMemoryPressures\n" + #else + " TCPAbortFailed TCPMemoryPressures" + " TCPHintClears TCPMarkHeadHintHits TCPUpdateScoreHintHits" + " TCPXmitRetranLostHintHits TCPXmitRetranForwardHintHits" + " TCPSackBlockEasyHits TCPSackFastPathHintHits TCPSackNormalised\n" + #endif "TcpExt:"); for (i=0; iwmem_queued < sk->sndbuf && + (sysctl_web100_sbufmode != 1 || + sk->wmem_queued / PAGE_SIZE + 1 < sysctl_tcp_mem[0] - + atomic_read(&tcp_memory_allocated)); + #endif return sk->wmem_queued < sk->sndbuf; } *************** wait_for_memory: *** 919,926 **** } out: ! if (copied) tcp_push(sk, tp, flags, mss_now, tp->nonagle); return copied; do_error: --- 947,958 ---- } out: ! if (copied) { tcp_push(sk, tp, flags, mss_now, tp->nonagle); + #ifdef CONFIG_WEB100_STATS + web100_update_writeq(sk); + #endif + } return copied; do_error: *************** new_segment: *** 1169,1174 **** --- 1201,1212 ---- __tcp_push_pending_frames(sk, tp, mss_now, 1); } else if (skb == tp->send_head) tcp_push_one(sk, mss_now); + + #if 0 + /* why is this here? WEB100_XXX */ + WEB100_UPDATE_FUNC(tp, web100_update_cwnd(tp)); + #endif + continue; wait_for_sndbuf: *************** wait_for_memory: *** 1185,1192 **** } out: ! if (copied) tcp_push(sk, tp, flags, mss_now, tp->nonagle); TCP_CHECK_TIMER(sk); release_sock(sk); return copied; --- 1223,1234 ---- } out: ! if (copied) { tcp_push(sk, tp, flags, mss_now, tp->nonagle); + #ifdef CONFIG_WEB100_STATS + web100_update_writeq(sk); + #endif + } TCP_CHECK_TIMER(sk); release_sock(sk); return copied; *************** int tcp_recvmsg(struct sock *sk, struct *** 1543,1548 **** --- 1585,1593 ---- BUG_TRAP(flags&MSG_PEEK); skb = skb->next; } while (skb != (struct sk_buff *)&sk->receive_queue); + #ifdef CONFIG_WEB100_STATS + web100_update_recvq(sk); + #endif /* Well, if we have backlog, try to process it now yet. */ *************** int tcp_disconnect(struct sock *sk, int *** 2099,2104 **** --- 2144,2154 ---- tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->receive_queue); tcp_writequeue_purge(sk); + + #ifdef CONFIG_TCP_SACK2 + purge_sacked_list(tp); + #endif + __skb_queue_purge(&tp->out_of_order_queue); sk->dport = 0; *************** int tcp_disconnect(struct sock *sk, int *** 2123,2128 **** --- 2173,2184 ---- tp->packets_out = 0; tp->snd_ssthresh = 0x7fffffff; tp->snd_cwnd_cnt = 0; + #ifdef CONFIG_RFC3465 + tp->bytes_acked = 0; + #endif + #ifdef CONFIG_ALTAIMD + tp->hstcp_entry_index=0; + #endif tp->ca_state = TCP_CA_Open; tcp_clear_retrans(tp); tcp_delack_init(tp); *************** int tcp_setsockopt(struct sock *sk, int *** 2280,2285 **** --- 2336,2342 ---- err = -EINVAL; break; } + WEB100_VAR_SET(tp, NagleEnabled, !val); tp->nonagle = (val == 0) ? 0 : 1; if (val) tcp_push_pending_frames(sk, tp); *************** int tcp_setsockopt(struct sock *sk, int *** 2301,2306 **** --- 2358,2364 ---- err = -EINVAL; break; } + WEB100_VAR_SET(tp, NagleEnabled, !val); if (val != 0) { tp->nonagle = 2; } else { *************** void __init tcp_init(void) *** 2582,2588 **** tcp_ehash = (struct tcp_ehash_bucket *) __get_free_pages(GFP_ATOMIC, order); } while (tcp_ehash == NULL && --order > 0); ! if (!tcp_ehash) panic("Failed to allocate TCP established hash table\n"); for (i = 0; i < (tcp_ehash_size<<1); i++) { --- 2640,2646 ---- tcp_ehash = (struct tcp_ehash_bucket *) __get_free_pages(GFP_ATOMIC, order); } while (tcp_ehash == NULL && --order > 0); ! if (!tcp_ehash) panic("Failed to allocate TCP established hash table\n"); for (i = 0; i < (tcp_ehash_size<<1); i++) { *************** void __init tcp_init(void) *** 2642,2645 **** --- 2700,2707 ---- tcp_ehash_size<<1, tcp_bhash_size); tcpdiag_init(); + + #ifdef CONFIG_WEB100_STATS + web100_stats_init(); + #endif } diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/net/ipv4/tcp_input.c linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/ipv4/tcp_input.c *** linux-2.4.20.orig/net/ipv4/tcp_input.c Thu Nov 28 23:53:15 2002 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/ipv4/tcp_input.c Mon Jan 19 17:43:43 2004 *************** int sysctl_tcp_stdurg = 0; *** 87,92 **** --- 87,226 ---- int sysctl_tcp_rfc1337 = 0; int sysctl_tcp_max_orphans = NR_FILE; + /* TCP Packet Dropping */ + #ifdef CONFIG_TCP_PKTDROP + __u32 sysctl_tcp_pktdrop_rate = CONFIG_TCP_PKTDROP_RATE; + #endif + + #ifdef CONFIG_LIMITED_SLOW_START + int sysctl_tcp_ss_max_ssthresh = CONFIG_SS_MAX_SSTHRESH; + #endif + + #if CONFIG_ALTAIMD + + #if CONFIG_DFLT_AIMD == AIMD_VANILLA + int sysctl_tcp_altAIMD = AIMD_VANILLA; + #elif CONFIG_DFLT_ALTAIMD == AIMD_HSTCP + int sysctl_tcp_altAIMD = AIMD_HSTCP; + #elif CONFIG_DFLT_ALTAIMD == AIMD_SCALABLE + int sysctl_tcp_altAIMD = AIMD_SCALABLE; + #elif CONFIG_DFLT_ALTAIMD == AIMD_GRIDDT; + int sysctl_tcp_altAIMD = AIMD_GRIDDT; + #elif CONFIG_DFLT_ALTAIMD == AIMD_HTCP; + int sysctl_tcp_altAIMD = AIMD_HTCP; + #endif + + /* HSTCP */ + /* first value is cwnd segments, additive increase, multiplicative decrease (*8) */ + /* strucutre to hold the hstcp parameters */ + struct hstcp_entry hstcp_table[] = { + {0,1,128}, + {38, 1, 128}, + {118, 2, 112}, + {221, 3, 104}, + {347, 4, 98}, + {495, 5, 93}, + {663, 6, 89}, + {851, 7, 86}, + {1058, 8, 83}, + {1284, 9, 81}, + {1529, 10, 78}, + {1793, 11, 76}, + {2076, 12, 74}, + {2378, 13, 72}, + {2699, 14, 71}, + {3039, 15, 69}, + {3399, 16, 68}, + {3778, 17, 66}, + {4177, 18, 65}, + {4596, 19, 64}, + {5036, 20, 62}, + {5497, 21, 61}, + {5979, 22, 60}, + {6483, 23, 59}, + {7009, 24, 58}, + {7558, 25, 57}, + {8130, 26, 56}, + {8726, 27, 55}, + {9346, 28, 54}, + {9991, 29, 53}, + {10661, 30, 52}, + {11358, 31, 52}, + {12082, 32, 51}, + {12834, 33, 50}, + {13614, 34, 49}, + {14424, 35, 48}, + {15265, 36, 48}, + {16137, 37, 47}, + {17042, 38, 46}, + {17981, 39, 45}, + {18955, 40, 45}, + {19965, 41, 44}, + {21013, 42, 43}, + {22101, 43, 43}, + {23230, 44, 42}, + {24402, 45, 41}, + {25618, 46, 41}, + {26881, 47, 40}, + {28193, 48, 39}, + {29557, 49, 39}, + {30975, 50, 38}, + {32450, 51, 38}, + {33986, 52, 37}, + {35586, 53, 36}, + {37253, 54, 36}, + {38992, 55, 35}, + {40808, 56, 35}, + {42707, 57, 34}, + {44694, 58, 33}, + {46776, 59, 33}, + {48961, 60, 32}, + {51258, 61, 32}, + {53677, 62, 31}, + {56230, 63, 30}, + {58932, 64, 30}, + {61799, 65, 29}, + {64851, 66, 28}, + {68113, 67, 28}, + {71617, 68, 27}, + {75401, 69, 26}, + {79517, 70, 26}, + {84035, 71, 25}, + {89053, 72, 24}, + {94717, 73, 23}, + }; + + /* Scalable TCP sysctl */ + #define DFLT_SCALABLE_HIGHWIN 16 + #define DFLT_SCALABLE_1_ON_A 100 + #define DFLT_SCALABLE_1_ON_B 8 + int sysctl_tcp_scalable_highwin = DFLT_SCALABLE_HIGHWIN; + int sysctl_tcp_scalable_1_on_a = DFLT_SCALABLE_1_ON_A; + int sysctl_tcp_scalable_1_on_b = DFLT_SCALABLE_1_ON_B; + + /* GridDT sysctl */ + #define DFLT_GRIDDT_MSS_REF 1500 + #define DFLT_GRIDDT_RTT_REF 10 + #define DFLT_GRIDDT_ADD_CNT_CLAMP 1000 + int sysctl_tcp_griddt_mss_ref = DFLT_GRIDDT_MSS_REF; + __u32 sysctl_tcp_griddt_rtt_ref = DFLT_GRIDDT_RTT_REF; + __u32 sysctl_tcp_griddt_add_cnt_clamp = DFLT_GRIDDT_ADD_CNT_CLAMP; + + #endif + + #ifdef CONFIG_RFC3465 + int sysctl_tcp_abc = CONFIG_DFLT_RFC3465; + int sysctl_tcp_abc_L = 2; + #endif + + #ifdef CONFIG_MODCWND + int sysctl_tcp_moderate_cwnd = CONFIG_DFLT_MODCWND; + #endif + + #ifdef CONFIG_TCP_SACK + int sysctl_tcp_slack_rto = 0; + #endif + #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ #define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */ *************** static void tcp_init_buffer_space(struct *** 286,296 **** struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); int maxwin; ! if (!(sk->userlocks&SOCK_RCVBUF_LOCK)) ! tcp_fixup_rcvbuf(sk); ! if (!(sk->userlocks&SOCK_SNDBUF_LOCK)) ! tcp_fixup_sndbuf(sk); maxwin = tcp_full_space(sk); if (tp->window_clamp >= maxwin) { --- 420,439 ---- struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); int maxwin; ! #ifdef CONFIG_WEB100_STATS ! if (sysctl_web100_rbufmode != 1) ! #endif ! if (!(sk->userlocks&SOCK_RCVBUF_LOCK)) ! tcp_fixup_rcvbuf(sk); ! #ifdef CONFIG_WEB100_STATS ! if (sysctl_web100_sbufmode != 1) ! #endif ! if (!(sk->userlocks&SOCK_SNDBUF_LOCK)) ! tcp_fixup_sndbuf(sk); + #ifdef CONFIG_WEB100_STATS + tp->rcv_space = tp->rcv_wnd; + #endif maxwin = tcp_full_space(sk); if (tp->window_clamp >= maxwin) { *************** static void tcp_clamp_window(struct sock *** 347,352 **** --- 490,597 ---- } } + #ifdef CONFIG_WEB100_STATS + /* Receiver "autotuning" code. + * + * Note that some of these algorithms are based on or similar to + * Dynamic Right-Sizing (DRS) by Wu Feng and Mike Fisk of LANL. + * + * + * Details on this code can be found at + * + */ + + static inline void tcp_rcv_rtt_update(struct tcp_opt *tp, __u32 rcv_rtt, int win_dep) + { + rcv_rtt++; /* Truncated, round up. */ + if (tp->rcv_rtt == 0) + tp->rcv_rtt = rcv_rtt; /* First measurement */ + else if (rcv_rtt < tp->rcv_rtt || win_dep) + tp->rcv_rtt = min(tp->rcv_rtt, rcv_rtt); + else + tp->rcv_rtt = (7 * tp->rcv_rtt + rcv_rtt) / 8; + + WEB100_VAR_SET(tp, X_RcvRTT, tp->rcv_rtt * 1000000 / HZ); + } + + static inline void tcp_rcv_rtt_measure(struct tcp_opt *tp) + { + if (tp->rcv_rtt_time == 0) + goto new_measure; + + if (before(tp->rcv_nxt, tp->rcv_rtt_seq)) + return; + + tcp_rcv_rtt_update(tp, jiffies - tp->rcv_rtt_time, 1); + + new_measure: + tp->rcv_rtt_seq = tp->rcv_nxt + tp->rcv_wnd; + tp->rcv_rtt_time = jiffies; + } + + static inline void tcp_rcv_win_to_space(struct tcp_opt *tp, int win) + { + win = max(2*win, (int)tp->rcv_space/2); + tp->rcv_space = max(win, 6*tp->advmss); + } + + static inline void tcp_rcv_space_adjust(struct sock *sk) + { + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + int time; + + if (tp->rcv_winest_time == 0) + goto new_measure; + + time = jiffies - tp->rcv_winest_time; + if (time < tp->rcv_rtt || tp->rcv_rtt == 0) + return; + + tcp_rcv_win_to_space(tp, tp->rcv_hi_seq - tp->rcv_winest_seq); + + new_measure: + tp->rcv_winest_seq = tp->rcv_hi_seq; + tp->rcv_winest_time = jiffies; + } + + /* Conclude a receiver-side window measurement using timestamps */ + static inline void tcp_rcv_tswin_measure(struct sock *sk) + { + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + + if (tp->rcv_tswin_pending && + !after(tp->rcv_tswin_tstamp, tp->rcv_tsecr)) { + tp->rcv_tswin_pending = 0; + tcp_rcv_win_to_space(tp, tp->rcv_hi_seq - tp->rcv_tswin_seq); + } + } + + /* Call this every time we get an acceptable segment (checksum ok, in window), + and we are in the ESTABLISHED state when it arrives. */ + static void tcp_good_seg_rcvd(struct sock *sk, struct sk_buff *skb) + { + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + + if (TCP_SKB_CB(skb)->end_seq == 0) { + #if 0 + printk("tcp_good_seg_rcvd: end_seq == 0\n"); + #endif + return; + } + else if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_hi_seq)) + tp->rcv_hi_seq = TCP_SKB_CB(skb)->end_seq; + web100_update_recvq(sk); + web100_update_ofoq(sk); + + tcp_rcv_rtt_measure(tp); + + if (tp->tstamp_ok) + tcp_rcv_tswin_measure(sk); + else + tcp_rcv_space_adjust(sk); + } + #endif /* CONFIG_WEB100_STATS */ + /* There is something which you must keep in mind when you analyze the * behavior of the tp->ato delayed ack timeout interval. When a * connection starts up, we want to ack as quickly as possible. The *************** static void tcp_event_data_recv(struct s *** 397,402 **** --- 642,648 ---- if (skb->len >= 128) tcp_grow_window(sk, tp, skb); + WEB100_UPDATE_FUNC(tp, web100_update_rcv_nxt(tp)); } /* Called to compute a smoothed rtt estimate. The data fed to this *************** static __inline__ void tcp_set_rto(struc *** 487,492 **** --- 733,744 ---- */ tp->rto = (tp->srtt >> 3) + tp->rttvar; + #ifdef CONFIG_TCP_SACK + if(sysctl_tcp_slack_rto){ + tp->rto = tp->rto << 1; + } + #endif + /* 2. Fixups made earlier cannot be right. * If we do not estimate RTO correctly without them, * all the algo is pure shit and should be replaced *************** void tcp_update_metrics(struct sock *sk) *** 511,516 **** --- 763,773 ---- { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct dst_entry *dst = __sk_dst_get(sk); + + #ifdef CONFIG_WEB100_NET100 + if (sysctl_web100_no_metrics_save) + return; + #endif dst_confirm(dst); *************** __u32 tcp_init_cwnd(struct tcp_opt *tp) *** 613,618 **** --- 870,909 ---- return min_t(__u32, cwnd, tp->snd_cwnd_clamp); } + #ifdef CONFIG_ALTAIMD + /* GridDT : additive increment evaluation + * called by tcp_cong_avoid to compute the new additive increment as + * function of the RTT and the MSS + */ + + __u32 add_inc(struct tcp_opt *tp ) + { + __u32 mssval, rttval; + + if ( tp->mss_clamp >= sysctl_tcp_griddt_mss_ref) { + mssval = 1; + } else { + mssval = sysctl_tcp_griddt_mss_ref / tp->mss_clamp; + } + + /* min RTT over the life of the connectino*/ + if (tp->srtt < tp->griddt_min_rtt || tp->griddt_min_rtt==0) { + tp->griddt_min_rtt = tp->srtt; + } + + if (tp->griddt_min_rtt <= sysctl_tcp_griddt_rtt_ref) { + rttval = 1; + } else { + rttval = tp->griddt_min_rtt / sysctl_tcp_griddt_rtt_ref; + rttval *= rttval; + } + + return mssval * rttval; + + } + #endif + + /* Initialize metrics on socket. */ static void tcp_init_metrics(struct sock *sk) *************** static void tcp_init_metrics(struct sock *** 635,640 **** --- 926,932 ---- if (dst->reordering && tp->reordering != dst->reordering) { tp->sack_ok &= ~2; tp->reordering = dst->reordering; + WEB100_VAR_SET(tp, RetranThresh, tp->reordering); } if (dst->rtt == 0) *************** static void tcp_update_reordering(struct *** 687,692 **** --- 979,985 ---- { if (metric > tp->reordering) { tp->reordering = min(TCP_MAX_REORDERING, metric); + WEB100_VAR_SET(tp, RetranThresh, tp->reordering); /* This exciting event is worth to be remembered. 8) */ if (ts) *************** static void tcp_update_reordering(struct *** 708,713 **** --- 1001,1018 ---- } } + #ifdef CONFIG_TCP_SACK2 + static inline int tcp_skb_timedout(struct tcp_opt *tp, struct sk_buff *skb) + { + return (tcp_time_stamp - TCP_SKB_CB(skb)->when > tp->rto); + } + + static inline int tcp_head_timedout(struct sock *sk, struct tcp_opt *tp) + { + return tp->packets_out && tcp_skb_timedout(tp, skb_peek(&sk->write_queue)); + } + #endif + /* This procedure tags the retransmission queue when SACKs arrive. * * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L). *************** static void tcp_update_reordering(struct *** 756,761 **** --- 1061,1076 ---- * Both of these heuristics are not used in Loss state, when we cannot * account for retransmits accurately. */ + + /* tcp_sacktag_write_queue rewritten to use a avoid a walk of entire write queue every ack. For large + * window sizes, this is too slow. The new code restricts the walk to only those packets on the write queue which have not yet been + * sacked (these are the only packets that require attention here) and so computation scales as + * o(number of lost packets) rather than o(cwnd). + * - DL 10/9/03 + * + * TO DO: add support for DSACKS, look into piggy-backing sacked-list on sk_buff's. + */ + static int tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una) { *************** tcp_sacktag_write_queue(struct sock *sk, *** 763,785 **** unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2); int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; int reord = tp->packets_out; int prior_fackets; u32 lost_retrans = 0; int flag = 0; int i; if (!tp->sacked_out) tp->fackets_out = 0; prior_fackets = tp->fackets_out; for (i=0; istart_seq); __u32 end_seq = ntohl(sp->end_seq); int fack_count = 0; int dup_sack = 0; ! /* Check for D-SACK. */ if (i == 0) { u32 ack = TCP_SKB_CB(ack_skb)->ack_seq; --- 1078,1145 ---- unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2); int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; + #ifndef CONFIG_TCP_SACK2 int reord = tp->packets_out; + #endif + #ifdef CONFIG_TCP_SACK + int dup_sack = 0; + #endif int prior_fackets; u32 lost_retrans = 0; int flag = 0; + #ifdef CONFIG_TCP_SACK2 + int i; int j; + __u32 start_seq[4]; + __u32 end_seq[4]; + __u32 temp; + u32 ack = TCP_SKB_CB(ack_skb)->ack_seq; + u8 sacked; + struct sacked_list_item *sl = sacked_list_head(tp); + struct sk_buff *skb; + int in_sack; + struct sacked_list_item *temp2; + #else int i; + #endif + #ifndef CONFIG_TCP_SACK2 + WEB100_VAR_INC(tp, SACKsRcvd); + WEB100_VAR_ADD(tp, SACKBlocksRcvd, num_sacks); + #else + //printk("sacktag_write_queue ..."); + #endif if (!tp->sacked_out) tp->fackets_out = 0; prior_fackets = tp->fackets_out; + #ifndef CONFIG_TCP_SACK2 + #ifndef CONFIG_TCP_SACK for (i=0; istart_seq); __u32 end_seq = ntohl(sp->end_seq); int fack_count = 0; int dup_sack = 0; ! #else ! /* SACK fastpath: ! * if the only SACK change is the increase of the end_seq of ! * the first block then only apply that SACK block ! * and use retrans queue hinting otherwise slowpath */ ! flag = 1; ! for ( i=0; irecv_sack_cache[i].start_seq != start_seq)){ ! flag = 0; ! }else if((tp->recv_sack_cache[i].start_seq != start_seq) && ! (tp->recv_sack_cache[i].end_seq != end_seq)){ ! flag = 0; ! } ! tp->recv_sack_cache[i].start_seq = start_seq; ! tp->recv_sack_cache[i].end_seq = end_seq; ! #endif /* Check for D-SACK. */ if (i == 0) { u32 ack = TCP_SKB_CB(ack_skb)->ack_seq; *************** tcp_sacktag_write_queue(struct sock *sk, *** 795,800 **** --- 1155,1163 ---- tp->sack_ok |= 4; NET_INC_STATS_BH(TCPDSACKOfoRecv); } + + if (dup_sack) + WEB100_VAR_INC(tp, DSACKDups); /* D-SACK for already forgotten data... * Do dumb counting. */ *************** tcp_sacktag_write_queue(struct sock *sk, *** 802,843 **** !after(end_seq, prior_snd_una) && after(end_seq, tp->undo_marker)) tp->undo_retrans--; /* Eliminate too old ACKs, but take into * account more or less fresh ones, they can * contain valid SACK info. */ if (before(ack, prior_snd_una-tp->max_window)) return 0; } /* Event "B" in the comment above. */ if (after(end_seq, tp->high_seq)) flag |= FLAG_DATA_LOST; for_retrans_queue(skb, sk, tp) { u8 sacked = TCP_SKB_CB(skb)->sacked; int in_sack; /* The retransmission queue is always in order, so * we can short-circuit the walk early. */ ! if(!before(TCP_SKB_CB(skb)->seq, end_seq)) break; ! fack_count++; in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && !before(end_seq, TCP_SKB_CB(skb)->end_seq); /* Account D-SACK for retransmitted packet. */ if ((dup_sack && in_sack) && (sacked & TCPCB_RETRANS) && after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) tp->undo_retrans--; /* The frame is ACKed. */ if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) { if (sacked&TCPCB_RETRANS) { if ((dup_sack && in_sack) && (sacked&TCPCB_SACKED_ACKED)) --- 1165,1342 ---- !after(end_seq, prior_snd_una) && after(end_seq, tp->undo_marker)) tp->undo_retrans--; + #else + WEB100_VAR_INC(tp, SACKsRcvd); + WEB100_VAR_ADD(tp, SACKBlocksRcvd, num_sacks); + #endif /* Eliminate too old ACKs, but take into * account more or less fresh ones, they can * contain valid SACK info. */ if (before(ack, prior_snd_una-tp->max_window)) + #ifdef CONFIG_TCP_SACK2 + { + //printk("old ACK\n"); + #endif return 0; } + #ifdef CONFIG_TCP_SACK + } + + if(flag) { + num_sacks=1; + NET_INC_STATS_BH(TCPSackBlockEasyHits); + }else{ + int j; + tp->sackfastpath_skb_hint = NULL; + + /* order SACK blocks to allow in order walk of the retrans queue */ + for(i=num_sacks-1; i > 0; i--){ + for(j=0; jrecv_sack_cache[j+1].start_seq); + sp[j].end_seq = htonl(tp->recv_sack_cache[j+1].end_seq); + sp[j+1].start_seq = htonl(tp->recv_sack_cache[j].start_seq); + sp[j+1].end_seq = htonl(tp->recv_sack_cache[j].end_seq); + NET_INC_STATS_BH(TCPSackBlockNormalised); + } + + } + } + + } + + /* clear flag as used for different purpose in following code */ + flag = 0; + + for (i=0; istart_seq); + __u32 end_seq = ntohl(sp->end_seq); + int fack_count; + + /* Use SACK fastpath hint if valid */ + if( tp->sackfastpath_skb_hint != NULL){ + skb = tp->sackfastpath_skb_hint; + fack_count = tp->sackfastpath_facket_cnt_hint; + NET_INC_STATS_BH(TCPSackFastPathHintHits); + }else{ + skb = sk->write_queue.next; + fack_count = 0; + } + + #elif defined CONFIG_TCP_SACK2 + for (i=0; istart_seq); + end_seq[i] = ntohl(sp->end_seq); + } + for (i=0; i start_seq[j]) { + temp=start_seq[i]; start_seq[i]=start_seq[j]; start_seq[j]=temp; + temp=end_seq[i]; end_seq[i]=end_seq[j]; end_seq[j]=temp; + } + } + } + + if (sl == NULL) { + skb = (sk)->write_queue.next; + if (add_sacked_list_tail (skb, tp)) { + // printk("sacktag error: initial sacked_list out of space\n"); + return 0; /* error - out of sacked_list space yet sacked_list empty*/ + } + sl = sacked_list_tail(tp); + } + BUG_TRAP(sl !=NULL); + + skb = sl->skb; + BUG_TRAP(skb !=NULL); + + for (i=0; ihigh_seq)) + #else + if (after(end_seq[i], tp->high_seq)) + #endif flag |= FLAG_DATA_LOST; + #ifndef CONFIG_TCP_SACK2 + #ifndef CONFIG_TCP_SACK for_retrans_queue(skb, sk, tp) { + #else + for_retrans_queue_from(skb,skb, sk, tp) { + #endif //!config_tcp_sack u8 sacked = TCP_SKB_CB(skb)->sacked; int in_sack; + #ifdef CONFIG_TCP_SACK + tp->sackfastpath_skb_hint = skb; + tp->sackfastpath_facket_cnt_hint = fack_count; + #endif + #else // !config_tcp_sack2 + while ((skb != (tp)->send_head) && (skb != (struct sk_buff *)&(sk)->write_queue)) { + #endif + + #ifndef CONFIG_TCP_SACK2 /* The retransmission queue is always in order, so * we can short-circuit the walk early. */ ! if( i+1seq, end_seq) ) ! #else ! if (sl == NULL) { ! /* run off end of sacked_list - do we need to extend it ? */ ! for (skb=skb->next;((skb != (tp)->send_head) && (skb != (struct sk_buff *)&(sk)->write_queue)); skb=skb->next) { ! BUG_TRAP(skb !=NULL); ! sacked = TCP_SKB_CB(skb)->sacked; ! if ( !(sacked&TCPCB_SACKED_ACKED) ) { ! if (add_sacked_list_tail (skb, tp)) { ! //printk("sacktag: sacked_list out of space\n"); ! return 0; /* out of sacked_list space */ ! } ! sl = sacked_list_tail(tp); ! #endif break; ! #ifdef CONFIG_TCP_SACK2 ! } ! } ! if (sl == NULL) { ! // printk("left, return 0.\n"); ! return 0; /* rest of write_queue in flight is already sacked ! */ ! } ! } ! skb = sl->skb; ! BUG_TRAP(skb !=NULL); ! #endif ! #ifndef CONFIG_TCP_SACK2 fack_count++; in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) && !before(end_seq, TCP_SKB_CB(skb)->end_seq); + #else + if (!before(TCP_SKB_CB(skb)->seq, end_seq[i])) + break; + #endif + #ifndef CONFIG_TCP_SACK2 /* Account D-SACK for retransmitted packet. */ if ((dup_sack && in_sack) && (sacked & TCPCB_RETRANS) && after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker)) tp->undo_retrans--; + #else + sacked = TCP_SKB_CB(skb)->sacked; + in_sack = !after(start_seq[i], TCP_SKB_CB(skb)->seq) && + !before(end_seq[i], TCP_SKB_CB(skb)->end_seq); + #endif /* The frame is ACKed. */ if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) { + #ifndef CONFIG_TCP_SACK2 if (sacked&TCPCB_RETRANS) { if ((dup_sack && in_sack) && (sacked&TCPCB_SACKED_ACKED)) *************** tcp_sacktag_write_queue(struct sock *sk, *** 848,867 **** !(sacked&TCPCB_SACKED_ACKED)) reord = min(fack_count, reord); } ! /* Nothing to do; acked frame is about to be dropped. */ continue; } if ((sacked&TCPCB_SACKED_RETRANS) && after(end_seq, TCP_SKB_CB(skb)->ack_seq) && (!lost_retrans || after(end_seq, lost_retrans))) lost_retrans = end_seq; if (!in_sack) continue; if (!(sacked&TCPCB_SACKED_ACKED)) { if (sacked & TCPCB_SACKED_RETRANS) { /* If the segment is not tagged as lost, * we do not clear RETRANS, believing --- 1347,1390 ---- !(sacked&TCPCB_SACKED_ACKED)) reord = min(fack_count, reord); } ! #endif /* Nothing to do; acked frame is about to be dropped. */ + #ifdef CONFIG_TCP_SACK2 + sl = sl->next; + #endif continue; } if ((sacked&TCPCB_SACKED_RETRANS) && + #ifndef CONFIG_TCP_SACK2 after(end_seq, TCP_SKB_CB(skb)->ack_seq) && (!lost_retrans || after(end_seq, lost_retrans))) lost_retrans = end_seq; if (!in_sack) continue; + #else + after(end_seq[i], TCP_SKB_CB(skb)->ack_seq) && + (!lost_retrans || after(end_seq[i], lost_retrans))) + lost_retrans = end_seq[i]; + + if (!in_sack) { + + if (!(sacked&TCPCB_TAGBITS) && after(TCP_SKB_CB(skb)->end_seq,tp->snd_una) ) { + /* If packet lies in a hole, force retransmit */ + TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out++; + } else if (sacked & TCPCB_SACKED_RETRANS && tcp_skb_timedout(tp,skb)) { + TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; + TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->retrans_out--; + } + sl = sl->next; + #endif + #ifndef CONFIG_TCP_SACK2 if (!(sacked&TCPCB_SACKED_ACKED)) { + #else + } else { + #endif if (sacked & TCPCB_SACKED_RETRANS) { /* If the segment is not tagged as lost, * we do not clear RETRANS, believing *************** tcp_sacktag_write_queue(struct sock *sk, *** 871,888 **** --- 1394,1423 ---- TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); tp->lost_out--; tp->retrans_out--; + #ifdef CONFIG_TCP_SACK + /* clear lost hint */ + tp->xmit_retransmit_queue_lost_skb_hint = NULL; + #endif + } } else { + #ifndef CONFIG_TCP_SACK2 /* New sack for not retransmitted frame, * which was in hole. It is reordering. */ if (!(sacked & TCPCB_RETRANS) && fack_count < prior_fackets) reord = min(fack_count, reord); + #endif if (sacked & TCPCB_LOST) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; tp->lost_out--; + #ifdef CONFIG_TCP_SACK + /* clear lost hint */ + tp->xmit_retransmit_queue_lost_skb_hint = NULL; + #endif + } } *************** tcp_sacktag_write_queue(struct sock *sk, *** 890,903 **** flag |= FLAG_DATA_SACKED; tp->sacked_out++; if (fack_count > tp->fackets_out) tp->fackets_out = fack_count; } else { if (dup_sack && (sacked&TCPCB_RETRANS)) reord = min(fack_count, reord); } ! /* D-SACK. We can detect redundant retransmission * in S|R and plain R frames and clear it. * undo_retrans is decreased above, L|R frames * are accounted above as well. --- 1425,1450 ---- flag |= FLAG_DATA_SACKED; tp->sacked_out++; + #ifndef CONFIG_TCP_SACK2 if (fack_count > tp->fackets_out) tp->fackets_out = fack_count; } else { if (dup_sack && (sacked&TCPCB_RETRANS)) reord = min(fack_count, reord); + #else + /* unlink skbuff from sacked_list */ + temp2=sl->next; + free_sacked_list(sl, tp); + sl=temp2; + #endif } + #ifdef CONFIG_TCP_SACK2 + } /* while */ + } /* for */ + #endif ! #ifndef CONFIG_TCP_SACK2 ! /* D-SACK. We can detect redundant retransmission * in S|R and plain R frames and clear it. * undo_retrans is decreased above, L|R frames * are accounted above as well. *************** tcp_sacktag_write_queue(struct sock *sk, *** 909,914 **** --- 1456,1462 ---- } } } + #endif /* Check for lost retransmit. This superb idea is * borrowed from "ratehalving". Event "C". *************** tcp_sacktag_write_queue(struct sock *sk, *** 918,924 **** */ if (lost_retrans && tp->ca_state == TCP_CA_Recovery) { struct sk_buff *skb; ! for_retrans_queue(skb, sk, tp) { if (after(TCP_SKB_CB(skb)->seq, lost_retrans)) break; --- 1466,1474 ---- */ if (lost_retrans && tp->ca_state == TCP_CA_Recovery) { struct sk_buff *skb; ! #ifdef CONFIG_TCP_SACK2 ! // printk ("lost retransmit\n"); ! #endif for_retrans_queue(skb, sk, tp) { if (after(TCP_SKB_CB(skb)->seq, lost_retrans)) break; *************** tcp_sacktag_write_queue(struct sock *sk, *** 936,950 **** --- 1486,1511 ---- TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; flag |= FLAG_DATA_SACKED; NET_INC_STATS_BH(TCPLostRetransmit); + #ifdef CONFIG_TCP_SACK + /* clear lost hint */ + tp->xmit_retransmit_queue_lost_skb_hint = NULL; + #endif } } } } + #ifdef CONFIG_TCP_SACK2 + tp->fackets_out = tp->sacked_out + tp->lost_out; + #endif tp->left_out = tp->sacked_out + tp->lost_out; + #ifndef CONFIG_TCP_SACK2 if (reord < tp->fackets_out && tp->ca_state != TCP_CA_Loss) tcp_update_reordering(tp, (tp->fackets_out+1)-reord, 0); + #else + printk("left.\n"); + #endif #if FASTRETRANS_DEBUG > 0 BUG_TRAP((int)tp->sacked_out >= 0); *************** tcp_sacktag_write_queue(struct sock *sk, *** 957,962 **** --- 1518,1527 ---- void tcp_clear_retrans(struct tcp_opt *tp) { + + #ifdef CONFIG_TCP_SACK + clear_all_retrans_hints(tp); + #endif tp->left_out = 0; tp->retrans_out = 0; *************** void tcp_enter_loss(struct sock *sk, int *** 977,992 **** struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; struct sk_buff *skb; int cnt = 0; ! /* Reduce ssthresh if it has not yet been made inside this window. */ if (tp->ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) { tp->prior_ssthresh = tcp_current_ssthresh(tp); tp->snd_ssthresh = tcp_recalc_ssthresh(tp); } tp->snd_cwnd = 1; tp->snd_cwnd_cnt = 0; tp->snd_cwnd_stamp = tcp_time_stamp; tcp_clear_retrans(tp); --- 1542,1570 ---- struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; struct sk_buff *skb; int cnt = 0; ! #ifdef CONFIG_WEB100_STATS ! WEB100_UPDATE_FUNC(tp, web100_update_congestion(tp, 0)); ! #endif /* Reduce ssthresh if it has not yet been made inside this window. */ if (tp->ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) { tp->prior_ssthresh = tcp_current_ssthresh(tp); tp->snd_ssthresh = tcp_recalc_ssthresh(tp); + #ifdef CONFIG_ALTAIMD + decreasenum_reset(tp); + #endif } tp->snd_cwnd = 1; tp->snd_cwnd_cnt = 0; + #ifdef CONFIG_ALTAIMD + htcp_reset(tp); + #endif + + #ifdef CONFIG_RFC3465 + tp->bytes_acked = 0; + #endif + tp->snd_cwnd_stamp = tcp_time_stamp; tcp_clear_retrans(tp); *************** void tcp_enter_loss(struct sock *sk, int *** 996,1001 **** --- 1574,1582 ---- if (!how) tp->undo_marker = tp->snd_una; + #ifdef CONFIG_TCP_SACK2 + purge_sacked_list (tp); + #endif for_retrans_queue(skb, sk, tp) { cnt++; if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) *************** void tcp_enter_loss(struct sock *sk, int *** 1013,1018 **** --- 1594,1600 ---- tcp_sync_left_out(tp); tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); + WEB100_VAR_SET(tp, RetranThresh, tp->reordering); tp->ca_state = TCP_CA_Loss; tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); *************** static inline int tcp_fackets_out(struct *** 1046,1051 **** --- 1628,1634 ---- return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out; } + #ifndef CONFIG_TCP_SACK2 static inline int tcp_skb_timedout(struct tcp_opt *tp, struct sk_buff *skb) { return (tcp_time_stamp - TCP_SKB_CB(skb)->when > tp->rto); *************** static inline int tcp_head_timedout(stru *** 1055,1060 **** --- 1638,1644 ---- { return tp->packets_out && tcp_skb_timedout(tp, skb_peek(&sk->write_queue)); } + #endif /* Linux NewReno/SACK/FACK/ECN state machine. * -------------------------------------- *************** static int *** 1153,1160 **** --- 1737,1746 ---- tcp_time_to_recover(struct sock *sk, struct tcp_opt *tp) { /* Trick#1: The loss is proven. */ + #ifndef CONFIG_TCP_SACK2 if (tp->lost_out) return 1; + #endif /* Not-A-Trick#2 : Classic rule... */ if (tcp_fackets_out(tp) > tp->reordering) *************** static void *** 1233,1248 **** --- 1819,1864 ---- tcp_mark_head_lost(struct sock *sk, struct tcp_opt *tp, int packets, u32 high_seq) { struct sk_buff *skb; + #ifdef CONFIG_TCP_SACK + int cnt = 0; + #else int cnt = packets; + #endif BUG_TRAP(cnt <= tp->packets_out); + #ifndef CONFIG_TCP_SACK for_retrans_queue(skb, sk, tp) { if (--cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq)) + #else + if ( tp->mark_head_lost_skb_hint != NULL ) { + skb = tp->mark_head_lost_skb_hint; + cnt = tp->mark_head_lost_cnt_hint; + net_statistics[smp_processor_id()*2].TCPMarkHeadHintHits += tp->mark_head_lost_cnt_hint; + /*NET_INC_STATS_BH(TCPMarkHeadHintHits);*/ + }else{ + skb = sk->write_queue.next; + } + + for_retrans_queue_from(skb, skb, sk, tp) { + /* TODO: do this better */ + /* this is not the most efficient way to do this... */ + tp->mark_head_lost_skb_hint = skb; + tp->mark_head_lost_cnt_hint = cnt; + if (++cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq)) + #endif break; if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out++; + #ifdef CONFIG_TCP_SACK + /* clear xmit_retransmit_queue hints + * if this is beyond hint */ + if(tp->xmit_retransmit_queue_lost_skb_hint != NULL && + before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->xmit_retransmit_queue_lost_skb_hint)->seq) ){ + tp->xmit_retransmit_queue_lost_skb_hint = NULL; + } + #endif } } tcp_sync_left_out(tp); *************** static void tcp_update_scoreboard(struct *** 1269,1306 **** if (tcp_head_timedout(sk, tp)) { struct sk_buff *skb; for_retrans_queue(skb, sk, tp) { if (tcp_skb_timedout(tp, skb) && !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out++; } } tcp_sync_left_out(tp); } } - /* CWND moderation, preventing bursts due to too big ACKs - * in dubious situations. - */ - static __inline__ void tcp_moderate_cwnd(struct tcp_opt *tp) - { - tp->snd_cwnd = min(tp->snd_cwnd, - tcp_packets_in_flight(tp)+tcp_max_burst(tp)); - tp->snd_cwnd_stamp = tcp_time_stamp; - } - /* Decrease cwnd each second ack. */ static void tcp_cwnd_down(struct tcp_opt *tp) { int decr = tp->snd_cwnd_cnt + 1; tp->snd_cwnd_cnt = decr&1; decr >>= 1; if (decr && tp->snd_cwnd > tp->snd_ssthresh/2) tp->snd_cwnd -= decr; tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1); tp->snd_cwnd_stamp = tcp_time_stamp; --- 1885,1954 ---- if (tcp_head_timedout(sk, tp)) { struct sk_buff *skb; + #ifndef CONFIG_TCP_SACK for_retrans_queue(skb, sk, tp) { if (tcp_skb_timedout(tp, skb) && !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out++; + #else + if( tp->update_scoreboard_skb_hint != NULL ) { + skb = tp->update_scoreboard_skb_hint; + NET_INC_STATS_BH(TCPUpdateScoreHintHits); + }else{ + skb = sk->write_queue.next; + } + + for_retrans_queue_from(skb,skb, sk, tp) { + if(tcp_skb_timedout(tp, skb)){ + if(!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)){ + TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; + tp->lost_out++; + /* clear xmit_retrans hint */ + if(tp->xmit_retransmit_queue_lost_skb_hint != NULL && + before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->xmit_retransmit_queue_lost_skb_hint)->seq) ){ + + tp->xmit_retransmit_queue_lost_skb_hint = NULL; + } + } + }else{ + break; + #endif } } + #ifdef CONFIG_TCP_SACK + tp->update_scoreboard_skb_hint = skb; + #endif tcp_sync_left_out(tp); } } /* Decrease cwnd each second ack. */ static void tcp_cwnd_down(struct tcp_opt *tp) { int decr = tp->snd_cwnd_cnt + 1; + #ifdef CONFIG_ALTAIMD + htcp_reset(tp); + #endif + tp->snd_cwnd_cnt = decr&1; decr >>= 1; + #ifdef CONFIG_ALTAIMD + if ( sysctl_tcp_altAIMD == AIMD_HTCP ) { + if (decr && tp->snd_cwnd > tp->snd_ssthresh) + tp->snd_cwnd -= decr; + } else + /* decrease cwnd each second ack */ + if( decr && tp->snd_cwnd > tp->rh_target){ + tp->snd_cwnd -= decr; + } + #else if (decr && tp->snd_cwnd > tp->snd_ssthresh/2) tp->snd_cwnd -= decr; + #endif tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1); tp->snd_cwnd_stamp = tcp_time_stamp; *************** static void DBGUNDO(struct sock *sk, str *** 1334,1350 **** --- 1982,2012 ---- static void tcp_undo_cwr(struct tcp_opt *tp, int undo) { if (tp->prior_ssthresh) { + #ifdef CONFIG_ALTAIMD + if( sysctl_tcp_altAIMD == AIMD_HTCP ) + tp->snd_cwnd = max(tp->snd_cwnd, (tp->snd_ssthresh<<7)/tp->snd_decreasenum); + else + tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1); + #else tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1); + #endif if (undo && tp->prior_ssthresh > tp->snd_ssthresh) { tp->snd_ssthresh = tp->prior_ssthresh; TCP_ECN_withdraw_cwr(tp); } + WEB100_VAR_INC(tp, CongestionOverCount); } else { tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh); } tcp_moderate_cwnd(tp); tp->snd_cwnd_stamp = tcp_time_stamp; + + #ifdef CONFIG_TCP_SACK + /* There is something screwy going on with the retrans hints after + an undo */ + clear_all_retrans_hints(tp); + #endif } static inline int tcp_may_undo(struct tcp_opt *tp) *************** static int tcp_try_undo_loss(struct sock *** 1427,1432 **** --- 2089,2097 ---- for_retrans_queue(skb, sk, tp) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; } + #ifdef CONFIG_TCP_SACK + clear_all_retrans_hints(tp); + #endif DBGUNDO(sk, tp, "partial loss"); tp->lost_out = 0; tp->left_out = tp->sacked_out; *************** static int tcp_try_undo_loss(struct sock *** 1443,1449 **** --- 2108,2118 ---- static __inline__ void tcp_complete_cwr(struct tcp_opt *tp) { + #ifdef CONFIG_ALTAIMD + tp->snd_cwnd = min(tp->snd_cwnd, tp->rh_target); + #else tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); + #endif tp->snd_cwnd_stamp = tcp_time_stamp; } *************** static void tcp_try_to_open(struct sock *** 1462,1470 **** --- 2131,2148 ---- if (tp->left_out || tp->retrans_out || + #ifndef CONFIG_TCP_SACK2 tp->undo_marker) + #else + tp->undo_marker) { + #endif state = TCP_CA_Disorder; + #ifdef CONFIG_TCP_SACK2 + tp->retrans_out=0; + tp->undo_marker=0; + } + #endif if (tp->ca_state != state) { tp->ca_state = state; tp->high_seq = tp->snd_nxt; *************** tcp_fastretrans_alert(struct sock *sk, u *** 1514,1520 **** --- 2192,2202 ---- if ((flag&FLAG_DATA_LOST) && before(tp->snd_una, tp->high_seq) && tp->ca_state != TCP_CA_Open && + #ifndef CONFIG_TCP_SACK2 tp->fackets_out > tp->reordering) { + #else + tp->fackets_out > tp->reordering && IsReno(tp) ) { + #endif tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq); NET_INC_STATS_BH(TCPLoss); } *************** tcp_fastretrans_alert(struct sock *sk, u *** 1617,1622 **** --- 2299,2331 ---- tp->undo_marker = tp->snd_una; tp->undo_retrans = tp->retrans_out; + #ifdef CONFIG_ALTAIMD + /* calculate backoff factor to use */ + // printk("calculate backoff factor to use: maxB=%u, minB=%u, oldmaxB=%u, modecount=%u\n", tp->snd_maxB, tp->snd_minB, tp->snd_oldmaxB, tp->snd_modecount); + if (tp->snd_decreasenum2 == 0) tp->snd_decreasenum2=1<<7; /* initialise decreasenum2 */ + if ((5*tp->snd_maxB > 4*tp->snd_oldmaxB)&&(5*tp->snd_maxB < 6*tp->snd_oldmaxB) && sysctl_tcp_altAIMD == AIMD_HTCP ) { + tp->snd_modecount++; + if (tp->snd_modecount > 1 && tp->snd_minB>0) { + u32 d = tp->snd_decreasenum*tp->snd_oldmaxB/tp->snd_minB; + // printk("d=%u. ",d); + if ( d < tp->snd_decreasenum2 + && tp->snd_minB < tp->snd_oldmaxB /* sanity check */ + && tp->snd_ccount>3) { + // printk("decreasenum2 updated. "); + tp->snd_decreasenum2 = d; + } + tp->snd_decreasenum=tp->snd_decreasenum2; + } else + tp->snd_decreasenum = 1<<6; /* 0.5 */ + } else { + tp->snd_modecount = 0; + tp->snd_decreasenum = 1<<6; /* 0.5 */ + } + // printk("decreasenum2=%u\n",tp->snd_decreasenum2); + decreasenum_check(tp); + if (tp->snd_ccount>3) tp->snd_oldmaxB = tp->snd_maxB; + #endif + if (tp->ca_state < TCP_CA_CWR) { if (!(flag&FLAG_ECE)) tp->prior_ssthresh = tcp_current_ssthresh(tp); *************** tcp_fastretrans_alert(struct sock *sk, u *** 1625,1634 **** --- 2334,2357 ---- } tp->snd_cwnd_cnt = 0; + + #ifdef CONFIG_ALTAIMD + htcp_reset(tp); + #endif + + #ifdef CONFIG_RFC3465 + tp->bytes_acked = 0; + #endif tp->ca_state = TCP_CA_Recovery; + WEB100_UPDATE_FUNC(tp, web100_update_congestion(tp, 0)); + WEB100_VAR_INC(tp, FastRetran); /* WEB100_XXX */ } + #ifndef CONFIG_TCP_SACK2 if (is_dupack || tcp_head_timedout(sk, tp)) + #else + if (IsReno(tp) && (is_dupack || tcp_head_timedout(sk, tp))) + #endif tcp_update_scoreboard(sk, tp); tcp_cwnd_down(tp); tcp_xmit_retransmit_queue(sk); *************** tcp_fastretrans_alert(struct sock *sk, u *** 1637,1644 **** /* Read draft-ietf-tcplw-high-performance before mucking * with this code. (Superceeds RFC1323) */ ! static void tcp_ack_saw_tstamp(struct tcp_opt *tp, int flag) { __u32 seq_rtt; /* RTTM Rule: A TSecr value received in a segment is used to --- 2360,2368 ---- /* Read draft-ietf-tcplw-high-performance before mucking * with this code. (Superceeds RFC1323) */ ! static void tcp_ack_saw_tstamp(struct sock *sk, int flag) { + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; __u32 seq_rtt; /* RTTM Rule: A TSecr value received in a segment is used to *************** static void tcp_ack_saw_tstamp(struct tc *** 1657,1670 **** * in window is lost... Voila. --ANK (010210) */ seq_rtt = tcp_time_stamp - tp->rcv_tsecr; tcp_rtt_estimator(tp, seq_rtt); tcp_set_rto(tp); tp->backoff = 0; tcp_bound_rto(tp); } ! static void tcp_ack_no_tstamp(struct tcp_opt *tp, u32 seq_rtt, int flag) { /* We don't have a timestamp. Can only use * packets that are not retransmitted to determine * rtt estimates. Also, we must not reset the --- 2381,2398 ---- * in window is lost... Voila. --ANK (010210) */ seq_rtt = tcp_time_stamp - tp->rcv_tsecr; + tcp_rtt_estimator(tp, seq_rtt); tcp_set_rto(tp); + WEB100_UPDATE_FUNC(tp, web100_update_rtt(tp, seq_rtt)); tp->backoff = 0; tcp_bound_rto(tp); } ! static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag) { + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + /* We don't have a timestamp. Can only use * packets that are not retransmitted to determine * rtt estimates. Also, we must not reset the *************** static void tcp_ack_no_tstamp(struct tcp *** 1679,1719 **** tcp_rtt_estimator(tp, seq_rtt); tcp_set_rto(tp); tp->backoff = 0; tcp_bound_rto(tp); } static __inline__ void ! tcp_ack_update_rtt(struct tcp_opt *tp, int flag, s32 seq_rtt) { /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */ if (tp->saw_tstamp && tp->rcv_tsecr) ! tcp_ack_saw_tstamp(tp, flag); else if (seq_rtt >= 0) ! tcp_ack_no_tstamp(tp, seq_rtt, flag); } /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */ static __inline__ void tcp_cong_avoid(struct tcp_opt *tp) { ! if (tp->snd_cwnd <= tp->snd_ssthresh) { /* In "safe" area, increase. */ ! if (tp->snd_cwnd < tp->snd_cwnd_clamp) tp->snd_cwnd++; ! } else { ! /* In dangerous area, increase slowly. ! * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd ! */ ! if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { ! if (tp->snd_cwnd < tp->snd_cwnd_clamp) tp->snd_cwnd++; ! tp->snd_cwnd_cnt=0; ! } else tp->snd_cwnd_cnt++; ! } ! tp->snd_cwnd_stamp = tcp_time_stamp; } /* Restart timer after forward progress on connection. --- 2407,2735 ---- tcp_rtt_estimator(tp, seq_rtt); tcp_set_rto(tp); + WEB100_UPDATE_FUNC(tp, web100_update_rtt(tp, seq_rtt)); tp->backoff = 0; tcp_bound_rto(tp); } static __inline__ void ! tcp_ack_update_rtt(struct sock *sk, int flag, s32 seq_rtt) { + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */ if (tp->saw_tstamp && tp->rcv_tsecr) ! tcp_ack_saw_tstamp(sk, flag); else if (seq_rtt >= 0) ! tcp_ack_no_tstamp(sk, seq_rtt, flag); } + #ifdef CONFIG_RFC3465 + /* Macro increments cwnd for the number of bytes acknowledged */ + #define TCP_ABC_COUNT( tp, sysctl_tcp_abc_L, mss_now, incrs_applied ) \ + while( tp->bytes_acked > mss_now && incrs_applied < sysctl_tcp_abc_L ) { \ + tp->bytes_acked -= mss_now; \ + tp->snd_cwnd++; \ + incrs_applied++; \ + } + #endif + + /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */ + #ifdef CONFIG_RFC3465 + static __inline__ void tcp_cong_avoid(struct tcp_opt *tp, unsigned int mss_now) + #else static __inline__ void tcp_cong_avoid(struct tcp_opt *tp) + #endif { ! #ifdef CONFIG_RFC3465 ! int incrs_applied = 0; ! #endif ! ! if (tp->snd_cwnd <= tp->snd_ssthresh) { /* In "safe" area, increase. */ ! #ifdef CONFIG_LIMITED_SLOW_START ! /* Slow Start */ ! if ( sysctl_tcp_ss_max_ssthresh <= 0 ! || tp->snd_cwnd <= sysctl_tcp_ss_max_ssthresh ) { ! /* this ensures that we only start counting after we leave ! standard slow start */ ! #ifdef CONFIG_RFC3465 ! if ( sysctl_tcp_abc && !tp->nonagle ) ! { TCP_ABC_COUNT( tp, sysctl_tcp_abc_L, mss_now, incrs_applied ); } ! else ! tp->snd_cwnd++; ! #else tp->snd_cwnd++; ! #endif ! } else { ! /* Floyd modified slow start */ ! if (tp->snd_lss_k == 0) ! tp->snd_lss_k = 2 * tp->snd_cwnd / sysctl_tcp_ss_max_ssthresh; ! if (tp->snd_ssthresh_cnt1++ >= tp->snd_lss_k ) { ! #ifdef CONFIG_RFC3465 ! if ( sysctl_tcp_abc && !tp->nonagle ) ! { TCP_ABC_COUNT( tp, sysctl_tcp_abc_L, mss_now, incrs_applied ); } ! else ! tp->snd_cwnd++; ! #else ! tp->snd_cwnd++; ! #endif ! tp->snd_ssthresh_cnt1 = 0; ! if (tp->snd_ssthresh_cnt2++ >= sysctl_tcp_ss_max_ssthresh) { ! tp->snd_ssthresh_cnt2 = 0; ! tp->snd_lss_k = 2 * tp->snd_cwnd / sysctl_tcp_ss_max_ssthresh; ! } ! } ! } ! ! #else // lss ! #ifdef CONFIG_RFC3465 ! if ( sysctl_tcp_abc && !tp->nonagle ) ! TCP_ABC_COUNT( tp, sysctl_tcp_abc_L, mss_now, incrs_applied ); ! else ! tp->snd_cwnd++; ! #else ! tp->snd_cwnd++; ! #endif // abc ! #endif // lss ! ! #ifdef CONFIG_RFC3465 ! if ( sysctl_tcp_abc && !tp->nonagle ) ! /* only apply per ack in slow start */ ! if (tp->snd_cwnd <= tp->snd_ssthresh) ! while(tp->bytes_acked > mss_now) ! tp->bytes_acked -= mss_now; ! #endif ! ! #ifdef CONFIG_ALTAIMD ! htcp_reset(tp); ! #endif ! ! WEB100_VAR_INC(tp, SlowStart); ! } else { ! ! #ifdef CONFIG_ALTAIMD ! /* HTCP variables */ ! __u32 diff=0; ! __u32 alpha=1; ! __u32 thresh=0; ! __u32 RTT_Scaling=1; ! __u32 addon; ! ! ! if ( sysctl_tcp_altAIMD == AIMD_HSTCP ) { ! ! /* do hstcp where cwnd -> cwnd + a/cwnd and a is variable ! * hstcp_table handles normal increment also */ ! tp->snd_cwnd_cnt += altAIMD_get_hstcp_entry(tp).a_val; ! ! if ( tp->snd_cwnd_cnt >= tp->snd_cwnd ) { ! ! #ifdef CONFIG_RFC3465 ! if ( sysctl_tcp_abc && !tp->nonagle ) { ! tp->snd_cwnd_cnt -= tp->snd_cwnd; ! TCP_ABC_COUNT( tp, sysctl_tcp_abc_L, mss_now, incrs_applied ); ! } else { ! tp->snd_cwnd_cnt -= tp->snd_cwnd; ! tp->snd_cwnd++; ! } ! #else ! tp->snd_cwnd_cnt -= tp->snd_cwnd; tp->snd_cwnd++; ! #endif ! } ! ! } else if ( sysctl_tcp_altAIMD == AIMD_SCALABLE ) { ! ! tp->snd_cwnd_cnt++; ! ! /* scalable does cwnd -> cwnd + 1/A where 1/A is sysctl_tcp_scalable_1_on_a */ ! /* do normal tcp behaviour */ ! if ( tp->snd_cwnd <= sysctl_tcp_scalable_highwin ) { ! ! if ( tp->snd_cwnd_cnt >= tp->snd_cwnd ) { ! #ifdef CONFIG_RFC3465 ! if ( sysctl_tcp_abc && !tp->nonagle ) { ! tp->snd_cwnd_cnt -= tp->snd_cwnd; ! TCP_ABC_COUNT( tp, sysctl_tcp_abc_L, mss_now, incrs_applied ); ! } else { ! tp->snd_cwnd_cnt -= tp->snd_cwnd; ! tp->snd_cwnd++; ! } ! #else ! tp->snd_cwnd_cnt -= tp->snd_cwnd; ! tp->snd_cwnd++; ! #endif ! /* set the AIMD parameter */ ! WEB100_VAR_SET(tp, WAD_AI, 1 ); ! WEB100_VAR_SET(tp, WAD_MD, 128 ); ! } ! /* do scalable */ ! } else { ! ! if ( tp->snd_cwnd_cnt >= sysctl_tcp_scalable_1_on_a ) { ! #ifdef CONFIG_RFC3465 ! if( sysctl_tcp_abc && !tp->nonagle ) { ! tp->snd_cwnd_cnt = 0; ! TCP_ABC_COUNT( tp, sysctl_tcp_abc_L, mss_now, incrs_applied ); ! } else { ! tp->snd_cwnd_cnt = 0; ! tp->snd_cwnd++; ! } ! #else ! tp->snd_cwnd_cnt = 0; ! tp->snd_cwnd++; ! #endif ! /* set the AIMD parameter */ ! WEB100_VAR_SET(tp, WAD_AI, 0 ); ! WEB100_VAR_SET(tp, WAD_MD, 32 ); ! } ! } ! ! ! } else if ( sysctl_tcp_altAIMD == AIMD_GRIDDT ) { ! ! /* In theory this is tp->snd_cwnd += a(RTT,MSS)/ tp->snd_cwnd ! * a(RTT,MSS) is evaluated by the function add_inc() */ ! if (tp->griddt_snd_cwnd_inc == 0) ! tp->griddt_snd_cwnd_inc = add_inc(tp); ! ! if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { ! ! if ( tp->griddt_snd_cwnd_add_cnt <= ! (tp->griddt_snd_cwnd_inc * sysctl_tcp_griddt_add_cnt_clamp) ) { ! ! if (tp->griddt_snd_cwnd_add_cnt == 0) ! tp->griddt_snd_cwnd_inc = add_inc(tp); ! ! tp->griddt_snd_cwnd_add_cnt++; ! ! } else ! tp->griddt_snd_cwnd_add_cnt=0; ! ! // if (tp->snd_cwnd < tp->snd_cwnd_clamp) ! #ifdef CONFIG_RFC3465 ! if ( sysctl_tcp_abc && !tp->nonagle ) ! { TCP_ABC_COUNT( tp, sysctl_tcp_abc_L, mss_now, incrs_applied ); } ! else ! tp->snd_cwnd++; ! #else ! tp->snd_cwnd++; ! #endif ! ! tp->snd_cwnd_cnt=0; ! ! } else { ! tp->snd_cwnd_cnt += tp->griddt_snd_cwnd_inc; ! } ! ! ! } else if ( sysctl_tcp_altAIMD == AIMD_HTCP ) { ! ! /* TODO: Add ABC to HTCP */ ! ! /* TO DO: proper ack accounting to allow for both ordinary acks and delayed acks */ ! ! /* keep track of number of round-trip times since last backoff event */ ! if (tp->snd_cwnd_cnt2 > tp->snd_cwnd) { ! tp->snd_ccount++; ! tp->snd_cwnd_cnt2=0; ! } else ! tp->snd_cwnd_cnt2 = tp->snd_cwnd_cnt2 + 2; /* add 2 to account for delayed acking */ ! ! /* keep track of minimum RTT seen so far */ ! if (tp->snd_minRTT <=1) ! tp->snd_minRTT=tp->srtt>>3; /* minRTT is initially zero */ ! tp->snd_minRTT=min(tp->snd_minRTT, tp->srtt>>3); ! if (tp->snd_minRTT <=1) ! tp->snd_minRTT = 1; /* safety net to avoid divide by zero */ ! ! thresh = HZ/tp->snd_minRTT; /* stay in low-speed mode for one second */ ! /* thresh=19; */ /* or 19 RTT's */ ! ! RTT_Scaling = max((HZ<<3)/(5*tp->snd_minRTT), (__u32)1<<3); ! RTT_Scaling = min(RTT_Scaling,(__u32)10<<3); ! ! // if (sysctl_tcp_htcp==1) { ! /* calculate increase rate - alpha is number of packets added to cwnd per RTT */ ! alpha = 1; ! if (tp->snd_ccount > thresh) { ! diff = ((tp->snd_ccount-thresh)<<3) / RTT_Scaling; ! alpha += diff+(diff/20)*diff/20; ! } ! alpha = (alpha<<3)/RTT_Scaling; ! alpha = alpha<<1; /* account for delayed acking */ ! if (alpha<1) alpha=1; ! alpha = alpha*2*((1<<7)-tp->snd_decreasenum); ! //} else { ! // alpha=1<<7; ! //} ! //printk("cong_avoid, alpha=%u/128, decreasenum=%u/128, RTTScaling=%u/8, thresh=%u\n", alpha, tp->snd_decreasenum,RTT_Scaling, thresh); ! ! /* experimental - tcp header only supports up to 3 sack blocks, so by incrementing cwnd in */ ! /* 3 bursts (moderated by maxburst) we get roughly 3 "holes" in flow when */ ! /* congestion occurs which may lead to more robust sack performance. */ ! /* if (alpha > 3<<7) { ! addon=(alpha/3)>>7; alpha=3<<7; ! } else ! addon=1; ! */ ! addon=1; ! tp->snd_cwnd_cnt++; ! if ((tp->snd_cwnd_cnt*alpha)>>7 > tp->snd_cwnd) { ! tp->snd_cwnd=tp->snd_cwnd+addon; ! tp->snd_cwnd_cnt=0; ! } ! ! /* Otherwise do standard vanilla tcp cwnd -> 1/cwnd per ack */ ! } else { ! tp->snd_cwnd_cnt++; ! if ( tp->snd_cwnd_cnt >= tp->snd_cwnd ) { ! #ifdef CONFIG_RFC3465 ! if( sysctl_tcp_abc && !tp->nonagle ) { ! tp->snd_cwnd_cnt -= tp->snd_cwnd; ! TCP_ABC_COUNT( tp, sysctl_tcp_abc_L, mss_now, incrs_applied ); ! } else { ! tp->snd_cwnd_cnt -= tp->snd_cwnd; ! tp->snd_cwnd++; ! } ! #else ! tp->snd_cwnd_cnt -= tp->snd_cwnd; ! tp->snd_cwnd++; ! #endif ! } ! ! /* HTCP */ ! alpha = 1<<7; ! ! /* set the AIMD parameter */ ! WEB100_VAR_SET(tp, WAD_AI, 1 ); ! WEB100_VAR_SET(tp, WAD_MD, 128 ); ! } ! ! #else // ALTAIMD ! /* In dangerous area, increase slowly. ! * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd ! */ ! if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { ! tp->snd_cwnd++; ! tp->snd_cwnd_cnt=0; ! } else ! tp->snd_cwnd_cnt++; ! ! /* set the AIMD parameter */ ! WEB100_VAR_SET(tp, WAD_AI, 1 ); ! WEB100_VAR_SET(tp, WAD_MD, 128 ); ! #endif ! ! WEB100_VAR_INC(tp, CongAvoid); ! } ! ! tp->snd_cwnd = min(tp->snd_cwnd, (__u32)tp->snd_cwnd_clamp); ! tp->snd_cwnd_stamp = tcp_time_stamp; } /* Restart timer after forward progress on connection. *************** static int tcp_clean_rtx_queue(struct so *** 1737,1742 **** --- 2753,2790 ---- __u32 now = tcp_time_stamp; int acked = 0; __s32 seq_rtt = -1; + #ifdef CONFIG_ALTAIMD + __u32 minB0; + #endif + #ifdef CONFIG_TCP_SACK + int clean_cnt = tp->packets_out; + + /* clear retrans queue hinting if necessary */ + hint_advance(tp->mark_head_lost_skb_hint, tp->snd_una); + hint_advance(tp->update_scoreboard_skb_hint, tp->snd_una); + hint_advance(tp->xmit_retransmit_queue_lost_skb_hint, tp->snd_una); + hint_advance(tp->xmit_retransmit_queue_forward_skb_hint, tp->snd_una); + hint_advance(tp->sackfastpath_skb_hint, tp->snd_una); + #endif + + #ifdef CONFIG_TCP_SACK2 + struct sacked_list_item *temp; + + /* clean up sacked list */ + struct sacked_list_item *sl = sacked_list_head(tp); + while (sl != NULL) { + if (TCP_SKB_CB(sl->skb) == NULL) { + // printk("clean_rtx_queue: error sl->skb=NULL\n"); + break; + } + if (after(TCP_SKB_CB(sl->skb)->end_seq, tp->snd_una)) + break; + + temp=sl->next; + free_sacked_list(sl, tp); + sl=temp; + } + #endif while((skb=skb_peek(&sk->write_queue)) && (skb != tp->send_head)) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); *************** static int tcp_clean_rtx_queue(struct so *** 1758,1763 **** --- 2806,2814 ---- */ if(!(scb->flags & TCPCB_FLAG_SYN)) { acked |= FLAG_DATA_ACKED; + #ifdef CONFIG_ALTAIMD + tp->snd_packetcount++; + #endif } else { acked |= FLAG_SYN_ACKED; tp->retrans_stamp = 0; *************** static int tcp_clean_rtx_queue(struct so *** 1773,1780 **** --- 2824,2838 ---- seq_rtt = now - scb->when; if(sacked & TCPCB_SACKED_ACKED) tp->sacked_out--; + #ifndef CONFIG_TCP_SACK if(sacked & TCPCB_LOST) tp->lost_out--; + #else + if(sacked & TCPCB_LOST){ + tp->lost_out--; + tp->xmit_retransmit_queue_lost_cnt_hint--; + } + #endif if(sacked & TCPCB_URG) { if (tp->urg_mode && !before(scb->end_seq, tp->snd_up)) *************** static int tcp_clean_rtx_queue(struct so *** 1789,1799 **** tcp_free_skb(sk, skb); } if (acked&FLAG_ACKED) { ! tcp_ack_update_rtt(tp, acked, seq_rtt); tcp_ack_packets_out(sk, tp); } #if FASTRETRANS_DEBUG > 0 BUG_TRAP((int)tp->sacked_out >= 0); BUG_TRAP((int)tp->lost_out >= 0); --- 2847,2892 ---- tcp_free_skb(sk, skb); } + #ifdef CONFIG_TCP_SACK + /* keep retrans queue hinting valid */ + clean_cnt = clean_cnt - tp->packets_out; + tp->mark_head_lost_cnt_hint -= clean_cnt; + tp->xmit_retransmit_queue_forward_cnt_hint -= clean_cnt; + tp->sackfastpath_facket_cnt_hint -= clean_cnt; + #endif + if (acked&FLAG_ACKED) { ! tcp_ack_update_rtt(sk, acked, seq_rtt); tcp_ack_packets_out(sk, tp); } + #ifdef CONFIG_ALTAIMD + /* achieved throughput calculations */ + if (tp->snd_ccount<=3) { + tp->snd_packetcount = 0; + if (tp->snd_minRTT > 0) /* avoid divide by zero */ + minB0 = tp->snd_cwnd*HZ/tp->snd_minRTT; + else + minB0 = 0; + tp->snd_maxB = minB0/2; + tp->snd_Bi = minB0; + tp->snd_minB = minB0; //this isn't correct if queue doesn't empty + tp->snd_lasttime=now; + // printk("initialising minB etc, cwnd=%u, =%u, ccount=%u\n", tp->snd_cwnd,tp->snd_minB, tp->snd_ccount); + } else if (tp->snd_packetcount >= tp->snd_cwnd + && now - tp->snd_lasttime >= tp->snd_minRTT + && tp->snd_minRTT>0) { + tp->snd_Bi=(3*tp->snd_Bi+tp->snd_packetcount*HZ/(now-tp->snd_lasttime))/4; + if (tp->snd_Bi > tp->snd_maxB) tp->snd_maxB = tp->snd_Bi; + if (tp->snd_minB>tp->snd_maxB) tp->snd_minB=tp->snd_maxB; //sanity check needed when queue doesn't empty + tp->snd_packetcount = 0; + tp->snd_lasttime = now; + // printk("Bi=%u, minB=%u, maxB=%u, ccount=%u, cwnd=%u\n", tp->snd_Bi, tp->snd_minB, tp->snd_maxB,tp->snd_ccount, tp->snd_cwnd); + } + #endif + + + #if FASTRETRANS_DEBUG > 0 BUG_TRAP((int)tp->sacked_out >= 0); BUG_TRAP((int)tp->lost_out >= 0); *************** static int tcp_ack_update_window(struct *** 1884,1893 **** --- 2977,2988 ---- tp->max_window = nwin; tcp_sync_mss(sk, tp->pmtu_cookie); } + WEB100_UPDATE_FUNC(tp, web100_update_rwin_rcvd(tp)); } } tp->snd_una = ack; + WEB100_VAR_SET(tp, SndUna, ack); return flag; } *************** static int tcp_ack(struct sock *sk, stru *** 1902,1907 **** --- 2997,3007 ---- u32 prior_in_flight; int prior_packets; + #ifdef CONFIG_RFC3465 + /* Need mss for ABC in tcp_cong_avoid */ + int mss_now = tcp_current_mss(sk); + #endif + /* If the ack is newer than sent or older than previous acks * then we can probably ignore it. */ *************** static int tcp_ack(struct sock *sk, stru *** 1911,1923 **** if (before(ack, prior_snd_una)) goto old_ack; if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { /* Window is constant, pure forward advance. * No more checks are required. * Note, we use the fact that SND.UNA>=SND.WL2. */ tcp_update_wl(tp, ack, ack_seq); ! tp->snd_una = ack; flag |= FLAG_WIN_UPDATE; NET_INC_STATS_BH(TCPHPAcks); --- 3011,3031 ---- if (before(ack, prior_snd_una)) goto old_ack; + #ifdef CONFIG_RFC3465 + if ( sysctl_tcp_abc && tp->ca_state < TCP_CA_CWR ) { + tp->bytes_acked += ack - prior_snd_una; + } + #endif + if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { /* Window is constant, pure forward advance. * No more checks are required. * Note, we use the fact that SND.UNA>=SND.WL2. */ tcp_update_wl(tp, ack, ack_seq); ! tp->snd_una = ack; ! ! WEB100_VAR_SET(tp, SndUna, ack); flag |= FLAG_WIN_UPDATE; NET_INC_STATS_BH(TCPHPAcks); *************** static int tcp_ack(struct sock *sk, stru *** 1932,1939 **** if (TCP_SKB_CB(skb)->sacked) flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); ! if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th)) flag |= FLAG_ECE; } /* We passed data and got it acked, remove any soft error --- 3040,3049 ---- if (TCP_SKB_CB(skb)->sacked) flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una); ! if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th)) { flag |= FLAG_ECE; + WEB100_VAR_INC(tp, ECERcvd); + } } /* We passed data and got it acked, remove any soft error *************** static int tcp_ack(struct sock *sk, stru *** 1953,1963 **** --- 3063,3081 ---- /* Advanve CWND, if state allows this. */ if ((flag&FLAG_DATA_ACKED) && prior_in_flight >= tp->snd_cwnd && tcp_may_raise_cwnd(tp, flag)) + #ifdef CONFIG_RFC3465 + tcp_cong_avoid(tp, mss_now); + #else tcp_cong_avoid(tp); + #endif tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); } else { if ((flag&FLAG_DATA_ACKED) && prior_in_flight >= tp->snd_cwnd) + #ifdef CONFIG_RFC3465 + tcp_cong_avoid(tp,mss_now); + #else tcp_cong_avoid(tp); + #endif } if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP)) *************** no_queue: *** 1977,1986 **** --- 3095,3106 ---- return 1; old_ack: + /* WEB100_XXX */ if (TCP_SKB_CB(skb)->sacked) tcp_sacktag_write_queue(sk, skb, prior_snd_una); uninteresting_ack: + /* WEB100_XXX */ SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt); return 0; } *************** static void tcp_send_dupack(struct sock *** 2333,2338 **** --- 3453,3460 ---- { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + WEB100_VAR_INC(tp, DupAcksOut); + if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { NET_INC_STATS_BH(DelayedACKLost); *************** static inline int tcp_rmem_schedule(stru *** 2519,2524 **** --- 3641,3658 ---- static int tcp_prune_queue(struct sock *sk); + #ifdef CONFIG_WEB100_STATS + static inline int tcp_rmem_full(struct sock *sk, struct sk_buff *skb) + { + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + + return (tcp_win_from_space(atomic_read(&sk->rmem_alloc)) > + tp->rcv_hi_seq - tp->copied_seq && + tcp_prune_queue(sk) < 0) || + !tcp_rmem_schedule(sk, skb); + } + #endif + static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) { struct tcphdr *th = skb->h.th; *************** static void tcp_data_queue(struct sock * *** 2565,2578 **** local_bh_disable(); } ! if (eaten <= 0) { queue_and_out: ! if (eaten < 0 && ! (atomic_read(&sk->rmem_alloc) > sk->rcvbuf || ! !tcp_rmem_schedule(sk, skb))) { if (tcp_prune_queue(sk) < 0 || !tcp_rmem_schedule(sk, skb)) goto drop; } tcp_set_owner_r(skb, sk); __skb_queue_tail(&sk->receive_queue, skb); } --- 3699,3719 ---- local_bh_disable(); } ! if (eaten < 0) { queue_and_out: ! #ifdef CONFIG_WEB100_STATS ! if (sysctl_web100_rbufmode == 1) { ! if (tcp_rmem_full(sk, skb)) ! goto drop; ! } else ! #endif ! if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf || ! !tcp_rmem_schedule(sk, skb)) { if (tcp_prune_queue(sk) < 0 || !tcp_rmem_schedule(sk, skb)) goto drop; } + } + if (eaten <= 0) { tcp_set_owner_r(skb, sk); __skb_queue_tail(&sk->receive_queue, skb); } *************** queue_and_out: *** 2597,2606 **** --- 3738,3756 ---- tcp_fast_path_check(sk, tp); + #ifdef CONFIG_WEB100_STATS + #if 0 + if (TCP_SKB_CB(skb)->end_seq == 0) + printk("a: end_seq == 0\n"); + #endif + tcp_good_seg_rcvd(sk, skb); + #endif + if (eaten > 0) { __kfree_skb(skb); } else if (!sk->dead) sk->data_ready(sk, 0); + return; } *************** drop: *** 2641,2646 **** --- 3791,3802 ---- TCP_ECN_check_ce(tp, skb); + #ifdef CONFIG_WEB100_STATS + if (sysctl_web100_rbufmode == 1) { + if (tcp_rmem_full(sk, skb)) + goto drop; + } else + #endif if (atomic_read(&sk->rmem_alloc) > sk->rcvbuf || !tcp_rmem_schedule(sk, skb)) { if (tcp_prune_queue(sk) < 0 || !tcp_rmem_schedule(sk, skb)) *************** drop: *** 2656,2661 **** --- 3812,3825 ---- tcp_set_owner_r(skb, sk); + #ifdef CONFIG_WEB100_STATS + #if 0 + if (TCP_SKB_CB(skb)->end_seq == 0) + printk("b: end_seq == 0\n"); + #endif + tcp_good_seg_rcvd(sk, skb); + #endif + if (skb_peek(&tp->out_of_order_queue) == NULL) { /* Initial out of order segment, build 1 SACK. */ if(tp->sack_ok) { *************** tcp_collapse(struct sock *sk, struct sk_ *** 2790,2796 **** memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; __skb_insert(nskb, skb->prev, skb, skb->list); ! tcp_set_owner_r(nskb, sk); /* Copy data, releasing collapsed skbs. */ while (copy > 0) { --- 3954,3965 ---- memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; __skb_insert(nskb, skb->prev, skb, skb->list); ! #ifdef COFNIG_WEB100_STATS ! if (tp->tcp_stats->wc_vars.X_RBufMode == WC_BUFMODE_WEB100) ! ; ! else ! #endif ! tcp_set_owner_r(nskb, sk); /* Copy data, releasing collapsed skbs. */ while (copy > 0) { *************** static int tcp_prune_queue(struct sock * *** 2875,2884 **** NET_INC_STATS_BH(PruneCalled); ! if (atomic_read(&sk->rmem_alloc) >= sk->rcvbuf) ! tcp_clamp_window(sk, tp); ! else if (tcp_memory_pressure) ! tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss); tcp_collapse_ofo_queue(sk); tcp_collapse(sk, sk->receive_queue.next, --- 4044,4058 ---- NET_INC_STATS_BH(PruneCalled); ! #ifdef CONFIG_WEB100_STATS ! if (sysctl_web100_rbufmode != 1) ! #endif ! { ! if (atomic_read(&sk->rmem_alloc) >= sk->rcvbuf) ! tcp_clamp_window(sk, tp); ! else if (tcp_memory_pressure) ! tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss); ! } tcp_collapse_ofo_queue(sk); tcp_collapse(sk, sk->receive_queue.next, *************** static int tcp_prune_queue(struct sock * *** 2886,2891 **** --- 4060,4069 ---- tp->copied_seq, tp->rcv_nxt); tcp_mem_reclaim(sk); + #ifdef CONFIG_WEB100_STATS + if (sysctl_web100_rbufmode == 1) + return 0; + #endif if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf) return 0; *************** void tcp_cwnd_application_limited(struct *** 2937,2942 **** --- 4115,4122 ---- if (win_used < tp->snd_cwnd) { tp->snd_ssthresh = tcp_current_ssthresh(tp); tp->snd_cwnd = (tp->snd_cwnd+win_used)>>1; + WEB100_VAR_INC(tp, OtherReductions); + WEB100_VAR_INC(tp, X_OtherReductionsCV); } tp->snd_cwnd_used = 0; } *************** static void tcp_new_space(struct sock *s *** 2952,2957 **** --- 4132,4140 ---- { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + #ifdef CONFIG_WEB100_STATS + if (sysctl_web100_sbufmode != 1) + #endif if (tp->packets_out < tp->snd_cwnd && !(sk->userlocks&SOCK_SNDBUF_LOCK) && !tcp_memory_pressure && *************** int tcp_rcv_established(struct sock *sk, *** 3229,3234 **** --- 4412,4435 ---- tp->saw_tstamp = 0; + #ifdef CONFIG_TCP_PKTDROP + /* + * YTL: packet dropping feature. The idea is to drop every nth + * packet to simulate packet loss at the recv + */ + + if ( sysctl_tcp_pktdrop_rate > 1 ) { + + if ( tp->pktdrop_cnt >= sysctl_tcp_pktdrop_rate ) { + tp->pktdrop_cnt = 0; + goto csum_error; + } + + tp->pktdrop_cnt++; + } + #endif + + /* pred_flags is 0xS?10 << 16 + snd_wnd * if header_predition is to be made * 'S' will always be tp->tcp_header_len >> 2 *************** int tcp_rcv_established(struct sock *sk, *** 3284,3293 **** --- 4485,4498 ---- (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && tp->rcv_nxt == tp->rcv_wup) tcp_store_ts_recent(tp); + /* We know that such packets are checksummed * on entry. */ tcp_ack(sk, skb, 0); + #ifdef CONFIG_WEB100_STATS + tcp_good_seg_rcvd(sk, skb); + #endif __kfree_skb(skb); tcp_data_snd_check(sk); return 0; *************** int tcp_rcv_established(struct sock *sk, *** 3297,3303 **** } } else { int eaten = 0; ! if (tp->ucopy.task == current && tp->copied_seq == tp->rcv_nxt && len - tcp_header_len <= tp->ucopy.len && --- 4502,4508 ---- } } else { int eaten = 0; ! if (tp->ucopy.task == current && tp->copied_seq == tp->rcv_nxt && len - tcp_header_len <= tp->ucopy.len && *************** int tcp_rcv_established(struct sock *sk, *** 3348,3353 **** --- 4553,4561 ---- tcp_event_data_recv(sk, tp, skb); + #ifdef CONFIG_WEB100_STATS + tcp_good_seg_rcvd(sk, skb); + #endif if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) { /* Well, only one small jumplet in fast path... */ tcp_ack(sk, skb, FLAG_DATA); *************** static int tcp_rcv_synsent_state_process *** 3561,3566 **** --- 4769,4778 ---- tp->copied_seq = tp->rcv_nxt; mb(); tcp_set_state(sk, TCP_ESTABLISHED); + + #ifdef CONFIG_WEB100_STATS + web100_stats_establish(sk); + #endif if(!sk->dead) { sk->state_change(sk); *************** int tcp_rcv_state_process(struct sock *s *** 3780,3785 **** --- 4992,5000 ---- mb(); tcp_set_state(sk, TCP_ESTABLISHED); sk->state_change(sk); + #ifdef CONFIG_WEB100_STATS + web100_stats_establish(sk); + #endif /* Note, that this wakeup is only for marginal * crossed SYN case. Passively open sockets *************** int tcp_rcv_state_process(struct sock *s *** 3791,3797 **** } tp->snd_una = TCP_SKB_CB(skb)->ack_seq; ! tp->snd_wnd = ntohs(th->window) << tp->snd_wscale; tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq); /* tcp_ack considers this ACK as duplicate --- 5006,5017 ---- } tp->snd_una = TCP_SKB_CB(skb)->ack_seq; ! WEB100_VAR_SET(tp, SndUna, tp->snd_una); ! /* RFC1323: The window in SYN & SYN/ACK segments is ! * never scaled (PSC/CMU patch {rreddy,mathis}@psc.edu). ! */ ! tp->snd_wnd = ntohs(th->window); ! WEB100_UPDATE_FUNC(tp, web100_update_rwin_rcvd(tp)); tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq); /* tcp_ack considers this ACK as duplicate *************** int tcp_rcv_state_process(struct sock *s *** 3799,3805 **** * Fix it at least with timestamps. */ if (tp->saw_tstamp && tp->rcv_tsecr && !tp->srtt) ! tcp_ack_saw_tstamp(tp, 0); if (tp->tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; --- 5019,5025 ---- * Fix it at least with timestamps. */ if (tp->saw_tstamp && tp->rcv_tsecr && !tp->srtt) ! tcp_ack_saw_tstamp(sk, 0); if (tp->tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/net/ipv4/tcp_ipv4.c linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/ipv4/tcp_ipv4.c *** linux-2.4.20.orig/net/ipv4/tcp_ipv4.c Thu Nov 28 23:53:15 2002 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/ipv4/tcp_ipv4.c Mon Jan 19 17:41:51 2004 *************** int tcp_v4_connect(struct sock *sk, stru *** 835,840 **** --- 835,844 ---- if (!tp->write_seq) tp->write_seq = secure_tcp_sequence_number(sk->saddr, sk->daddr, sk->sport, usin->sin_port); + WEB100_VAR_SET(tp, SndISS, tp->write_seq); + WEB100_VAR_SET(tp, SndMax, tp->write_seq); + WEB100_VAR_SET(tp, SndNxt, tp->write_seq); + WEB100_VAR_SET(tp, SndUna, tp->write_seq); sk->protinfo.af_inet.id = tp->write_seq^jiffies; *************** void tcp_v4_err(struct sk_buff *skb, u32 *** 1017,1024 **** /* This is deprecated, but if someone generated it, * we have no reasons to ignore it. */ ! if (sk->lock.users == 0) tcp_enter_cwr(tp); goto out; case ICMP_PARAMETERPROB: err = EPROTO; --- 1021,1034 ---- /* This is deprecated, but if someone generated it, * we have no reasons to ignore it. */ ! if (sk->lock.users == 0) { tcp_enter_cwr(tp); + WEB100_VAR_INC(tp, QuenchRcvd); + #if 0 + /* WEB100_XXX */ + WEB100_UPDATE_FUNC(tp, web100_update_cwnd(tp)); + #endif + } goto out; case ICMP_PARAMETERPROB: err = EPROTO; *************** struct sock * tcp_v4_syn_recv_sock(struc *** 1546,1551 **** --- 1556,1568 ---- newsk = tcp_create_openreq_child(sk, req, skb); if (!newsk) goto exit; + #ifdef CONFIG_WEB100_STATS + if (web100_stats_create(newsk)) { + sk_free(newsk); + goto exit; + } + newsk->tp_pinfo.af_tcp.tcp_stats->wc_vars.LocalAddressType = WC_ADDRTYPE_IPV4; + #endif newsk->dst_cache = dst; newsk->route_caps = dst->dev->features; *************** process: *** 1764,1775 **** --- 1781,1794 ---- skb->dev = NULL; bh_lock_sock(sk); + WEB100_UPDATE_FUNC(&sk->tp_pinfo.af_tcp, web100_update_segrecv(&sk->tp_pinfo.af_tcp, skb)); ret = 0; if (!sk->lock.users) { if (!tcp_prequeue(sk, skb)) ret = tcp_v4_do_rcv(sk, skb); } else sk_add_backlog(sk, skb); + WEB100_UPDATE_FUNC(&sk->tp_pinfo.af_tcp, web100_update_cwnd(&sk->tp_pinfo.af_tcp)); bh_unlock_sock(sk); sock_put(sk); *************** static int tcp_v4_init_sock(struct sock *** 2007,2012 **** --- 2026,2035 ---- tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); + #ifdef CONFIG_TCP_SACK2 + create_sacked_list(tp); + #endif + tp->rto = TCP_TIMEOUT_INIT; tp->mdev = TCP_TIMEOUT_INIT; *************** static int tcp_v4_init_sock(struct sock *** 2017,2022 **** --- 2040,2049 ---- */ tp->snd_cwnd = 2; + #ifdef CONFIG_ALTAIMD + tp->hstcp_entry_index=0; + #endif + /* See draft-stevens-tcpca-spec-01 for discussion of the * initialization of these values. */ *************** static int tcp_v4_init_sock(struct sock *** 2035,2040 **** --- 2062,2077 ---- sk->sndbuf = sysctl_tcp_wmem[1]; sk->rcvbuf = sysctl_tcp_rmem[1]; + + #ifdef CONFIG_WEB100_STATS + { + int err; + if ((err = web100_stats_create(sk))) { + return err; + } + sk->tp_pinfo.af_tcp.tcp_stats->wc_vars.LocalAddressType = WC_ADDRTYPE_IPV4; + } + #endif atomic_inc(&tcp_sockets_allocated); *************** static int tcp_v4_destroy_sock(struct so *** 2050,2055 **** --- 2087,2096 ---- /* Cleanup up the write buffer. */ tcp_writequeue_purge(sk); + #ifdef CONFIG_TCP_SACK2 + purge_sacked_list(tp); + #endif + /* Cleans up our, hopefully empty, out_of_order_queue. */ __skb_queue_purge(&tp->out_of_order_queue); *************** static int tcp_v4_destroy_sock(struct so *** 2059,2064 **** --- 2100,2109 ---- /* Clean up a referenced TCP bind bucket. */ if(sk->prev != NULL) tcp_put_port(sk); + + #ifdef CONFIG_WEB100_STATS + web100_stats_destroy(sk->tp_pinfo.af_tcp.tcp_stats); + #endif /* If sendmsg cached page exists, toss it. */ if (tp->sndmsg_page != NULL) *************** out_no_bh: *** 2280,2285 **** --- 2325,2331 ---- return len; } + struct proto tcp_prot = { name: "TCP", close: tcp_close, diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/net/ipv4/tcp_minisocks.c linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/ipv4/tcp_minisocks.c *** linux-2.4.20.orig/net/ipv4/tcp_minisocks.c Thu Nov 28 23:53:15 2002 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/ipv4/tcp_minisocks.c Mon Jan 19 17:41:51 2004 *************** void tcp_time_wait(struct sock *sk, int *** 390,395 **** --- 390,398 ---- sizeof(struct in6_addr)); } #endif + + WEB100_VAR_SET(tp, State, WC_STATE_TIMEWAIT); + /* Linkage updates. */ __tcp_tw_hashdance(sk, tw); *************** struct sock *tcp_create_openreq_child(st *** 715,720 **** --- 718,727 ---- newtp->snd_cwnd = 2; newtp->snd_cwnd_cnt = 0; + #ifdef CONFIG_RFC3465 + newtp->bytes_acked = 0; + #endif + newtp->ca_state = TCP_CA_Open; tcp_init_xmit_timers(newsk); skb_queue_head_init(&newtp->out_of_order_queue); diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/net/ipv4/tcp_output.c linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/ipv4/tcp_output.c *** linux-2.4.20.orig/net/ipv4/tcp_output.c Thu Nov 28 23:53:15 2002 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/ipv4/tcp_output.c Mon Jan 19 17:41:51 2004 *************** *** 44,49 **** --- 44,57 ---- /* People can turn this off for buggy TCP's found in printers etc. */ int sysctl_tcp_retrans_collapse = 1; + #ifdef CONFIG_TCP_SACK + /* new reno response to partial acks */ + int sysctl_tcp_new_reno = 0; + #endif + + /* moderate the cwnd rather than half it if the transmission queue is full */ + int sysctl_tcp_moderate_on_txq = 0; + static __inline__ void update_send_head(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb) { *************** void update_send_head(struct sock *sk, s *** 51,56 **** --- 59,65 ---- if (tp->send_head == (struct sk_buff *) &sk->write_queue) tp->send_head = NULL; tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; + WEB100_UPDATE_FUNC(tp, web100_update_snd_nxt(tp)); if (tp->packets_out++ == 0) tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); } *************** static void tcp_cwnd_restart(struct tcp_ *** 113,118 **** --- 122,133 ---- tp->snd_cwnd = max(cwnd, restart_cwnd); tp->snd_cwnd_stamp = tcp_time_stamp; tp->snd_cwnd_used = 0; + #ifdef CONFIG_ALTAIMD + htcp_reset(tp); + #endif + #ifdef CONFIG_TCP_SACK2 + purge_sacked_list(tp); + #endif } static __inline__ void tcp_event_data_sent(struct tcp_opt *tp, struct sk_buff *skb) *************** static __inline__ u16 tcp_select_window( *** 161,168 **** --- 176,201 ---- */ new_win = cur_win; } + #if 0 + #ifdef CONFIG_WEB100_STATS + atomic_add(new_win - tp->rcv_wnd, &tcp_rwin_announced); + if (sysctl_web100_rbufmode == 1) { + if (new_win > 2*tp->rcv_wnd) + printk("tcp_select_window: large window %u\n", new_win); + if ((int)atomic_read(&tcp_rwin_announced) < 0) { + printk("tcp_select_window: BUG: tcp_rwin_announced < 0\n"); + } else if (new_win > tcp_win_from_space(PAGE_SIZE * sysctl_tcp_mem[0]) - + atomic_read(&tcp_rwin_announced)) { + printk("tcp_select_window: over bounds\n"); + atomic_sub(new_win - tp->rcv_wnd, &tcp_rwin_announced); + new_win = tp->rcv_wnd; + } + } + #endif + #endif tp->rcv_wnd = new_win; tp->rcv_wup = tp->rcv_nxt; + WEB100_UPDATE_FUNC(tp, web100_update_rwin_sent(tp)); /* RFC1323 scaling applied */ new_win >>= tp->rcv_wscale; *************** static __inline__ u16 tcp_select_window( *** 174,179 **** --- 207,228 ---- return new_win; } + #ifdef CONFIG_WEB100_STATS + /* Start up a receiver-side window measurement using timestamps */ + static inline void tcp_rcv_tswin_start(struct sock *sk) + { + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + + if (tp->rcv_prev_tstamp != tcp_time_stamp) { + tp->rcv_prev_tstamp = tcp_time_stamp; + if (!tp->rcv_tswin_pending) { + tp->rcv_tswin_pending = 1; + tp->rcv_tswin_tstamp = tcp_time_stamp; + tp->rcv_tswin_seq = tp->rcv_hi_seq; + } + } + } + #endif /* This routine actually transmits TCP packets queued in by * tcp_do_sendmsg(). This is used by both the initial *************** int tcp_transmit_skb(struct sock *sk, st *** 200,205 **** --- 249,258 ---- #define SYSCTL_FLAG_WSCALE 0x2 #define SYSCTL_FLAG_SACK 0x4 + /* XXX this is not correct --- we don't necessarily send the + segment yet */ + WEB100_UPDATE_FUNC(tp, web100_update_segsend(tp, skb)); + sysctl_flags = 0; if (tcb->flags & TCPCB_FLAG_SYN) { tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS; *************** int tcp_transmit_skb(struct sock *sk, st *** 267,272 **** --- 320,330 ---- } tp->af_specific->send_check(sk, th, skb->len, skb); + #ifdef CONFIG_WEB100_STATS + if (!(tcb->flags&(TCPCB_FLAG_SYN|TCPCB_FLAG_RST|TCPCB_FLAG_FIN))) + tcp_rcv_tswin_start(sk); + #endif + if (tcb->flags & TCPCB_FLAG_ACK) tcp_event_ack_sent(sk); *************** int tcp_transmit_skb(struct sock *sk, st *** 279,285 **** if (err <= 0) return err; ! tcp_enter_cwr(tp); /* NET_XMIT_CN is special. It does not guarantee, * that this packet is lost. It tells that device --- 337,354 ---- if (err <= 0) return err; ! #ifdef CONFIG_WEB100_NET100 ! if (!NET100_WAD(tp, WAD_IFQ, sysctl_WAD_IFQ)) { ! #endif ! if( !sysctl_tcp_moderate_on_txq ){ ! tcp_enter_cwr(tp); ! } else { ! tcp_moderate_cwnd(tp); ! } ! #ifdef CONFIG_WEB100_NET100 ! } ! #endif ! WEB100_VAR_INC(tp, SendStall); /* NET_XMIT_CN is special. It does not guarantee, * that this packet is lost. It tells that device *************** int tcp_transmit_skb(struct sock *sk, st *** 295,301 **** #undef SYSCTL_FLAG_SACK } - /* This is the main buffer sending routine. We queue the buffer * and decide whether to queue or transmit now. * --- 364,369 ---- *************** int tcp_transmit_skb(struct sock *sk, st *** 305,327 **** void tcp_send_skb(struct sock *sk, struct sk_buff *skb, int force_queue, unsigned cur_mss) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); /* Advance write_seq and place onto the write_queue. */ tp->write_seq = TCP_SKB_CB(skb)->end_seq; __skb_queue_tail(&sk->write_queue, skb); tcp_charge_skb(sk, skb); ! if (!force_queue && tp->send_head == NULL && tcp_snd_test(tp, skb, cur_mss, tp->nonagle)) { /* Send it out now. */ TCP_SKB_CB(skb)->when = tcp_time_stamp; if (tcp_transmit_skb(sk, skb_clone(skb, sk->allocation)) == 0) { tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; tcp_minshall_update(tp, cur_mss, skb); if (tp->packets_out++ == 0) tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); return; } } /* Queue it, remembering where we must start sending. */ if (tp->send_head == NULL) tp->send_head = skb; --- 373,406 ---- void tcp_send_skb(struct sock *sk, struct sk_buff *skb, int force_queue, unsigned cur_mss) { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + int why = WC_SNDLIM_NONE; /* Advance write_seq and place onto the write_queue. */ tp->write_seq = TCP_SKB_CB(skb)->end_seq; __skb_queue_tail(&sk->write_queue, skb); tcp_charge_skb(sk, skb); ! if (!force_queue && tp->send_head == NULL && ! (why = tcp_snd_wait(tp, skb, cur_mss, tp->nonagle)) == WC_SNDLIM_NONE) { /* Send it out now. */ TCP_SKB_CB(skb)->when = tcp_time_stamp; if (tcp_transmit_skb(sk, skb_clone(skb, sk->allocation)) == 0) { + #ifdef CONFIG_WEB100_STATS + if (tp->tcp_stats->wc_vars.X_SBufMode == WC_BUFMODE_WEB100) + tcp_retx_charge_skb(sk, skb); + #endif tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; + WEB100_UPDATE_FUNC(tp, web100_update_snd_nxt(tp)); tcp_minshall_update(tp, cur_mss, skb); if (tp->packets_out++ == 0) tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); return; + } else { + why = WC_SNDLIM_SENDER; } } + if (why != WC_SNDLIM_NONE) + WEB100_UPDATE_FUNC(tp, web100_update_sndlim(tp, why)); /* Queue it, remembering where we must start sending. */ if (tp->send_head == NULL) tp->send_head = skb; *************** void tcp_push_one(struct sock *sk, unsig *** 334,351 **** { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct sk_buff *skb = tp->send_head; ! if (tcp_snd_test(tp, skb, cur_mss, 1)) { /* Send it out now. */ TCP_SKB_CB(skb)->when = tcp_time_stamp; if (tcp_transmit_skb(sk, skb_clone(skb, sk->allocation)) == 0) { tp->send_head = NULL; tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; if (tp->packets_out++ == 0) tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); return; } } } /* Split fragmented skb to two parts at length len. */ --- 413,440 ---- { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct sk_buff *skb = tp->send_head; + int why; ! if ((why = tcp_snd_wait(tp, skb, cur_mss, 1)) == WC_SNDLIM_NONE) { /* Send it out now. */ TCP_SKB_CB(skb)->when = tcp_time_stamp; if (tcp_transmit_skb(sk, skb_clone(skb, sk->allocation)) == 0) { + #ifdef CONFIG_WEB100_STATS + if (tp->tcp_stats->wc_vars.X_SBufMode == WC_BUFMODE_WEB100) + tcp_retx_charge_skb(sk, skb); + #endif tp->send_head = NULL; tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; + WEB100_UPDATE_FUNC(tp, web100_update_snd_nxt(tp)); if (tp->packets_out++ == 0) tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); return; + } else { + why = WC_SNDLIM_SENDER; } } + if (why != WC_SNDLIM_NONE) + WEB100_UPDATE_FUNC(tp, web100_update_sndlim(tp, why)); } /* Split fragmented skb to two parts at length len. */ *************** static int tcp_fragment(struct sock *sk, *** 424,429 **** --- 513,522 ---- int nsize = skb->len - len; u16 flags; + #ifdef CONFIG_TCP_SACK + clear_all_retrans_hints(tp); + #endif + if (skb_cloned(skb) && skb_is_nonlinear(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) *************** int tcp_sync_mss(struct sock *sk, u32 pm *** 532,537 **** --- 625,631 ---- /* And store cached results */ tp->pmtu_cookie = pmtu; tp->mss_cache = mss_now; + WEB100_UPDATE_FUNC(tp, web100_update_mss(tp)); return mss_now; } *************** int tcp_write_xmit(struct sock *sk, int *** 547,552 **** --- 641,647 ---- { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); unsigned int mss_now; + int why = WC_SNDLIM_NONE; /* If we are closed, the bytes will have to remain here. * In time closedown will finish, we empty the write queue and all *************** int tcp_write_xmit(struct sock *sk, int *** 564,583 **** mss_now = tcp_current_mss(sk); while((skb = tp->send_head) && ! tcp_snd_test(tp, skb, mss_now, tcp_skb_is_last(sk, skb) ? nonagle : 1)) { if (skb->len > mss_now) { if (tcp_fragment(sk, skb, mss_now)) break; } TCP_SKB_CB(skb)->when = tcp_time_stamp; ! if (tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))) break; /* Advance the send_head. This one is sent out. */ update_send_head(sk, tp, skb); tcp_minshall_update(tp, mss_now, skb); sent_pkts = 1; } if (sent_pkts) { tcp_cwnd_validate(sk, tp); --- 659,689 ---- mss_now = tcp_current_mss(sk); while((skb = tp->send_head) && ! (why = tcp_snd_wait(tp, skb, mss_now, ! tcp_skb_is_last(sk, skb) ? tp->nonagle : 1)) ! == WC_SNDLIM_NONE) { if (skb->len > mss_now) { if (tcp_fragment(sk, skb, mss_now)) break; } TCP_SKB_CB(skb)->when = tcp_time_stamp; ! if (tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))) { ! why = WC_SNDLIM_SENDER; break; + } + #ifdef CONFIG_WEB100_STATS + if (tp->tcp_stats->wc_vars.X_SBufMode == WC_BUFMODE_WEB100) + tcp_retx_charge_skb(sk, skb); + #endif /* Advance the send_head. This one is sent out. */ update_send_head(sk, tp, skb); tcp_minshall_update(tp, mss_now, skb); sent_pkts = 1; } + if (why == WC_SNDLIM_NONE) + why = WC_SNDLIM_SENDER; + WEB100_UPDATE_FUNC(tp, web100_update_sndlim(tp, why)); if (sent_pkts) { tcp_cwnd_validate(sk, tp); *************** u32 __tcp_select_window(struct sock *sk) *** 654,660 **** int free_space = tcp_space(sk); int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk)); int window; ! if (mss > full_space) mss = full_space; --- 760,779 ---- int free_space = tcp_space(sk); int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk)); int window; ! ! #ifdef CONFIG_WEB100_STATS ! WEB100_VAR_SET(tp, X_dbg4, full_space); ! ! if (tp->tcp_stats->wc_vars.X_RBufMode == WC_BUFMODE_WEB100) { ! window = tp->rcv_space - max(tp->rcv_alloc - sk->rcvbuf, 0) + ! min_t(int, tp->rcv_hi_seq - tp->rcv_nxt, ! tp->rcv_space); ! ! window = max(window, 0); ! window = min_t(__u32, window, tp->tcp_stats->wc_vars.LimRwin); ! return (u32)window; ! } ! #endif if (mss > full_space) mss = full_space; *************** u32 __tcp_select_window(struct sock *sk) *** 683,688 **** --- 802,810 ---- if (window <= free_space - mss || window > free_space) window = (free_space/mss)*mss; + WEB100_VAR_SET(tp, X_dbg3, free_space); + WEB100_VAR_SET(tp, X_dbg2, mss); + WEB100_VAR_SET(tp, X_dbg1, window); return window; } *************** static void tcp_retrans_try_collapse(str *** 715,720 **** --- 837,847 ---- ((skb_size + next_skb_size) > mss_now)) return; + #ifdef CONFIG_TCP_SACK + /* changing transmit queue under us so clear hints */ + clear_all_retrans_hints(tp); + #endif + /* Ok. We will be able to collapse the packet. */ __skb_unlink(next_skb, next_skb->list); *************** void tcp_simple_retransmit(struct sock * *** 785,790 **** --- 912,921 ---- } } + #ifdef CONFIG_TCP_SACK + clear_all_retrans_hints(tp); + #endif + if (!lost) return; *************** int tcp_retransmit_skb(struct sock *sk, *** 900,905 **** --- 1031,1048 ---- return err; } + #ifdef CONFIG_TCP_SACK + void clear_all_retrans_hints(struct tcp_opt *tp){ + + tp->mark_head_lost_skb_hint = NULL; + tp->update_scoreboard_skb_hint = NULL; + tp->xmit_retransmit_queue_lost_skb_hint = NULL; + tp->xmit_retransmit_queue_forward_skb_hint = NULL; + tp->sackfastpath_skb_hint = NULL; + NET_INC_STATS_BH(TCPHintClears); + } + #endif + /* This gets called after a retransmit timeout, and the initially * retransmitted data is acknowledged. It tries to continue * resending the rest of the retransmit queue, until either *************** void tcp_xmit_retransmit_queue(struct so *** 912,931 **** --- 1055,1134 ---- { struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); struct sk_buff *skb; + #ifndef CONFIG_TCP_SACK int packet_cnt = tp->lost_out; + #else + int packet_cnt; + + if( tp->xmit_retransmit_queue_lost_skb_hint != NULL ){ + skb = tp->xmit_retransmit_queue_lost_skb_hint; + packet_cnt = tp->xmit_retransmit_queue_lost_cnt_hint; + net_statistics[smp_processor_id()*2].TCPXmitRetranLostHintHits += tp->xmit_retransmit_queue_lost_cnt_hint; + /* NET_INC_STATS_BH(TCPXmitRetranLostHintHits); */ + }else{ + skb = sk->write_queue.next; + packet_cnt = 0; + } + #endif /* First pass: retransmit lost packets. */ + #ifdef CONFIG_TCP_SACK + if (tp->lost_out) { + for_retrans_queue_from(skb, skb, sk, tp) { + #else if (packet_cnt) { + #ifndef CONFIG_TCP_SACK2 for_retrans_queue(skb, sk, tp) { + #else + struct sacked_list_item *sl = sacked_list_head(tp); + struct sk_buff *skb; + if (sl == NULL) + skb = (sk)->write_queue.next; + else { + skb = sl->skb; + if (skb == NULL) { + // printk("tcp_xmit_retransmit(): skb==NULL\n"); + skb = (sk)->write_queue.next; + } + } + + while ((skb != (tp)->send_head) && (skb != (struct sk_buff *)&(sk)->write_queue)) { + #endif // Config_TCP_SACK2 + #endif // config_tcp_sack __u8 sacked = TCP_SKB_CB(skb)->sacked; + #ifdef CONFIG_TCP_SACK + /* we could do better than to assign each time */ + tp->xmit_retransmit_queue_lost_skb_hint = skb; + tp->xmit_retransmit_queue_lost_cnt_hint = packet_cnt; + + if (tcp_packets_in_flight(tp) >= tp->snd_cwnd){ + return; + } + #else if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) return; + #endif + #ifndef CONFIG_TCP_SACK2 if (sacked&TCPCB_LOST) { if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { + #ifdef CONFIG_TCP_SACK + if (tcp_retransmit_skb(sk, skb)){ + tp->xmit_retransmit_queue_lost_skb_hint = NULL; + return; + } + #else if (tcp_retransmit_skb(sk, skb)) return; + #endif + #else + if (sacked&(TCPCB_LOST)) { + if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS)) ) + if ( !before(TCP_SKB_CB(skb)->end_seq,tp->snd_una) ) { + if (tcp_retransmit_skb(sk, skb)) { + return; } + #endif // !CONFIG_TCP_SACK2 if (tp->ca_state != TCP_CA_Loss) NET_INC_STATS_BH(TCPFastRetrans); else *************** void tcp_xmit_retransmit_queue(struct so *** 935,942 **** --- 1138,1162 ---- tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); } + #ifdef CONFIG_TCP_SACK + if (++packet_cnt >= tp->lost_out) + #else if (--packet_cnt <= 0) + #endif break; + + #ifdef CONFIG_TCP_SACK2 + if (sl != NULL) sl = sl->next; + if (sl == NULL) + skb = skb->next; + else { + skb = sl->skb; + if (skb == NULL) { + //printk("tcp_xmit_retransmit(): skb==NULL in loop\n"); + skb = skb->next; + } + } + #endif // config_tcp-sack2 } } } *************** void tcp_xmit_retransmit_queue(struct so *** 948,954 **** --- 1168,1178 ---- return; /* No forward retransmissions in Reno are possible. */ + #ifdef CONFIG_TCP_SACK + if (!tp->sack_ok && !sysctl_tcp_new_reno) + #else if (!tp->sack_ok) + #endif return; /* Yeah, we have to make difficult choice between forward transmission *************** void tcp_xmit_retransmit_queue(struct so *** 961,969 **** --- 1185,1211 ---- if (tcp_may_send_now(sk, tp)) return; + #ifdef CONFIG_TCP_SACK + if ( tp->xmit_retransmit_queue_forward_skb_hint != NULL){ + skb = tp->xmit_retransmit_queue_forward_skb_hint; + packet_cnt = tp->xmit_retransmit_queue_forward_cnt_hint; + net_statistics[smp_processor_id()*2].TCPXmitRetranForwardHintHits += tp->xmit_retransmit_queue_forward_cnt_hint; + /*NET_INC_STATS_BH(TCPXmitRetranForwardHintHits);*/ + } else{ + skb = sk->write_queue.next; + packet_cnt = 0; + } + + for_retrans_queue_from(skb,skb, sk, tp) { + tp->xmit_retransmit_queue_forward_cnt_hint = packet_cnt; + tp->xmit_retransmit_queue_forward_skb_hint = skb; + #else packet_cnt = 0; + #endif + #ifndef CONFIG_TCP_SACK for_retrans_queue(skb, sk, tp) { + #endif if(++packet_cnt > tp->fackets_out) break; *************** void tcp_xmit_retransmit_queue(struct so *** 974,981 **** --- 1216,1230 ---- continue; /* Ok, retransmit it. */ + #ifdef CONFIG_TCP_SACK + if(tcp_retransmit_skb(sk, skb)){ + tp->xmit_retransmit_queue_forward_skb_hint = NULL; + break; + } + #else if(tcp_retransmit_skb(sk, skb)) break; + #endif if (skb == skb_peek(&sk->write_queue)) tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); *************** static inline void tcp_connect_init(stru *** 1196,1201 **** --- 1445,1451 ---- tp->snd_wnd = 0; tcp_init_wl(tp, tp->write_seq, 0); tp->snd_una = tp->write_seq; + WEB100_VAR_SET(tp, SndUna, tp->snd_una); tp->snd_sml = tp->write_seq; tp->rcv_nxt = 0; tp->rcv_wup = 0; *************** static inline void tcp_connect_init(stru *** 1204,1209 **** --- 1454,1465 ---- tp->rto = TCP_TIMEOUT_INIT; tp->retransmits = 0; tcp_clear_retrans(tp); + + #ifdef CONFIG_TCP_PKTDROP + /* YTL: TCP Packet Dropping */ + tp->pktdrop_cnt = 0; + #endif + } /* *************** int tcp_connect(struct sock *sk) *** 1230,1235 **** --- 1486,1492 ---- TCP_SKB_CB(buff)->seq = tp->write_seq++; TCP_SKB_CB(buff)->end_seq = tp->write_seq; tp->snd_nxt = tp->write_seq; + WEB100_UPDATE_FUNC(tp, web100_update_snd_nxt(tp)); tp->pushed_seq = tp->write_seq; /* Send it off. */ diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/net/ipv4/tcp_timer.c linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/ipv4/tcp_timer.c *** linux-2.4.20.orig/net/ipv4/tcp_timer.c Mon Oct 1 17:19:57 2001 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/ipv4/tcp_timer.c Mon Jan 19 17:41:51 2004 *************** static void tcp_retransmit_timer(struct *** 371,376 **** --- 371,377 ---- NET_INC_STATS_BH(TCPTimeouts); } } + WEB100_UPDATE_FUNC(tp, web100_update_timeout(tp)); tcp_enter_loss(sk, 0); *************** static void tcp_retransmit_timer(struct *** 401,406 **** --- 402,408 ---- * the 120 second clamps though! */ tp->backoff++; + WEB100_VAR_SET(tp, CurTimeoutCount, tp->backoff); tp->retransmits++; out_reset_timer: diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/net/ipv4/web100_stats.c linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/ipv4/web100_stats.c *** linux-2.4.20.orig/net/ipv4/web100_stats.c Thu Jan 1 01:00:00 1970 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/ipv4/web100_stats.c Mon Jan 19 17:41:51 2004 *************** *** 0 **** --- 1,714 ---- + /* + * net/ipv4/web100_stats.c + * + * Copyright (C) 2001 Matt Mathis + * Copyright (C) 2001 John Heffner + * Copyright (C) 2000 Jeffrey Semke + * + * The Web 100 project. See http://www.web100.org + * + * Functions for creating, destroying, and updating the Web100 + * statistics structure. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + + #include + + #include + #include + #include + #include + #include + #include + #include + + #define WC_INF32 0xffffffff + + #define WC_DEATH_SLOTS 8 + #define WC_PERSIST_TIME 60 + + /* BEWARE: The release process updates the version string */ + char *web100_version_string = "2.3.3 200312091116" + #ifdef CONFIG_WEB100_NET100 + " net100" + #endif + ; + + static void death_cleanup(unsigned long dummy); + + /* Global stats reader-writer lock */ + rwlock_t web100_linkage_lock = RW_LOCK_UNLOCKED; + + /* Data structures for tying together stats */ + static int web100stats_next_cid; + static int web100stats_conn_num; + static int web100stats_htsize; + struct web100stats **web100stats_ht; + struct web100stats *web100stats_first = NULL; + + static struct web100stats *death_slots[WC_DEATH_SLOTS]; + static int cur_death_slot; + static spinlock_t death_lock = SPIN_LOCK_UNLOCKED; + static struct timer_list stats_persist_timer = { function: death_cleanup }; + static int ndeaths; + + #ifdef CONFIG_WEB100_NETLINK + static struct sock *web100_nlsock; + #endif + + extern struct proc_dir_entry *proc_web100_dir; + + + /* + * Structural maintainance + */ + + static inline int web100stats_hash(int cid) + { + return cid % web100stats_htsize; + } + + struct web100stats *web100stats_lookup(int cid) + { + struct web100stats *stats; + + /* Let's ensure safety here. It's not too expensive and may change. */ + if (cid < 0 || cid >= WEB100_MAX_CONNS) + return NULL; + + stats = web100stats_ht[web100stats_hash(cid)]; + while (stats && stats->wc_cid != cid) + stats = stats->wc_hash_next; + return stats; + } + + /* This will get really slow as the cid space fills. This can be done + * better, but it's just not worth it right now. + * The caller must hold the lock. + */ + static int get_next_cid(void) + { + int i; + + if (web100stats_conn_num >= WEB100_MAX_CONNS) + return -1; + + i = web100stats_next_cid; + do { + if (web100stats_lookup(i) == NULL) + break; + i = (i + 1) % WEB100_MAX_CONNS; + } while (i != web100stats_next_cid); + web100stats_next_cid = (i + 1) % WEB100_MAX_CONNS; + + return i; + } + + static void stats_link(struct web100stats *stats) + { + int hash; + + write_lock_bh(&web100_linkage_lock); + + if ((stats->wc_cid = get_next_cid()) < 0) { + write_unlock_bh(&web100_linkage_lock); + return; + } + + hash = web100stats_hash(stats->wc_cid); + stats->wc_hash_next = web100stats_ht[hash]; + stats->wc_hash_prev = NULL; + if (web100stats_ht[hash]) + web100stats_ht[hash]->wc_hash_prev = stats; + web100stats_ht[hash] = stats; + + stats->wc_next = web100stats_first; + stats->wc_prev = NULL; + if (web100stats_first) + web100stats_first->wc_prev = stats; + web100stats_first = stats; + + web100stats_conn_num++; + proc_web100_dir->nlink = web100stats_conn_num + 2; + + write_unlock_bh(&web100_linkage_lock); + } + + static void stats_unlink(struct web100stats *stats) + { + int hash; + + write_lock_bh(&web100_linkage_lock); + + hash = web100stats_hash(stats->wc_cid); + if (stats->wc_hash_next) + stats->wc_hash_next->wc_hash_prev = stats->wc_hash_prev; + if (stats->wc_hash_prev) + stats->wc_hash_prev->wc_hash_next = stats->wc_hash_next; + if (stats == web100stats_ht[hash]) + web100stats_ht[hash] = stats->wc_hash_next ? + stats->wc_hash_next : + stats->wc_hash_prev; + + if (stats->wc_next) + stats->wc_next->wc_prev = stats->wc_prev; + if (stats->wc_prev) + stats->wc_prev->wc_next = stats->wc_next; + if (stats == web100stats_first) + web100stats_first = stats->wc_next ? stats->wc_next : + stats->wc_prev; + + web100stats_conn_num--; + proc_web100_dir->nlink = web100stats_conn_num + 2; + + write_unlock_bh(&web100_linkage_lock); + } + + static void stats_persist(struct web100stats *stats) + { + spin_lock_bh(&death_lock); + + stats->wc_death_next = death_slots[cur_death_slot]; + death_slots[cur_death_slot] = stats; + if (ndeaths <= 0) { + stats_persist_timer.expires = jiffies + WC_PERSIST_TIME * HZ / WC_DEATH_SLOTS; + add_timer(&stats_persist_timer); + } + ndeaths++; + + spin_unlock_bh(&death_lock); + } + + static void death_cleanup(unsigned long dummy) + { + struct web100stats *stats, *next; + + spin_lock_bh(&death_lock); + + cur_death_slot = (cur_death_slot + 1) % WC_DEATH_SLOTS; + stats = death_slots[cur_death_slot]; + while (stats) { + stats->wc_dead = 1; + ndeaths--; + next = stats->wc_death_next; + web100_stats_unuse(stats); + stats = next; + } + death_slots[cur_death_slot] = NULL; + + if (ndeaths > 0) { + stats_persist_timer.expires = jiffies + WC_PERSIST_TIME * HZ / WC_DEATH_SLOTS; + add_timer(&stats_persist_timer); + } + + spin_unlock_bh(&death_lock); + } + + + /* Tom Dunigan's (slightly modified) netlink code. Notifies listening apps + * of Web100 events. + * + * NOTE: we are currently squatting on netlink family 10 (NETLINK_WEB100) in + * include/linux/netlink.h + */ + + #ifdef CONFIG_WEB100_NETLINK + void web100_netlink_event(int type, int cid) + { + struct web100_netlink_msg *msg; + struct sk_buff *tmpskb; + + if (web100_nlsock == NULL) + return; + + if ((tmpskb = alloc_skb((sizeof (struct web100_netlink_msg)), GFP_ATOMIC)) == NULL) { + printk(KERN_INFO "web100_netlink_event: alloc_skb failure\n"); + return; + } + + skb_put(tmpskb, sizeof (struct web100_netlink_msg)); + msg = (struct web100_netlink_msg *)tmpskb->data; + msg->type = type; + msg->cid = cid; + netlink_broadcast(web100_nlsock, tmpskb, 0, ~0, GFP_ATOMIC); + } + #endif /* CONFIG_WEB100_NETLINK */ + + extern __u32 sysctl_wmem_default; + extern __u32 sysctl_rmem_default; + + /* Called whenever a TCP/IPv4 sock is created. + * net/ipv4/tcp_ipv4.c: tcp_v4_syn_recv_sock, + * tcp_v4_init_sock + * Allocates a stats structure and initializes values. + */ + int web100_stats_create(struct sock *sk) + { + struct web100stats *stats; + struct web100directs *vars; + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct timeval tv; + + if ((stats = kmalloc(sizeof (struct web100stats), gfp_any())) == NULL) + return -ENOMEM; + sk->tp_pinfo.af_tcp.tcp_stats = stats; + vars = &stats->wc_vars; + + memset(stats, 0, sizeof (struct web100stats)); + + stats->wc_cid = -1; + stats->wc_sk = sk; + atomic_set(&stats->wc_users, 0); + + stats->wc_limstate = WC_SNDLIM_STARTUP; + do_gettimeofday(&stats->wc_limstate_time); + + vars->NagleEnabled = !(sk->tp_pinfo.af_tcp.nonagle); + vars->ActiveOpen = !in_interrupt(); + + vars->SndUna = tp->snd_una; + vars->SndNxt = tp->snd_nxt; + vars->SndMax = tp->snd_nxt; + vars->SndISS = tp->snd_nxt; + + do_gettimeofday(&tv); + vars->StartTime = tv.tv_sec * 10 + tv.tv_usec / 100000; + vars->StartTimeSec = tv.tv_sec; + vars->StartTimeUsec = tv.tv_usec; + stats->wc_start_monotime = web100_mono_time(); + + vars->MinRTT = vars->MinRTO = vars->MinMSS = vars->MinRwinRcvd = + vars->MinRwinSent = vars->MinSsthresh = WC_INF32; + + if (sysctl_web100_sbufmode == WC_BUFMODE_OS) { + vars->X_SBufMode = WC_BUFMODE_OS; + } else { + vars->X_SBufMode = WC_BUFMODE_WEB100; + if (!(sk->userlocks & SOCK_SNDBUF_LOCK)) { + sk->userlocks |= SOCK_SNDBUF_LOCK; + sk->sndbuf = sysctl_wmem_default; + } + } + if (sysctl_web100_rbufmode == WC_BUFMODE_OS) { + vars->X_RBufMode = WC_BUFMODE_OS; + vars->LimRwin = tp->window_clamp; + } else { + vars->X_RBufMode = WC_BUFMODE_WEB100; + if (!(sk->userlocks & SOCK_RCVBUF_LOCK)) { + sk->userlocks |= SOCK_RCVBUF_LOCK; + sk->rcvbuf = sysctl_rmem_default; + } + vars->LimRwin = WC_INF32; + } + + #ifdef CONFIG_WEB100_NET100 + stats->wc_flindex = 1; + #endif + web100_stats_use(stats); + + return 0; + } + + void web100_stats_destroy(struct web100stats *stats) + { + atomic_sub(stats->wc_sk->tp_pinfo.af_tcp.rcv_wnd, &tcp_rwin_announced); + + /* Attribute final sndlim time. */ + web100_update_sndlim(&stats->wc_sk->tp_pinfo.af_tcp, stats->wc_limstate); + + if (stats->wc_cid >= 0) { + #ifdef CONFIG_WEB100_NETLINK + web100_netlink_event(WC_NL_TYPE_DISCONNECT, stats->wc_cid); + #endif + stats_persist(stats); + } else { + web100_stats_unuse(stats); + } + } + + /* Do not call directly. Called from web100_stats_unuse(). */ + void web100_stats_free(struct web100stats *stats) + { + if (stats->wc_cid >= 0) { + stats_unlink(stats); + } + kfree(stats); + } + + extern __u32 sysctl_wmem_default; + extern __u32 sysctl_rmem_default; + + /* Called when a connection enters the ESTABLISHED state, and has all its + * state initialized. + * net/ipv4/tcp_input.c: tcp_rcv_state_process, + * tcp_rcv_synsent_state_process + * Here we link the statistics structure in so it is visible in the /proc + * fs, and do some final init. + */ + void web100_stats_establish(struct sock *sk) + { + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct web100stats *stats = tp->tcp_stats; + struct web100directs *vars = &stats->wc_vars; + + tp->rcv_hi_seq = tp->rcv_nxt; + atomic_add(tp->rcv_wnd, &tcp_rwin_announced); + + if (stats == NULL) + return; + + /* Let's set these here, since they can't change once the + * connection is established. + */ + vars->LocalPort = sk->num; + vars->RemPort = ntohs(sk->dport); + + if (vars->LocalAddressType == WC_ADDRTYPE_IPV4) { + vars->LocalAddress.v4addr = sk->rcv_saddr; + vars->RemAddress.v4addr = sk->daddr; + } + #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + else if (vars->LocalAddressType == WC_ADDRTYPE_IPV6) { + memcpy(&vars->LocalAddress.v6addr.addr, &sk->net_pinfo.af_inet6.saddr, 16); + memcpy(&vars->RemAddress.v6addr.addr, &sk->net_pinfo.af_inet6.daddr, 16); + } + #endif + else { + printk(KERN_ERR "Web100: LocalAddressType not valid.\n"); + } + vars->LocalAddress.v6addr.type = vars->RemAddress.v6addr.type = vars->LocalAddressType; + + vars->SACKEnabled = tp->sack_ok; + vars->TimestampsEnabled = tp->tstamp_ok; + #ifdef CONFIG_INET_ECN + vars->ECNEnabled = tp->ecn_flags & TCP_ECN_OK; + #endif + + if (tp->wscale_ok) { + vars->WinScaleRcvd = tp->snd_wscale; + vars->WinScaleSent = tp->rcv_wscale; + } else { + vars->WinScaleRcvd = -1; + vars->WinScaleSent = -1; + } + vars->SndWinScale = vars->WinScaleRcvd; + vars->RcvWinScale = vars->WinScaleSent; + + vars->CurCwnd = tp->snd_cwnd * tp->mss_cache; + vars->CurSsthresh = tp->snd_ssthresh * tp->mss_cache; + + vars->RecvISS = vars->RcvNxt = tp->rcv_nxt; + + vars->RetranThresh = tp->reordering; + + vars->LimRwin = min_t(__u32, vars->LimRwin, 65355U << tp->rcv_wscale); + + stats_link(stats); + + web100_update_sndlim(tp, WC_SNDLIM_SENDER); + + #ifdef CONFIG_WEB100_NETLINK + web100_netlink_event(WC_NL_TYPE_CONNECT, stats->wc_cid); + #endif + } + + /* + * Statistics update functions + */ + + void web100_update_snd_nxt(struct tcp_opt *tp) + { + struct web100stats *stats = tp->tcp_stats; + + if (after(tp->snd_nxt, stats->wc_vars.SndMax)) { + if (before(stats->wc_vars.SndMax, stats->wc_vars.SndISS) && + after(tp->snd_nxt, stats->wc_vars.SndISS)) + stats->wc_vars.SendWraps++; + stats->wc_vars.ThruBytesAcked += (__u32) (tp->snd_nxt - stats->wc_vars.SndMax); /* XXX */ + stats->wc_vars.SndMax = tp->snd_nxt; + } + stats->wc_vars.SndNxt = tp->snd_nxt; + } + + void web100_update_rtt(struct tcp_opt *tp, unsigned long rtt_sample) + { + struct web100stats *stats = tp->tcp_stats; + unsigned long rtt_sample_msec = rtt_sample * 1000 / HZ; + __u32 rto; + + stats->wc_vars.SampleRTT = rtt_sample_msec; + + if (rtt_sample_msec > stats->wc_vars.MaxRTT) + stats->wc_vars.MaxRTT = rtt_sample_msec; + if (rtt_sample_msec < stats->wc_vars.MinRTT) + stats->wc_vars.MinRTT = rtt_sample_msec; + + stats->wc_vars.CountRTT++; + stats->wc_vars.SumRTT += rtt_sample_msec; + + if (stats->wc_vars.PreCongCountRTT != stats->wc_vars.PostCongCountRTT) { + stats->wc_vars.PostCongCountRTT++; + stats->wc_vars.PostCongSumRTT += rtt_sample_msec; + } + + /* srtt is stored as 8 * the smoothed estimate */ + stats->wc_vars.SmoothedRTT = + (tp->srtt >> 3) * 1000 / HZ; + + rto = tp->rto * 1000 / HZ; + if (rto > stats->wc_vars.MaxRTO) + stats->wc_vars.MaxRTO = rto; + if (rto < stats->wc_vars.MinRTO) + stats->wc_vars.MinRTO = rto; + stats->wc_vars.CurRTO = rto; + + stats->wc_vars.CurTimeoutCount = 0; + + stats->wc_vars.RTTVar = (tp->rttvar >> 2) * 1000 / HZ; + } + + void web100_update_timeout(struct tcp_opt *tp) { + struct web100stats *stats = tp->tcp_stats; + + stats->wc_vars.CurTimeoutCount++; + if (tp->backoff) + stats->wc_vars.SubsequentTimeouts++; + else + stats->wc_vars.Timeouts++; + if (tp->ca_state == TCP_CA_Open) + stats->wc_vars.AbruptTimeouts++; + } + + void web100_update_mss(struct tcp_opt *tp) + { + struct web100stats *stats = tp->tcp_stats; + int mss = tp->mss_cache; + + stats->wc_vars.CurMSS = mss; + if (mss > stats->wc_vars.MaxMSS) + stats->wc_vars.MaxMSS = mss; + if (mss < stats->wc_vars.MinMSS) + stats->wc_vars.MinMSS = mss; + } + + void web100_update_cwnd(struct tcp_opt *tp) + { + struct web100stats *stats = tp->tcp_stats; + __u16 mss = tp->mss_cache; + __u32 cwnd; + __u32 ssthresh; + + if (mss == 0) { + printk("Web100: web100_update_cwnd: mss == 0\n"); + return; + } + + cwnd = min(WC_INF32 / mss, tp->snd_cwnd) * mss; + stats->wc_vars.CurCwnd = cwnd; + if (cwnd > stats->wc_vars.MaxCwnd) + stats->wc_vars.MaxCwnd = cwnd; + + ssthresh = min(WC_INF32 / mss, tp->snd_ssthresh) * mss; + stats->wc_vars.CurSsthresh = ssthresh; + + /* Discard initiail ssthresh set at infinity. */ + if (tp->snd_ssthresh >= 0x7ffffff) { + return; + } + if (ssthresh > stats->wc_vars.MaxSsthresh) + stats->wc_vars.MaxSsthresh = ssthresh; + if (ssthresh < stats->wc_vars.MinSsthresh) + stats->wc_vars.MinSsthresh = ssthresh; + } + + void web100_update_rwin_rcvd(struct tcp_opt *tp) + { + struct web100stats *stats = tp->tcp_stats; + __u32 win = tp->snd_wnd; + + stats->wc_vars.CurRwinRcvd = win; + if (win > stats->wc_vars.MaxRwinRcvd) + stats->wc_vars.MaxRwinRcvd = win; + if (win < stats->wc_vars.MinRwinRcvd) + stats->wc_vars.MinRwinRcvd = win; + } + + void web100_update_rwin_sent(struct tcp_opt *tp) + { + struct web100stats *stats = tp->tcp_stats; + __u32 win = tp->rcv_wnd; + + /* Update our advertised window. */ + stats->wc_vars.CurRwinSent = win; + if (win > stats->wc_vars.MaxRwinSent) + stats->wc_vars.MaxRwinSent = win; + if (win < stats->wc_vars.MinRwinSent) + stats->wc_vars.MinRwinSent = win; + } + + + /* TODO: change this to a generic state machine instrument */ + static void web100_state_update(struct tcp_opt *tp, int why, __u64 bytes) + { + struct web100stats *stats = tp->tcp_stats; + struct timeval now; + + do_gettimeofday(&now); + stats->wc_vars.SndLimTime[stats->wc_limstate] += + (1000000*(now.tv_sec - stats->wc_limstate_time.tv_sec)) + + ((signed)(now.tv_usec) - stats->wc_limstate_time.tv_usec); + memcpy(&stats->wc_limstate_time, &now, sizeof (struct timeval)); + + stats->wc_vars.SndLimBytes[why] += bytes - stats->wc_limstate_bytes; + stats->wc_limstate_bytes = bytes; + + if (stats->wc_limstate != why) { + stats->wc_limstate = why; + stats->wc_vars.SndLimTrans[why]++; + } + } + + void web100_update_sndlim(struct tcp_opt *tp, int why) + { + struct web100stats *stats = tp->tcp_stats; + + if (why < 0) { + printk("web100_update_sndlim: BUG: why < 0\n"); + return; + } + + web100_state_update(tp, why, stats->wc_vars.DataBytesOut); + /* future instruments on other sender bottlenecks here... */ + /* if (!why) { why = ??? } */ + /* web100_state_update(tp, why, stats->wc_vars.DataBytesOut); */ + } + + void web100_update_congestion(struct tcp_opt *tp, int why_dummy) + { + struct web100stats *stats = tp->tcp_stats; + + stats->wc_vars.CongestionSignals++; + stats->wc_vars.PreCongSumCwnd += stats->wc_vars.CurCwnd; + + /* This may require more control flags */ + stats->wc_vars.PreCongCountRTT++; + stats->wc_vars.PreCongSumRTT += stats->wc_vars.SampleRTT; + } + + /* Called from tcp_transmit_skb, whenever we push a segment onto the wire. + * This must be called before the header is pushed onto the skb. + */ + void web100_update_segsend(struct tcp_opt *tp, struct sk_buff *skb) + { + struct web100stats *stats = tp->tcp_stats; + + /* We know we're sending a segment. */ + stats->wc_vars.PktsOut++; + + /* We know the ack seq is rcv_nxt. web100_XXX bug compatible*/ + web100_update_rcv_nxt(tp); + + /* A pure ACK contains no data; everything else is data. */ + if (skb->len > 0) { + stats->wc_vars.DataPktsOut++; + stats->wc_vars.DataBytesOut += skb->len; + } else { + stats->wc_vars.AckPktsOut++; + } + + /* Check for retransmission. */ + if (before(TCP_SKB_CB(skb)->seq, stats->wc_vars.SndMax)) { + stats->wc_vars.PktsRetrans++; + stats->wc_vars.BytesRetrans += skb->len; + } + } + + void web100_update_segrecv(struct tcp_opt *tp, struct sk_buff *skb) + { + struct web100directs *vars = &tp->tcp_stats->wc_vars; + struct tcphdr *th = skb->h.th; + + vars->PktsIn++; + if (skb->len == th->doff*4) { + vars->AckPktsIn++; + if (TCP_SKB_CB(skb)->ack_seq == tp->snd_una) + vars->DupAcksIn++; + } else { + vars->DataPktsIn++; + vars->DataBytesIn += skb->len - th->doff*4; + } + } + + void web100_update_rcv_nxt(struct tcp_opt *tp) + { + struct web100stats *stats = tp->tcp_stats; + + if (before(stats->wc_vars.RcvNxt, stats->wc_vars.RecvISS) && + after(tp->rcv_nxt, stats->wc_vars.RecvISS)) + stats->wc_vars.RecvWraps++; + stats->wc_vars.ThruBytesReceived += (__u32) (tp->rcv_nxt - stats->wc_vars.RcvNxt); /* XXX */ + stats->wc_vars.RcvNxt = tp->rcv_nxt; + } + + void web100_update_writeq(struct sock *sk) + { + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct web100directs *vars = &tp->tcp_stats->wc_vars; + int len = tp->write_seq - vars->SndMax; + + vars->CurAppWQueue = len; + if (len > vars->MaxAppWQueue) + vars->MaxAppWQueue = len; + } + + void web100_update_recvq(struct sock *sk) + { + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct web100directs *vars = &tp->tcp_stats->wc_vars; + int len = tp->rcv_nxt - tp->copied_seq; + + tp->rcv_alloc = len; + vars->CurAppRQueue = len; + if (vars->MaxAppRQueue < len) + vars->MaxAppRQueue = len; + } + + void web100_update_ofoq(struct sock *sk) + { + struct tcp_opt *tp = &sk->tp_pinfo.af_tcp; + struct web100directs *vars = &tp->tcp_stats->wc_vars; + int len = tp->rcv_hi_seq - tp->rcv_nxt; + + vars->CurReasmQueue = len; + if (vars->MaxReasmQueue < len) + vars->MaxReasmQueue = len; + } + + void __init web100_stats_init() + { + int order; + + memset(death_slots, 0, sizeof (death_slots)); + + web100stats_htsize = tcp_ehash_size; + for (order = 0; (1UL << order) * PAGE_SIZE < web100stats_htsize * + sizeof (struct web100stats *); order++) + ; + printk("Web100: initiailizing hash table of size %d (order %d)\n", + web100stats_htsize, order); + if ((web100stats_ht = (struct web100stats **)__get_free_pages(GFP_ATOMIC, order)) == NULL) + panic("Failed to allocate Web100 stats hash table.\n"); + memset(web100stats_ht, 0, web100stats_htsize * sizeof (struct web100stats *)); + + #ifdef CONFIG_WEB100_NETLINK + if ((web100_nlsock = netlink_kernel_create(NETLINK_WEB100, NULL)) == NULL) + printk(KERN_ERR "web100_stats_init(): cannot initialize netlink socket\n"); + #endif + + printk("Web100 %s: Initialization successful\n", web100_version_string); + } diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/net/ipv6/tcp_ipv6.c linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/ipv6/tcp_ipv6.c *** linux-2.4.20.orig/net/ipv6/tcp_ipv6.c Thu Nov 28 23:53:15 2002 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/ipv6/tcp_ipv6.c Mon Jan 19 17:41:51 2004 *************** static int tcp_v6_connect(struct sock *s *** 680,685 **** --- 680,690 ---- tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, np->daddr.s6_addr32, sk->sport, sk->dport); + WEB100_VAR_SET(tp, SndISS, tp->write_seq); + WEB100_VAR_SET(tp, SndMax, tp->write_seq); + WEB100_VAR_SET(tp, SndNxt, tp->write_seq); + WEB100_VAR_SET(tp, SndUna, tp->write_seq); + err = tcp_connect(sk); if (err) goto late_failure; *************** static struct sock * tcp_v6_syn_recv_soc *** 1302,1307 **** --- 1307,1319 ---- newsk = tcp_create_openreq_child(sk, req, skb); if (newsk == NULL) goto out; + #ifdef CONFIG_WEB100_STATS + if (web100_stats_create(newsk)) { + sk_free(newsk); + goto out; + } + newsk->tp_pinfo.af_tcp.tcp_stats->wc_vars.LocalAddressType = WC_ADDRTYPE_IPV6; + #endif /* Charge newly allocated IPv6 socket */ #ifdef INET_REFCNT_DEBUG *************** process: *** 1589,1600 **** --- 1601,1614 ---- skb->dev = NULL; bh_lock_sock(sk); + WEB100_UPDATE_FUNC(&sk->tp_pinfo.af_tcp, web100_update_segrecv(&sk->tp_pinfo.af_tcp, skb)); ret = 0; if (!sk->lock.users) { if (!tcp_prequeue(sk, skb)) ret = tcp_v6_do_rcv(sk, skb); } else sk_add_backlog(sk, skb); + WEB100_UPDATE_FUNC(&sk->tp_pinfo.af_tcp, web100_update_cwnd(&sk->tp_pinfo.af_tcp)); bh_unlock_sock(sk); sock_put(sk); *************** static int tcp_v6_init_sock(struct sock *** 1834,1839 **** --- 1848,1863 ---- sk->sndbuf = sysctl_tcp_wmem[1]; sk->rcvbuf = sysctl_tcp_rmem[1]; + #ifdef CONFIG_WEB100_STATS + { + int err; + if ((err = web100_stats_create(sk))) { + return err; + } + sk->tp_pinfo.af_tcp.tcp_stats->wc_vars.LocalAddressType = WC_ADDRTYPE_IPV6; + } + #endif + atomic_inc(&tcp_sockets_allocated); return 0; *************** static int tcp_v6_destroy_sock(struct so *** 1857,1863 **** /* Clean up a referenced TCP bind bucket. */ if(sk->prev != NULL) tcp_put_port(sk); ! /* If sendmsg cached page exists, toss it. */ if (tp->sndmsg_page != NULL) __free_page(tp->sndmsg_page); --- 1881,1895 ---- /* Clean up a referenced TCP bind bucket. */ if(sk->prev != NULL) tcp_put_port(sk); ! ! #ifdef CONFIG_WEB100_STATS ! #if 0 ! /* Do we have an ipv4 connection here? */ ! if (sk->tp_pinfo.af_tcp.tcp_stats) ! #endif ! web100_stats_destroy(sk->tp_pinfo.af_tcp.tcp_stats); ! #endif ! /* If sendmsg cached page exists, toss it. */ if (tp->sndmsg_page != NULL) __free_page(tp->sndmsg_page); diff -I'$(I)d:(.)*$' -C3 -P -p -r --exclude='*~' linux-2.4.20.orig/net/netsyms.c linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/netsyms.c *** linux-2.4.20.orig/net/netsyms.c Thu Nov 28 23:53:16 2002 --- linux-2.4.20_altAIMD-0.3_web100-2.3.3_sacks/net/netsyms.c Mon Jan 19 17:41:51 2004 *************** EXPORT_SYMBOL(sysctl_tcp_tw_recycle); *** 401,406 **** --- 401,422 ---- EXPORT_SYMBOL(sysctl_max_syn_backlog); #endif + #if defined(CONFIG_WEB100_STATS) && defined(CONFIG_IPV6_MODULE) + EXPORT_SYMBOL(web100_stats_create); + EXPORT_SYMBOL(web100_stats_destroy); + EXPORT_SYMBOL(web100_update_segrecv); + EXPORT_SYMBOL(web100_update_cwnd); + EXPORT_SYMBOL(web100_update_writeq); + #endif + + #ifdef CONFIG_WEB100_STATS + EXPORT_SYMBOL(tcp_retx_mem); + #endif + + #ifdef CONFIG_TCP_PKTDROP + EXPORT_SYMBOL(sysctl_tcp_pktdrop_rate); + #endif + #if defined (CONFIG_IPV6_MODULE) EXPORT_SYMBOL(secure_tcpv6_sequence_number); EXPORT_SYMBOL(secure_ipv6_id); *************** EXPORT_SYMBOL(sysctl_wmem_max); *** 534,542 **** --- 550,565 ---- EXPORT_SYMBOL(sysctl_rmem_max); #ifdef CONFIG_INET EXPORT_SYMBOL(sysctl_ip_default_ttl); + #ifdef CONFIG_ALTAIMD + EXPORT_SYMBOL(sysctl_tcp_altAIMD); + #endif #endif #endif + #ifdef CONFIG_MODCWND + EXPORT_SYMBOL(sysctl_tcp_moderate_cwnd); + #endif + /* Packet scheduler modules want these. */ EXPORT_SYMBOL(qdisc_destroy); EXPORT_SYMBOL(qdisc_reset);