diff options
author | nbd <nbd@3c298f89-4303-0410-b956-a3cf2f4a3e73> | 2006-01-10 19:43:00 +0000 |
---|---|---|
committer | nbd <nbd@3c298f89-4303-0410-b956-a3cf2f4a3e73> | 2006-01-10 19:43:00 +0000 |
commit | 489f9455f265caba0edd3488fe9580e36418eb32 (patch) | |
tree | 227994a76fef5a3c7c651667756984dc69bddb2d /openwrt/target/linux/generic-2.6/patches | |
parent | 3224e1420fd9c58ce5bf3b67e871730be18f8fdd (diff) |
large target/linux cleanup
git-svn-id: svn://svn.openwrt.org/openwrt/trunk@2877 3c298f89-4303-0410-b956-a3cf2f4a3e73
Diffstat (limited to 'openwrt/target/linux/generic-2.6/patches')
11 files changed, 15586 insertions, 0 deletions
diff --git a/openwrt/target/linux/generic-2.6/patches/000-reenable_devfs.patch b/openwrt/target/linux/generic-2.6/patches/000-reenable_devfs.patch new file mode 100644 index 0000000000..ce98def5da --- /dev/null +++ b/openwrt/target/linux/generic-2.6/patches/000-reenable_devfs.patch @@ -0,0 +1,219 @@ +diff -ur linux-2.6.15-rc5/drivers/mtd/mtd_blkdevs.c linux-2.6.15-rc5-openwrt/drivers/mtd/mtd_blkdevs.c +--- linux-2.6.15-rc5/drivers/mtd/mtd_blkdevs.c 2005-12-04 06:10:42.000000000 +0100 ++++ linux-2.6.15-rc5-openwrt/drivers/mtd/mtd_blkdevs.c 2005-12-15 07:53:20.000000000 +0100 +@@ -21,6 +21,9 @@ + #include <linux/init.h> + #include <asm/semaphore.h> + #include <asm/uaccess.h> ++#ifdef CONFIG_DEVFS_FS ++#include <linux/devfs_fs_kernel.h> ++#endif + + static LIST_HEAD(blktrans_majors); + +@@ -302,6 +305,11 @@ + snprintf(gd->disk_name, sizeof(gd->disk_name), + "%s%d", tr->name, new->devnum); + ++#ifdef CONFIG_DEVFS_FS ++ snprintf(gd->devfs_name, sizeof(gd->devfs_name), ++ "%s/%c", tr->name, (tr->part_bits?'a':'0') + new->devnum); ++#endif ++ + /* 2.5 has capacity in units of 512 bytes while still + having BLOCK_SIZE_BITS set to 10. Just to keep us amused. */ + set_capacity(gd, (new->size * new->blksize) >> 9); +@@ -418,6 +426,10 @@ + return ret; + } + ++#ifdef CONFIG_DEVFS_FS ++ devfs_mk_dir(tr->name); ++#endif ++ + INIT_LIST_HEAD(&tr->devs); + list_add(&tr->list, &blktrans_majors); + +@@ -450,6 +462,10 @@ + tr->remove_dev(dev); + } + ++#ifdef CONFIG_DEVFS_FS ++ devfs_remove(tr->name); ++#endif ++ + blk_cleanup_queue(tr->blkcore_priv->rq); + unregister_blkdev(tr->major, tr->name); + +diff -ur linux-2.6.15-rc5/drivers/mtd/mtdchar.c linux-2.6.15-rc5-openwrt/drivers/mtd/mtdchar.c +--- linux-2.6.15-rc5/drivers/mtd/mtdchar.c 2005-12-04 06:10:42.000000000 +0100 ++++ linux-2.6.15-rc5-openwrt/drivers/mtd/mtdchar.c 2005-12-15 07:49:15.000000000 +0100 +@@ -6,7 +6,6 @@ + */ + + #include <linux/config.h> +-#include <linux/device.h> + #include <linux/fs.h> + #include <linux/init.h> + #include <linux/kernel.h> +@@ -19,19 +18,33 @@ + + #include <asm/uaccess.h> + ++#ifdef CONFIG_DEVFS_FS ++#include <linux/devfs_fs_kernel.h> ++#else ++#include <linux/device.h> ++ + static struct class *mtd_class; ++#endif + + static void mtd_notify_add(struct mtd_info* mtd) + { + if (!mtd) + return; + ++#ifdef CONFIG_DEVFS_FS ++ devfs_mk_cdev(MKDEV(MTD_CHAR_MAJOR, mtd->index*2), ++ S_IFCHR | S_IRUGO | S_IWUGO, "mtd/%d", mtd->index); ++ ++ devfs_mk_cdev(MKDEV(MTD_CHAR_MAJOR, mtd->index*2+1), ++ S_IFCHR | S_IRUGO, "mtd/%dro", mtd->index); ++#else + class_device_create(mtd_class, NULL, MKDEV(MTD_CHAR_MAJOR, mtd->index*2), + NULL, "mtd%d", mtd->index); + + class_device_create(mtd_class, NULL, + MKDEV(MTD_CHAR_MAJOR, mtd->index*2+1), + NULL, "mtd%dro", mtd->index); ++#endif + } + + static void mtd_notify_remove(struct mtd_info* mtd) +@@ -39,8 +52,13 @@ + if (!mtd) + return; + ++#ifdef CONFIG_DEVFS_FS ++ devfs_remove("mtd/%d", mtd->index); ++ devfs_remove("mtd/%dro", mtd->index); ++#else + class_device_destroy(mtd_class, MKDEV(MTD_CHAR_MAJOR, mtd->index*2)); + class_device_destroy(mtd_class, MKDEV(MTD_CHAR_MAJOR, mtd->index*2+1)); ++#endif + } + + static struct mtd_notifier notifier = { +@@ -48,6 +66,22 @@ + .remove = mtd_notify_remove, + }; + ++#ifdef CONFIG_DEVFS_FS ++ static inline void mtdchar_devfs_init(void) ++ { ++ devfs_mk_dir("mtd"); ++ register_mtd_user(¬ifier); ++ } ++ static inline void mtdchar_devfs_exit(void) ++ { ++ unregister_mtd_user(¬ifier); ++ devfs_remove("mtd"); ++ } ++ #else /* !DEVFS */ ++ #define mtdchar_devfs_init() do { } while(0) ++ #define mtdchar_devfs_exit() do { } while(0) ++#endif ++ + /* + * We use file->private_data to store a pointer to the MTDdevice. + * Since alighment is at least 32 bits, we have 2 bits free for OTP +@@ -643,6 +677,9 @@ + return -EAGAIN; + } + ++#ifdef CONFIG_DEVFS_FS ++ mtdchar_devfs_init(); ++#else + mtd_class = class_create(THIS_MODULE, "mtd"); + + if (IS_ERR(mtd_class)) { +@@ -652,13 +689,19 @@ + } + + register_mtd_user(¬ifier); ++#endif + return 0; + } + + static void __exit cleanup_mtdchar(void) + { ++ ++#ifdef CONFIG_DEVFS_FS ++ mtdchar_devfs_exit(); ++#else + unregister_mtd_user(¬ifier); + class_destroy(mtd_class); ++#endif + unregister_chrdev(MTD_CHAR_MAJOR, "mtd"); + } + +diff -ur linux-2.6.15-rc5/fs/Kconfig linux-2.6.15-rc5-openwrt/fs/Kconfig +--- linux-2.6.15-rc5/fs/Kconfig 2005-12-04 06:10:42.000000000 +0100 ++++ linux-2.6.15-rc5-openwrt/fs/Kconfig 2005-12-15 07:44:01.000000000 +0100 +@@ -772,6 +772,56 @@ + help + Exports the dump image of crashed kernel in ELF format. + ++config DEVFS_FS ++ bool "/dev file system support (OBSOLETE)" ++ depends on EXPERIMENTAL ++ help ++ This is support for devfs, a virtual file system (like /proc) which ++ provides the file system interface to device drivers, normally found ++ in /dev. Devfs does not depend on major and minor number ++ allocations. Device drivers register entries in /dev which then ++ appear automatically, which means that the system administrator does ++ not have to create character and block special device files in the ++ /dev directory using the mknod command (or MAKEDEV script) anymore. ++ ++ This is work in progress. If you want to use this, you *must* read ++ the material in <file:Documentation/filesystems/devfs/>, especially ++ the file README there. ++ ++ Note that devfs no longer manages /dev/pts! If you are using UNIX98 ++ ptys, you will also need to mount the /dev/pts filesystem (devpts). ++ ++ Note that devfs has been obsoleted by udev, ++ <http://www.kernel.org/pub/linux/utils/kernel/hotplug/>. ++ It has been stripped down to a bare minimum and is only provided for ++ legacy installations that use its naming scheme which is ++ unfortunately different from the names normal Linux installations ++ use. ++ ++ If unsure, say N. ++ ++config DEVFS_MOUNT ++ bool "Automatically mount at boot" ++ depends on DEVFS_FS ++ help ++ This option appears if you have CONFIG_DEVFS_FS enabled. Setting ++ this to 'Y' will make the kernel automatically mount devfs onto /dev ++ when the system is booted, before the init thread is started. ++ You can override this with the "devfs=nomount" boot option. ++ ++ If unsure, say N. ++ ++config DEVFS_DEBUG ++ bool "Debug devfs" ++ depends on DEVFS_FS ++ help ++ If you say Y here, then the /dev file system code will generate ++ debugging messages. See the file ++ <file:Documentation/filesystems/devfs/boot-options> for more ++ details. ++ ++ If unsure, say N. ++ + config SYSFS + bool "sysfs file system support" if EMBEDDED + default y diff --git a/openwrt/target/linux/generic-2.6/patches/001-squashfs.patch b/openwrt/target/linux/generic-2.6/patches/001-squashfs.patch new file mode 100644 index 0000000000..f51beaac66 --- /dev/null +++ b/openwrt/target/linux/generic-2.6/patches/001-squashfs.patch @@ -0,0 +1,2592 @@ +diff --new-file -urp linux-2.6.12/fs/Kconfig linux-2.6.12-squashfs2.2/fs/Kconfig +--- linux-2.6.12/fs/Kconfig 2005-06-17 20:48:29.000000000 +0100 ++++ linux-2.6.12-squashfs2.2/fs/Kconfig 2005-07-04 02:35:35.000000000 +0100 +@@ -1171,6 +1171,69 @@ config CRAMFS + + If unsure, say N. + ++config SQUASHFS ++ tristate "SquashFS 2.0 - Squashed file system support" ++ select ZLIB_INFLATE ++ help ++ Saying Y here includes support for SquashFs 2.0 (Compressed Read-Only File ++ System). Squashfs is a highly compressed read-only filesystem for Linux. ++ It uses zlib compression to compress both files, inodes and directories. ++ Inodes in the system are very small and all blocks are packed to minimise ++ data overhead. Block sizes greater than 4K are supported up to a maximum of 64K. ++ ++ Squashfs is intended for general read-only filesystem use, for archival ++ use (i.e. in cases where a .tar.gz file may be used), and in embedded ++ systems where low overhead is needed. Further information and filesystem tools ++ are available from http://squashfs.sourceforge.net. ++ ++ If you want to compile this as a module ( = code which can be ++ inserted in and removed from the running kernel whenever you want), ++ say M here and read <file:Documentation/modules.txt>. The module ++ will be called squashfs. Note that the root file system (the one ++ containing the directory /) cannot be compiled as a module. ++ ++ If unsure, say N. ++ ++config SQUASHFS_EMBEDDED ++ ++ bool "Additional options for memory-constrained systems" ++ depends on SQUASHFS ++ default n ++ help ++ Saying Y here allows you to specify cache sizes and how Squashfs ++ allocates memory. This is only intended for memory constrained ++ systems. ++ ++ If unsure, say N. ++ ++config SQUASHFS_FRAGMENT_CACHE_SIZE ++ int "Number of fragments cached" if SQUASHFS_EMBEDDED ++ depends on SQUASHFS ++ default "3" ++ help ++ By default SquashFS caches the last 3 fragments read from ++ the filesystem. Increasing this amount may mean SquashFS ++ has to re-read fragments less often from disk, at the expense ++ of extra system memory. Decreasing this amount will mean ++ SquashFS uses less memory at the expense of extra reads from disk. ++ ++ Note there must be at least one cached fragment. Anything ++ much more than three will probably not make much difference. ++ ++config SQUASHFS_VMALLOC ++ bool "Use Vmalloc rather than Kmalloc" if SQUASHFS_EMBEDDED ++ depends on SQUASHFS ++ default n ++ help ++ By default SquashFS uses kmalloc to obtain fragment cache memory. ++ Kmalloc memory is the standard kernel allocator, but it can fail ++ on memory constrained systems. Because of the way Vmalloc works, ++ Vmalloc can succeed when kmalloc fails. Specifying this option ++ will make SquashFS always use Vmalloc to allocate the ++ fragment cache memory. ++ ++ If unsure, say N. ++ + config VXFS_FS + tristate "FreeVxFS file system support (VERITAS VxFS(TM) compatible)" + help +diff --new-file -urp linux-2.6.12/fs/Makefile linux-2.6.12-squashfs2.2/fs/Makefile +--- linux-2.6.12/fs/Makefile 2005-06-17 20:48:29.000000000 +0100 ++++ linux-2.6.12-squashfs2.2/fs/Makefile 2005-07-04 02:35:35.000000000 +0100 +@@ -52,6 +52,7 @@ obj-$(CONFIG_EXT3_FS) += ext3/ # Before + obj-$(CONFIG_JBD) += jbd/ + obj-$(CONFIG_EXT2_FS) += ext2/ + obj-$(CONFIG_CRAMFS) += cramfs/ ++obj-$(CONFIG_SQUASHFS) += squashfs/ + obj-$(CONFIG_RAMFS) += ramfs/ + obj-$(CONFIG_HUGETLBFS) += hugetlbfs/ + obj-$(CONFIG_CODA_FS) += coda/ +diff --new-file -urp linux-2.6.12/fs/squashfs/inode.c linux-2.6.12-squashfs2.2/fs/squashfs/inode.c +--- linux-2.6.12/fs/squashfs/inode.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12-squashfs2.2/fs/squashfs/inode.c 2005-07-04 02:35:35.000000000 +0100 +@@ -0,0 +1,1803 @@ ++/* ++ * Squashfs - a compressed read only filesystem for Linux ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005 Phillip Lougher <phillip@lougher.demon.co.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * inode.c ++ */ ++ ++#define SQUASHFS_1_0_COMPATIBILITY ++ ++#include <linux/types.h> ++#include <linux/squashfs_fs.h> ++#include <linux/module.h> ++#include <linux/errno.h> ++#include <linux/slab.h> ++#include <linux/fs.h> ++#include <linux/smp_lock.h> ++#include <linux/slab.h> ++#include <linux/squashfs_fs_sb.h> ++#include <linux/squashfs_fs_i.h> ++#include <linux/buffer_head.h> ++#include <linux/vfs.h> ++#include <linux/init.h> ++#include <linux/dcache.h> ++#include <asm/uaccess.h> ++#include <linux/wait.h> ++#include <asm/semaphore.h> ++#include <linux/zlib.h> ++#include <linux/blkdev.h> ++#include <linux/vmalloc.h> ++ ++#ifdef SQUASHFS_TRACE ++#define TRACE(s, args...) printk(KERN_NOTICE "SQUASHFS: "s, ## args) ++#else ++#define TRACE(s, args...) {} ++#endif ++ ++#define ERROR(s, args...) printk(KERN_ERR "SQUASHFS error: "s, ## args) ++ ++#define SERROR(s, args...) if(!silent) printk(KERN_ERR "SQUASHFS error: "s, ## args) ++#define WARNING(s, args...) printk(KERN_WARNING "SQUASHFS: "s, ## args) ++ ++static void squashfs_put_super(struct super_block *); ++static int squashfs_statfs(struct super_block *, struct kstatfs *); ++static int squashfs_symlink_readpage(struct file *file, struct page *page); ++static int squashfs_readpage(struct file *file, struct page *page); ++static int squashfs_readpage4K(struct file *file, struct page *page); ++static int squashfs_readdir(struct file *, void *, filldir_t); ++static struct dentry *squashfs_lookup(struct inode *, struct dentry *, struct nameidata *); ++static unsigned int read_data(struct super_block *s, char *buffer, ++ unsigned int index, unsigned int length, unsigned int *next_index); ++static int squashfs_get_cached_block(struct super_block *s, char *buffer, ++ unsigned int block, unsigned int offset, int length, ++ unsigned int *next_block, unsigned int *next_offset); ++static struct inode *squashfs_iget(struct super_block *s, squashfs_inode inode); ++static unsigned int read_blocklist(struct inode *inode, int index, int readahead_blks, ++ char *block_list, unsigned short **block_p, unsigned int *bsize); ++static void squashfs_put_super(struct super_block *s); ++static struct super_block *squashfs_get_sb(struct file_system_type *, int, const char *, void *); ++static struct inode *squashfs_alloc_inode(struct super_block *sb); ++static void squashfs_destroy_inode(struct inode *inode); ++static int init_inodecache(void); ++static void destroy_inodecache(void); ++ ++#ifdef SQUASHFS_1_0_COMPATIBILITY ++static int squashfs_readpage_lessthan4K(struct file *file, struct page *page); ++static struct inode *squashfs_iget_1(struct super_block *s, squashfs_inode inode); ++static unsigned int read_blocklist_1(struct inode *inode, int index, int readahead_blks, ++ char *block_list, unsigned short **block_p, unsigned int *bsize); ++#endif ++ ++DECLARE_MUTEX(read_data_mutex); ++ ++static z_stream stream; ++ ++static struct file_system_type squashfs_fs_type = { ++ .owner = THIS_MODULE, ++ .name = "squashfs", ++ .get_sb = squashfs_get_sb, ++ .kill_sb = kill_block_super, ++ .fs_flags = FS_REQUIRES_DEV ++ }; ++ ++static unsigned char squashfs_filetype_table[] = { ++ DT_UNKNOWN, DT_DIR, DT_REG, DT_LNK, DT_BLK, DT_CHR, DT_FIFO, DT_SOCK ++}; ++ ++static struct super_operations squashfs_ops = { ++ .alloc_inode = squashfs_alloc_inode, ++ .destroy_inode = squashfs_destroy_inode, ++ .statfs = squashfs_statfs, ++ .put_super = squashfs_put_super, ++}; ++ ++static struct address_space_operations squashfs_symlink_aops = { ++ .readpage = squashfs_symlink_readpage ++}; ++ ++static struct address_space_operations squashfs_aops = { ++ .readpage = squashfs_readpage ++}; ++ ++static struct address_space_operations squashfs_aops_4K = { ++ .readpage = squashfs_readpage4K ++}; ++ ++#ifdef SQUASHFS_1_0_COMPATIBILITY ++static struct address_space_operations squashfs_aops_lessthan4K = { ++ .readpage = squashfs_readpage_lessthan4K ++}; ++#endif ++ ++static struct file_operations squashfs_dir_ops = { ++ .read = generic_read_dir, ++ .readdir = squashfs_readdir ++}; ++ ++static struct inode_operations squashfs_dir_inode_ops = { ++ .lookup = squashfs_lookup ++}; ++ ++ ++static inline struct squashfs_inode_info *SQUASHFS_I(struct inode *inode) ++{ ++ return list_entry(inode, struct squashfs_inode_info, vfs_inode); ++} ++ ++ ++static struct buffer_head *get_block_length(struct super_block *s, ++ int *cur_index, int *offset, int *c_byte) ++{ ++ squashfs_sb_info *msblk = s->s_fs_info; ++ unsigned short temp; ++ struct buffer_head *bh; ++ ++ if (!(bh = sb_bread(s, *cur_index))) ++ goto out; ++ ++ if (msblk->devblksize - *offset == 1) { ++ if (msblk->swap) ++ ((unsigned char *) &temp)[1] = *((unsigned char *) ++ (bh->b_data + *offset)); ++ else ++ ((unsigned char *) &temp)[0] = *((unsigned char *) ++ (bh->b_data + *offset)); ++ brelse(bh); ++ if (!(bh = sb_bread(s, ++(*cur_index)))) ++ goto out; ++ if (msblk->swap) ++ ((unsigned char *) &temp)[0] = *((unsigned char *) ++ bh->b_data); ++ else ++ ((unsigned char *) &temp)[1] = *((unsigned char *) ++ bh->b_data); ++ *c_byte = temp; ++ *offset = 1; ++ } else { ++ if (msblk->swap) { ++ ((unsigned char *) &temp)[1] = *((unsigned char *) ++ (bh->b_data + *offset)); ++ ((unsigned char *) &temp)[0] = *((unsigned char *) ++ (bh->b_data + *offset + 1)); ++ } else { ++ ((unsigned char *) &temp)[0] = *((unsigned char *) ++ (bh->b_data + *offset)); ++ ((unsigned char *) &temp)[1] = *((unsigned char *) ++ (bh->b_data + *offset + 1)); ++ } ++ *c_byte = temp; ++ *offset += 2; ++ } ++ ++ if (SQUASHFS_CHECK_DATA(msblk->sBlk.flags)) { ++ if (*offset == msblk->devblksize) { ++ brelse(bh); ++ if (!(bh = sb_bread(s, ++(*cur_index)))) ++ goto out; ++ *offset = 0; ++ } ++ if (*((unsigned char *) (bh->b_data + *offset)) != ++ SQUASHFS_MARKER_BYTE) { ++ ERROR("Metadata block marker corrupt @ %x\n", ++ *cur_index); ++ brelse(bh); ++ goto out; ++ } ++ (*offset)++; ++ } ++ return bh; ++ ++out: ++ return NULL; ++} ++ ++ ++static unsigned int read_data(struct super_block *s, char *buffer, ++ unsigned int index, unsigned int length, unsigned int *next_index) ++{ ++ squashfs_sb_info *msBlk = (squashfs_sb_info *)s->s_fs_info; ++ struct buffer_head *bh[((SQUASHFS_FILE_MAX_SIZE - 1) >> msBlk->devblksize_log2) + 2]; ++ unsigned int offset = index & ((1 << msBlk->devblksize_log2) - 1); ++ unsigned int cur_index = index >> msBlk->devblksize_log2; ++ int bytes, avail_bytes, b = 0, k; ++ char *c_buffer; ++ unsigned int compressed; ++ unsigned int c_byte = length; ++ ++ if(c_byte) { ++ bytes = msBlk->devblksize - offset; ++ compressed = SQUASHFS_COMPRESSED_BLOCK(c_byte); ++ c_buffer = compressed ? msBlk->read_data : buffer; ++ c_byte = SQUASHFS_COMPRESSED_SIZE_BLOCK(c_byte); ++ ++ TRACE("Block @ 0x%x, %scompressed size %d\n", index, compressed ? "" : "un", (unsigned int) c_byte); ++ ++ if(!(bh[0] = sb_getblk(s, cur_index))) ++ goto block_release; ++ for(b = 1; bytes < c_byte; b++) { ++ if(!(bh[b] = sb_getblk(s, ++cur_index))) ++ goto block_release; ++ bytes += msBlk->devblksize; ++ } ++ ll_rw_block(READ, b, bh); ++ } else { ++ if(!(bh[0] = get_block_length(s, &cur_index, &offset, &c_byte))) ++ goto read_failure; ++ ++ bytes = msBlk->devblksize - offset; ++ compressed = SQUASHFS_COMPRESSED(c_byte); ++ c_buffer = compressed ? msBlk->read_data : buffer; ++ c_byte = SQUASHFS_COMPRESSED_SIZE(c_byte); ++ ++ TRACE("Block @ 0x%x, %scompressed size %d\n", index, compressed ? "" : "un", (unsigned int) c_byte); ++ ++ for(b = 1; bytes < c_byte; b++) { ++ if(!(bh[b] = sb_getblk(s, ++cur_index))) ++ goto block_release; ++ bytes += msBlk->devblksize; ++ } ++ ll_rw_block(READ, b - 1, bh + 1); ++ } ++ ++ if(compressed) ++ down(&read_data_mutex); ++ ++ for(bytes = 0, k = 0; k < b; k++) { ++ avail_bytes = (c_byte - bytes) > (msBlk->devblksize - offset) ? msBlk->devblksize - offset : c_byte - bytes; ++ wait_on_buffer(bh[k]); ++ if (!buffer_uptodate(bh[k])) ++ goto block_release; ++ memcpy(c_buffer + bytes, bh[k]->b_data + offset, avail_bytes); ++ bytes += avail_bytes; ++ offset = 0; ++ brelse(bh[k]); ++ } ++ ++ /* ++ * uncompress block ++ */ ++ if(compressed) { ++ int zlib_err; ++ ++ stream.next_in = c_buffer; ++ stream.avail_in = c_byte; ++ stream.next_out = buffer; ++ stream.avail_out = msBlk->read_size; ++ if(((zlib_err = zlib_inflateInit(&stream)) != Z_OK) || ++ ((zlib_err = zlib_inflate(&stream, Z_FINISH)) != Z_STREAM_END) || ++ ((zlib_err = zlib_inflateEnd(&stream)) != Z_OK)) { ++ ERROR("zlib_fs returned unexpected result 0x%x\n", zlib_err); ++ bytes = 0; ++ } else ++ bytes = stream.total_out; ++ up(&read_data_mutex); ++ } ++ ++ if(next_index) ++ *next_index = index + c_byte + (length ? 0 : (SQUASHFS_CHECK_DATA(msBlk->sBlk.flags) ? 3 : 2)); ++ ++ return bytes; ++ ++block_release: ++ while(--b >= 0) brelse(bh[b]); ++ ++read_failure: ++ ERROR("sb_bread failed reading block 0x%x\n", cur_index); ++ return 0; ++} ++ ++ ++static int squashfs_get_cached_block(struct super_block *s, char *buffer, ++ unsigned int block, unsigned int offset, int length, ++ unsigned int *next_block, unsigned int *next_offset) ++{ ++ squashfs_sb_info *msBlk = (squashfs_sb_info *)s->s_fs_info; ++ int n, i, bytes, return_length = length; ++ unsigned int next_index; ++ ++ TRACE("Entered squashfs_get_cached_block [%x:%x]\n", block, offset); ++ ++ for(;;) { ++ for(i = 0; i < SQUASHFS_CACHED_BLKS; i++) ++ if(msBlk->block_cache[i].block == block) ++ break; ++ ++ down(&msBlk->block_cache_mutex); ++ if(i == SQUASHFS_CACHED_BLKS) { ++ /* read inode header block */ ++ for(i = msBlk->next_cache, n = SQUASHFS_CACHED_BLKS; n ; n --, i = (i + 1) % SQUASHFS_CACHED_BLKS) ++ if(msBlk->block_cache[i].block != SQUASHFS_USED_BLK) ++ break; ++ if(n == 0) { ++ wait_queue_t wait; ++ ++ init_waitqueue_entry(&wait, current); ++ add_wait_queue(&msBlk->waitq, &wait); ++ up(&msBlk->block_cache_mutex); ++ set_current_state(TASK_UNINTERRUPTIBLE); ++ schedule(); ++ set_current_state(TASK_RUNNING); ++ remove_wait_queue(&msBlk->waitq, &wait); ++ continue; ++ } ++ msBlk->next_cache = (i + 1) % SQUASHFS_CACHED_BLKS; ++ ++ if(msBlk->block_cache[i].block == SQUASHFS_INVALID_BLK) { ++ if(!(msBlk->block_cache[i].data = (unsigned char *) ++ kmalloc(SQUASHFS_METADATA_SIZE, GFP_KERNEL))) { ++ ERROR("Failed to allocate cache block\n"); ++ up(&msBlk->block_cache_mutex); ++ return 0; ++ } ++ } ++ ++ msBlk->block_cache[i].block = SQUASHFS_USED_BLK; ++ up(&msBlk->block_cache_mutex); ++ if(!(msBlk->block_cache[i].length = read_data(s, msBlk->block_cache[i].data, block, 0, ++ &next_index))) { ++ ERROR("Unable to read cache block [%x:%x]\n", block, offset); ++ return 0; ++ } ++ down(&msBlk->block_cache_mutex); ++ wake_up(&msBlk->waitq); ++ msBlk->block_cache[i].block = block; ++ msBlk->block_cache[i].next_index = next_index; ++ TRACE("Read cache block [%x:%x]\n", block, offset); ++ } ++ ++ if(msBlk->block_cache[i].block != block) { ++ up(&msBlk->block_cache_mutex); ++ continue; ++ } ++ ++ if((bytes = msBlk->block_cache[i].length - offset) >= length) { ++ if(buffer) ++ memcpy(buffer, msBlk->block_cache[i].data + offset, length); ++ if(msBlk->block_cache[i].length - offset == length) { ++ *next_block = msBlk->block_cache[i].next_index; ++ *next_offset = 0; ++ } else { ++ *next_block = block; ++ *next_offset = offset + length; ++ } ++ ++ up(&msBlk->block_cache_mutex); ++ return return_length; ++ } else { ++ if(buffer) { ++ memcpy(buffer, msBlk->block_cache[i].data + offset, bytes); ++ buffer += bytes; ++ } ++ block = msBlk->block_cache[i].next_index; ++ up(&msBlk->block_cache_mutex); ++ length -= bytes; ++ offset = 0; ++ } ++ } ++} ++ ++ ++static int get_fragment_location(struct super_block *s, unsigned int fragment, unsigned int *fragment_start_block, unsigned int *fragment_size) ++{ ++ squashfs_sb_info *msBlk = (squashfs_sb_info *)s->s_fs_info; ++ unsigned int start_block = msBlk->fragment_index[SQUASHFS_FRAGMENT_INDEX(fragment)]; ++ int offset = SQUASHFS_FRAGMENT_INDEX_OFFSET(fragment); ++ squashfs_fragment_entry fragment_entry; ++ ++ if(msBlk->swap) { ++ squashfs_fragment_entry sfragment_entry; ++ ++ if(!squashfs_get_cached_block(s, (char *) &sfragment_entry, start_block, offset, ++ sizeof(sfragment_entry), &start_block, &offset)) ++ return 0; ++ SQUASHFS_SWAP_FRAGMENT_ENTRY(&fragment_entry, &sfragment_entry); ++ } else ++ if(!squashfs_get_cached_block(s, (char *) &fragment_entry, start_block, offset, ++ sizeof(fragment_entry), &start_block, &offset)) ++ return 0; ++ ++ *fragment_start_block = fragment_entry.start_block; ++ *fragment_size = fragment_entry.size; ++ ++ return 1; ++} ++ ++ ++void release_cached_fragment(squashfs_sb_info *msBlk, struct squashfs_fragment_cache *fragment) ++{ ++ down(&msBlk->fragment_mutex); ++ fragment->locked --; ++ wake_up(&msBlk->fragment_wait_queue); ++ up(&msBlk->fragment_mutex); ++} ++ ++ ++struct squashfs_fragment_cache *get_cached_fragment(struct super_block *s, unsigned int start_block, int length) ++{ ++ int i, n; ++ squashfs_sb_info *msBlk = (squashfs_sb_info *)s->s_fs_info; ++ ++ for(;;) { ++ down(&msBlk->fragment_mutex); ++ for(i = 0; i < SQUASHFS_CACHED_FRAGMENTS && msBlk->fragment[i].block != start_block; i++); ++ if(i == SQUASHFS_CACHED_FRAGMENTS) { ++ for(i = msBlk->next_fragment, n = SQUASHFS_CACHED_FRAGMENTS; ++ n && msBlk->fragment[i].locked; n--, i = (i + 1) % SQUASHFS_CACHED_FRAGMENTS); ++ ++ if(n == 0) { ++ wait_queue_t wait; ++ ++ init_waitqueue_entry(&wait, current); ++ add_wait_queue(&msBlk->fragment_wait_queue, &wait); ++ up(&msBlk->fragment_mutex); ++ set_current_state(TASK_UNINTERRUPTIBLE); ++ schedule(); ++ set_current_state(TASK_RUNNING); ++ remove_wait_queue(&msBlk->fragment_wait_queue, &wait); ++ continue; ++ } ++ msBlk->next_fragment = (msBlk->next_fragment + 1) % SQUASHFS_CACHED_FRAGMENTS; ++ ++ if(msBlk->fragment[i].data == NULL) ++ if(!(msBlk->fragment[i].data = (unsigned char *) ++ SQUASHFS_ALLOC(SQUASHFS_FILE_MAX_SIZE))) { ++ ERROR("Failed to allocate fragment cache block\n"); ++ up(&msBlk->fragment_mutex); ++ return NULL; ++ } ++ ++ msBlk->fragment[i].block = SQUASHFS_INVALID_BLK; ++ msBlk->fragment[i].locked = 1; ++ up(&msBlk->fragment_mutex); ++ if(!(msBlk->fragment[i].length = read_data(s, msBlk->fragment[i].data, start_block, length, ++ NULL))) { ++ ERROR("Unable to read fragment cache block [%x]\n", start_block); ++ msBlk->fragment[i].locked = 0; ++ return NULL; ++ } ++ msBlk->fragment[i].block = start_block; ++ TRACE("New fragment %d, start block %d, locked %d\n", i, msBlk->fragment[i].block, msBlk->fragment[i].locked); ++ return &msBlk->fragment[i]; ++ } ++ ++ msBlk->fragment[i].locked ++; ++ up(&msBlk->fragment_mutex); ++ ++ TRACE("Got fragment %d, start block %d, locked %d\n", i, msBlk->fragment[i].block, msBlk->fragment[i].locked); ++ return &msBlk->fragment[i]; ++ } ++} ++ ++ ++#ifdef SQUASHFS_1_0_COMPATIBILITY ++static struct inode *squashfs_iget_1(struct super_block *s, squashfs_inode inode) ++{ ++ struct inode *i = new_inode(s); ++ squashfs_sb_info *msBlk = (squashfs_sb_info *)s->s_fs_info; ++ squashfs_super_block *sBlk = &msBlk->sBlk; ++ unsigned int block = SQUASHFS_INODE_BLK(inode) + sBlk->inode_table_start; ++ unsigned int offset = SQUASHFS_INODE_OFFSET(inode); ++ unsigned int next_block, next_offset; ++ squashfs_base_inode_header_1 inodeb; ++ ++ TRACE("Entered squashfs_iget_1\n"); ++ ++ if(msBlk->swap) { ++ squashfs_base_inode_header_1 sinodeb; ++ ++ if(!squashfs_get_cached_block(s, (char *) &sinodeb, block, offset, ++ sizeof(sinodeb), &next_block, &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_BASE_INODE_HEADER_1(&inodeb, &sinodeb, sizeof(sinodeb)); ++ } else ++ if(!squashfs_get_cached_block(s, (char *) &inodeb, block, offset, ++ sizeof(inodeb), &next_block, &next_offset)) ++ goto failed_read; ++ ++ i->i_nlink = 1; ++ ++ i->i_mtime.tv_sec = sBlk->mkfs_time; ++ i->i_atime.tv_sec = sBlk->mkfs_time; ++ i->i_ctime.tv_sec = sBlk->mkfs_time; ++ ++ if(inodeb.inode_type != SQUASHFS_IPC_TYPE) ++ i->i_uid = msBlk->uid[((inodeb.inode_type - 1) / SQUASHFS_TYPES) * 16 + inodeb.uid]; ++ i->i_ino = SQUASHFS_MK_VFS_INODE(block - sBlk->inode_table_start, offset); ++ ++ i->i_mode = inodeb.mode; ++ ++ switch(inodeb.inode_type == SQUASHFS_IPC_TYPE ? SQUASHFS_IPC_TYPE : (inodeb.inode_type - 1) % SQUASHFS_TYPES + 1) { ++ case SQUASHFS_FILE_TYPE: { ++ squashfs_reg_inode_header_1 inodep; ++ ++ if(msBlk->swap) { ++ squashfs_reg_inode_header_1 sinodep; ++ ++ if(!squashfs_get_cached_block(s, (char *) &sinodep, block, offset, sizeof(sinodep), ++ &next_block, &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_REG_INODE_HEADER_1(&inodep, &sinodep); ++ } else ++ if(!squashfs_get_cached_block(s, (char *) &inodep, block, offset, sizeof(inodep), ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ i->i_size = inodep.file_size; ++ i->i_fop = &generic_ro_fops; ++ if(sBlk->block_size > 4096) ++ i->i_data.a_ops = &squashfs_aops; ++ else if(sBlk->block_size == 4096) ++ i->i_data.a_ops = &squashfs_aops_4K; ++ else ++ i->i_data.a_ops = &squashfs_aops_lessthan4K; ++ i->i_mode |= S_IFREG; ++ i->i_mtime.tv_sec = inodep.mtime; ++ i->i_atime.tv_sec = inodep.mtime; ++ i->i_ctime.tv_sec = inodep.mtime; ++ i->i_blocks = ((i->i_size - 1) >> 9) + 1; ++ i->i_blksize = PAGE_CACHE_SIZE; ++ SQUASHFS_I(i)->u.s1.fragment_start_block = SQUASHFS_INVALID_BLK; ++ SQUASHFS_I(i)->u.s1.fragment_offset = 0; ++ SQUASHFS_I(i)->start_block = inodep.start_block; ++ SQUASHFS_I(i)->block_list_start = next_block; ++ SQUASHFS_I(i)->offset = next_offset; ++ TRACE("File inode %x:%x, start_block %x, block_list_start %x, offset %x\n", ++ SQUASHFS_INODE_BLK(inode), offset, inodep.start_block, next_block, next_offset); ++ break; ++ } ++ case SQUASHFS_DIR_TYPE: { ++ squashfs_dir_inode_header_1 inodep; ++ ++ if(msBlk->swap) { ++ squashfs_dir_inode_header_1 sinodep; ++ ++ if(!squashfs_get_cached_block(s, (char *) &sinodep, block, offset, sizeof(sinodep), ++ &next_block, &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_DIR_INODE_HEADER_1(&inodep, &sinodep); ++ } else ++ if(!squashfs_get_cached_block(s, (char *) &inodep, block, offset, sizeof(inodep), ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ i->i_size = inodep.file_size; ++ i->i_op = &squashfs_dir_inode_ops; ++ i->i_fop = &squashfs_dir_ops; ++ i->i_mode |= S_IFDIR; ++ i->i_mtime.tv_sec = inodep.mtime; ++ i->i_atime.tv_sec = inodep.mtime; ++ i->i_ctime.tv_sec = inodep.mtime; ++ SQUASHFS_I(i)->start_block = inodep.start_block; ++ SQUASHFS_I(i)->offset = inodep.offset; ++ SQUASHFS_I(i)->u.s2.directory_index_count = 0; ++ TRACE("Directory inode %x:%x, start_block %x, offset %x\n", SQUASHFS_INODE_BLK(inode), offset, ++ inodep.start_block, inodep.offset); ++ break; ++ } ++ case SQUASHFS_SYMLINK_TYPE: { ++ squashfs_symlink_inode_header_1 inodep; ++ ++ if(msBlk->swap) { ++ squashfs_symlink_inode_header_1 sinodep; ++ ++ if(!squashfs_get_cached_block(s, (char *) &sinodep, block, offset, sizeof(sinodep), ++ &next_block, &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_SYMLINK_INODE_HEADER_1(&inodep, &sinodep); ++ } else ++ if(!squashfs_get_cached_block(s, (char *) &inodep, block, offset, sizeof(inodep), ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ i->i_size = inodep.symlink_size; ++ i->i_op = &page_symlink_inode_operations; ++ i->i_data.a_ops = &squashfs_symlink_aops; ++ i->i_mode |= S_IFLNK; ++ SQUASHFS_I(i)->start_block = next_block; ++ SQUASHFS_I(i)->offset = next_offset; ++ TRACE("Symbolic link inode %x:%x, start_block %x, offset %x\n", ++ SQUASHFS_INODE_BLK(inode), offset, next_block, next_offset); ++ break; ++ } ++ case SQUASHFS_BLKDEV_TYPE: ++ case SQUASHFS_CHRDEV_TYPE: { ++ squashfs_dev_inode_header_1 inodep; ++ ++ if(msBlk->swap) { ++ squashfs_dev_inode_header_1 sinodep; ++ ++ if(!squashfs_get_cached_block(s, (char *) &sinodep, block, offset, sizeof(sinodep), ++ &next_block, &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_DEV_INODE_HEADER_1(&inodep, &sinodep); ++ } else ++ if(!squashfs_get_cached_block(s, (char *) &inodep, block, offset, sizeof(inodep), ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ i->i_size = 0; ++ i->i_mode |= (inodeb.inode_type == SQUASHFS_CHRDEV_TYPE) ? S_IFCHR : S_IFBLK; ++ init_special_inode(i, i->i_mode, old_decode_dev(inodep.rdev)); ++ TRACE("Device inode %x:%x, rdev %x\n", SQUASHFS_INODE_BLK(inode), offset, inodep.rdev); ++ break; ++ } ++ case SQUASHFS_IPC_TYPE: { ++ squashfs_ipc_inode_header_1 inodep; ++ ++ if(msBlk->swap) { ++ squashfs_ipc_inode_header_1 sinodep; ++ ++ if(!squashfs_get_cached_block(s, (char *) &sinodep, block, offset, sizeof(sinodep), ++ &next_block, &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_IPC_INODE_HEADER_1(&inodep, &sinodep); ++ } else ++ if(!squashfs_get_cached_block(s, (char *) &inodep, block, offset, sizeof(inodep), ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ i->i_size = 0; ++ i->i_mode |= (inodep.type == SQUASHFS_FIFO_TYPE) ? S_IFIFO : S_IFSOCK; ++ i->i_uid = msBlk->uid[inodep.offset * 16 + inodeb.uid]; ++ init_special_inode(i, i->i_mode, 0); ++ break; ++ } ++ default: ++ ERROR("Unknown inode type %d in squashfs_iget!\n", inodeb.inode_type); ++ goto failed_read1; ++ } ++ ++ if(inodeb.guid == 15) ++ i->i_gid = i->i_uid; ++ else ++ i->i_gid = msBlk->guid[inodeb.guid]; ++ ++ insert_inode_hash(i); ++ return i; ++ ++failed_read: ++ ERROR("Unable to read inode [%x:%x]\n", block, offset); ++ ++failed_read1: ++ return NULL; ++} ++#endif ++ ++ ++static struct inode *squashfs_iget(struct super_block *s, squashfs_inode inode) ++{ ++ struct inode *i = new_inode(s); ++ squashfs_sb_info *msBlk = (squashfs_sb_info *)s->s_fs_info; ++ squashfs_super_block *sBlk = &msBlk->sBlk; ++ unsigned int block = SQUASHFS_INODE_BLK(inode) + sBlk->inode_table_start; ++ unsigned int offset = SQUASHFS_INODE_OFFSET(inode); ++ unsigned int next_block, next_offset; ++ squashfs_base_inode_header inodeb; ++ ++ TRACE("Entered squashfs_iget\n"); ++ ++ if(msBlk->swap) { ++ squashfs_base_inode_header sinodeb; ++ ++ if(!squashfs_get_cached_block(s, (char *) &sinodeb, block, offset, ++ sizeof(sinodeb), &next_block, &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_BASE_INODE_HEADER(&inodeb, &sinodeb, sizeof(sinodeb)); ++ } else ++ if(!squashfs_get_cached_block(s, (char *) &inodeb, block, offset, ++ sizeof(inodeb), &next_block, &next_offset)) ++ goto failed_read; ++ ++ i->i_nlink = 1; ++ ++ i->i_mtime.tv_sec = sBlk->mkfs_time; ++ i->i_atime.tv_sec = sBlk->mkfs_time; ++ i->i_ctime.tv_sec = sBlk->mkfs_time; ++ ++ i->i_uid = msBlk->uid[inodeb.uid]; ++ i->i_ino = SQUASHFS_MK_VFS_INODE(block - sBlk->inode_table_start, offset); ++ ++ i->i_mode = inodeb.mode; ++ ++ switch(inodeb.inode_type) { ++ case SQUASHFS_FILE_TYPE: { ++ squashfs_reg_inode_header inodep; ++ ++ if(msBlk->swap) { ++ squashfs_reg_inode_header sinodep; ++ ++ if(!squashfs_get_cached_block(s, (char *) &sinodep, block, offset, sizeof(sinodep), ++ &next_block, &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_REG_INODE_HEADER(&inodep, &sinodep); ++ } else ++ if(!squashfs_get_cached_block(s, (char *) &inodep, block, offset, sizeof(inodep), ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ SQUASHFS_I(i)->u.s1.fragment_start_block = SQUASHFS_INVALID_BLK; ++ if(inodep.fragment != SQUASHFS_INVALID_BLK && !get_fragment_location(s, inodep.fragment, ++ &SQUASHFS_I(i)->u.s1.fragment_start_block, &SQUASHFS_I(i)->u.s1.fragment_size)) ++ goto failed_read; ++ ++ SQUASHFS_I(i)->u.s1.fragment_offset = inodep.offset; ++ i->i_size = inodep.file_size; ++ i->i_fop = &generic_ro_fops; ++ if(sBlk->block_size > 4096) ++ i->i_data.a_ops = &squashfs_aops; ++ else ++ i->i_data.a_ops = &squashfs_aops_4K; ++ i->i_mode |= S_IFREG; ++ i->i_mtime.tv_sec = inodep.mtime; ++ i->i_atime.tv_sec = inodep.mtime; ++ i->i_ctime.tv_sec = inodep.mtime; ++ i->i_blocks = ((i->i_size - 1) >> 9) + 1; ++ i->i_blksize = PAGE_CACHE_SIZE; ++ SQUASHFS_I(i)->start_block = inodep.start_block; ++ SQUASHFS_I(i)->block_list_start = next_block; ++ SQUASHFS_I(i)->offset = next_offset; ++ TRACE("File inode %x:%x, start_block %x, block_list_start %x, offset %x\n", ++ SQUASHFS_INODE_BLK(inode), offset, inodep.start_block, next_block, next_offset); ++ break; ++ } ++ case SQUASHFS_DIR_TYPE: { ++ squashfs_dir_inode_header inodep; ++ ++ if(msBlk->swap) { ++ squashfs_dir_inode_header sinodep; ++ ++ if(!squashfs_get_cached_block(s, (char *) &sinodep, block, offset, sizeof(sinodep), ++ &next_block, &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_DIR_INODE_HEADER(&inodep, &sinodep); ++ } else ++ if(!squashfs_get_cached_block(s, (char *) &inodep, block, offset, sizeof(inodep), ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ i->i_size = inodep.file_size; ++ i->i_op = &squashfs_dir_inode_ops; ++ i->i_fop = &squashfs_dir_ops; ++ i->i_mode |= S_IFDIR; ++ i->i_mtime.tv_sec = inodep.mtime; ++ i->i_atime.tv_sec = inodep.mtime; ++ i->i_ctime.tv_sec = inodep.mtime; ++ SQUASHFS_I(i)->start_block = inodep.start_block; ++ SQUASHFS_I(i)->offset = inodep.offset; ++ SQUASHFS_I(i)->u.s2.directory_index_count = 0; ++ TRACE("Directory inode %x:%x, start_block %x, offset %x\n", SQUASHFS_INODE_BLK(inode), offset, ++ inodep.start_block, inodep.offset); ++ break; ++ } ++ case SQUASHFS_LDIR_TYPE: { ++ squashfs_ldir_inode_header inodep; ++ ++ if(msBlk->swap) { ++ squashfs_ldir_inode_header sinodep; ++ ++ if(!squashfs_get_cached_block(s, (char *) &sinodep, block, offset, sizeof(sinodep), ++ &next_block, &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_LDIR_INODE_HEADER(&inodep, &sinodep); ++ } else ++ if(!squashfs_get_cached_block(s, (char *) &inodep, block, offset, sizeof(inodep), ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ i->i_size = inodep.file_size; ++ i->i_op = &squashfs_dir_inode_ops; ++ i->i_fop = &squashfs_dir_ops; ++ i->i_mode |= S_IFDIR; ++ i->i_mtime.tv_sec = inodep.mtime; ++ i->i_atime.tv_sec = inodep.mtime; ++ i->i_ctime.tv_sec = inodep.mtime; ++ SQUASHFS_I(i)->start_block = inodep.start_block; ++ SQUASHFS_I(i)->offset = inodep.offset; ++ SQUASHFS_I(i)->u.s2.directory_index_start = next_block; ++ SQUASHFS_I(i)->u.s2.directory_index_offset = next_offset; ++ SQUASHFS_I(i)->u.s2.directory_index_count = inodep.i_count; ++ TRACE("Long directory inode %x:%x, start_block %x, offset %x\n", SQUASHFS_INODE_BLK(inode), offset, ++ inodep.start_block, inodep.offset); ++ break; ++ } ++ case SQUASHFS_SYMLINK_TYPE: { ++ squashfs_symlink_inode_header inodep; ++ ++ if(msBlk->swap) { ++ squashfs_symlink_inode_header sinodep; ++ ++ if(!squashfs_get_cached_block(s, (char *) &sinodep, block, offset, sizeof(sinodep), ++ &next_block, &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_SYMLINK_INODE_HEADER(&inodep, &sinodep); ++ } else ++ if(!squashfs_get_cached_block(s, (char *) &inodep, block, offset, sizeof(inodep), ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ i->i_size = inodep.symlink_size; ++ i->i_op = &page_symlink_inode_operations; ++ i->i_data.a_ops = &squashfs_symlink_aops; ++ i->i_mode |= S_IFLNK; ++ SQUASHFS_I(i)->start_block = next_block; ++ SQUASHFS_I(i)->offset = next_offset; ++ TRACE("Symbolic link inode %x:%x, start_block %x, offset %x\n", ++ SQUASHFS_INODE_BLK(inode), offset, next_block, next_offset); ++ break; ++ } ++ case SQUASHFS_BLKDEV_TYPE: ++ case SQUASHFS_CHRDEV_TYPE: { ++ squashfs_dev_inode_header inodep; ++ ++ if(msBlk->swap) { ++ squashfs_dev_inode_header sinodep; ++ ++ if(!squashfs_get_cached_block(s, (char *) &sinodep, block, offset, sizeof(sinodep), ++ &next_block, &next_offset)) ++ goto failed_read; ++ SQUASHFS_SWAP_DEV_INODE_HEADER(&inodep, &sinodep); ++ } else ++ if(!squashfs_get_cached_block(s, (char *) &inodep, block, offset, sizeof(inodep), ++ &next_block, &next_offset)) ++ goto failed_read; ++ ++ i->i_size = 0; ++ i->i_mode |= (inodeb.inode_type == SQUASHFS_CHRDEV_TYPE) ? S_IFCHR : S_IFBLK; ++ init_special_inode(i, i->i_mode, old_decode_dev(inodep.rdev)); ++ TRACE("Device inode %x:%x, rdev %x\n", SQUASHFS_INODE_BLK(inode), offset, inodep.rdev); ++ break; ++ } ++ case SQUASHFS_FIFO_TYPE: ++ case SQUASHFS_SOCKET_TYPE: { ++ i->i_size = 0; ++ i->i_mode |= (inodeb.inode_type == SQUASHFS_FIFO_TYPE) ? S_IFIFO : S_IFSOCK; ++ init_special_inode(i, i->i_mode, 0); ++ break; ++ } ++ default: ++ ERROR("Unknown inode type %d in squashfs_iget!\n", inodeb.inode_type); ++ goto failed_read1; ++ } ++ ++ if(inodeb.guid == SQUASHFS_GUIDS) ++ i->i_gid = i->i_uid; ++ else ++ i->i_gid = msBlk->guid[inodeb.guid]; ++ ++ insert_inode_hash(i); ++ return i; ++ ++failed_read: ++ ERROR("Unable to read inode [%x:%x]\n", block, offset); ++ ++failed_read1: ++ return NULL; ++} ++ ++ ++static int squashfs_fill_super(struct super_block *s, ++ void *data, int silent) ++{ ++ squashfs_sb_info *msBlk; ++ squashfs_super_block *sBlk; ++ int i; ++ char b[BDEVNAME_SIZE]; ++ ++ TRACE("Entered squashfs_read_superblock\n"); ++ ++ if(!(s->s_fs_info = (void *) kmalloc(sizeof(squashfs_sb_info), GFP_KERNEL))) { ++ ERROR("Failed to allocate superblock\n"); ++ return -ENOMEM; ++ } ++ msBlk = (squashfs_sb_info *) s->s_fs_info; ++ sBlk = &msBlk->sBlk; ++ ++ msBlk->devblksize = sb_min_blocksize(s, BLOCK_SIZE); ++ msBlk->devblksize_log2 = ffz(~msBlk->devblksize); ++ ++ init_MUTEX(&msBlk->read_page_mutex); ++ init_MUTEX(&msBlk->block_cache_mutex); ++ init_MUTEX(&msBlk->fragment_mutex); ++ ++ init_waitqueue_head(&msBlk->waitq); ++ init_waitqueue_head(&msBlk->fragment_wait_queue); ++ ++ if(!read_data(s, (char *) sBlk, SQUASHFS_START, sizeof(squashfs_super_block) | SQUASHFS_COMPRESSED_BIT_BLOCK, NULL)) { ++ SERROR("unable to read superblock\n"); ++ goto failed_mount; ++ } ++ ++ /* Check it is a SQUASHFS superblock */ ++ msBlk->swap = 0; ++ if((s->s_magic = sBlk->s_magic) != SQUASHFS_MAGIC) { ++ if(sBlk->s_magic == SQUASHFS_MAGIC_SWAP) { ++ squashfs_super_block sblk; ++ WARNING("Mounting a different endian SQUASHFS filesystem on %s\n", bdevname(s->s_bdev, b)); ++ SQUASHFS_SWAP_SUPER_BLOCK(&sblk, sBlk); ++ memcpy(sBlk, &sblk, sizeof(squashfs_super_block)); ++ msBlk->swap = 1; ++ } else { ++ SERROR("Can't find a SQUASHFS superblock on %s\n", bdevname(s->s_bdev, b)); ++ goto failed_mount; ++ } ++ } ++ ++ /* Check the MAJOR & MINOR versions */ ++#ifdef SQUASHFS_1_0_COMPATIBILITY ++ if((sBlk->s_major != 1) && (sBlk->s_major != 2 || sBlk->s_minor > SQUASHFS_MINOR)) { ++ SERROR("Major/Minor mismatch, filesystem is (%d:%d), I support (1 : x) or (2 : <= %d)\n", ++ sBlk->s_major, sBlk->s_minor, SQUASHFS_MINOR); ++ goto failed_mount; ++ } ++ if(sBlk->s_major == 1) ++ sBlk->block_size = sBlk->block_size_1; ++#else ++ if(sBlk->s_major != SQUASHFS_MAJOR || sBlk->s_minor > SQUASHFS_MINOR) { ++ SERROR("Major/Minor mismatch, filesystem is (%d:%d), I support (%d: <= %d)\n", ++ sBlk->s_major, sBlk->s_minor, SQUASHFS_MAJOR, SQUASHFS_MINOR); ++ goto failed_mount; ++ } ++#endif ++ ++ TRACE("Found valid superblock on %s\n", bdevname(s->s_bdev, b)); ++ TRACE("Inodes are %scompressed\n", SQUASHFS_UNCOMPRESSED_INODES(sBlk->flags) ? "un" : ""); ++ TRACE("Data is %scompressed\n", SQUASHFS_UNCOMPRESSED_DATA(sBlk->flags) ? "un" : ""); ++ TRACE("Check data is %s present in the filesystem\n", SQUASHFS_CHECK_DATA(sBlk->flags) ? "" : "not"); ++ TRACE("Filesystem size %d bytes\n", sBlk->bytes_used); ++ TRACE("Block size %d\n", sBlk->block_size); ++ TRACE("Number of inodes %d\n", sBlk->inodes); ++ if(sBlk->s_major > 1) ++ TRACE("Number of fragments %d\n", sBlk->fragments); ++ TRACE("Number of uids %d\n", sBlk->no_uids); ++ TRACE("Number of gids %d\n", sBlk->no_guids); ++ TRACE("sBlk->inode_table_start %x\n", sBlk->inode_table_start); ++ TRACE("sBlk->directory_table_start %x\n", sBlk->directory_table_start); ++ if(sBlk->s_major > 1) ++ TRACE("sBlk->fragment_table_start %x\n", sBlk->fragment_table_start); ++ TRACE("sBlk->uid_start %x\n", sBlk->uid_start); ++ ++ s->s_flags |= MS_RDONLY; ++ s->s_op = &squashfs_ops; ++ ++ /* Init inode_table block pointer array */ ++ if(!(msBlk->block_cache = (squashfs_cache *) kmalloc(sizeof(squashfs_cache) * SQUASHFS_CACHED_BLKS, GFP_KERNEL))) { ++ ERROR("Failed to allocate block cache\n"); ++ goto failed_mount; ++ } ++ ++ for(i = 0; i < SQUASHFS_CACHED_BLKS; i++) ++ msBlk->block_cache[i].block = SQUASHFS_INVALID_BLK; ++ ++ msBlk->next_cache = 0; ++ ++ /* Allocate read_data block */ ++ msBlk->read_size = (sBlk->block_size < SQUASHFS_METADATA_SIZE) ? SQUASHFS_METADATA_SIZE : sBlk->block_size; ++ if(!(msBlk->read_data = (char *) kmalloc(msBlk->read_size, GFP_KERNEL))) { ++ ERROR("Failed to allocate read_data block\n"); ++ goto failed_mount1; ++ } ++ ++ /* Allocate read_page block */ ++ if(sBlk->block_size > PAGE_CACHE_SIZE) { ++ if(!(msBlk->read_page = (char *) kmalloc(sBlk->block_size, GFP_KERNEL))) { ++ ERROR("Failed to allocate read_page block\n"); ++ goto failed_mount2; ++ } ++ } else ++ msBlk->read_page = NULL; ++ ++ /* Allocate uid and gid tables */ ++ if(!(msBlk->uid = (squashfs_uid *) kmalloc((sBlk->no_uids + ++ sBlk->no_guids) * sizeof(squashfs_uid), GFP_KERNEL))) { ++ ERROR("Failed to allocate uid/gid table\n"); ++ goto failed_mount3; ++ } ++ msBlk->guid = msBlk->uid + sBlk->no_uids; ++ ++ if(msBlk->swap) { ++ squashfs_uid suid[sBlk->no_uids + sBlk->no_guids]; ++ ++ if(!read_data(s, (char *) &suid, sBlk->uid_start, ((sBlk->no_uids + sBlk->no_guids) * ++ sizeof(squashfs_uid)) | SQUASHFS_COMPRESSED_BIT_BLOCK, NULL)) { ++ SERROR("unable to read uid/gid table\n"); ++ goto failed_mount4; ++ } ++ SQUASHFS_SWAP_DATA(msBlk->uid, suid, (sBlk->no_uids + sBlk->no_guids), (sizeof(squashfs_uid) * 8)); ++ } else ++ if(!read_data(s, (char *) msBlk->uid, sBlk->uid_start, ((sBlk->no_uids + sBlk->no_guids) * ++ sizeof(squashfs_uid)) | SQUASHFS_COMPRESSED_BIT_BLOCK, NULL)) { ++ SERROR("unable to read uid/gid table\n"); ++ goto failed_mount4; ++ } ++ ++ ++#ifdef SQUASHFS_1_0_COMPATIBILITY ++ if(sBlk->s_major == 1) { ++ msBlk->iget = squashfs_iget_1; ++ msBlk->read_blocklist = read_blocklist_1; ++ msBlk->fragment = NULL; ++ msBlk->fragment_index = NULL; ++ goto allocate_root; ++ } ++#endif ++ msBlk->iget = squashfs_iget; ++ msBlk->read_blocklist = read_blocklist; ++ ++ if(!(msBlk->fragment = (struct squashfs_fragment_cache *) kmalloc(sizeof(struct squashfs_fragment_cache) * SQUASHFS_CACHED_FRAGMENTS, GFP_KERNEL))) { ++ ERROR("Failed to allocate fragment block cache\n"); ++ goto failed_mount4; ++ } ++ ++ for(i = 0; i < SQUASHFS_CACHED_FRAGMENTS; i++) { ++ msBlk->fragment[i].locked = 0; ++ msBlk->fragment[i].block = SQUASHFS_INVALID_BLK; ++ msBlk->fragment[i].data = NULL; ++ } ++ ++ msBlk->next_fragment = 0; ++ ++ /* Allocate fragment index table */ ++ if(!(msBlk->fragment_index = (squashfs_fragment_index *) kmalloc(SQUASHFS_FRAGMENT_INDEX_BYTES(sBlk->fragments), GFP_KERNEL))) { ++ ERROR("Failed to allocate uid/gid table\n"); ++ goto failed_mount5; ++ } ++ ++ if(SQUASHFS_FRAGMENT_INDEX_BYTES(sBlk->fragments) && ++ !read_data(s, (char *) msBlk->fragment_index, sBlk->fragment_table_start, ++ SQUASHFS_FRAGMENT_INDEX_BYTES(sBlk->fragments) | SQUASHFS_COMPRESSED_BIT_BLOCK, NULL)) { ++ SERROR("unable to read fragment index table\n"); ++ goto failed_mount6; ++ } ++ ++ if(msBlk->swap) { ++ int i; ++ squashfs_fragment_index fragment; ++ ++ for(i = 0; i < SQUASHFS_FRAGMENT_INDEXES(sBlk->fragments); i++) { ++ SQUASHFS_SWAP_FRAGMENT_INDEXES((&fragment), &msBlk->fragment_index[i], 1); ++ msBlk->fragment_index[i] = fragment; ++ } ++ } ++ ++#ifdef SQUASHFS_1_0_COMPATIBILITY ++allocate_root: ++#endif ++ if(!(s->s_root = d_alloc_root((msBlk->iget)(s, sBlk->root_inode)))) { ++ ERROR("Root inode create failed\n"); ++ goto failed_mount5; ++ } ++ ++ TRACE("Leaving squashfs_read_super\n"); ++ return 0; ++ ++failed_mount6: ++ kfree(msBlk->fragment_index); ++failed_mount5: ++ kfree(msBlk->fragment); ++failed_mount4: ++ kfree(msBlk->uid); ++failed_mount3: ++ kfree(msBlk->read_page); ++failed_mount2: ++ kfree(msBlk->read_data); ++failed_mount1: ++ kfree(msBlk->block_cache); ++failed_mount: ++ kfree(s->s_fs_info); ++ s->s_fs_info = NULL; ++ return -EINVAL; ++} ++ ++ ++static int squashfs_statfs(struct super_block *s, struct kstatfs *buf) ++{ ++ squashfs_super_block *sBlk = &((squashfs_sb_info *)s->s_fs_info)->sBlk; ++ ++ TRACE("Entered squashfs_statfs\n"); ++ buf->f_type = SQUASHFS_MAGIC; ++ buf->f_bsize = sBlk->block_size; ++ buf->f_blocks = ((sBlk->bytes_used - 1) >> sBlk->block_log) + 1; ++ buf->f_bfree = buf->f_bavail = 0; ++ buf->f_files = sBlk->inodes; ++ buf->f_ffree = 0; ++ buf->f_namelen = SQUASHFS_NAME_LEN; ++ return 0; ++} ++ ++ ++static int squashfs_symlink_readpage(struct file *file, struct page *page) ++{ ++ struct inode *inode = page->mapping->host; ++ int index = page->index << PAGE_CACHE_SHIFT, length, bytes; ++ unsigned int block = SQUASHFS_I(inode)->start_block; ++ int offset = SQUASHFS_I(inode)->offset; ++ void *pageaddr = kmap(page); ++ ++ TRACE("Entered squashfs_symlink_readpage, page index %d, start block %x, offset %x\n", ++ page->index, SQUASHFS_I(inode)->start_block, SQUASHFS_I(inode)->offset); ++ ++ for(length = 0; length < index; length += bytes) { ++ if(!(bytes = squashfs_get_cached_block(inode->i_sb, NULL, block, offset, ++ PAGE_CACHE_SIZE, &block, &offset))) { ++ ERROR("Unable to read symbolic link [%x:%x]\n", block, offset); ++ goto skip_read; ++ } ++ } ++ ++ if(length != index) { ++ ERROR("(squashfs_symlink_readpage) length != index\n"); ++ bytes = 0; ++ goto skip_read; ++ } ++ ++ bytes = (inode->i_size - length) > PAGE_CACHE_SIZE ? PAGE_CACHE_SIZE : inode->i_size - length; ++ if(!(bytes = squashfs_get_cached_block(inode->i_sb, pageaddr, block, offset, bytes, &block, &offset))) ++ ERROR("Unable to read symbolic link [%x:%x]\n", block, offset); ++ ++skip_read: ++ memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); ++ kunmap(page); ++ flush_dcache_page(page); ++ SetPageUptodate(page); ++ unlock_page(page); ++ ++ return 0; ++} ++ ++ ++#define SIZE 256 ++ ++#ifdef SQUASHFS_1_0_COMPATIBILITY ++static unsigned int read_blocklist_1(struct inode *inode, int index, int readahead_blks, ++ char *block_list, unsigned short **block_p, unsigned int *bsize) ++{ ++ squashfs_sb_info *msBlk = (squashfs_sb_info *)inode->i_sb->s_fs_info; ++ unsigned short *block_listp; ++ int i = 0; ++ int block_ptr = SQUASHFS_I(inode)->block_list_start; ++ int offset = SQUASHFS_I(inode)->offset; ++ unsigned int block = SQUASHFS_I(inode)->start_block; ++ ++ for(;;) { ++ int blocks = (index + readahead_blks - i); ++ if(blocks > (SIZE >> 1)) { ++ if((index - i) <= (SIZE >> 1)) ++ blocks = index - i; ++ else ++ blocks = SIZE >> 1; ++ } ++ ++ if(msBlk->swap) { ++ unsigned char sblock_list[SIZE]; ++ if(!squashfs_get_cached_block(inode->i_sb, (char *) sblock_list, block_ptr, offset, blocks << 1, &block_ptr, &offset)) { ++ ERROR("Unable to read block list [%d:%x]\n", block_ptr, offset); ++ return 0; ++ } ++ SQUASHFS_SWAP_SHORTS(((unsigned short *)block_list), ((unsigned short *)sblock_list), blocks); ++ } else ++ if(!squashfs_get_cached_block(inode->i_sb, (char *) block_list, block_ptr, offset, blocks << 1, &block_ptr, &offset)) { ++ ERROR("Unable to read block list [%d:%x]\n", block_ptr, offset); ++ return 0; ++ } ++ for(block_listp = (unsigned short *) block_list; i < index && blocks; i ++, block_listp ++, blocks --) ++ block += SQUASHFS_COMPRESSED_SIZE(*block_listp); ++ if(blocks >= readahead_blks) ++ break; ++ } ++ ++ if(bsize) ++ *bsize = SQUASHFS_COMPRESSED_SIZE(*block_listp) | (!SQUASHFS_COMPRESSED(*block_listp) ? SQUASHFS_COMPRESSED_BIT_BLOCK : 0); ++ else ++ *block_p = block_listp; ++ return block; ++} ++#endif ++ ++ ++static unsigned int read_blocklist(struct inode *inode, int index, int readahead_blks, ++ char *block_list, unsigned short **block_p, unsigned int *bsize) ++{ ++ squashfs_sb_info *msBlk = (squashfs_sb_info *)inode->i_sb->s_fs_info; ++ unsigned int *block_listp; ++ int i = 0; ++ int block_ptr = SQUASHFS_I(inode)->block_list_start; ++ int offset = SQUASHFS_I(inode)->offset; ++ unsigned int block = SQUASHFS_I(inode)->start_block; ++ ++ for(;;) { ++ int blocks = (index + readahead_blks - i); ++ if(blocks > (SIZE >> 2)) { ++ if((index - i) <= (SIZE >> 2)) ++ blocks = index - i; ++ else ++ blocks = SIZE >> 2; ++ } ++ ++ if(msBlk->swap) { ++ unsigned char sblock_list[SIZE]; ++ if(!squashfs_get_cached_block(inode->i_sb, (char *) sblock_list, block_ptr, offset, blocks << 2, &block_ptr, &offset)) { ++ ERROR("Unable to read block list [%d:%x]\n", block_ptr, offset); ++ return 0; ++ } ++ SQUASHFS_SWAP_INTS(((unsigned int *)block_list), ((unsigned int *)sblock_list), blocks); ++ } else ++ if(!squashfs_get_cached_block(inode->i_sb, (char *) block_list, block_ptr, offset, blocks << 2, &block_ptr, &offset)) { ++ ERROR("Unable to read block list [%d:%x]\n", block_ptr, offset); ++ return 0; ++ } ++ for(block_listp = (unsigned int *) block_list; i < index && blocks; i ++, block_listp ++, blocks --) ++ block += SQUASHFS_COMPRESSED_SIZE_BLOCK(*block_listp); ++ if(blocks >= readahead_blks) ++ break; ++ } ++ ++ *bsize = *block_listp; ++ return block; ++} ++ ++ ++static int squashfs_readpage(struct file *file, struct page *page) ++{ ++ struct inode *inode = page->mapping->host; ++ squashfs_sb_info *msBlk = (squashfs_sb_info *)inode->i_sb->s_fs_info; ++ squashfs_super_block *sBlk = &msBlk->sBlk; ++ unsigned char block_list[SIZE]; ++ unsigned int bsize, block, i = 0, bytes = 0, byte_offset = 0; ++ int index = page->index >> (sBlk->block_log - PAGE_CACHE_SHIFT); ++ void *pageaddr = kmap(page); ++ struct squashfs_fragment_cache *fragment = NULL; ++ char *data_ptr = msBlk->read_page; ++ ++ int mask = (1 << (sBlk->block_log - PAGE_CACHE_SHIFT)) - 1; ++ int start_index = page->index & ~mask; ++ int end_index = start_index | mask; ++ ++ TRACE("Entered squashfs_readpage, page index %x, start block %x\n", (unsigned int) page->index, ++ SQUASHFS_I(inode)->start_block); ++ ++ if(page->index >= ((inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)) { ++ goto skip_read; ++ } ++ ++ if(SQUASHFS_I(inode)->u.s1.fragment_start_block == SQUASHFS_INVALID_BLK || index < (inode->i_size >> sBlk->block_log)) { ++ if((block = (msBlk->read_blocklist)(inode, index, 1, block_list, NULL, &bsize)) == 0) ++ goto skip_read; ++ ++ down(&msBlk->read_page_mutex); ++ if(!(bytes = read_data(inode->i_sb, msBlk->read_page, block, bsize, NULL))) { ++ ERROR("Unable to read page, block %x, size %x\n", block, bsize); ++ up(&msBlk->read_page_mutex); ++ goto skip_read; ++ } ++ } else { ++ if((fragment = get_cached_fragment(inode->i_sb, SQUASHFS_I(inode)->u.s1.fragment_start_block, SQUASHFS_I(inode)->u.s1.fragment_size)) == NULL) { ++ ERROR("Unable to read page, block %x, size %x\n", SQUASHFS_I(inode)->u.s1.fragment_start_block, (int) SQUASHFS_I(inode)->u.s1.fragment_size); ++ goto skip_read; ++ } ++ bytes = SQUASHFS_I(inode)->u.s1.fragment_offset + (inode->i_size & (sBlk->block_size - 1)); ++ byte_offset = SQUASHFS_I(inode)->u.s1.fragment_offset; ++ data_ptr = fragment->data; ++ } ++ ++ for(i = start_index; i <= end_index && byte_offset < bytes; i++, byte_offset += PAGE_CACHE_SIZE) { ++ struct page *push_page; ++ int available_bytes = (bytes - byte_offset) > PAGE_CACHE_SIZE ? PAGE_CACHE_SIZE : bytes - byte_offset; ++ ++ TRACE("bytes %d, i %d, byte_offset %d, available_bytes %d\n", bytes, i, byte_offset, available_bytes); ++ ++ if(i == page->index) { ++ memcpy(pageaddr, data_ptr + byte_offset, available_bytes); ++ memset(pageaddr + available_bytes, 0, PAGE_CACHE_SIZE - available_bytes); ++ kunmap(page); ++ flush_dcache_page(page); ++ SetPageUptodate(page); ++ unlock_page(page); ++ } else if((push_page = grab_cache_page_nowait(page->mapping, i))) { ++ void *pageaddr = kmap(push_page); ++ memcpy(pageaddr, data_ptr + byte_offset, available_bytes); ++ memset(pageaddr + available_bytes, 0, PAGE_CACHE_SIZE - available_bytes); ++ kunmap(push_page); ++ flush_dcache_page(push_page); ++ SetPageUptodate(push_page); ++ unlock_page(push_page); ++ page_cache_release(push_page); ++ } ++ } ++ ++ if(SQUASHFS_I(inode)->u.s1.fragment_start_block == SQUASHFS_INVALID_BLK || index < (inode->i_size >> sBlk->block_log)) ++ up(&msBlk->read_page_mutex); ++ else ++ release_cached_fragment(msBlk, fragment); ++ ++ return 0; ++ ++skip_read: ++ memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); ++ kunmap(page); ++ flush_dcache_page(page); ++ SetPageUptodate(page); ++ unlock_page(page); ++ ++ return 0; ++} ++ ++ ++static int squashfs_readpage4K(struct file *file, struct page *page) ++{ ++ struct inode *inode = page->mapping->host; ++ squashfs_sb_info *msBlk = (squashfs_sb_info *)inode->i_sb->s_fs_info; ++ squashfs_super_block *sBlk = &msBlk->sBlk; ++ unsigned char block_list[SIZE]; ++ unsigned int bsize, block, bytes = 0; ++ void *pageaddr = kmap(page); ++ ++ TRACE("Entered squashfs_readpage4K, page index %x, start block %x\n", (unsigned int) page->index, ++ SQUASHFS_I(inode)->start_block); ++ ++ if(page->index >= ((inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)) { ++ goto skip_read; ++ } ++ ++ if(SQUASHFS_I(inode)->u.s1.fragment_start_block == SQUASHFS_INVALID_BLK || page->index < (inode->i_size >> sBlk->block_log)) { ++ block = (msBlk->read_blocklist)(inode, page->index, 1, block_list, NULL, &bsize); ++ ++ if(!(bytes = read_data(inode->i_sb, pageaddr, block, bsize, NULL))) ++ ERROR("Unable to read page, block %x, size %x\n", block, bsize); ++ } else { ++ struct squashfs_fragment_cache *fragment; ++ ++ if((fragment = get_cached_fragment(inode->i_sb, SQUASHFS_I(inode)->u.s1.fragment_start_block, SQUASHFS_I(inode)->u.s1.fragment_size)) == NULL) ++ ERROR("Unable to read page, block %x, size %x\n", SQUASHFS_I(inode)->u.s1.fragment_start_block, (int) SQUASHFS_I(inode)->u.s1.fragment_size); ++ else { ++ bytes = inode->i_size & (sBlk->block_size - 1); ++ memcpy(pageaddr, fragment->data + SQUASHFS_I(inode)->u.s1.fragment_offset, bytes); ++ release_cached_fragment(msBlk, fragment); ++ } ++ } ++ ++skip_read: ++ memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); ++ kunmap(page); ++ flush_dcache_page(page); ++ SetPageUptodate(page); ++ unlock_page(page); ++ ++ return 0; ++} ++ ++ ++#ifdef SQUASHFS_1_0_COMPATIBILITY ++static int squashfs_readpage_lessthan4K(struct file *file, struct page *page) ++{ ++ struct inode *inode = page->mapping->host; ++ squashfs_sb_info *msBlk = (squashfs_sb_info *)inode->i_sb->s_fs_info; ++ squashfs_super_block *sBlk = &msBlk->sBlk; ++ unsigned char block_list[SIZE]; ++ unsigned short *block_listp, block, bytes = 0; ++ int index = page->index << (PAGE_CACHE_SHIFT - sBlk->block_log); ++ int file_blocks = ((inode->i_size - 1) >> sBlk->block_log) + 1; ++ int readahead_blks = 1 << (PAGE_CACHE_SHIFT - sBlk->block_log); ++ void *pageaddr = kmap(page); ++ ++ int i_end = index + (1 << (PAGE_CACHE_SHIFT - sBlk->block_log)); ++ int byte; ++ ++ TRACE("Entered squashfs_readpage_lessthan4K, page index %x, start block %x\n", (unsigned int) page->index, ++ SQUASHFS_I(inode)->start_block); ++ ++ block = read_blocklist_1(inode, index, readahead_blks, block_list, &block_listp, NULL); ++ ++ if(i_end > file_blocks) ++ i_end = file_blocks; ++ ++ while(index < i_end) { ++ int c_byte = !SQUASHFS_COMPRESSED(*block_listp) ? SQUASHFS_COMPRESSED_SIZE(*block_listp) | SQUASHFS_COMPRESSED_BIT_BLOCK : *block_listp; ++ if(!(byte = read_data(inode->i_sb, pageaddr, block, c_byte, NULL))) { ++ ERROR("Unable to read page, block %x, size %x\n", block, *block_listp); ++ goto skip_read; ++ } ++ block += SQUASHFS_COMPRESSED_SIZE(*block_listp); ++ pageaddr += byte; ++ bytes += byte; ++ index ++; ++ block_listp ++; ++ } ++ ++skip_read: ++ memset(pageaddr, 0, PAGE_CACHE_SIZE - bytes); ++ kunmap(page); ++ flush_dcache_page(page); ++ SetPageUptodate(page); ++ unlock_page(page); ++ ++ return 0; ++} ++#endif ++ ++ ++static int get_dir_index_using_offset(struct super_block *s, unsigned int *next_block, ++ unsigned int *next_offset, unsigned int index_start, unsigned int index_offset, ++ int i_count, long long f_pos) ++{ ++ squashfs_sb_info *msBlk = (squashfs_sb_info *)s->s_fs_info; ++ squashfs_super_block *sBlk = &msBlk->sBlk; ++ int i, length = 0; ++ squashfs_dir_index index; ++ ++ TRACE("Entered get_dir_index_using_offset, i_count %d, f_pos %d\n", i_count, (unsigned int) f_pos); ++ ++ if(f_pos == 0) ++ return 0; ++ ++ for(i = 0; i < i_count; i++) { ++ if(msBlk->swap) { ++ squashfs_dir_index sindex; ++ squashfs_get_cached_block(s, (char *) &sindex, index_start, index_offset, ++ sizeof(sindex), &index_start, &index_offset); ++ SQUASHFS_SWAP_DIR_INDEX(&index, &sindex); ++ } else ++ squashfs_get_cached_block(s, (char *) &index, index_start, index_offset, ++ sizeof(index), &index_start, &index_offset); ++ ++ if(index.index > f_pos) ++ break; ++ ++ squashfs_get_cached_block(s, NULL, index_start, index_offset, ++ index.size + 1, &index_start, &index_offset); ++ ++ length = index.index; ++ *next_block = index.start_block + sBlk->directory_table_start; ++ } ++ ++ *next_offset = (length + *next_offset) % SQUASHFS_METADATA_SIZE; ++ return length; ++} ++ ++ ++static int get_dir_index_using_name(struct super_block *s, unsigned int *next_block, ++ unsigned int *next_offset, unsigned int index_start, unsigned int index_offset, ++ int i_count, const char *name, int size) ++{ ++ squashfs_sb_info *msBlk = (squashfs_sb_info *)s->s_fs_info; ++ squashfs_super_block *sBlk = &msBlk->sBlk; ++ int i, length = 0; ++ char buffer[sizeof(squashfs_dir_index) + SQUASHFS_NAME_LEN + 1]; ++ squashfs_dir_index *index = (squashfs_dir_index *) buffer; ++ char str[SQUASHFS_NAME_LEN + 1]; ++ ++ TRACE("Entered get_dir_index_using_name, i_count %d\n", i_count); ++ ++ strncpy(str, name, size); ++ str[size] = '\0'; ++ ++ for(i = 0; i < i_count; i++) { ++ if(msBlk->swap) { ++ squashfs_dir_index sindex; ++ squashfs_get_cached_block(s, (char *) &sindex, index_start, index_offset, ++ sizeof(sindex), &index_start, &index_offset); ++ SQUASHFS_SWAP_DIR_INDEX(index, &sindex); ++ } else ++ squashfs_get_cached_block(s, (char *) index, index_start, index_offset, ++ sizeof(squashfs_dir_index), &index_start, &index_offset); ++ ++ squashfs_get_cached_block(s, index->name, index_start, index_offset, ++ index->size + 1, &index_start, &index_offset); ++ ++ index->name[index->size + 1] = '\0'; ++ ++ if(strcmp(index->name, str) > 0) ++ break; ++ ++ length = index->index; ++ *next_block = index->start_block + sBlk->directory_table_start; ++ } ++ ++ *next_offset = (length + *next_offset) % SQUASHFS_METADATA_SIZE; ++ return length; ++} ++ ++ ++static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) ++{ ++ struct inode *i = file->f_dentry->d_inode; ++ squashfs_sb_info *msBlk = (squashfs_sb_info *)i->i_sb->s_fs_info; ++ squashfs_super_block *sBlk = &msBlk->sBlk; ++ int next_block = SQUASHFS_I(i)->start_block + sBlk->directory_table_start, next_offset = ++ SQUASHFS_I(i)->offset, length = 0, dirs_read = 0, dir_count; ++ squashfs_dir_header dirh; ++ char buffer[sizeof(squashfs_dir_entry) + SQUASHFS_NAME_LEN + 1]; ++ squashfs_dir_entry *dire = (squashfs_dir_entry *) buffer; ++ ++ TRACE("Entered squashfs_readdir [%x:%x]\n", next_block, next_offset); ++ ++ lock_kernel(); ++ ++ length = get_dir_index_using_offset(i->i_sb, &next_block, &next_offset, SQUASHFS_I(i)->u.s2.directory_index_start, ++ SQUASHFS_I(i)->u.s2.directory_index_offset, SQUASHFS_I(i)->u.s2.directory_index_count, file->f_pos); ++ ++ while(length < i->i_size) { ++ /* read directory header */ ++ if(msBlk->swap) { ++ squashfs_dir_header sdirh; ++ if(!squashfs_get_cached_block(i->i_sb, (char *) &sdirh, next_block, ++ next_offset, sizeof(sdirh), &next_block, &next_offset)) ++ goto failed_read; ++ length += sizeof(sdirh); ++ SQUASHFS_SWAP_DIR_HEADER(&dirh, &sdirh); ++ } else { ++ if(!squashfs_get_cached_block(i->i_sb, (char *) &dirh, next_block, ++ next_offset, sizeof(dirh), &next_block, &next_offset)) ++ goto failed_read; ++ length += sizeof(dirh); ++ } ++ ++ dir_count = dirh.count + 1; ++ while(dir_count--) { ++ if(msBlk->swap) { ++ squashfs_dir_entry sdire; ++ if(!squashfs_get_cached_block(i->i_sb, (char *) &sdire, next_block, ++ next_offset, sizeof(sdire), &next_block, &next_offset)) ++ goto failed_read; ++ length += sizeof(sdire); ++ SQUASHFS_SWAP_DIR_ENTRY(dire, &sdire); ++ } else { ++ if(!squashfs_get_cached_block(i->i_sb, (char *) dire, next_block, ++ next_offset, sizeof(*dire), &next_block, &next_offset)) ++ goto failed_read; ++ length += sizeof(*dire); ++ } ++ ++ if(!squashfs_get_cached_block(i->i_sb, dire->name, next_block, ++ next_offset, dire->size + 1, &next_block, &next_offset)) ++ goto failed_read; ++ length += dire->size + 1; ++ ++ if(file->f_pos >= length) ++ continue; ++ ++ dire->name[dire->size + 1] = '\0'; ++ ++ TRACE("Calling filldir(%x, %s, %d, %d, %x:%x, %d)\n", (unsigned int) dirent, ++ dire->name, dire->size + 1, (int) file->f_pos, ++ dirh.start_block, dire->offset, squashfs_filetype_table[dire->type]); ++ ++ if(filldir(dirent, dire->name, dire->size + 1, file->f_pos, SQUASHFS_MK_VFS_INODE(dirh.start_block, ++ dire->offset), squashfs_filetype_table[dire->type]) < 0) { ++ TRACE("Filldir returned less than 0\n"); ++ unlock_kernel(); ++ return dirs_read; ++ } ++ ++ file->f_pos = length; ++ dirs_read ++; ++ } ++ } ++ ++ unlock_kernel(); ++ return dirs_read; ++ ++failed_read: ++ unlock_kernel(); ++ ERROR("Unable to read directory block [%x:%x]\n", next_block, next_offset); ++ return 0; ++} ++ ++ ++static struct dentry *squashfs_lookup(struct inode *i, struct dentry *dentry, struct nameidata *nd) ++{ ++ const unsigned char *name =dentry->d_name.name; ++ int len = dentry->d_name.len; ++ struct inode *inode = NULL; ++ squashfs_sb_info *msBlk = (squashfs_sb_info *)i->i_sb->s_fs_info; ++ squashfs_super_block *sBlk = &msBlk->sBlk; ++ int next_block = SQUASHFS_I(i)->start_block + sBlk->directory_table_start, next_offset = ++ SQUASHFS_I(i)->offset, length = 0, dir_count; ++ squashfs_dir_header dirh; ++ char buffer[sizeof(squashfs_dir_entry) + SQUASHFS_NAME_LEN]; ++ squashfs_dir_entry *dire = (squashfs_dir_entry *) buffer; ++ int squashfs_2_1 = sBlk->s_major == 2 && sBlk->s_minor == 1; ++ ++ TRACE("Entered squashfs_lookup [%x:%x]\n", next_block, next_offset); ++ ++ lock_kernel(); ++ ++ length = get_dir_index_using_name(i->i_sb, &next_block, &next_offset, SQUASHFS_I(i)->u.s2.directory_index_start, ++ SQUASHFS_I(i)->u.s2.directory_index_offset, SQUASHFS_I(i)->u.s2.directory_index_count, name, len); ++ ++ while(length < i->i_size) { ++ /* read directory header */ ++ if(msBlk->swap) { ++ squashfs_dir_header sdirh; ++ if(!squashfs_get_cached_block(i->i_sb, (char *) &sdirh, next_block, next_offset, ++ sizeof(sdirh), &next_block, &next_offset)) ++ goto failed_read; ++ length += sizeof(sdirh); ++ SQUASHFS_SWAP_DIR_HEADER(&dirh, &sdirh); ++ } else { ++ if(!squashfs_get_cached_block(i->i_sb, (char *) &dirh, next_block, next_offset, ++ sizeof(dirh), &next_block, &next_offset)) ++ goto failed_read; ++ length += sizeof(dirh); ++ } ++ ++ dir_count = dirh.count + 1; ++ while(dir_count--) { ++ if(msBlk->swap) { ++ squashfs_dir_entry sdire; ++ if(!squashfs_get_cached_block(i->i_sb, (char *) &sdire, ++ next_block,next_offset, sizeof(sdire), &next_block, &next_offset)) ++ goto failed_read; ++ length += sizeof(sdire); ++ SQUASHFS_SWAP_DIR_ENTRY(dire, &sdire); ++ } else { ++ if(!squashfs_get_cached_block(i->i_sb, (char *) dire, ++ next_block,next_offset, sizeof(*dire), &next_block, &next_offset)) ++ goto failed_read; ++ length += sizeof(*dire); ++ } ++ ++ if(!squashfs_get_cached_block(i->i_sb, dire->name, ++ next_block, next_offset, dire->size + 1, &next_block, &next_offset)) ++ goto failed_read; ++ length += dire->size + 1; ++ ++ if(squashfs_2_1 && name[0] < dire->name[0]) ++ goto exit_loop; ++ ++ if((len == dire->size + 1) && !strncmp(name, dire->name, len)) { ++ squashfs_inode ino = SQUASHFS_MKINODE(dirh.start_block, dire->offset); ++ ++ TRACE("calling squashfs_iget for directory entry %s, inode %x:%x\n", ++ name, dirh.start_block, dire->offset); ++ ++ inode = (msBlk->iget)(i->i_sb, ino); ++ ++ goto exit_loop; ++ } ++ } ++ } ++ ++exit_loop: ++ d_add(dentry, inode); ++ unlock_kernel(); ++ return ERR_PTR(0); ++ ++failed_read: ++ ERROR("Unable to read directory block [%x:%x]\n", next_block, next_offset); ++ goto exit_loop; ++} ++ ++ ++static void squashfs_put_super(struct super_block *s) ++{ ++ int i; ++ ++ if(s->s_fs_info) { ++ squashfs_sb_info *sbi = (squashfs_sb_info *) s->s_fs_info; ++ if(sbi->block_cache) { ++ for(i = 0; i < SQUASHFS_CACHED_BLKS; i++) ++ if(sbi->block_cache[i].block != SQUASHFS_INVALID_BLK) ++ kfree(sbi->block_cache[i].data); ++ kfree(sbi->block_cache); ++ } ++ if(sbi->read_data) kfree(sbi->read_data); ++ if(sbi->read_page) kfree(sbi->read_page); ++ if(sbi->uid) kfree(sbi->uid); ++ if(sbi->fragment) { ++ for(i = 0; i < SQUASHFS_CACHED_FRAGMENTS; i++) ++ if(sbi->fragment[i].data != NULL) ++ SQUASHFS_FREE(sbi->fragment[i].data); ++ kfree(sbi->fragment); ++ } ++ if(sbi->fragment_index) kfree(sbi->fragment_index); ++ kfree(s->s_fs_info); ++ s->s_fs_info = NULL; ++ } ++} ++ ++ ++static struct super_block *squashfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) ++{ ++ return get_sb_bdev(fs_type, flags, dev_name, data, squashfs_fill_super); ++} ++ ++ ++static int __init init_squashfs_fs(void) ++{ ++ int err = init_inodecache(); ++ if(err) ++ return err; ++ ++ printk(KERN_INFO "Squashfs 2.2 (released 2005/07/03) (C) 2002-2005 Phillip Lougher\n"); ++ ++ if(!(stream.workspace = (char *) vmalloc(zlib_inflate_workspacesize()))) { ++ ERROR("Failed to allocate zlib workspace\n"); ++ destroy_inodecache(); ++ return -ENOMEM; ++ } ++ ++ if((err = register_filesystem(&squashfs_fs_type))) { ++ vfree(stream.workspace); ++ destroy_inodecache(); ++ } ++ ++ return err; ++} ++ ++ ++static void __exit exit_squashfs_fs(void) ++{ ++ vfree(stream.workspace); ++ unregister_filesystem(&squashfs_fs_type); ++ destroy_inodecache(); ++} ++ ++ ++static kmem_cache_t * squashfs_inode_cachep; ++ ++ ++static struct inode *squashfs_alloc_inode(struct super_block *sb) ++{ ++ struct squashfs_inode_info *ei; ++ ei = (struct squashfs_inode_info *)kmem_cache_alloc(squashfs_inode_cachep, SLAB_KERNEL); ++ if (!ei) ++ return NULL; ++ return &ei->vfs_inode; ++} ++ ++ ++static void squashfs_destroy_inode(struct inode *inode) ++{ ++ kmem_cache_free(squashfs_inode_cachep, SQUASHFS_I(inode)); ++} ++ ++ ++static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) ++{ ++ struct squashfs_inode_info *ei = (struct squashfs_inode_info *) foo; ++ ++ if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == ++ SLAB_CTOR_CONSTRUCTOR) ++ inode_init_once(&ei->vfs_inode); ++} ++ ++ ++static int init_inodecache(void) ++{ ++ squashfs_inode_cachep = kmem_cache_create("squashfs_inode_cache", ++ sizeof(struct squashfs_inode_info), ++ 0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, ++ init_once, NULL); ++ if (squashfs_inode_cachep == NULL) ++ return -ENOMEM; ++ return 0; ++} ++ ++ ++static void destroy_inodecache(void) ++{ ++ if (kmem_cache_destroy(squashfs_inode_cachep)) ++ printk(KERN_INFO "squashfs_inode_cache: not all structures were freed\n"); ++} ++ ++ ++module_init(init_squashfs_fs); ++module_exit(exit_squashfs_fs); ++MODULE_DESCRIPTION("squashfs, a compressed read-only filesystem"); ++MODULE_AUTHOR("Phillip Lougher <phillip@lougher.demon.co.uk>"); ++MODULE_LICENSE("GPL"); +diff --new-file -urp linux-2.6.12/fs/squashfs/Makefile linux-2.6.12-squashfs2.2/fs/squashfs/Makefile +--- linux-2.6.12/fs/squashfs/Makefile 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12-squashfs2.2/fs/squashfs/Makefile 2005-07-04 02:35:35.000000000 +0100 +@@ -0,0 +1,7 @@ ++# ++# Makefile for the linux squashfs routines. ++# ++ ++obj-$(CONFIG_SQUASHFS) += squashfs.o ++ ++squashfs-objs := inode.o +diff --new-file -urp linux-2.6.12/include/linux/squashfs_fs.h linux-2.6.12-squashfs2.2/include/linux/squashfs_fs.h +--- linux-2.6.12/include/linux/squashfs_fs.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12-squashfs2.2/include/linux/squashfs_fs.h 2005-07-04 02:35:35.000000000 +0100 +@@ -0,0 +1,519 @@ ++#ifndef SQUASHFS_FS ++#define SQUASHFS_FS ++/* ++ * Squashfs ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005 Phillip Lougher <phillip@lougher.demon.co.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * squashfs_fs.h ++ */ ++ ++#ifdef CONFIG_SQUASHFS_VMALLOC ++#define SQUASHFS_ALLOC(a) vmalloc(a) ++#define SQUASHFS_FREE(a) vfree(a) ++#else ++#define SQUASHFS_ALLOC(a) kmalloc(a, GFP_KERNEL) ++#define SQUASHFS_FREE(a) kfree(a) ++#endif ++#define SQUASHFS_CACHED_FRAGMENTS CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE ++#define SQUASHFS_MAJOR 2 ++#define SQUASHFS_MINOR 1 ++#define SQUASHFS_MAGIC 0x73717368 ++#define SQUASHFS_MAGIC_SWAP 0x68737173 ++#define SQUASHFS_START 0 ++ ++/* size of metadata (inode and directory) blocks */ ++#define SQUASHFS_METADATA_SIZE 8192 ++#define SQUASHFS_METADATA_LOG 13 ++ ++/* default size of data blocks */ ++#define SQUASHFS_FILE_SIZE 65536 ++#define SQUASHFS_FILE_LOG 16 ++ ++#define SQUASHFS_FILE_MAX_SIZE 65536 ++ ++/* Max number of uids and gids */ ++#define SQUASHFS_UIDS 256 ++#define SQUASHFS_GUIDS 255 ++ ++/* Max length of filename (not 255) */ ++#define SQUASHFS_NAME_LEN 256 ++ ++#define SQUASHFS_INVALID ((long long) 0xffffffffffff) ++#define SQUASHFS_INVALID_BLK ((long long) 0xffffffff) ++#define SQUASHFS_USED_BLK ((long long) 0xfffffffe) ++ ++/* Filesystem flags */ ++#define SQUASHFS_NOI 0 ++#define SQUASHFS_NOD 1 ++#define SQUASHFS_CHECK 2 ++#define SQUASHFS_NOF 3 ++#define SQUASHFS_NO_FRAG 4 ++#define SQUASHFS_ALWAYS_FRAG 5 ++#define SQUASHFS_DUPLICATE 6 ++#define SQUASHFS_BIT(flag, bit) ((flag >> bit) & 1) ++#define SQUASHFS_UNCOMPRESSED_INODES(flags) SQUASHFS_BIT(flags, SQUASHFS_NOI) ++#define SQUASHFS_UNCOMPRESSED_DATA(flags) SQUASHFS_BIT(flags, SQUASHFS_NOD) ++#define SQUASHFS_UNCOMPRESSED_FRAGMENTS(flags) SQUASHFS_BIT(flags, SQUASHFS_NOF) ++#define SQUASHFS_NO_FRAGMENTS(flags) SQUASHFS_BIT(flags, SQUASHFS_NO_FRAG) ++#define SQUASHFS_ALWAYS_FRAGMENTS(flags) SQUASHFS_BIT(flags, SQUASHFS_ALWAYS_FRAG) ++#define SQUASHFS_DUPLICATES(flags) SQUASHFS_BIT(flags, SQUASHFS_DUPLICATE) ++#define SQUASHFS_CHECK_DATA(flags) SQUASHFS_BIT(flags, SQUASHFS_CHECK) ++#define SQUASHFS_MKFLAGS(noi, nod, check_data, nof, no_frag, always_frag, duplicate_checking) (noi | (nod << 1) | (check_data << 2) | (nof << 3) | (no_frag << 4) | (always_frag << 5) | (duplicate_checking << 6)) ++ ++/* Max number of types and file types */ ++#define SQUASHFS_DIR_TYPE 1 ++#define SQUASHFS_FILE_TYPE 2 ++#define SQUASHFS_SYMLINK_TYPE 3 ++#define SQUASHFS_BLKDEV_TYPE 4 ++#define SQUASHFS_CHRDEV_TYPE 5 ++#define SQUASHFS_FIFO_TYPE 6 ++#define SQUASHFS_SOCKET_TYPE 7 ++#define SQUASHFS_LDIR_TYPE 8 ++ ++/* 1.0 filesystem type definitions */ ++#define SQUASHFS_TYPES 5 ++#define SQUASHFS_IPC_TYPE 0 ++ ++/* Flag whether block is compressed or uncompressed, bit is set if block is uncompressed */ ++#define SQUASHFS_COMPRESSED_BIT (1 << 15) ++#define SQUASHFS_COMPRESSED_SIZE(B) (((B) & ~SQUASHFS_COMPRESSED_BIT) ? \ ++ (B) & ~SQUASHFS_COMPRESSED_BIT : SQUASHFS_COMPRESSED_BIT) ++ ++#define SQUASHFS_COMPRESSED(B) (!((B) & SQUASHFS_COMPRESSED_BIT)) ++ ++#define SQUASHFS_COMPRESSED_BIT_BLOCK (1 << 24) ++#define SQUASHFS_COMPRESSED_SIZE_BLOCK(B) (((B) & ~SQUASHFS_COMPRESSED_BIT_BLOCK) ? \ ++ (B) & ~SQUASHFS_COMPRESSED_BIT_BLOCK : SQUASHFS_COMPRESSED_BIT_BLOCK) ++ ++#define SQUASHFS_COMPRESSED_BLOCK(B) (!((B) & SQUASHFS_COMPRESSED_BIT_BLOCK)) ++ ++/* ++ * Inode number ops. Inodes consist of a compressed block number, and an uncompressed ++ * offset within that block ++ */ ++#define SQUASHFS_INODE_BLK(a) ((unsigned int) ((a) >> 16)) ++#define SQUASHFS_INODE_OFFSET(a) ((unsigned int) ((a) & 0xffff)) ++#define SQUASHFS_MKINODE(A, B) ((squashfs_inode)(((squashfs_inode) (A) << 16)\ ++ + (B))) ++ ++/* Compute 32 bit VFS inode number from squashfs inode number */ ++#define SQUASHFS_MK_VFS_INODE(a, b) ((unsigned int) (((a) << 8) + ((b) >> 2) + 1)) ++ ++/* Translate between VFS mode and squashfs mode */ ++#define SQUASHFS_MODE(a) ((a) & 0xfff) ++ ++/* fragment and fragment table defines */ ++typedef unsigned int squashfs_fragment_index; ++#define SQUASHFS_FRAGMENT_BYTES(A) (A * sizeof(squashfs_fragment_entry)) ++#define SQUASHFS_FRAGMENT_INDEX(A) (SQUASHFS_FRAGMENT_BYTES(A) / SQUASHFS_METADATA_SIZE) ++#define SQUASHFS_FRAGMENT_INDEX_OFFSET(A) (SQUASHFS_FRAGMENT_BYTES(A) % SQUASHFS_METADATA_SIZE) ++#define SQUASHFS_FRAGMENT_INDEXES(A) ((SQUASHFS_FRAGMENT_BYTES(A) + SQUASHFS_METADATA_SIZE - 1) / SQUASHFS_METADATA_SIZE) ++#define SQUASHFS_FRAGMENT_INDEX_BYTES(A) (SQUASHFS_FRAGMENT_INDEXES(A) * sizeof(squashfs_fragment_index)) ++ ++/* cached data constants for filesystem */ ++#define SQUASHFS_CACHED_BLKS 8 ++ ++#define SQUASHFS_MAX_FILE_SIZE_LOG 32 ++#define SQUASHFS_MAX_FILE_SIZE ((long long) 1 << (SQUASHFS_MAX_FILE_SIZE_LOG - 1)) ++ ++#define SQUASHFS_MARKER_BYTE 0xff ++ ++ ++/* ++ * definitions for structures on disk ++ */ ++ ++typedef unsigned int squashfs_block; ++typedef long long squashfs_inode; ++ ++typedef unsigned int squashfs_uid; ++ ++typedef struct squashfs_super_block { ++ unsigned int s_magic; ++ unsigned int inodes; ++ unsigned int bytes_used; ++ unsigned int uid_start; ++ unsigned int guid_start; ++ unsigned int inode_table_start; ++ unsigned int directory_table_start; ++ unsigned int s_major:16; ++ unsigned int s_minor:16; ++ unsigned int block_size_1:16; ++ unsigned int block_log:16; ++ unsigned int flags:8; ++ unsigned int no_uids:8; ++ unsigned int no_guids:8; ++ unsigned int mkfs_time /* time of filesystem creation */; ++ squashfs_inode root_inode; ++ unsigned int block_size; ++ unsigned int fragments; ++ unsigned int fragment_table_start; ++} __attribute__ ((packed)) squashfs_super_block; ++ ++typedef struct { ++ unsigned int index:27; ++ unsigned int start_block:29; ++ unsigned char size; ++ unsigned char name[0]; ++} __attribute__ ((packed)) squashfs_dir_index; ++ ++typedef struct { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:8; /* index into uid table */ ++ unsigned int guid:8; /* index into guid table */ ++} __attribute__ ((packed)) squashfs_base_inode_header; ++ ++typedef squashfs_base_inode_header squashfs_ipc_inode_header; ++ ++typedef struct { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:8; /* index into uid table */ ++ unsigned int guid:8; /* index into guid table */ ++ unsigned short rdev; ++} __attribute__ ((packed)) squashfs_dev_inode_header; ++ ++typedef struct { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:8; /* index into uid table */ ++ unsigned int guid:8; /* index into guid table */ ++ unsigned short symlink_size; ++ char symlink[0]; ++} __attribute__ ((packed)) squashfs_symlink_inode_header; ++ ++typedef struct { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:8; /* index into uid table */ ++ unsigned int guid:8; /* index into guid table */ ++ unsigned int mtime; ++ squashfs_block start_block; ++ unsigned int fragment; ++ unsigned int offset; ++ unsigned int file_size:SQUASHFS_MAX_FILE_SIZE_LOG; ++ unsigned short block_list[0]; ++} __attribute__ ((packed)) squashfs_reg_inode_header; ++ ++typedef struct { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:8; /* index into uid table */ ++ unsigned int guid:8; /* index into guid table */ ++ unsigned int file_size:19; ++ unsigned int offset:13; ++ unsigned int mtime; ++ unsigned int start_block:24; ++} __attribute__ ((packed)) squashfs_dir_inode_header; ++ ++typedef struct { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:8; /* index into uid table */ ++ unsigned int guid:8; /* index into guid table */ ++ unsigned int file_size:27; ++ unsigned int offset:13; ++ unsigned int mtime; ++ unsigned int start_block:24; ++ unsigned int i_count:16; ++ squashfs_dir_index index[0]; ++} __attribute__ ((packed)) squashfs_ldir_inode_header; ++ ++typedef union { ++ squashfs_base_inode_header base; ++ squashfs_dev_inode_header dev; ++ squashfs_symlink_inode_header symlink; ++ squashfs_reg_inode_header reg; ++ squashfs_dir_inode_header dir; ++ squashfs_ldir_inode_header ldir; ++ squashfs_ipc_inode_header ipc; ++} squashfs_inode_header; ++ ++typedef struct { ++ unsigned int offset:13; ++ unsigned int type:3; ++ unsigned int size:8; ++ char name[0]; ++} __attribute__ ((packed)) squashfs_dir_entry; ++ ++typedef struct { ++ unsigned int count:8; ++ unsigned int start_block:24; ++} __attribute__ ((packed)) squashfs_dir_header; ++ ++typedef struct { ++ unsigned int start_block; ++ unsigned int size; ++} __attribute__ ((packed)) squashfs_fragment_entry; ++ ++extern int squashfs_uncompress_block(void *d, int dstlen, void *s, int srclen); ++extern int squashfs_uncompress_init(void); ++extern int squashfs_uncompress_exit(void); ++ ++/* ++ * macros to convert each packed bitfield structure from little endian to big ++ * endian and vice versa. These are needed when creating or using a filesystem on a ++ * machine with different byte ordering to the target architecture. ++ * ++ */ ++ ++#define SQUASHFS_SWAP_SUPER_BLOCK(s, d) {\ ++ SQUASHFS_MEMSET(s, d, sizeof(squashfs_super_block));\ ++ SQUASHFS_SWAP((s)->s_magic, d, 0, 32);\ ++ SQUASHFS_SWAP((s)->inodes, d, 32, 32);\ ++ SQUASHFS_SWAP((s)->bytes_used, d, 64, 32);\ ++ SQUASHFS_SWAP((s)->uid_start, d, 96, 32);\ ++ SQUASHFS_SWAP((s)->guid_start, d, 128, 32);\ ++ SQUASHFS_SWAP((s)->inode_table_start, d, 160, 32);\ ++ SQUASHFS_SWAP((s)->directory_table_start, d, 192, 32);\ ++ SQUASHFS_SWAP((s)->s_major, d, 224, 16);\ ++ SQUASHFS_SWAP((s)->s_minor, d, 240, 16);\ ++ SQUASHFS_SWAP((s)->block_size_1, d, 256, 16);\ ++ SQUASHFS_SWAP((s)->block_log, d, 272, 16);\ ++ SQUASHFS_SWAP((s)->flags, d, 288, 8);\ ++ SQUASHFS_SWAP((s)->no_uids, d, 296, 8);\ ++ SQUASHFS_SWAP((s)->no_guids, d, 304, 8);\ ++ SQUASHFS_SWAP((s)->mkfs_time, d, 312, 32);\ ++ SQUASHFS_SWAP((s)->root_inode, d, 344, 64);\ ++ SQUASHFS_SWAP((s)->block_size, d, 408, 32);\ ++ SQUASHFS_SWAP((s)->fragments, d, 440, 32);\ ++ SQUASHFS_SWAP((s)->fragment_table_start, d, 472, 32);\ ++} ++ ++#define SQUASHFS_SWAP_BASE_INODE_HEADER(s, d, n) {\ ++ SQUASHFS_MEMSET(s, d, n);\ ++ SQUASHFS_SWAP((s)->inode_type, d, 0, 4);\ ++ SQUASHFS_SWAP((s)->mode, d, 4, 12);\ ++ SQUASHFS_SWAP((s)->uid, d, 16, 8);\ ++ SQUASHFS_SWAP((s)->guid, d, 24, 8);\ ++} ++ ++#define SQUASHFS_SWAP_IPC_INODE_HEADER(s, d) SQUASHFS_SWAP_BASE_INODE_HEADER(s, d, sizeof(squashfs_ipc_inode_header)) ++ ++#define SQUASHFS_SWAP_DEV_INODE_HEADER(s, d) {\ ++ SQUASHFS_SWAP_BASE_INODE_HEADER(s, d, sizeof(squashfs_dev_inode_header));\ ++ SQUASHFS_SWAP((s)->rdev, d, 32, 16);\ ++} ++ ++#define SQUASHFS_SWAP_SYMLINK_INODE_HEADER(s, d) {\ ++ SQUASHFS_SWAP_BASE_INODE_HEADER(s, d, sizeof(squashfs_symlink_inode_header));\ ++ SQUASHFS_SWAP((s)->symlink_size, d, 32, 16);\ ++} ++ ++#define SQUASHFS_SWAP_REG_INODE_HEADER(s, d) {\ ++ SQUASHFS_SWAP_BASE_INODE_HEADER(s, d, sizeof(squashfs_reg_inode_header));\ ++ SQUASHFS_SWAP((s)->mtime, d, 32, 32);\ ++ SQUASHFS_SWAP((s)->start_block, d, 64, 32);\ ++ SQUASHFS_SWAP((s)->fragment, d, 96, 32);\ ++ SQUASHFS_SWAP((s)->offset, d, 128, 32);\ ++ SQUASHFS_SWAP((s)->file_size, d, 160, SQUASHFS_MAX_FILE_SIZE_LOG);\ ++} ++ ++#define SQUASHFS_SWAP_DIR_INODE_HEADER(s, d) {\ ++ SQUASHFS_SWAP_BASE_INODE_HEADER(s, d, sizeof(squashfs_dir_inode_header));\ ++ SQUASHFS_SWAP((s)->file_size, d, 32, 19);\ ++ SQUASHFS_SWAP((s)->offset, d, 51, 13);\ ++ SQUASHFS_SWAP((s)->mtime, d, 64, 32);\ ++ SQUASHFS_SWAP((s)->start_block, d, 96, 24);\ ++} ++ ++#define SQUASHFS_SWAP_LDIR_INODE_HEADER(s, d) {\ ++ SQUASHFS_SWAP_BASE_INODE_HEADER(s, d, sizeof(squashfs_ldir_inode_header));\ ++ SQUASHFS_SWAP((s)->file_size, d, 32, 27);\ ++ SQUASHFS_SWAP((s)->offset, d, 59, 13);\ ++ SQUASHFS_SWAP((s)->mtime, d, 72, 32);\ ++ SQUASHFS_SWAP((s)->start_block, d, 104, 24);\ ++ SQUASHFS_SWAP((s)->i_count, d, 128, 16);\ ++} ++ ++#define SQUASHFS_SWAP_DIR_INDEX(s, d) {\ ++ SQUASHFS_MEMSET(s, d, sizeof(squashfs_dir_index));\ ++ SQUASHFS_SWAP((s)->index, d, 0, 27);\ ++ SQUASHFS_SWAP((s)->start_block, d, 27, 29);\ ++ SQUASHFS_SWAP((s)->size, d, 56, 8);\ ++} ++ ++#define SQUASHFS_SWAP_DIR_HEADER(s, d) {\ ++ SQUASHFS_MEMSET(s, d, sizeof(squashfs_dir_header));\ ++ SQUASHFS_SWAP((s)->count, d, 0, 8);\ ++ SQUASHFS_SWAP((s)->start_block, d, 8, 24);\ ++} ++ ++#define SQUASHFS_SWAP_DIR_ENTRY(s, d) {\ ++ SQUASHFS_MEMSET(s, d, sizeof(squashfs_dir_entry));\ ++ SQUASHFS_SWAP((s)->offset, d, 0, 13);\ ++ SQUASHFS_SWAP((s)->type, d, 13, 3);\ ++ SQUASHFS_SWAP((s)->size, d, 16, 8);\ ++} ++ ++#define SQUASHFS_SWAP_FRAGMENT_ENTRY(s, d) {\ ++ SQUASHFS_MEMSET(s, d, sizeof(squashfs_fragment_entry));\ ++ SQUASHFS_SWAP((s)->start_block, d, 0, 32);\ ++ SQUASHFS_SWAP((s)->size, d, 32, 32);\ ++} ++ ++#define SQUASHFS_SWAP_SHORTS(s, d, n) {\ ++ int entry;\ ++ int bit_position;\ ++ SQUASHFS_MEMSET(s, d, n * 2);\ ++ for(entry = 0, bit_position = 0; entry < n; entry++, bit_position += 16)\ ++ SQUASHFS_SWAP(s[entry], d, bit_position, 16);\ ++} ++ ++#define SQUASHFS_SWAP_INTS(s, d, n) {\ ++ int entry;\ ++ int bit_position;\ ++ SQUASHFS_MEMSET(s, d, n * 4);\ ++ for(entry = 0, bit_position = 0; entry < n; entry++, bit_position += 32)\ ++ SQUASHFS_SWAP(s[entry], d, bit_position, 32);\ ++} ++ ++#define SQUASHFS_SWAP_DATA(s, d, n, bits) {\ ++ int entry;\ ++ int bit_position;\ ++ SQUASHFS_MEMSET(s, d, n * bits / 8);\ ++ for(entry = 0, bit_position = 0; entry < n; entry++, bit_position += bits)\ ++ SQUASHFS_SWAP(s[entry], d, bit_position, bits);\ ++} ++ ++#define SQUASHFS_SWAP_FRAGMENT_INDEXES(s, d, n) SQUASHFS_SWAP_INTS(s, d, n) ++ ++#ifdef SQUASHFS_1_0_COMPATIBILITY ++typedef struct { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:4; /* index into uid table */ ++ unsigned int guid:4; /* index into guid table */ ++} __attribute__ ((packed)) squashfs_base_inode_header_1; ++ ++typedef struct { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:4; /* index into uid table */ ++ unsigned int guid:4; /* index into guid table */ ++ unsigned int type:4; ++ unsigned int offset:4; ++} __attribute__ ((packed)) squashfs_ipc_inode_header_1; ++ ++typedef struct { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:4; /* index into uid table */ ++ unsigned int guid:4; /* index into guid table */ ++ unsigned short rdev; ++} __attribute__ ((packed)) squashfs_dev_inode_header_1; ++ ++typedef struct { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:4; /* index into uid table */ ++ unsigned int guid:4; /* index into guid table */ ++ unsigned short symlink_size; ++ char symlink[0]; ++} __attribute__ ((packed)) squashfs_symlink_inode_header_1; ++ ++typedef struct { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:4; /* index into uid table */ ++ unsigned int guid:4; /* index into guid table */ ++ unsigned int mtime; ++ squashfs_block start_block; ++ unsigned int file_size:SQUASHFS_MAX_FILE_SIZE_LOG; ++ unsigned short block_list[0]; ++} __attribute__ ((packed)) squashfs_reg_inode_header_1; ++ ++typedef struct { ++ unsigned int inode_type:4; ++ unsigned int mode:12; /* protection */ ++ unsigned int uid:4; /* index into uid table */ ++ unsigned int guid:4; /* index into guid table */ ++ unsigned int file_size:19; ++ unsigned int offset:13; ++ unsigned int mtime; ++ unsigned int start_block:24; ++} __attribute__ ((packed)) squashfs_dir_inode_header_1; ++ ++#define SQUASHFS_SWAP_BASE_INODE_HEADER_1(s, d, n) {\ ++ SQUASHFS_MEMSET(s, d, n);\ ++ SQUASHFS_SWAP((s)->inode_type, d, 0, 4);\ ++ SQUASHFS_SWAP((s)->mode, d, 4, 12);\ ++ SQUASHFS_SWAP((s)->uid, d, 16, 4);\ ++ SQUASHFS_SWAP((s)->guid, d, 20, 4);\ ++} ++ ++#define SQUASHFS_SWAP_IPC_INODE_HEADER_1(s, d) {\ ++ SQUASHFS_SWAP_BASE_INODE_HEADER_1(s, d, sizeof(squashfs_ipc_inode_header_1));\ ++ SQUASHFS_SWAP((s)->type, d, 24, 4);\ ++ SQUASHFS_SWAP((s)->offset, d, 28, 4);\ ++} ++ ++#define SQUASHFS_SWAP_DEV_INODE_HEADER_1(s, d) {\ ++ SQUASHFS_SWAP_BASE_INODE_HEADER_1(s, d, sizeof(squashfs_dev_inode_header_1));\ ++ SQUASHFS_SWAP((s)->rdev, d, 24, 16);\ ++} ++ ++#define SQUASHFS_SWAP_SYMLINK_INODE_HEADER_1(s, d) {\ ++ SQUASHFS_SWAP_BASE_INODE_HEADER(s, d, sizeof(squashfs_symlink_inode_header_1));\ ++ SQUASHFS_SWAP((s)->symlink_size, d, 24, 16);\ ++} ++ ++#define SQUASHFS_SWAP_REG_INODE_HEADER_1(s, d) {\ ++ SQUASHFS_SWAP_BASE_INODE_HEADER(s, d, sizeof(squashfs_reg_inode_header_1));\ ++ SQUASHFS_SWAP((s)->mtime, d, 24, 32);\ ++ SQUASHFS_SWAP((s)->start_block, d, 56, 32);\ ++ SQUASHFS_SWAP((s)->file_size, d, 88, SQUASHFS_MAX_FILE_SIZE_LOG);\ ++} ++ ++#define SQUASHFS_SWAP_DIR_INODE_HEADER_1(s, d) {\ ++ SQUASHFS_SWAP_BASE_INODE_HEADER(s, d, sizeof(squashfs_dir_inode_header_1));\ ++ SQUASHFS_SWAP((s)->file_size, d, 24, 19);\ ++ SQUASHFS_SWAP((s)->offset, d, 43, 13);\ ++ SQUASHFS_SWAP((s)->mtime, d, 56, 32);\ ++ SQUASHFS_SWAP((s)->start_block, d, 88, 24);\ ++} ++#endif ++ ++#ifdef __KERNEL__ ++/* ++ * macros used to swap each structure entry, taking into account ++ * bitfields and different bitfield placing conventions on differing architectures ++ */ ++#include <asm/byteorder.h> ++#ifdef __BIG_ENDIAN ++ /* convert from little endian to big endian */ ++#define SQUASHFS_SWAP(value, p, pos, tbits) _SQUASHFS_SWAP(value, p, pos, tbits, b_pos) ++#else ++ /* convert from big endian to little endian */ ++#define SQUASHFS_SWAP(value, p, pos, tbits) _SQUASHFS_SWAP(value, p, pos, tbits, 64 - tbits - b_pos) ++#endif ++ ++#define _SQUASHFS_SWAP(value, p, pos, tbits, SHIFT) {\ ++ int bits;\ ++ int b_pos = pos % 8;\ ++ unsigned long long val = 0;\ ++ unsigned char *s = (unsigned char *)p + (pos / 8);\ ++ unsigned char *d = ((unsigned char *) &val) + 7;\ ++ for(bits = 0; bits < (tbits + b_pos); bits += 8) \ ++ *d-- = *s++;\ ++ value = (val >> (SHIFT))/* & ((1 << tbits) - 1)*/;\ ++} ++#define SQUASHFS_MEMSET(s, d, n) memset(s, 0, n); ++#endif ++#endif +diff --new-file -urp linux-2.6.12/include/linux/squashfs_fs_i.h linux-2.6.12-squashfs2.2/include/linux/squashfs_fs_i.h +--- linux-2.6.12/include/linux/squashfs_fs_i.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12-squashfs2.2/include/linux/squashfs_fs_i.h 2005-07-04 02:35:35.000000000 +0100 +@@ -0,0 +1,43 @@ ++#ifndef SQUASHFS_FS_I ++#define SQUASHFS_FS_I ++/* ++ * Squashfs ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005 Phillip Lougher <phillip@lougher.demon.co.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * squashfs_fs_i.h ++ */ ++ ++typedef struct squashfs_inode_info { ++ unsigned int start_block; ++ unsigned int block_list_start; ++ unsigned int offset; ++ union { ++ struct { ++ unsigned int fragment_start_block; ++ unsigned int fragment_size; ++ unsigned int fragment_offset; ++ } s1; ++ struct { ++ unsigned int directory_index_start; ++ unsigned int directory_index_offset; ++ unsigned int directory_index_count; ++ } s2; ++ } u; ++ struct inode vfs_inode; ++ } squashfs_inode_info; ++#endif +diff --new-file -urp linux-2.6.12/include/linux/squashfs_fs_sb.h linux-2.6.12-squashfs2.2/include/linux/squashfs_fs_sb.h +--- linux-2.6.12/include/linux/squashfs_fs_sb.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12-squashfs2.2/include/linux/squashfs_fs_sb.h 2005-07-04 02:35:35.000000000 +0100 +@@ -0,0 +1,65 @@ ++#ifndef SQUASHFS_FS_SB ++#define SQUASHFS_FS_SB ++/* ++ * Squashfs ++ * ++ * Copyright (c) 2002, 2003, 2004, 2005 Phillip Lougher <phillip@lougher.demon.co.uk> ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version 2, ++ * or (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * squashfs_fs_sb.h ++ */ ++ ++#include <linux/squashfs_fs.h> ++ ++typedef struct { ++ unsigned int block; ++ int length; ++ unsigned int next_index; ++ char *data; ++ } squashfs_cache; ++ ++struct squashfs_fragment_cache { ++ unsigned int block; ++ int length; ++ unsigned int locked; ++ char *data; ++ }; ++ ++typedef struct squashfs_sb_info { ++ squashfs_super_block sBlk; ++ int devblksize; ++ int devblksize_log2; ++ int swap; ++ squashfs_cache *block_cache; ++ struct squashfs_fragment_cache *fragment; ++ int next_cache; ++ int next_fragment; ++ squashfs_uid *uid; ++ squashfs_uid *guid; ++ squashfs_fragment_index *fragment_index; ++ unsigned int read_size; ++ char *read_data; ++ char *read_page; ++ struct semaphore read_page_mutex; ++ struct semaphore block_cache_mutex; ++ struct semaphore fragment_mutex; ++ wait_queue_head_t waitq; ++ wait_queue_head_t fragment_wait_queue; ++ struct inode *(*iget)(struct super_block *s, squashfs_inode inode); ++ unsigned int (*read_blocklist)(struct inode *inode, int index, int readahead_blks, ++ char *block_list, unsigned short **block_p, unsigned int *bsize); ++ } squashfs_sb_info; ++#endif +diff --new-file -urp linux-2.6.12/init/do_mounts_rd.c linux-2.6.12-squashfs2.2/init/do_mounts_rd.c +--- linux-2.6.12/init/do_mounts_rd.c 2005-06-17 20:48:29.000000000 +0100 ++++ linux-2.6.12-squashfs2.2/init/do_mounts_rd.c 2005-07-04 02:35:35.000000000 +0100 +@@ -5,6 +5,7 @@ + #include <linux/ext2_fs.h> + #include <linux/romfs_fs.h> + #include <linux/cramfs_fs.h> ++#include <linux/squashfs_fs.h> + #include <linux/initrd.h> + #include <linux/string.h> + +@@ -39,6 +40,7 @@ static int __init crd_load(int in_fd, in + * numbers could not be found. + * + * We currently check for the following magic numbers: ++ * squashfs + * minix + * ext2 + * romfs +@@ -53,6 +55,7 @@ identify_ramdisk_image(int fd, int start + struct ext2_super_block *ext2sb; + struct romfs_super_block *romfsb; + struct cramfs_super *cramfsb; ++ struct squashfs_super_block *squashfsb; + int nblocks = -1; + unsigned char *buf; + +@@ -64,6 +67,7 @@ identify_ramdisk_image(int fd, int start + ext2sb = (struct ext2_super_block *) buf; + romfsb = (struct romfs_super_block *) buf; + cramfsb = (struct cramfs_super *) buf; ++ squashfsb = (struct squashfs_super_block *) buf; + memset(buf, 0xe5, size); + + /* +@@ -101,6 +105,15 @@ identify_ramdisk_image(int fd, int start + goto done; + } + ++ /* squashfs is at block zero too */ ++ if (squashfsb->s_magic == SQUASHFS_MAGIC) { ++ printk(KERN_NOTICE ++ "RAMDISK: squashfs filesystem found at block %d\n", ++ start_block); ++ nblocks = (squashfsb->bytes_used+BLOCK_SIZE-1)>>BLOCK_SIZE_BITS; ++ goto done; ++ } ++ + /* + * Read block 1 to test for minix and ext2 superblock + */ diff --git a/openwrt/target/linux/generic-2.6/patches/002-squashfs_lzma.patch b/openwrt/target/linux/generic-2.6/patches/002-squashfs_lzma.patch new file mode 100644 index 0000000000..be4c19bd7e --- /dev/null +++ b/openwrt/target/linux/generic-2.6/patches/002-squashfs_lzma.patch @@ -0,0 +1,890 @@ +diff -Nur linux-2.6.12.5-brcm-squashfs/fs/squashfs/LzmaDecode.c linux-2.6.12.5-brcm-squashfs-lzma/fs/squashfs/LzmaDecode.c +--- linux-2.6.12.5-brcm-squashfs/fs/squashfs/LzmaDecode.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12.5-brcm-squashfs-lzma/fs/squashfs/LzmaDecode.c 2005-08-29 00:02:44.099124176 +0200 +@@ -0,0 +1,663 @@ ++/* ++ LzmaDecode.c ++ LZMA Decoder ++ ++ LZMA SDK 4.05 Copyright (c) 1999-2004 Igor Pavlov (2004-08-25) ++ http://www.7-zip.org/ ++ ++ LZMA SDK is licensed under two licenses: ++ 1) GNU Lesser General Public License (GNU LGPL) ++ 2) Common Public License (CPL) ++ It means that you can select one of these two licenses and ++ follow rules of that license. ++ ++ SPECIAL EXCEPTION: ++ Igor Pavlov, as the author of this code, expressly permits you to ++ statically or dynamically link your code (or bind by name) to the ++ interfaces of this file without subjecting your linked code to the ++ terms of the CPL or GNU LGPL. Any modifications or additions ++ to this file, however, are subject to the LGPL or CPL terms. ++*/ ++ ++#include "LzmaDecode.h" ++ ++#ifndef Byte ++#define Byte unsigned char ++#endif ++ ++#define kNumTopBits 24 ++#define kTopValue ((UInt32)1 << kNumTopBits) ++ ++#define kNumBitModelTotalBits 11 ++#define kBitModelTotal (1 << kNumBitModelTotalBits) ++#define kNumMoveBits 5 ++ ++typedef struct _CRangeDecoder ++{ ++ Byte *Buffer; ++ Byte *BufferLim; ++ UInt32 Range; ++ UInt32 Code; ++ #ifdef _LZMA_IN_CB ++ ILzmaInCallback *InCallback; ++ int Result; ++ #endif ++ int ExtraBytes; ++} CRangeDecoder; ++ ++Byte RangeDecoderReadByte(CRangeDecoder *rd) ++{ ++ if (rd->Buffer == rd->BufferLim) ++ { ++ #ifdef _LZMA_IN_CB ++ UInt32 size; ++ rd->Result = rd->InCallback->Read(rd->InCallback, &rd->Buffer, &size); ++ rd->BufferLim = rd->Buffer + size; ++ if (size == 0) ++ #endif ++ { ++ rd->ExtraBytes = 1; ++ return 0xFF; ++ } ++ } ++ return (*rd->Buffer++); ++} ++ ++/* #define ReadByte (*rd->Buffer++) */ ++#define ReadByte (RangeDecoderReadByte(rd)) ++ ++void RangeDecoderInit(CRangeDecoder *rd, ++ #ifdef _LZMA_IN_CB ++ ILzmaInCallback *inCallback ++ #else ++ Byte *stream, UInt32 bufferSize ++ #endif ++ ) ++{ ++ int i; ++ #ifdef _LZMA_IN_CB ++ rd->InCallback = inCallback; ++ rd->Buffer = rd->BufferLim = 0; ++ #else ++ rd->Buffer = stream; ++ rd->BufferLim = stream + bufferSize; ++ #endif ++ rd->ExtraBytes = 0; ++ rd->Code = 0; ++ rd->Range = (0xFFFFFFFF); ++ for(i = 0; i < 5; i++) ++ rd->Code = (rd->Code << 8) | ReadByte; ++} ++ ++#define RC_INIT_VAR UInt32 range = rd->Range; UInt32 code = rd->Code; ++#define RC_FLUSH_VAR rd->Range = range; rd->Code = code; ++#define RC_NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | ReadByte; } ++ ++UInt32 RangeDecoderDecodeDirectBits(CRangeDecoder *rd, int numTotalBits) ++{ ++ RC_INIT_VAR ++ UInt32 result = 0; ++ int i; ++ for (i = numTotalBits; i > 0; i--) ++ { ++ /* UInt32 t; */ ++ range >>= 1; ++ ++ result <<= 1; ++ if (code >= range) ++ { ++ code -= range; ++ result |= 1; ++ } ++ /* ++ t = (code - range) >> 31; ++ t &= 1; ++ code -= range & (t - 1); ++ result = (result + result) | (1 - t); ++ */ ++ RC_NORMALIZE ++ } ++ RC_FLUSH_VAR ++ return result; ++} ++ ++int RangeDecoderBitDecode(CProb *prob, CRangeDecoder *rd) ++{ ++ UInt32 bound = (rd->Range >> kNumBitModelTotalBits) * *prob; ++ if (rd->Code < bound) ++ { ++ rd->Range = bound; ++ *prob += (kBitModelTotal - *prob) >> kNumMoveBits; ++ if (rd->Range < kTopValue) ++ { ++ rd->Code = (rd->Code << 8) | ReadByte; ++ rd->Range <<= 8; ++ } ++ return 0; ++ } ++ else ++ { ++ rd->Range -= bound; ++ rd->Code -= bound; ++ *prob -= (*prob) >> kNumMoveBits; ++ if (rd->Range < kTopValue) ++ { ++ rd->Code = (rd->Code << 8) | ReadByte; ++ rd->Range <<= 8; ++ } ++ return 1; ++ } ++} ++ ++#define RC_GET_BIT2(prob, mi, A0, A1) \ ++ UInt32 bound = (range >> kNumBitModelTotalBits) * *prob; \ ++ if (code < bound) \ ++ { A0; range = bound; *prob += (kBitModelTotal - *prob) >> kNumMoveBits; mi <<= 1; } \ ++ else \ ++ { A1; range -= bound; code -= bound; *prob -= (*prob) >> kNumMoveBits; mi = (mi + mi) + 1; } \ ++ RC_NORMALIZE ++ ++#define RC_GET_BIT(prob, mi) RC_GET_BIT2(prob, mi, ; , ;) ++ ++int RangeDecoderBitTreeDecode(CProb *probs, int numLevels, CRangeDecoder *rd) ++{ ++ int mi = 1; ++ int i; ++ #ifdef _LZMA_LOC_OPT ++ RC_INIT_VAR ++ #endif ++ for(i = numLevels; i > 0; i--) ++ { ++ #ifdef _LZMA_LOC_OPT ++ CProb *prob = probs + mi; ++ RC_GET_BIT(prob, mi) ++ #else ++ mi = (mi + mi) + RangeDecoderBitDecode(probs + mi, rd); ++ #endif ++ } ++ #ifdef _LZMA_LOC_OPT ++ RC_FLUSH_VAR ++ #endif ++ return mi - (1 << numLevels); ++} ++ ++int RangeDecoderReverseBitTreeDecode(CProb *probs, int numLevels, CRangeDecoder *rd) ++{ ++ int mi = 1; ++ int i; ++ int symbol = 0; ++ #ifdef _LZMA_LOC_OPT ++ RC_INIT_VAR ++ #endif ++ for(i = 0; i < numLevels; i++) ++ { ++ #ifdef _LZMA_LOC_OPT ++ CProb *prob = probs + mi; ++ RC_GET_BIT2(prob, mi, ; , symbol |= (1 << i)) ++ #else ++ int bit = RangeDecoderBitDecode(probs + mi, rd); ++ mi = mi + mi + bit; ++ symbol |= (bit << i); ++ #endif ++ } ++ #ifdef _LZMA_LOC_OPT ++ RC_FLUSH_VAR ++ #endif ++ return symbol; ++} ++ ++Byte LzmaLiteralDecode(CProb *probs, CRangeDecoder *rd) ++{ ++ int symbol = 1; ++ #ifdef _LZMA_LOC_OPT ++ RC_INIT_VAR ++ #endif ++ do ++ { ++ #ifdef _LZMA_LOC_OPT ++ CProb *prob = probs + symbol; ++ RC_GET_BIT(prob, symbol) ++ #else ++ symbol = (symbol + symbol) | RangeDecoderBitDecode(probs + symbol, rd); ++ #endif ++ } ++ while (symbol < 0x100); ++ #ifdef _LZMA_LOC_OPT ++ RC_FLUSH_VAR ++ #endif ++ return symbol; ++} ++ ++Byte LzmaLiteralDecodeMatch(CProb *probs, CRangeDecoder *rd, Byte matchByte) ++{ ++ int symbol = 1; ++ #ifdef _LZMA_LOC_OPT ++ RC_INIT_VAR ++ #endif ++ do ++ { ++ int bit; ++ int matchBit = (matchByte >> 7) & 1; ++ matchByte <<= 1; ++ #ifdef _LZMA_LOC_OPT ++ { ++ CProb *prob = probs + ((1 + matchBit) << 8) + symbol; ++ RC_GET_BIT2(prob, symbol, bit = 0, bit = 1) ++ } ++ #else ++ bit = RangeDecoderBitDecode(probs + ((1 + matchBit) << 8) + symbol, rd); ++ symbol = (symbol << 1) | bit; ++ #endif ++ if (matchBit != bit) ++ { ++ while (symbol < 0x100) ++ { ++ #ifdef _LZMA_LOC_OPT ++ CProb *prob = probs + symbol; ++ RC_GET_BIT(prob, symbol) ++ #else ++ symbol = (symbol + symbol) | RangeDecoderBitDecode(probs + symbol, rd); ++ #endif ++ } ++ break; ++ } ++ } ++ while (symbol < 0x100); ++ #ifdef _LZMA_LOC_OPT ++ RC_FLUSH_VAR ++ #endif ++ return symbol; ++} ++ ++#define kNumPosBitsMax 4 ++#define kNumPosStatesMax (1 << kNumPosBitsMax) ++ ++#define kLenNumLowBits 3 ++#define kLenNumLowSymbols (1 << kLenNumLowBits) ++#define kLenNumMidBits 3 ++#define kLenNumMidSymbols (1 << kLenNumMidBits) ++#define kLenNumHighBits 8 ++#define kLenNumHighSymbols (1 << kLenNumHighBits) ++ ++#define LenChoice 0 ++#define LenChoice2 (LenChoice + 1) ++#define LenLow (LenChoice2 + 1) ++#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits)) ++#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits)) ++#define kNumLenProbs (LenHigh + kLenNumHighSymbols) ++ ++int LzmaLenDecode(CProb *p, CRangeDecoder *rd, int posState) ++{ ++ if(RangeDecoderBitDecode(p + LenChoice, rd) == 0) ++ return RangeDecoderBitTreeDecode(p + LenLow + ++ (posState << kLenNumLowBits), kLenNumLowBits, rd); ++ if(RangeDecoderBitDecode(p + LenChoice2, rd) == 0) ++ return kLenNumLowSymbols + RangeDecoderBitTreeDecode(p + LenMid + ++ (posState << kLenNumMidBits), kLenNumMidBits, rd); ++ return kLenNumLowSymbols + kLenNumMidSymbols + ++ RangeDecoderBitTreeDecode(p + LenHigh, kLenNumHighBits, rd); ++} ++ ++#define kNumStates 12 ++ ++#define kStartPosModelIndex 4 ++#define kEndPosModelIndex 14 ++#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) ++ ++#define kNumPosSlotBits 6 ++#define kNumLenToPosStates 4 ++ ++#define kNumAlignBits 4 ++#define kAlignTableSize (1 << kNumAlignBits) ++ ++#define kMatchMinLen 2 ++ ++#define IsMatch 0 ++#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax)) ++#define IsRepG0 (IsRep + kNumStates) ++#define IsRepG1 (IsRepG0 + kNumStates) ++#define IsRepG2 (IsRepG1 + kNumStates) ++#define IsRep0Long (IsRepG2 + kNumStates) ++#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax)) ++#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) ++#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex) ++#define LenCoder (Align + kAlignTableSize) ++#define RepLenCoder (LenCoder + kNumLenProbs) ++#define Literal (RepLenCoder + kNumLenProbs) ++ ++#if Literal != LZMA_BASE_SIZE ++StopCompilingDueBUG ++#endif ++ ++#ifdef _LZMA_OUT_READ ++ ++typedef struct _LzmaVarState ++{ ++ CRangeDecoder RangeDecoder; ++ Byte *Dictionary; ++ UInt32 DictionarySize; ++ UInt32 DictionaryPos; ++ UInt32 GlobalPos; ++ UInt32 Reps[4]; ++ int lc; ++ int lp; ++ int pb; ++ int State; ++ int PreviousIsMatch; ++ int RemainLen; ++} LzmaVarState; ++ ++int LzmaDecoderInit( ++ unsigned char *buffer, UInt32 bufferSize, ++ int lc, int lp, int pb, ++ unsigned char *dictionary, UInt32 dictionarySize, ++ #ifdef _LZMA_IN_CB ++ ILzmaInCallback *inCallback ++ #else ++ unsigned char *inStream, UInt32 inSize ++ #endif ++ ) ++{ ++ LzmaVarState *vs = (LzmaVarState *)buffer; ++ CProb *p = (CProb *)(buffer + sizeof(LzmaVarState)); ++ UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (lc + lp)); ++ UInt32 i; ++ if (bufferSize < numProbs * sizeof(CProb) + sizeof(LzmaVarState)) ++ return LZMA_RESULT_NOT_ENOUGH_MEM; ++ vs->Dictionary = dictionary; ++ vs->DictionarySize = dictionarySize; ++ vs->DictionaryPos = 0; ++ vs->GlobalPos = 0; ++ vs->Reps[0] = vs->Reps[1] = vs->Reps[2] = vs->Reps[3] = 1; ++ vs->lc = lc; ++ vs->lp = lp; ++ vs->pb = pb; ++ vs->State = 0; ++ vs->PreviousIsMatch = 0; ++ vs->RemainLen = 0; ++ dictionary[dictionarySize - 1] = 0; ++ for (i = 0; i < numProbs; i++) ++ p[i] = kBitModelTotal >> 1; ++ RangeDecoderInit(&vs->RangeDecoder, ++ #ifdef _LZMA_IN_CB ++ inCallback ++ #else ++ inStream, inSize ++ #endif ++ ); ++ return LZMA_RESULT_OK; ++} ++ ++int LzmaDecode(unsigned char *buffer, ++ unsigned char *outStream, UInt32 outSize, ++ UInt32 *outSizeProcessed) ++{ ++ LzmaVarState *vs = (LzmaVarState *)buffer; ++ CProb *p = (CProb *)(buffer + sizeof(LzmaVarState)); ++ CRangeDecoder rd = vs->RangeDecoder; ++ int state = vs->State; ++ int previousIsMatch = vs->PreviousIsMatch; ++ Byte previousByte; ++ UInt32 rep0 = vs->Reps[0], rep1 = vs->Reps[1], rep2 = vs->Reps[2], rep3 = vs->Reps[3]; ++ UInt32 nowPos = 0; ++ UInt32 posStateMask = (1 << (vs->pb)) - 1; ++ UInt32 literalPosMask = (1 << (vs->lp)) - 1; ++ int lc = vs->lc; ++ int len = vs->RemainLen; ++ UInt32 globalPos = vs->GlobalPos; ++ ++ Byte *dictionary = vs->Dictionary; ++ UInt32 dictionarySize = vs->DictionarySize; ++ UInt32 dictionaryPos = vs->DictionaryPos; ++ ++ if (len == -1) ++ { ++ *outSizeProcessed = 0; ++ return LZMA_RESULT_OK; ++ } ++ ++ while(len > 0 && nowPos < outSize) ++ { ++ UInt32 pos = dictionaryPos - rep0; ++ if (pos >= dictionarySize) ++ pos += dictionarySize; ++ outStream[nowPos++] = dictionary[dictionaryPos] = dictionary[pos]; ++ if (++dictionaryPos == dictionarySize) ++ dictionaryPos = 0; ++ len--; ++ } ++ if (dictionaryPos == 0) ++ previousByte = dictionary[dictionarySize - 1]; ++ else ++ previousByte = dictionary[dictionaryPos - 1]; ++#else ++ ++int LzmaDecode( ++ Byte *buffer, UInt32 bufferSize, ++ int lc, int lp, int pb, ++ #ifdef _LZMA_IN_CB ++ ILzmaInCallback *inCallback, ++ #else ++ unsigned char *inStream, UInt32 inSize, ++ #endif ++ unsigned char *outStream, UInt32 outSize, ++ UInt32 *outSizeProcessed) ++{ ++ UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (lc + lp)); ++ CProb *p = (CProb *)buffer; ++ CRangeDecoder rd; ++ UInt32 i; ++ int state = 0; ++ int previousIsMatch = 0; ++ Byte previousByte = 0; ++ UInt32 rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1; ++ UInt32 nowPos = 0; ++ UInt32 posStateMask = (1 << pb) - 1; ++ UInt32 literalPosMask = (1 << lp) - 1; ++ int len = 0; ++ if (bufferSize < numProbs * sizeof(CProb)) ++ return LZMA_RESULT_NOT_ENOUGH_MEM; ++ for (i = 0; i < numProbs; i++) ++ p[i] = kBitModelTotal >> 1; ++ RangeDecoderInit(&rd, ++ #ifdef _LZMA_IN_CB ++ inCallback ++ #else ++ inStream, inSize ++ #endif ++ ); ++#endif ++ ++ *outSizeProcessed = 0; ++ while(nowPos < outSize) ++ { ++ int posState = (int)( ++ (nowPos ++ #ifdef _LZMA_OUT_READ ++ + globalPos ++ #endif ++ ) ++ & posStateMask); ++ #ifdef _LZMA_IN_CB ++ if (rd.Result != LZMA_RESULT_OK) ++ return rd.Result; ++ #endif ++ if (rd.ExtraBytes != 0) ++ return LZMA_RESULT_DATA_ERROR; ++ if (RangeDecoderBitDecode(p + IsMatch + (state << kNumPosBitsMax) + posState, &rd) == 0) ++ { ++ CProb *probs = p + Literal + (LZMA_LIT_SIZE * ++ ((( ++ (nowPos ++ #ifdef _LZMA_OUT_READ ++ + globalPos ++ #endif ++ ) ++ & literalPosMask) << lc) + (previousByte >> (8 - lc)))); ++ ++ if (state < 4) state = 0; ++ else if (state < 10) state -= 3; ++ else state -= 6; ++ if (previousIsMatch) ++ { ++ Byte matchByte; ++ #ifdef _LZMA_OUT_READ ++ UInt32 pos = dictionaryPos - rep0; ++ if (pos >= dictionarySize) ++ pos += dictionarySize; ++ matchByte = dictionary[pos]; ++ #else ++ matchByte = outStream[nowPos - rep0]; ++ #endif ++ previousByte = LzmaLiteralDecodeMatch(probs, &rd, matchByte); ++ previousIsMatch = 0; ++ } ++ else ++ previousByte = LzmaLiteralDecode(probs, &rd); ++ outStream[nowPos++] = previousByte; ++ #ifdef _LZMA_OUT_READ ++ dictionary[dictionaryPos] = previousByte; ++ if (++dictionaryPos == dictionarySize) ++ dictionaryPos = 0; ++ #endif ++ } ++ else ++ { ++ previousIsMatch = 1; ++ if (RangeDecoderBitDecode(p + IsRep + state, &rd) == 1) ++ { ++ if (RangeDecoderBitDecode(p + IsRepG0 + state, &rd) == 0) ++ { ++ if (RangeDecoderBitDecode(p + IsRep0Long + (state << kNumPosBitsMax) + posState, &rd) == 0) ++ { ++ #ifdef _LZMA_OUT_READ ++ UInt32 pos; ++ #endif ++ if ( ++ (nowPos ++ #ifdef _LZMA_OUT_READ ++ + globalPos ++ #endif ++ ) ++ == 0) ++ return LZMA_RESULT_DATA_ERROR; ++ state = state < 7 ? 9 : 11; ++ #ifdef _LZMA_OUT_READ ++ pos = dictionaryPos - rep0; ++ if (pos >= dictionarySize) ++ pos += dictionarySize; ++ previousByte = dictionary[pos]; ++ dictionary[dictionaryPos] = previousByte; ++ if (++dictionaryPos == dictionarySize) ++ dictionaryPos = 0; ++ #else ++ previousByte = outStream[nowPos - rep0]; ++ #endif ++ outStream[nowPos++] = previousByte; ++ continue; ++ } ++ } ++ else ++ { ++ UInt32 distance; ++ if(RangeDecoderBitDecode(p + IsRepG1 + state, &rd) == 0) ++ distance = rep1; ++ else ++ { ++ if(RangeDecoderBitDecode(p + IsRepG2 + state, &rd) == 0) ++ distance = rep2; ++ else ++ { ++ distance = rep3; ++ rep3 = rep2; ++ } ++ rep2 = rep1; ++ } ++ rep1 = rep0; ++ rep0 = distance; ++ } ++ len = LzmaLenDecode(p + RepLenCoder, &rd, posState); ++ state = state < 7 ? 8 : 11; ++ } ++ else ++ { ++ int posSlot; ++ rep3 = rep2; ++ rep2 = rep1; ++ rep1 = rep0; ++ state = state < 7 ? 7 : 10; ++ len = LzmaLenDecode(p + LenCoder, &rd, posState); ++ posSlot = RangeDecoderBitTreeDecode(p + PosSlot + ++ ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << ++ kNumPosSlotBits), kNumPosSlotBits, &rd); ++ if (posSlot >= kStartPosModelIndex) ++ { ++ int numDirectBits = ((posSlot >> 1) - 1); ++ rep0 = ((2 | ((UInt32)posSlot & 1)) << numDirectBits); ++ if (posSlot < kEndPosModelIndex) ++ { ++ rep0 += RangeDecoderReverseBitTreeDecode( ++ p + SpecPos + rep0 - posSlot - 1, numDirectBits, &rd); ++ } ++ else ++ { ++ rep0 += RangeDecoderDecodeDirectBits(&rd, ++ numDirectBits - kNumAlignBits) << kNumAlignBits; ++ rep0 += RangeDecoderReverseBitTreeDecode(p + Align, kNumAlignBits, &rd); ++ } ++ } ++ else ++ rep0 = posSlot; ++ rep0++; ++ } ++ if (rep0 == (UInt32)(0)) ++ { ++ /* it's for stream version */ ++ len = -1; ++ break; ++ } ++ if (rep0 > nowPos ++ #ifdef _LZMA_OUT_READ ++ + globalPos ++ #endif ++ ) ++ { ++ return LZMA_RESULT_DATA_ERROR; ++ } ++ len += kMatchMinLen; ++ do ++ { ++ #ifdef _LZMA_OUT_READ ++ UInt32 pos = dictionaryPos - rep0; ++ if (pos >= dictionarySize) ++ pos += dictionarySize; ++ previousByte = dictionary[pos]; ++ dictionary[dictionaryPos] = previousByte; ++ if (++dictionaryPos == dictionarySize) ++ dictionaryPos = 0; ++ #else ++ previousByte = outStream[nowPos - rep0]; ++ #endif ++ outStream[nowPos++] = previousByte; ++ len--; ++ } ++ while(len > 0 && nowPos < outSize); ++ } ++ } ++ ++ #ifdef _LZMA_OUT_READ ++ vs->RangeDecoder = rd; ++ vs->DictionaryPos = dictionaryPos; ++ vs->GlobalPos = globalPos + nowPos; ++ vs->Reps[0] = rep0; ++ vs->Reps[1] = rep1; ++ vs->Reps[2] = rep2; ++ vs->Reps[3] = rep3; ++ vs->State = state; ++ vs->PreviousIsMatch = previousIsMatch; ++ vs->RemainLen = len; ++ #endif ++ ++ *outSizeProcessed = nowPos; ++ return LZMA_RESULT_OK; ++} +diff -Nur linux-2.6.12.5-brcm-squashfs/fs/squashfs/LzmaDecode.h linux-2.6.12.5-brcm-squashfs-lzma/fs/squashfs/LzmaDecode.h +--- linux-2.6.12.5-brcm-squashfs/fs/squashfs/LzmaDecode.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12.5-brcm-squashfs-lzma/fs/squashfs/LzmaDecode.h 2005-08-29 00:02:44.099124176 +0200 +@@ -0,0 +1,100 @@ ++/* ++ LzmaDecode.h ++ LZMA Decoder interface ++ ++ LZMA SDK 4.05 Copyright (c) 1999-2004 Igor Pavlov (2004-08-25) ++ http://www.7-zip.org/ ++ ++ LZMA SDK is licensed under two licenses: ++ 1) GNU Lesser General Public License (GNU LGPL) ++ 2) Common Public License (CPL) ++ It means that you can select one of these two licenses and ++ follow rules of that license. ++ ++ SPECIAL EXCEPTION: ++ Igor Pavlov, as the author of this code, expressly permits you to ++ statically or dynamically link your code (or bind by name) to the ++ interfaces of this file without subjecting your linked code to the ++ terms of the CPL or GNU LGPL. Any modifications or additions ++ to this file, however, are subject to the LGPL or CPL terms. ++*/ ++ ++#ifndef __LZMADECODE_H ++#define __LZMADECODE_H ++ ++/* #define _LZMA_IN_CB */ ++/* Use callback for input data */ ++ ++/* #define _LZMA_OUT_READ */ ++/* Use read function for output data */ ++ ++/* #define _LZMA_PROB32 */ ++/* It can increase speed on some 32-bit CPUs, ++ but memory usage will be doubled in that case */ ++ ++/* #define _LZMA_LOC_OPT */ ++/* Enable local speed optimizations inside code */ ++ ++#ifndef UInt32 ++#ifdef _LZMA_UINT32_IS_ULONG ++#define UInt32 unsigned long ++#else ++#define UInt32 unsigned int ++#endif ++#endif ++ ++#ifdef _LZMA_PROB32 ++#define CProb UInt32 ++#else ++#define CProb unsigned short ++#endif ++ ++#define LZMA_RESULT_OK 0 ++#define LZMA_RESULT_DATA_ERROR 1 ++#define LZMA_RESULT_NOT_ENOUGH_MEM 2 ++ ++#ifdef _LZMA_IN_CB ++typedef struct _ILzmaInCallback ++{ ++ int (*Read)(void *object, unsigned char **buffer, UInt32 *bufferSize); ++} ILzmaInCallback; ++#endif ++ ++#define LZMA_BASE_SIZE 1846 ++#define LZMA_LIT_SIZE 768 ++ ++/* ++bufferSize = (LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)))* sizeof(CProb) ++bufferSize += 100 in case of _LZMA_OUT_READ ++by default CProb is unsigned short, ++but if specify _LZMA_PROB_32, CProb will be UInt32(unsigned int) ++*/ ++ ++#ifdef _LZMA_OUT_READ ++int LzmaDecoderInit( ++ unsigned char *buffer, UInt32 bufferSize, ++ int lc, int lp, int pb, ++ unsigned char *dictionary, UInt32 dictionarySize, ++ #ifdef _LZMA_IN_CB ++ ILzmaInCallback *inCallback ++ #else ++ unsigned char *inStream, UInt32 inSize ++ #endif ++); ++#endif ++ ++int LzmaDecode( ++ unsigned char *buffer, ++ #ifndef _LZMA_OUT_READ ++ UInt32 bufferSize, ++ int lc, int lp, int pb, ++ #ifdef _LZMA_IN_CB ++ ILzmaInCallback *inCallback, ++ #else ++ unsigned char *inStream, UInt32 inSize, ++ #endif ++ #endif ++ unsigned char *outStream, UInt32 outSize, ++ UInt32 *outSizeProcessed); ++ ++#endif +diff -Nur linux-2.6.12.5-brcm-squashfs/fs/squashfs/Makefile linux-2.6.12.5-brcm-squashfs-lzma/fs/squashfs/Makefile +--- linux-2.6.12.5-brcm-squashfs/fs/squashfs/Makefile 2005-08-28 23:44:05.046246000 +0200 ++++ linux-2.6.12.5-brcm-squashfs-lzma/fs/squashfs/Makefile 2005-08-29 00:06:21.872017664 +0200 +@@ -4,4 +4,4 @@ + + obj-$(CONFIG_SQUASHFS) += squashfs.o + +-squashfs-objs := inode.o ++squashfs-objs := inode.o LzmaDecode.o +diff -Nur linux-2.6.12.5-brcm-squashfs/fs/squashfs/inode.c linux-2.6.12.5-brcm-squashfs-lzma/fs/squashfs/inode.c +--- linux-2.6.12.5-brcm-squashfs/fs/squashfs/inode.c 2005-08-28 23:44:05.045246000 +0200 ++++ linux-2.6.12.5-brcm-squashfs-lzma/fs/squashfs/inode.c 2005-08-29 00:19:48.473476904 +0200 +@@ -3,6 +3,9 @@ + * + * Copyright (c) 2002, 2003, 2004, 2005 Phillip Lougher <phillip@lougher.demon.co.uk> + * ++ * LZMA decompressor support added by Oleg I. Vdovikin ++ * Copyright (c) 2005 Oleg I.Vdovikin <oleg@cs.msu.su> ++ * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2, +@@ -20,7 +23,11 @@ + * inode.c + */ + ++#define SQUASHFS_LZMA ++ ++#ifndef SQUASHFS_LZMA + #define SQUASHFS_1_0_COMPATIBILITY ++#endif + + #include <linux/types.h> + #include <linux/squashfs_fs.h> +@@ -43,6 +50,19 @@ + #include <linux/blkdev.h> + #include <linux/vmalloc.h> + ++#ifdef SQUASHFS_LZMA ++#include "LzmaDecode.h" ++ ++/* default LZMA settings, should be in sync with mksquashfs */ ++#define LZMA_LC 3 ++#define LZMA_LP 0 ++#define LZMA_PB 2 ++ ++#define LZMA_WORKSPACE_SIZE ((LZMA_BASE_SIZE + \ ++ (LZMA_LIT_SIZE << (LZMA_LC + LZMA_LP))) * sizeof(CProb)) ++ ++#endif ++ + #ifdef SQUASHFS_TRACE + #define TRACE(s, args...) printk(KERN_NOTICE "SQUASHFS: "s, ## args) + #else +@@ -85,7 +105,11 @@ + + DECLARE_MUTEX(read_data_mutex); + ++#ifdef SQUASHFS_LZMA ++static unsigned char lzma_workspace[LZMA_WORKSPACE_SIZE]; ++#else + static z_stream stream; ++#endif + + static struct file_system_type squashfs_fs_type = { + .owner = THIS_MODULE, +@@ -274,6 +298,15 @@ + if(compressed) { + int zlib_err; + ++#ifdef SQUASHFS_LZMA ++ if ((zlib_err = LzmaDecode(lzma_workspace, ++ LZMA_WORKSPACE_SIZE, LZMA_LC, LZMA_LP, LZMA_PB, ++ c_buffer, c_byte, buffer, msBlk->read_size, &bytes)) != LZMA_RESULT_OK) ++ { ++ ERROR("lzma returned unexpected result 0x%x\n", zlib_err); ++ bytes = 0; ++ } ++#else + stream.next_in = c_buffer; + stream.avail_in = c_byte; + stream.next_out = buffer; +@@ -285,6 +318,7 @@ + bytes = 0; + } else + bytes = stream.total_out; ++#endif + up(&read_data_mutex); + } + +@@ -1725,14 +1759,17 @@ + + printk(KERN_INFO "Squashfs 2.2 (released 2005/07/03) (C) 2002-2005 Phillip Lougher\n"); + ++#ifndef SQUASHFS_LZMA + if(!(stream.workspace = (char *) vmalloc(zlib_inflate_workspacesize()))) { + ERROR("Failed to allocate zlib workspace\n"); + destroy_inodecache(); + return -ENOMEM; + } +- ++#endif + if((err = register_filesystem(&squashfs_fs_type))) { ++#ifndef SQUASHFS_LZMA + vfree(stream.workspace); ++#endif + destroy_inodecache(); + } + +@@ -1742,7 +1779,9 @@ + + static void __exit exit_squashfs_fs(void) + { ++#ifndef SQUASHFS_LZMA + vfree(stream.workspace); ++#endif + unregister_filesystem(&squashfs_fs_type); + destroy_inodecache(); + } diff --git a/openwrt/target/linux/generic-2.6/patches/003-net-b44-1.patch b/openwrt/target/linux/generic-2.6/patches/003-net-b44-1.patch new file mode 100644 index 0000000000..726ea542a3 --- /dev/null +++ b/openwrt/target/linux/generic-2.6/patches/003-net-b44-1.patch @@ -0,0 +1,807 @@ +diff -ur linux-2.6.15-rc5/drivers/net/b44.c linux-2.6.15-rc5-openwrt/drivers/net/b44.c +--- linux-2.6.15-rc5/drivers/net/b44.c 2005-12-04 06:10:42.000000000 +0100 ++++ linux-2.6.15-rc5-openwrt/drivers/net/b44.c 2005-08-15 02:20:18.000000000 +0200 +@@ -18,7 +18,7 @@ + #include <linux/pci.h> + #include <linux/delay.h> + #include <linux/init.h> +-#include <linux/dma-mapping.h> ++#include <linux/version.h> + + #include <asm/uaccess.h> + #include <asm/io.h> +@@ -28,8 +28,8 @@ + + #define DRV_MODULE_NAME "b44" + #define PFX DRV_MODULE_NAME ": " +-#define DRV_MODULE_VERSION "0.97" +-#define DRV_MODULE_RELDATE "Nov 30, 2005" ++#define DRV_MODULE_VERSION "0.95" ++#define DRV_MODULE_RELDATE "Aug 3, 2004" + + #define B44_DEF_MSG_ENABLE \ + (NETIF_MSG_DRV | \ +@@ -101,35 +101,10 @@ + static void b44_halt(struct b44 *); + static void b44_init_rings(struct b44 *); + static void b44_init_hw(struct b44 *); +- +-static int dma_desc_align_mask; +-static int dma_desc_sync_size; +- +-static const char b44_gstrings[][ETH_GSTRING_LEN] = { +-#define _B44(x...) # x, +-B44_STAT_REG_DECLARE +-#undef _B44 +-}; +- +-static inline void b44_sync_dma_desc_for_device(struct pci_dev *pdev, +- dma_addr_t dma_base, +- unsigned long offset, +- enum dma_data_direction dir) +-{ +- dma_sync_single_range_for_device(&pdev->dev, dma_base, +- offset & dma_desc_align_mask, +- dma_desc_sync_size, dir); +-} +- +-static inline void b44_sync_dma_desc_for_cpu(struct pci_dev *pdev, +- dma_addr_t dma_base, +- unsigned long offset, +- enum dma_data_direction dir) +-{ +- dma_sync_single_range_for_cpu(&pdev->dev, dma_base, +- offset & dma_desc_align_mask, +- dma_desc_sync_size, dir); +-} ++static int b44_poll(struct net_device *dev, int *budget); ++#ifdef CONFIG_NET_POLL_CONTROLLER ++static void b44_poll_controller(struct net_device *dev); ++#endif + + static inline unsigned long br32(const struct b44 *bp, unsigned long reg) + { +@@ -503,10 +478,7 @@ + for (reg = B44_TX_GOOD_O; reg <= B44_TX_PAUSE; reg += 4UL) { + *val++ += br32(bp, reg); + } +- +- /* Pad */ +- reg += 8*4UL; +- ++ val = &bp->hw_stats.rx_good_octets; + for (reg = B44_RX_GOOD_O; reg <= B44_RX_NPAUSE; reg += 4UL) { + *val++ += br32(bp, reg); + } +@@ -657,7 +629,7 @@ + + /* Hardware bug work-around, the chip is unable to do PCI DMA + to/from anything above 1GB :-( */ +- if (mapping + RX_PKT_BUF_SZ > B44_DMA_MASK) { ++ if(mapping+RX_PKT_BUF_SZ > B44_DMA_MASK) { + /* Sigh... */ + pci_unmap_single(bp->pdev, mapping, RX_PKT_BUF_SZ,PCI_DMA_FROMDEVICE); + dev_kfree_skb_any(skb); +@@ -667,7 +639,7 @@ + mapping = pci_map_single(bp->pdev, skb->data, + RX_PKT_BUF_SZ, + PCI_DMA_FROMDEVICE); +- if (mapping + RX_PKT_BUF_SZ > B44_DMA_MASK) { ++ if(mapping+RX_PKT_BUF_SZ > B44_DMA_MASK) { + pci_unmap_single(bp->pdev, mapping, RX_PKT_BUF_SZ,PCI_DMA_FROMDEVICE); + dev_kfree_skb_any(skb); + return -ENOMEM; +@@ -696,11 +668,6 @@ + dp->ctrl = cpu_to_le32(ctrl); + dp->addr = cpu_to_le32((u32) mapping + bp->rx_offset + bp->dma_offset); + +- if (bp->flags & B44_FLAG_RX_RING_HACK) +- b44_sync_dma_desc_for_device(bp->pdev, bp->rx_ring_dma, +- dest_idx * sizeof(dp), +- DMA_BIDIRECTIONAL); +- + return RX_PKT_BUF_SZ; + } + +@@ -725,11 +692,6 @@ + pci_unmap_addr_set(dest_map, mapping, + pci_unmap_addr(src_map, mapping)); + +- if (bp->flags & B44_FLAG_RX_RING_HACK) +- b44_sync_dma_desc_for_cpu(bp->pdev, bp->rx_ring_dma, +- src_idx * sizeof(src_desc), +- DMA_BIDIRECTIONAL); +- + ctrl = src_desc->ctrl; + if (dest_idx == (B44_RX_RING_SIZE - 1)) + ctrl |= cpu_to_le32(DESC_CTRL_EOT); +@@ -738,14 +700,8 @@ + + dest_desc->ctrl = ctrl; + dest_desc->addr = src_desc->addr; +- + src_map->skb = NULL; + +- if (bp->flags & B44_FLAG_RX_RING_HACK) +- b44_sync_dma_desc_for_device(bp->pdev, bp->rx_ring_dma, +- dest_idx * sizeof(dest_desc), +- DMA_BIDIRECTIONAL); +- + pci_dma_sync_single_for_device(bp->pdev, src_desc->addr, + RX_PKT_BUF_SZ, + PCI_DMA_FROMDEVICE); +@@ -894,10 +850,11 @@ + { + struct net_device *dev = dev_id; + struct b44 *bp = netdev_priv(dev); ++ unsigned long flags; + u32 istat, imask; + int handled = 0; + +- spin_lock(&bp->lock); ++ spin_lock_irqsave(&bp->lock, flags); + + istat = br32(bp, B44_ISTAT); + imask = br32(bp, B44_IMASK); +@@ -908,12 +865,6 @@ + istat &= imask; + if (istat) { + handled = 1; +- +- if (unlikely(!netif_running(dev))) { +- printk(KERN_INFO "%s: late interrupt.\n", dev->name); +- goto irq_ack; +- } +- + if (netif_rx_schedule_prep(dev)) { + /* NOTE: These writes are posted by the readback of + * the ISTAT register below. +@@ -926,11 +877,10 @@ + dev->name); + } + +-irq_ack: + bw32(bp, B44_ISTAT, istat); + br32(bp, B44_ISTAT); + } +- spin_unlock(&bp->lock); ++ spin_unlock_irqrestore(&bp->lock, flags); + return IRQ_RETVAL(handled); + } + +@@ -958,7 +908,6 @@ + { + struct b44 *bp = netdev_priv(dev); + struct sk_buff *bounce_skb; +- int rc = NETDEV_TX_OK; + dma_addr_t mapping; + u32 len, entry, ctrl; + +@@ -968,28 +917,29 @@ + /* This is a hard error, log it. */ + if (unlikely(TX_BUFFS_AVAIL(bp) < 1)) { + netif_stop_queue(dev); ++ spin_unlock_irq(&bp->lock); + printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n", + dev->name); +- goto err_out; ++ return 1; + } + + mapping = pci_map_single(bp->pdev, skb->data, len, PCI_DMA_TODEVICE); +- if (mapping + len > B44_DMA_MASK) { ++ if(mapping+len > B44_DMA_MASK) { + /* Chip can't handle DMA to/from >1GB, use bounce buffer */ + pci_unmap_single(bp->pdev, mapping, len, PCI_DMA_TODEVICE); + + bounce_skb = __dev_alloc_skb(TX_PKT_BUF_SZ, + GFP_ATOMIC|GFP_DMA); + if (!bounce_skb) +- goto err_out; ++ return NETDEV_TX_BUSY; + + mapping = pci_map_single(bp->pdev, bounce_skb->data, + len, PCI_DMA_TODEVICE); +- if (mapping + len > B44_DMA_MASK) { ++ if(mapping+len > B44_DMA_MASK) { + pci_unmap_single(bp->pdev, mapping, + len, PCI_DMA_TODEVICE); + dev_kfree_skb_any(bounce_skb); +- goto err_out; ++ return NETDEV_TX_BUSY; + } + + memcpy(skb_put(bounce_skb, len), skb->data, skb->len); +@@ -1009,11 +959,6 @@ + bp->tx_ring[entry].ctrl = cpu_to_le32(ctrl); + bp->tx_ring[entry].addr = cpu_to_le32((u32) mapping+bp->dma_offset); + +- if (bp->flags & B44_FLAG_TX_RING_HACK) +- b44_sync_dma_desc_for_device(bp->pdev, bp->tx_ring_dma, +- entry * sizeof(bp->tx_ring[0]), +- DMA_TO_DEVICE); +- + entry = NEXT_TX(entry); + + bp->tx_prod = entry; +@@ -1029,16 +974,11 @@ + if (TX_BUFFS_AVAIL(bp) < 1) + netif_stop_queue(dev); + +- dev->trans_start = jiffies; +- +-out_unlock: + spin_unlock_irq(&bp->lock); + +- return rc; ++ dev->trans_start = jiffies; + +-err_out: +- rc = NETDEV_TX_BUSY; +- goto out_unlock; ++ return 0; + } + + static int b44_change_mtu(struct net_device *dev, int new_mtu) +@@ -1112,7 +1052,8 @@ + * + * The chip has been shut down and the driver detached from + * the networking, so no interrupts or new tx packets will +- * end up in the driver. ++ * end up in the driver. bp->lock is not held and we are not ++ * in an interrupt context and thus may sleep. + */ + static void b44_init_rings(struct b44 *bp) + { +@@ -1123,16 +1064,6 @@ + memset(bp->rx_ring, 0, B44_RX_RING_BYTES); + memset(bp->tx_ring, 0, B44_TX_RING_BYTES); + +- if (bp->flags & B44_FLAG_RX_RING_HACK) +- dma_sync_single_for_device(&bp->pdev->dev, bp->rx_ring_dma, +- DMA_TABLE_BYTES, +- PCI_DMA_BIDIRECTIONAL); +- +- if (bp->flags & B44_FLAG_TX_RING_HACK) +- dma_sync_single_for_device(&bp->pdev->dev, bp->tx_ring_dma, +- DMA_TABLE_BYTES, +- PCI_DMA_TODEVICE); +- + for (i = 0; i < bp->rx_pending; i++) { + if (b44_alloc_rx_skb(bp, -1, i) < 0) + break; +@@ -1145,33 +1076,23 @@ + */ + static void b44_free_consistent(struct b44 *bp) + { +- kfree(bp->rx_buffers); +- bp->rx_buffers = NULL; +- kfree(bp->tx_buffers); +- bp->tx_buffers = NULL; ++ if (bp->rx_buffers) { ++ kfree(bp->rx_buffers); ++ bp->rx_buffers = NULL; ++ } ++ if (bp->tx_buffers) { ++ kfree(bp->tx_buffers); ++ bp->tx_buffers = NULL; ++ } + if (bp->rx_ring) { +- if (bp->flags & B44_FLAG_RX_RING_HACK) { +- dma_unmap_single(&bp->pdev->dev, bp->rx_ring_dma, +- DMA_TABLE_BYTES, +- DMA_BIDIRECTIONAL); +- kfree(bp->rx_ring); +- } else +- pci_free_consistent(bp->pdev, DMA_TABLE_BYTES, +- bp->rx_ring, bp->rx_ring_dma); ++ pci_free_consistent(bp->pdev, DMA_TABLE_BYTES, ++ bp->rx_ring, bp->rx_ring_dma); + bp->rx_ring = NULL; +- bp->flags &= ~B44_FLAG_RX_RING_HACK; + } + if (bp->tx_ring) { +- if (bp->flags & B44_FLAG_TX_RING_HACK) { +- dma_unmap_single(&bp->pdev->dev, bp->tx_ring_dma, +- DMA_TABLE_BYTES, +- DMA_TO_DEVICE); +- kfree(bp->tx_ring); +- } else +- pci_free_consistent(bp->pdev, DMA_TABLE_BYTES, +- bp->tx_ring, bp->tx_ring_dma); ++ pci_free_consistent(bp->pdev, DMA_TABLE_BYTES, ++ bp->tx_ring, bp->tx_ring_dma); + bp->tx_ring = NULL; +- bp->flags &= ~B44_FLAG_TX_RING_HACK; + } + } + +@@ -1184,67 +1105,25 @@ + int size; + + size = B44_RX_RING_SIZE * sizeof(struct ring_info); +- bp->rx_buffers = kzalloc(size, GFP_KERNEL); ++ bp->rx_buffers = kmalloc(size, GFP_KERNEL); + if (!bp->rx_buffers) + goto out_err; ++ memset(bp->rx_buffers, 0, size); + + size = B44_TX_RING_SIZE * sizeof(struct ring_info); +- bp->tx_buffers = kzalloc(size, GFP_KERNEL); ++ bp->tx_buffers = kmalloc(size, GFP_KERNEL); + if (!bp->tx_buffers) + goto out_err; ++ memset(bp->tx_buffers, 0, size); + + size = DMA_TABLE_BYTES; + bp->rx_ring = pci_alloc_consistent(bp->pdev, size, &bp->rx_ring_dma); +- if (!bp->rx_ring) { +- /* Allocation may have failed due to pci_alloc_consistent +- insisting on use of GFP_DMA, which is more restrictive +- than necessary... */ +- struct dma_desc *rx_ring; +- dma_addr_t rx_ring_dma; +- +- rx_ring = kzalloc(size, GFP_KERNEL); +- if (!rx_ring) +- goto out_err; +- +- rx_ring_dma = dma_map_single(&bp->pdev->dev, rx_ring, +- DMA_TABLE_BYTES, +- DMA_BIDIRECTIONAL); +- +- if (rx_ring_dma + size > B44_DMA_MASK) { +- kfree(rx_ring); +- goto out_err; +- } +- +- bp->rx_ring = rx_ring; +- bp->rx_ring_dma = rx_ring_dma; +- bp->flags |= B44_FLAG_RX_RING_HACK; +- } ++ if (!bp->rx_ring) ++ goto out_err; + + bp->tx_ring = pci_alloc_consistent(bp->pdev, size, &bp->tx_ring_dma); +- if (!bp->tx_ring) { +- /* Allocation may have failed due to pci_alloc_consistent +- insisting on use of GFP_DMA, which is more restrictive +- than necessary... */ +- struct dma_desc *tx_ring; +- dma_addr_t tx_ring_dma; +- +- tx_ring = kzalloc(size, GFP_KERNEL); +- if (!tx_ring) +- goto out_err; +- +- tx_ring_dma = dma_map_single(&bp->pdev->dev, tx_ring, +- DMA_TABLE_BYTES, +- DMA_TO_DEVICE); +- +- if (tx_ring_dma + size > B44_DMA_MASK) { +- kfree(tx_ring); +- goto out_err; +- } +- +- bp->tx_ring = tx_ring; +- bp->tx_ring_dma = tx_ring_dma; +- bp->flags |= B44_FLAG_TX_RING_HACK; +- } ++ if (!bp->tx_ring) ++ goto out_err; + + return 0; + +@@ -1394,21 +1273,19 @@ + + err = b44_alloc_consistent(bp); + if (err) +- goto out; ++ return err; ++ ++ err = request_irq(dev->irq, b44_interrupt, SA_SHIRQ, dev->name, dev); ++ if (err) ++ goto err_out_free; ++ ++ spin_lock_irq(&bp->lock); + + b44_init_rings(bp); + b44_init_hw(bp); ++ bp->flags |= B44_FLAG_INIT_COMPLETE; + +- netif_carrier_off(dev); +- b44_check_phy(bp); +- +- err = request_irq(dev->irq, b44_interrupt, SA_SHIRQ, dev->name, dev); +- if (unlikely(err < 0)) { +- b44_chip_reset(bp); +- b44_free_rings(bp); +- b44_free_consistent(bp); +- goto out; +- } ++ spin_unlock_irq(&bp->lock); + + init_timer(&bp->timer); + bp->timer.expires = jiffies + HZ; +@@ -1417,8 +1294,11 @@ + add_timer(&bp->timer); + + b44_enable_ints(bp); +- netif_start_queue(dev); +-out: ++ ++ return 0; ++ ++err_out_free: ++ b44_free_consistent(bp); + return err; + } + +@@ -1453,8 +1333,6 @@ + + netif_stop_queue(dev); + +- netif_poll_disable(dev); +- + del_timer_sync(&bp->timer); + + spin_lock_irq(&bp->lock); +@@ -1464,14 +1342,13 @@ + #endif + b44_halt(bp); + b44_free_rings(bp); ++ bp->flags &= ~B44_FLAG_INIT_COMPLETE; + netif_carrier_off(bp->dev); + + spin_unlock_irq(&bp->lock); + + free_irq(dev->irq, dev); + +- netif_poll_enable(dev); +- + b44_free_consistent(bp); + + return 0; +@@ -1536,6 +1413,8 @@ + { + struct b44 *bp = netdev_priv(dev); + u32 val; ++ int i=0; ++ unsigned char zero[6] = {0,0,0,0,0,0}; + + val = br32(bp, B44_RXCONFIG); + val &= ~(RXCONFIG_PROMISC | RXCONFIG_ALLMULTI); +@@ -1543,17 +1422,14 @@ + val |= RXCONFIG_PROMISC; + bw32(bp, B44_RXCONFIG, val); + } else { +- unsigned char zero[6] = {0, 0, 0, 0, 0, 0}; +- int i = 0; +- + __b44_set_mac_addr(bp); + + if (dev->flags & IFF_ALLMULTI) + val |= RXCONFIG_ALLMULTI; + else +- i = __b44_load_mcast(bp, dev); ++ i=__b44_load_mcast(bp, dev); + +- for (; i < 64; i++) { ++ for(;i<64;i++) { + __b44_cam_write(bp, zero, i); + } + bw32(bp, B44_RXCONFIG, val); +@@ -1617,7 +1493,7 @@ + { + struct b44 *bp = netdev_priv(dev); + +- if (!netif_running(dev)) ++ if (!(bp->flags & B44_FLAG_INIT_COMPLETE)) + return -EAGAIN; + cmd->supported = (SUPPORTED_Autoneg); + cmd->supported |= (SUPPORTED_100baseT_Half | +@@ -1628,14 +1504,14 @@ + + cmd->advertising = 0; + if (bp->flags & B44_FLAG_ADV_10HALF) +- cmd->advertising |= ADVERTISED_10baseT_Half; ++ cmd->advertising |= ADVERTISE_10HALF; + if (bp->flags & B44_FLAG_ADV_10FULL) +- cmd->advertising |= ADVERTISED_10baseT_Full; ++ cmd->advertising |= ADVERTISE_10FULL; + if (bp->flags & B44_FLAG_ADV_100HALF) +- cmd->advertising |= ADVERTISED_100baseT_Half; ++ cmd->advertising |= ADVERTISE_100HALF; + if (bp->flags & B44_FLAG_ADV_100FULL) +- cmd->advertising |= ADVERTISED_100baseT_Full; +- cmd->advertising |= ADVERTISED_Pause | ADVERTISED_Asym_Pause; ++ cmd->advertising |= ADVERTISE_100FULL; ++ cmd->advertising |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM; + cmd->speed = (bp->flags & B44_FLAG_100_BASE_T) ? + SPEED_100 : SPEED_10; + cmd->duplex = (bp->flags & B44_FLAG_FULL_DUPLEX) ? +@@ -1655,7 +1531,7 @@ + { + struct b44 *bp = netdev_priv(dev); + +- if (!netif_running(dev)) ++ if (!(bp->flags & B44_FLAG_INIT_COMPLETE)) + return -EAGAIN; + + /* We do not support gigabit. */ +@@ -1785,37 +1661,6 @@ + return 0; + } + +-static void b44_get_strings(struct net_device *dev, u32 stringset, u8 *data) +-{ +- switch(stringset) { +- case ETH_SS_STATS: +- memcpy(data, *b44_gstrings, sizeof(b44_gstrings)); +- break; +- } +-} +- +-static int b44_get_stats_count(struct net_device *dev) +-{ +- return ARRAY_SIZE(b44_gstrings); +-} +- +-static void b44_get_ethtool_stats(struct net_device *dev, +- struct ethtool_stats *stats, u64 *data) +-{ +- struct b44 *bp = netdev_priv(dev); +- u32 *val = &bp->hw_stats.tx_good_octets; +- u32 i; +- +- spin_lock_irq(&bp->lock); +- +- b44_stats_update(bp); +- +- for (i = 0; i < ARRAY_SIZE(b44_gstrings); i++) +- *data++ = *val++; +- +- spin_unlock_irq(&bp->lock); +-} +- + static struct ethtool_ops b44_ethtool_ops = { + .get_drvinfo = b44_get_drvinfo, + .get_settings = b44_get_settings, +@@ -1828,25 +1673,18 @@ + .set_pauseparam = b44_set_pauseparam, + .get_msglevel = b44_get_msglevel, + .set_msglevel = b44_set_msglevel, +- .get_strings = b44_get_strings, +- .get_stats_count = b44_get_stats_count, +- .get_ethtool_stats = b44_get_ethtool_stats, +- .get_perm_addr = ethtool_op_get_perm_addr, + }; + + static int b44_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) + { + struct mii_ioctl_data *data = if_mii(ifr); + struct b44 *bp = netdev_priv(dev); +- int err = -EINVAL; +- +- if (!netif_running(dev)) +- goto out; ++ int err; + + spin_lock_irq(&bp->lock); + err = generic_mii_ioctl(&bp->mii_if, data, cmd, NULL); + spin_unlock_irq(&bp->lock); +-out: ++ + return err; + } + +@@ -1877,7 +1715,6 @@ + bp->dev->dev_addr[3] = eeprom[80]; + bp->dev->dev_addr[4] = eeprom[83]; + bp->dev->dev_addr[5] = eeprom[82]; +- memcpy(bp->dev->perm_addr, bp->dev->dev_addr, bp->dev->addr_len); + + bp->phy_addr = eeprom[90] & 0x1f; + +@@ -1942,9 +1779,9 @@ + + err = pci_set_consistent_dma_mask(pdev, (u64) B44_DMA_MASK); + if (err) { +- printk(KERN_ERR PFX "No usable DMA configuration, " +- "aborting.\n"); +- goto err_out_free_res; ++ printk(KERN_ERR PFX "No usable DMA configuration, " ++ "aborting.\n"); ++ goto err_out_free_res; + } + + b44reg_base = pci_resource_start(pdev, 0); +@@ -1966,8 +1803,10 @@ + bp = netdev_priv(dev); + bp->pdev = pdev; + bp->dev = dev; +- +- bp->msg_enable = netif_msg_init(b44_debug, B44_DEF_MSG_ENABLE); ++ if (b44_debug >= 0) ++ bp->msg_enable = (1 << b44_debug) - 1; ++ else ++ bp->msg_enable = B44_DEF_MSG_ENABLE; + + spin_lock_init(&bp->lock); + +@@ -2057,14 +1896,17 @@ + static void __devexit b44_remove_one(struct pci_dev *pdev) + { + struct net_device *dev = pci_get_drvdata(pdev); +- struct b44 *bp = netdev_priv(dev); + +- unregister_netdev(dev); +- iounmap(bp->regs); +- free_netdev(dev); +- pci_release_regions(pdev); +- pci_disable_device(pdev); +- pci_set_drvdata(pdev, NULL); ++ if (dev) { ++ struct b44 *bp = netdev_priv(dev); ++ ++ unregister_netdev(dev); ++ iounmap(bp->regs); ++ free_netdev(dev); ++ pci_release_regions(pdev); ++ pci_disable_device(pdev); ++ pci_set_drvdata(pdev, NULL); ++ } + } + + static int b44_suspend(struct pci_dev *pdev, pm_message_t state) +@@ -2085,9 +1927,6 @@ + b44_free_rings(bp); + + spin_unlock_irq(&bp->lock); +- +- free_irq(dev->irq, dev); +- pci_disable_device(pdev); + return 0; + } + +@@ -2097,15 +1936,10 @@ + struct b44 *bp = netdev_priv(dev); + + pci_restore_state(pdev); +- pci_enable_device(pdev); +- pci_set_master(pdev); + + if (!netif_running(dev)) + return 0; + +- if (request_irq(dev->irq, b44_interrupt, SA_SHIRQ, dev->name, dev)) +- printk(KERN_ERR PFX "%s: request_irq failed\n", dev->name); +- + spin_lock_irq(&bp->lock); + + b44_init_rings(bp); +@@ -2117,7 +1951,6 @@ + add_timer(&bp->timer); + + b44_enable_ints(bp); +- netif_wake_queue(dev); + return 0; + } + +@@ -2132,12 +1965,6 @@ + + static int __init b44_init(void) + { +- unsigned int dma_desc_align_size = dma_get_cache_alignment(); +- +- /* Setup paramaters for syncing RX/TX DMA descriptors */ +- dma_desc_align_mask = ~(dma_desc_align_size - 1); +- dma_desc_sync_size = max(dma_desc_align_size, sizeof(struct dma_desc)); +- + return pci_module_init(&b44_driver); + } + +diff -ur linux-2.6.15-rc5/drivers/net/b44.h linux-2.6.15-rc5-openwrt/drivers/net/b44.h +--- linux-2.6.15-rc5/drivers/net/b44.h 2005-12-04 06:10:42.000000000 +0100 ++++ linux-2.6.15-rc5-openwrt/drivers/net/b44.h 2005-08-15 02:20:18.000000000 +0200 +@@ -346,63 +346,29 @@ + + #define B44_MCAST_TABLE_SIZE 32 + +-#define B44_STAT_REG_DECLARE \ +- _B44(tx_good_octets) \ +- _B44(tx_good_pkts) \ +- _B44(tx_octets) \ +- _B44(tx_pkts) \ +- _B44(tx_broadcast_pkts) \ +- _B44(tx_multicast_pkts) \ +- _B44(tx_len_64) \ +- _B44(tx_len_65_to_127) \ +- _B44(tx_len_128_to_255) \ +- _B44(tx_len_256_to_511) \ +- _B44(tx_len_512_to_1023) \ +- _B44(tx_len_1024_to_max) \ +- _B44(tx_jabber_pkts) \ +- _B44(tx_oversize_pkts) \ +- _B44(tx_fragment_pkts) \ +- _B44(tx_underruns) \ +- _B44(tx_total_cols) \ +- _B44(tx_single_cols) \ +- _B44(tx_multiple_cols) \ +- _B44(tx_excessive_cols) \ +- _B44(tx_late_cols) \ +- _B44(tx_defered) \ +- _B44(tx_carrier_lost) \ +- _B44(tx_pause_pkts) \ +- _B44(rx_good_octets) \ +- _B44(rx_good_pkts) \ +- _B44(rx_octets) \ +- _B44(rx_pkts) \ +- _B44(rx_broadcast_pkts) \ +- _B44(rx_multicast_pkts) \ +- _B44(rx_len_64) \ +- _B44(rx_len_65_to_127) \ +- _B44(rx_len_128_to_255) \ +- _B44(rx_len_256_to_511) \ +- _B44(rx_len_512_to_1023) \ +- _B44(rx_len_1024_to_max) \ +- _B44(rx_jabber_pkts) \ +- _B44(rx_oversize_pkts) \ +- _B44(rx_fragment_pkts) \ +- _B44(rx_missed_pkts) \ +- _B44(rx_crc_align_errs) \ +- _B44(rx_undersize) \ +- _B44(rx_crc_errs) \ +- _B44(rx_align_errs) \ +- _B44(rx_symbol_errs) \ +- _B44(rx_pause_pkts) \ +- _B44(rx_nonpause_pkts) +- + /* SW copy of device statistics, kept up to date by periodic timer +- * which probes HW values. Check b44_stats_update if you mess with +- * the layout ++ * which probes HW values. Must have same relative layout as HW ++ * register above, because b44_stats_update depends upon this. + */ + struct b44_hw_stats { +-#define _B44(x) u32 x; +-B44_STAT_REG_DECLARE +-#undef _B44 ++ u32 tx_good_octets, tx_good_pkts, tx_octets; ++ u32 tx_pkts, tx_broadcast_pkts, tx_multicast_pkts; ++ u32 tx_len_64, tx_len_65_to_127, tx_len_128_to_255; ++ u32 tx_len_256_to_511, tx_len_512_to_1023, tx_len_1024_to_max; ++ u32 tx_jabber_pkts, tx_oversize_pkts, tx_fragment_pkts; ++ u32 tx_underruns, tx_total_cols, tx_single_cols; ++ u32 tx_multiple_cols, tx_excessive_cols, tx_late_cols; ++ u32 tx_defered, tx_carrier_lost, tx_pause_pkts; ++ u32 __pad1[8]; ++ ++ u32 rx_good_octets, rx_good_pkts, rx_octets; ++ u32 rx_pkts, rx_broadcast_pkts, rx_multicast_pkts; ++ u32 rx_len_64, rx_len_65_to_127, rx_len_128_to_255; ++ u32 rx_len_256_to_511, rx_len_512_to_1023, rx_len_1024_to_max; ++ u32 rx_jabber_pkts, rx_oversize_pkts, rx_fragment_pkts; ++ u32 rx_missed_pkts, rx_crc_align_errs, rx_undersize; ++ u32 rx_crc_errs, rx_align_errs, rx_symbol_errs; ++ u32 rx_pause_pkts, rx_nonpause_pkts; + }; + + struct b44 { +@@ -420,6 +386,7 @@ + + u32 dma_offset; + u32 flags; ++#define B44_FLAG_INIT_COMPLETE 0x00000001 + #define B44_FLAG_BUGGY_TXPTR 0x00000002 + #define B44_FLAG_REORDER_BUG 0x00000004 + #define B44_FLAG_PAUSE_AUTO 0x00008000 +@@ -433,8 +400,6 @@ + #define B44_FLAG_ADV_100HALF 0x04000000 + #define B44_FLAG_ADV_100FULL 0x08000000 + #define B44_FLAG_INTERNAL_PHY 0x10000000 +-#define B44_FLAG_RX_RING_HACK 0x20000000 +-#define B44_FLAG_TX_RING_HACK 0x40000000 + + u32 rx_offset; + diff --git a/openwrt/target/linux/generic-2.6/patches/003-net-b44-2.patch b/openwrt/target/linux/generic-2.6/patches/003-net-b44-2.patch new file mode 100644 index 0000000000..8bfe429698 --- /dev/null +++ b/openwrt/target/linux/generic-2.6/patches/003-net-b44-2.patch @@ -0,0 +1,1089 @@ +diff -ur linux-2.6.14.3/drivers/net/b44.c linux-2.6.14.3-openwrt/drivers/net/b44.c +--- linux-2.6.14.3/drivers/net/b44.c 2005-11-24 23:10:21.000000000 +0100 ++++ linux-2.6.14.3-openwrt/drivers/net/b44.c 2005-12-08 13:24:35.000000000 +0100 +@@ -1,7 +1,8 @@ +-/* b44.c: Broadcom 4400 device driver. ++/* b44.c: Broadcom 4400/47xx device driver. + * + * Copyright (C) 2002 David S. Miller (davem@redhat.com) +- * Fixed by Pekka Pietikainen (pp@ee.oulu.fi) ++ * Copyright (C) 2004 Pekka Pietikainen (pp@ee.oulu.fi) ++ * Copyright (C) 2004 Florian Schirmer (jolt@tuxbox.org) + * + * Distribute under GPL. + */ +@@ -78,7 +79,7 @@ + DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; + + MODULE_AUTHOR("Florian Schirmer, Pekka Pietikainen, David S. Miller"); +-MODULE_DESCRIPTION("Broadcom 4400 10/100 PCI ethernet driver"); ++MODULE_DESCRIPTION("Broadcom 4400/47xx 10/100 PCI ethernet driver"); + MODULE_LICENSE("GPL"); + MODULE_VERSION(DRV_MODULE_VERSION); + +@@ -93,6 +94,8 @@ + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_BCM4401B1, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, ++ { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_BCM4713, ++ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { } /* terminate list with empty entry */ + }; + +@@ -106,24 +109,13 @@ + static void b44_poll_controller(struct net_device *dev); + #endif + +-static inline unsigned long br32(const struct b44 *bp, unsigned long reg) +-{ +- return readl(bp->regs + reg); +-} +- +-static inline void bw32(const struct b44 *bp, +- unsigned long reg, unsigned long val) +-{ +- writel(val, bp->regs + reg); +-} +- + static int b44_wait_bit(struct b44 *bp, unsigned long reg, + u32 bit, unsigned long timeout, const int clear) + { + unsigned long i; + + for (i = 0; i < timeout; i++) { +- u32 val = br32(bp, reg); ++ u32 val = br32(reg); + + if (clear && !(val & bit)) + break; +@@ -154,7 +146,7 @@ + + static u32 ssb_get_core_rev(struct b44 *bp) + { +- return (br32(bp, B44_SBIDHIGH) & SBIDHIGH_RC_MASK); ++ return (br32(B44_SBIDHIGH) & SBIDHIGH_RC_MASK); + } + + static u32 ssb_pci_setup(struct b44 *bp, u32 cores) +@@ -165,13 +157,13 @@ + pci_write_config_dword(bp->pdev, SSB_BAR0_WIN, BCM4400_PCI_CORE_ADDR); + pci_rev = ssb_get_core_rev(bp); + +- val = br32(bp, B44_SBINTVEC); ++ val = br32(B44_SBINTVEC); + val |= cores; +- bw32(bp, B44_SBINTVEC, val); ++ bw32(B44_SBINTVEC, val); + +- val = br32(bp, SSB_PCI_TRANS_2); ++ val = br32(SSB_PCI_TRANS_2); + val |= SSB_PCI_PREF | SSB_PCI_BURST; +- bw32(bp, SSB_PCI_TRANS_2, val); ++ bw32(SSB_PCI_TRANS_2, val); + + pci_write_config_dword(bp->pdev, SSB_BAR0_WIN, bar_orig); + +@@ -180,18 +172,18 @@ + + static void ssb_core_disable(struct b44 *bp) + { +- if (br32(bp, B44_SBTMSLOW) & SBTMSLOW_RESET) ++ if (br32(B44_SBTMSLOW) & SBTMSLOW_RESET) + return; + +- bw32(bp, B44_SBTMSLOW, (SBTMSLOW_REJECT | SBTMSLOW_CLOCK)); ++ bw32(B44_SBTMSLOW, (SBTMSLOW_REJECT | SBTMSLOW_CLOCK)); + b44_wait_bit(bp, B44_SBTMSLOW, SBTMSLOW_REJECT, 100000, 0); + b44_wait_bit(bp, B44_SBTMSHIGH, SBTMSHIGH_BUSY, 100000, 1); +- bw32(bp, B44_SBTMSLOW, (SBTMSLOW_FGC | SBTMSLOW_CLOCK | ++ bw32(B44_SBTMSLOW, (SBTMSLOW_FGC | SBTMSLOW_CLOCK | + SBTMSLOW_REJECT | SBTMSLOW_RESET)); +- br32(bp, B44_SBTMSLOW); ++ br32(B44_SBTMSLOW); + udelay(1); +- bw32(bp, B44_SBTMSLOW, (SBTMSLOW_REJECT | SBTMSLOW_RESET)); +- br32(bp, B44_SBTMSLOW); ++ bw32(B44_SBTMSLOW, (SBTMSLOW_REJECT | SBTMSLOW_RESET)); ++ br32(B44_SBTMSLOW); + udelay(1); + } + +@@ -200,58 +192,65 @@ + u32 val; + + ssb_core_disable(bp); +- bw32(bp, B44_SBTMSLOW, (SBTMSLOW_RESET | SBTMSLOW_CLOCK | SBTMSLOW_FGC)); +- br32(bp, B44_SBTMSLOW); ++ bw32(B44_SBTMSLOW, (SBTMSLOW_RESET | SBTMSLOW_CLOCK | SBTMSLOW_FGC)); ++ br32(B44_SBTMSLOW); + udelay(1); + + /* Clear SERR if set, this is a hw bug workaround. */ +- if (br32(bp, B44_SBTMSHIGH) & SBTMSHIGH_SERR) +- bw32(bp, B44_SBTMSHIGH, 0); ++ if (br32(B44_SBTMSHIGH) & SBTMSHIGH_SERR) ++ bw32(B44_SBTMSHIGH, 0); + +- val = br32(bp, B44_SBIMSTATE); ++ val = br32(B44_SBIMSTATE); + if (val & (SBIMSTATE_IBE | SBIMSTATE_TO)) +- bw32(bp, B44_SBIMSTATE, val & ~(SBIMSTATE_IBE | SBIMSTATE_TO)); ++ bw32(B44_SBIMSTATE, val & ~(SBIMSTATE_IBE | SBIMSTATE_TO)); + +- bw32(bp, B44_SBTMSLOW, (SBTMSLOW_CLOCK | SBTMSLOW_FGC)); +- br32(bp, B44_SBTMSLOW); ++ bw32(B44_SBTMSLOW, (SBTMSLOW_CLOCK | SBTMSLOW_FGC)); ++ br32(B44_SBTMSLOW); + udelay(1); + +- bw32(bp, B44_SBTMSLOW, (SBTMSLOW_CLOCK)); +- br32(bp, B44_SBTMSLOW); ++ bw32(B44_SBTMSLOW, (SBTMSLOW_CLOCK)); ++ br32(B44_SBTMSLOW); + udelay(1); + } + ++static int b44_4713_instance; ++ + static int ssb_core_unit(struct b44 *bp) + { +-#if 0 +- u32 val = br32(bp, B44_SBADMATCH0); +- u32 base; +- +- type = val & SBADMATCH0_TYPE_MASK; +- switch (type) { +- case 0: +- base = val & SBADMATCH0_BS0_MASK; +- break; +- +- case 1: +- base = val & SBADMATCH0_BS1_MASK; +- break; +- +- case 2: +- default: +- base = val & SBADMATCH0_BS2_MASK; +- break; +- }; +-#endif +- return 0; ++ if (bp->pdev->device == PCI_DEVICE_ID_BCM4713) ++ return b44_4713_instance++; ++ else ++ return 0; + } + + static int ssb_is_core_up(struct b44 *bp) + { +- return ((br32(bp, B44_SBTMSLOW) & (SBTMSLOW_RESET | SBTMSLOW_REJECT | SBTMSLOW_CLOCK)) ++ return ((br32(B44_SBTMSLOW) & (SBTMSLOW_RESET | SBTMSLOW_REJECT | SBTMSLOW_CLOCK)) + == SBTMSLOW_CLOCK); + } + ++static void __b44_cam_read(struct b44 *bp, unsigned char *data, int index) ++{ ++ u32 val; ++ ++ bw32(B44_CAM_CTRL, (CAM_CTRL_READ | ++ (index << CAM_CTRL_INDEX_SHIFT))); ++ ++ b44_wait_bit(bp, B44_CAM_CTRL, CAM_CTRL_BUSY, 100, 1); ++ ++ val = br32(B44_CAM_DATA_LO); ++ ++ data[2] = (val >> 24) & 0xFF; ++ data[3] = (val >> 16) & 0xFF; ++ data[4] = (val >> 8) & 0xFF; ++ data[5] = (val >> 0) & 0xFF; ++ ++ val = br32(B44_CAM_DATA_HI); ++ ++ data[0] = (val >> 8) & 0xFF; ++ data[1] = (val >> 0) & 0xFF; ++} ++ + static void __b44_cam_write(struct b44 *bp, unsigned char *data, int index) + { + u32 val; +@@ -260,19 +259,19 @@ + val |= ((u32) data[3]) << 16; + val |= ((u32) data[4]) << 8; + val |= ((u32) data[5]) << 0; +- bw32(bp, B44_CAM_DATA_LO, val); ++ bw32(B44_CAM_DATA_LO, val); + val = (CAM_DATA_HI_VALID | + (((u32) data[0]) << 8) | + (((u32) data[1]) << 0)); +- bw32(bp, B44_CAM_DATA_HI, val); +- bw32(bp, B44_CAM_CTRL, (CAM_CTRL_WRITE | ++ bw32(B44_CAM_DATA_HI, val); ++ bw32(B44_CAM_CTRL, (CAM_CTRL_WRITE | + (index << CAM_CTRL_INDEX_SHIFT))); + b44_wait_bit(bp, B44_CAM_CTRL, CAM_CTRL_BUSY, 100, 1); + } + + static inline void __b44_disable_ints(struct b44 *bp) + { +- bw32(bp, B44_IMASK, 0); ++ bw32(B44_IMASK, 0); + } + + static void b44_disable_ints(struct b44 *bp) +@@ -280,42 +279,59 @@ + __b44_disable_ints(bp); + + /* Flush posted writes. */ +- br32(bp, B44_IMASK); ++ br32(B44_IMASK); + } + + static void b44_enable_ints(struct b44 *bp) + { +- bw32(bp, B44_IMASK, bp->imask); ++ bw32(B44_IMASK, bp->imask); + } + +-static int b44_readphy(struct b44 *bp, int reg, u32 *val) ++static int __b44_readphy(struct b44 *bp, int phy_addr, int reg, u32 *val) + { + int err; + +- bw32(bp, B44_EMAC_ISTAT, EMAC_INT_MII); +- bw32(bp, B44_MDIO_DATA, (MDIO_DATA_SB_START | ++ bw32(B44_EMAC_ISTAT, EMAC_INT_MII); ++ bw32(B44_MDIO_DATA, (MDIO_DATA_SB_START | + (MDIO_OP_READ << MDIO_DATA_OP_SHIFT) | +- (bp->phy_addr << MDIO_DATA_PMD_SHIFT) | ++ (phy_addr << MDIO_DATA_PMD_SHIFT) | + (reg << MDIO_DATA_RA_SHIFT) | + (MDIO_TA_VALID << MDIO_DATA_TA_SHIFT))); + err = b44_wait_bit(bp, B44_EMAC_ISTAT, EMAC_INT_MII, 100, 0); +- *val = br32(bp, B44_MDIO_DATA) & MDIO_DATA_DATA; ++ *val = br32(B44_MDIO_DATA) & MDIO_DATA_DATA; + + return err; + } + +-static int b44_writephy(struct b44 *bp, int reg, u32 val) ++static int b44_readphy(struct b44 *bp, int reg, u32 *val) ++{ ++ if (bp->phy_addr == B44_PHY_ADDR_NO_PHY) ++ return 0; ++ ++ return __b44_readphy(bp, bp->phy_addr, reg, val); ++} ++ ++static int __b44_writephy(struct b44 *bp, int phy_addr, int reg, u32 val) + { +- bw32(bp, B44_EMAC_ISTAT, EMAC_INT_MII); +- bw32(bp, B44_MDIO_DATA, (MDIO_DATA_SB_START | ++ bw32(B44_EMAC_ISTAT, EMAC_INT_MII); ++ bw32(B44_MDIO_DATA, (MDIO_DATA_SB_START | + (MDIO_OP_WRITE << MDIO_DATA_OP_SHIFT) | +- (bp->phy_addr << MDIO_DATA_PMD_SHIFT) | ++ (phy_addr << MDIO_DATA_PMD_SHIFT) | + (reg << MDIO_DATA_RA_SHIFT) | + (MDIO_TA_VALID << MDIO_DATA_TA_SHIFT) | + (val & MDIO_DATA_DATA))); + return b44_wait_bit(bp, B44_EMAC_ISTAT, EMAC_INT_MII, 100, 0); + } + ++static int b44_writephy(struct b44 *bp, int reg, u32 val) ++{ ++ if (bp->phy_addr == B44_PHY_ADDR_NO_PHY) ++ return 0; ++ ++ return __b44_writephy(bp, bp->phy_addr, reg, val); ++} ++ ++ + /* miilib interface */ + /* FIXME FIXME: phy_id is ignored, bp->phy_addr use is unconditional + * due to code existing before miilib use was added to this driver. +@@ -344,6 +360,9 @@ + u32 val; + int err; + ++ if (bp->phy_addr == B44_PHY_ADDR_NO_PHY) ++ return 0; ++ + err = b44_writephy(bp, MII_BMCR, BMCR_RESET); + if (err) + return err; +@@ -367,20 +386,20 @@ + bp->flags &= ~(B44_FLAG_TX_PAUSE | B44_FLAG_RX_PAUSE); + bp->flags |= pause_flags; + +- val = br32(bp, B44_RXCONFIG); ++ val = br32(B44_RXCONFIG); + if (pause_flags & B44_FLAG_RX_PAUSE) + val |= RXCONFIG_FLOW; + else + val &= ~RXCONFIG_FLOW; +- bw32(bp, B44_RXCONFIG, val); ++ bw32(B44_RXCONFIG, val); + +- val = br32(bp, B44_MAC_FLOW); ++ val = br32(B44_MAC_FLOW); + if (pause_flags & B44_FLAG_TX_PAUSE) + val |= (MAC_FLOW_PAUSE_ENAB | + (0xc0 & MAC_FLOW_RX_HI_WATER)); + else + val &= ~MAC_FLOW_PAUSE_ENAB; +- bw32(bp, B44_MAC_FLOW, val); ++ bw32(B44_MAC_FLOW, val); + } + + static void b44_set_flow_ctrl(struct b44 *bp, u32 local, u32 remote) +@@ -414,6 +433,9 @@ + u32 val; + int err; + ++ if (bp->phy_addr == B44_PHY_ADDR_NO_PHY) ++ return 0; ++ + if ((err = b44_readphy(bp, B44_MII_ALEDCTRL, &val)) != 0) + goto out; + if ((err = b44_writephy(bp, B44_MII_ALEDCTRL, +@@ -476,11 +498,11 @@ + + val = &bp->hw_stats.tx_good_octets; + for (reg = B44_TX_GOOD_O; reg <= B44_TX_PAUSE; reg += 4UL) { +- *val++ += br32(bp, reg); ++ *val++ += br32(reg); + } + val = &bp->hw_stats.rx_good_octets; + for (reg = B44_RX_GOOD_O; reg <= B44_RX_NPAUSE; reg += 4UL) { +- *val++ += br32(bp, reg); ++ *val++ += br32(reg); + } + } + +@@ -506,6 +528,19 @@ + { + u32 bmsr, aux; + ++ if (bp->phy_addr == B44_PHY_ADDR_NO_PHY) { ++ bp->flags |= B44_FLAG_100_BASE_T; ++ bp->flags |= B44_FLAG_FULL_DUPLEX; ++ if (!netif_carrier_ok(bp->dev)) { ++ u32 val = br32(B44_TX_CTRL); ++ val |= TX_CTRL_DUPLEX; ++ bw32(B44_TX_CTRL, val); ++ netif_carrier_on(bp->dev); ++ b44_link_report(bp); ++ } ++ return; ++ } ++ + if (!b44_readphy(bp, MII_BMSR, &bmsr) && + !b44_readphy(bp, B44_MII_AUXCTRL, &aux) && + (bmsr != 0xffff)) { +@@ -520,14 +555,14 @@ + + if (!netif_carrier_ok(bp->dev) && + (bmsr & BMSR_LSTATUS)) { +- u32 val = br32(bp, B44_TX_CTRL); ++ u32 val = br32(B44_TX_CTRL); + u32 local_adv, remote_adv; + + if (bp->flags & B44_FLAG_FULL_DUPLEX) + val |= TX_CTRL_DUPLEX; + else + val &= ~TX_CTRL_DUPLEX; +- bw32(bp, B44_TX_CTRL, val); ++ bw32(B44_TX_CTRL, val); + + if (!(bp->flags & B44_FLAG_FORCE_LINK) && + !b44_readphy(bp, MII_ADVERTISE, &local_adv) && +@@ -572,7 +607,7 @@ + { + u32 cur, cons; + +- cur = br32(bp, B44_DMATX_STAT) & DMATX_STAT_CDMASK; ++ cur = br32(B44_DMATX_STAT) & DMATX_STAT_CDMASK; + cur /= sizeof(struct dma_desc); + + /* XXX needs updating when NETIF_F_SG is supported */ +@@ -596,7 +631,7 @@ + TX_BUFFS_AVAIL(bp) > B44_TX_WAKEUP_THRESH) + netif_wake_queue(bp->dev); + +- bw32(bp, B44_GPTIMER, 0); ++ bw32(B44_GPTIMER, 0); + } + + /* Works like this. This chip writes a 'struct rx_header" 30 bytes +@@ -713,7 +748,7 @@ + u32 cons, prod; + + received = 0; +- prod = br32(bp, B44_DMARX_STAT) & DMARX_STAT_CDMASK; ++ prod = br32(B44_DMARX_STAT) & DMARX_STAT_CDMASK; + prod /= sizeof(struct dma_desc); + cons = bp->rx_cons; + +@@ -792,7 +827,7 @@ + } + + bp->rx_cons = cons; +- bw32(bp, B44_DMARX_PTR, cons * sizeof(struct dma_desc)); ++ bw32(B44_DMARX_PTR, cons * sizeof(struct dma_desc)); + + return received; + } +@@ -856,8 +891,8 @@ + + spin_lock_irqsave(&bp->lock, flags); + +- istat = br32(bp, B44_ISTAT); +- imask = br32(bp, B44_IMASK); ++ istat = br32(B44_ISTAT); ++ imask = br32(B44_IMASK); + + /* ??? What the fuck is the purpose of the interrupt mask + * ??? register if we have to mask it out by hand anyways? +@@ -877,8 +912,8 @@ + dev->name); + } + +- bw32(bp, B44_ISTAT, istat); +- br32(bp, B44_ISTAT); ++ bw32(B44_ISTAT, istat); ++ br32(B44_ISTAT); + } + spin_unlock_irqrestore(&bp->lock, flags); + return IRQ_RETVAL(handled); +@@ -965,11 +1000,11 @@ + + wmb(); + +- bw32(bp, B44_DMATX_PTR, entry * sizeof(struct dma_desc)); ++ bw32(B44_DMATX_PTR, entry * sizeof(struct dma_desc)); + if (bp->flags & B44_FLAG_BUGGY_TXPTR) +- bw32(bp, B44_DMATX_PTR, entry * sizeof(struct dma_desc)); ++ bw32(B44_DMATX_PTR, entry * sizeof(struct dma_desc)); + if (bp->flags & B44_FLAG_REORDER_BUG) +- br32(bp, B44_DMATX_PTR); ++ br32(B44_DMATX_PTR); + + if (TX_BUFFS_AVAIL(bp) < 1) + netif_stop_queue(dev); +@@ -1137,32 +1172,35 @@ + { + unsigned long reg; + +- bw32(bp, B44_MIB_CTRL, MIB_CTRL_CLR_ON_READ); ++ bw32(B44_MIB_CTRL, MIB_CTRL_CLR_ON_READ); + for (reg = B44_TX_GOOD_O; reg <= B44_TX_PAUSE; reg += 4UL) +- br32(bp, reg); ++ br32(reg); + for (reg = B44_RX_GOOD_O; reg <= B44_RX_NPAUSE; reg += 4UL) +- br32(bp, reg); ++ br32(reg); + } + + /* bp->lock is held. */ + static void b44_chip_reset(struct b44 *bp) + { ++ unsigned int sb_clock; ++ + if (ssb_is_core_up(bp)) { +- bw32(bp, B44_RCV_LAZY, 0); +- bw32(bp, B44_ENET_CTRL, ENET_CTRL_DISABLE); ++ bw32(B44_RCV_LAZY, 0); ++ bw32(B44_ENET_CTRL, ENET_CTRL_DISABLE); + b44_wait_bit(bp, B44_ENET_CTRL, ENET_CTRL_DISABLE, 100, 1); +- bw32(bp, B44_DMATX_CTRL, 0); ++ bw32(B44_DMATX_CTRL, 0); + bp->tx_prod = bp->tx_cons = 0; +- if (br32(bp, B44_DMARX_STAT) & DMARX_STAT_EMASK) { ++ if (br32(B44_DMARX_STAT) & DMARX_STAT_EMASK) { + b44_wait_bit(bp, B44_DMARX_STAT, DMARX_STAT_SIDLE, + 100, 0); + } +- bw32(bp, B44_DMARX_CTRL, 0); ++ bw32(B44_DMARX_CTRL, 0); + bp->rx_prod = bp->rx_cons = 0; + } else { +- ssb_pci_setup(bp, (bp->core_unit == 0 ? +- SBINTVEC_ENET0 : +- SBINTVEC_ENET1)); ++ if (bp->pdev->device != PCI_DEVICE_ID_BCM4713) ++ ssb_pci_setup(bp, (bp->core_unit == 0 ? ++ SBINTVEC_ENET0 : ++ SBINTVEC_ENET1)); + } + + ssb_core_reset(bp); +@@ -1170,20 +1208,26 @@ + b44_clear_stats(bp); + + /* Make PHY accessible. */ +- bw32(bp, B44_MDIO_CTRL, (MDIO_CTRL_PREAMBLE | +- (0x0d & MDIO_CTRL_MAXF_MASK))); +- br32(bp, B44_MDIO_CTRL); +- +- if (!(br32(bp, B44_DEVCTRL) & DEVCTRL_IPP)) { +- bw32(bp, B44_ENET_CTRL, ENET_CTRL_EPSEL); +- br32(bp, B44_ENET_CTRL); ++ if (bp->pdev->device == PCI_DEVICE_ID_BCM4713) ++ sb_clock = 100000000; /* 100 MHz */ ++ else ++ sb_clock = 62500000; /* 62.5 MHz */ ++ ++ bw32(B44_MDIO_CTRL, (MDIO_CTRL_PREAMBLE | ++ (((sb_clock + (B44_MDC_RATIO / 2)) / B44_MDC_RATIO) ++ & MDIO_CTRL_MAXF_MASK))); ++ br32(B44_MDIO_CTRL); ++ ++ if (!(br32(B44_DEVCTRL) & DEVCTRL_IPP)) { ++ bw32(B44_ENET_CTRL, ENET_CTRL_EPSEL); ++ br32(B44_ENET_CTRL); + bp->flags &= ~B44_FLAG_INTERNAL_PHY; + } else { +- u32 val = br32(bp, B44_DEVCTRL); ++ u32 val = br32(B44_DEVCTRL); + + if (val & DEVCTRL_EPR) { +- bw32(bp, B44_DEVCTRL, (val & ~DEVCTRL_EPR)); +- br32(bp, B44_DEVCTRL); ++ bw32(B44_DEVCTRL, (val & ~DEVCTRL_EPR)); ++ br32(B44_DEVCTRL); + udelay(100); + } + bp->flags |= B44_FLAG_INTERNAL_PHY; +@@ -1200,13 +1244,13 @@ + /* bp->lock is held. */ + static void __b44_set_mac_addr(struct b44 *bp) + { +- bw32(bp, B44_CAM_CTRL, 0); ++ bw32(B44_CAM_CTRL, 0); + if (!(bp->dev->flags & IFF_PROMISC)) { + u32 val; + + __b44_cam_write(bp, bp->dev->dev_addr, 0); +- val = br32(bp, B44_CAM_CTRL); +- bw32(bp, B44_CAM_CTRL, val | CAM_CTRL_ENABLE); ++ val = br32(B44_CAM_CTRL); ++ bw32(B44_CAM_CTRL, val | CAM_CTRL_ENABLE); + } + } + +@@ -1240,30 +1284,30 @@ + b44_setup_phy(bp); + + /* Enable CRC32, set proper LED modes and power on PHY */ +- bw32(bp, B44_MAC_CTRL, MAC_CTRL_CRC32_ENAB | MAC_CTRL_PHY_LEDCTRL); +- bw32(bp, B44_RCV_LAZY, (1 << RCV_LAZY_FC_SHIFT)); ++ bw32(B44_MAC_CTRL, MAC_CTRL_CRC32_ENAB | MAC_CTRL_PHY_LEDCTRL); ++ bw32(B44_RCV_LAZY, (1 << RCV_LAZY_FC_SHIFT)); + + /* This sets the MAC address too. */ + __b44_set_rx_mode(bp->dev); + + /* MTU + eth header + possible VLAN tag + struct rx_header */ +- bw32(bp, B44_RXMAXLEN, bp->dev->mtu + ETH_HLEN + 8 + RX_HEADER_LEN); +- bw32(bp, B44_TXMAXLEN, bp->dev->mtu + ETH_HLEN + 8 + RX_HEADER_LEN); ++ bw32(B44_RXMAXLEN, bp->dev->mtu + ETH_HLEN + 8 + RX_HEADER_LEN); ++ bw32(B44_TXMAXLEN, bp->dev->mtu + ETH_HLEN + 8 + RX_HEADER_LEN); + +- bw32(bp, B44_TX_WMARK, 56); /* XXX magic */ +- bw32(bp, B44_DMATX_CTRL, DMATX_CTRL_ENABLE); +- bw32(bp, B44_DMATX_ADDR, bp->tx_ring_dma + bp->dma_offset); +- bw32(bp, B44_DMARX_CTRL, (DMARX_CTRL_ENABLE | ++ bw32(B44_TX_WMARK, 56); /* XXX magic */ ++ bw32(B44_DMATX_CTRL, DMATX_CTRL_ENABLE); ++ bw32(B44_DMATX_ADDR, bp->tx_ring_dma + bp->dma_offset); ++ bw32(B44_DMARX_CTRL, (DMARX_CTRL_ENABLE | + (bp->rx_offset << DMARX_CTRL_ROSHIFT))); +- bw32(bp, B44_DMARX_ADDR, bp->rx_ring_dma + bp->dma_offset); ++ bw32(B44_DMARX_ADDR, bp->rx_ring_dma + bp->dma_offset); + +- bw32(bp, B44_DMARX_PTR, bp->rx_pending); ++ bw32(B44_DMARX_PTR, bp->rx_pending); + bp->rx_prod = bp->rx_pending; + +- bw32(bp, B44_MIB_CTRL, MIB_CTRL_CLR_ON_READ); ++ bw32(B44_MIB_CTRL, MIB_CTRL_CLR_ON_READ); + +- val = br32(bp, B44_ENET_CTRL); +- bw32(bp, B44_ENET_CTRL, (val | ENET_CTRL_ENABLE)); ++ val = br32(B44_ENET_CTRL); ++ bw32(B44_ENET_CTRL, (val | ENET_CTRL_ENABLE)); + } + + static int b44_open(struct net_device *dev) +@@ -1419,11 +1463,11 @@ + int i=0; + unsigned char zero[6] = {0,0,0,0,0,0}; + +- val = br32(bp, B44_RXCONFIG); ++ val = br32(B44_RXCONFIG); + val &= ~(RXCONFIG_PROMISC | RXCONFIG_ALLMULTI); + if (dev->flags & IFF_PROMISC) { + val |= RXCONFIG_PROMISC; +- bw32(bp, B44_RXCONFIG, val); ++ bw32(B44_RXCONFIG, val); + } else { + __b44_set_mac_addr(bp); + +@@ -1435,9 +1479,9 @@ + for(;i<64;i++) { + __b44_cam_write(bp, zero, i); + } +- bw32(bp, B44_RXCONFIG, val); +- val = br32(bp, B44_CAM_CTRL); +- bw32(bp, B44_CAM_CTRL, val | CAM_CTRL_ENABLE); ++ bw32(B44_RXCONFIG, val); ++ val = br32(B44_CAM_CTRL); ++ bw32(B44_CAM_CTRL, val | CAM_CTRL_ENABLE); + } + } + +@@ -1678,17 +1722,288 @@ + .set_msglevel = b44_set_msglevel, + }; + ++static int b44_ethtool_ioctl (struct net_device *dev, void __user *useraddr) ++{ ++ struct b44 *bp = dev->priv; ++ struct pci_dev *pci_dev = bp->pdev; ++ u32 ethcmd; ++ ++ if (copy_from_user (ðcmd, useraddr, sizeof (ethcmd))) ++ return -EFAULT; ++ ++ switch (ethcmd) { ++ case ETHTOOL_GDRVINFO: { ++ struct ethtool_drvinfo info = { ETHTOOL_GDRVINFO }; ++ strcpy (info.driver, DRV_MODULE_NAME); ++ strcpy (info.version, DRV_MODULE_VERSION); ++ memset(&info.fw_version, 0, sizeof(info.fw_version)); ++ strcpy (info.bus_info, pci_name(pci_dev)); ++ info.eedump_len = 0; ++ info.regdump_len = 0; ++ if (copy_to_user (useraddr, &info, sizeof (info))) ++ return -EFAULT; ++ return 0; ++ } ++ ++ case ETHTOOL_GSET: { ++ struct ethtool_cmd cmd = { ETHTOOL_GSET }; ++ ++ if (!(bp->flags & B44_FLAG_INIT_COMPLETE)) ++ return -EAGAIN; ++ cmd.supported = (SUPPORTED_Autoneg); ++ cmd.supported |= (SUPPORTED_100baseT_Half | ++ SUPPORTED_100baseT_Full | ++ SUPPORTED_10baseT_Half | ++ SUPPORTED_10baseT_Full | ++ SUPPORTED_MII); ++ ++ cmd.advertising = 0; ++ if (bp->flags & B44_FLAG_ADV_10HALF) ++ cmd.advertising |= ADVERTISE_10HALF; ++ if (bp->flags & B44_FLAG_ADV_10FULL) ++ cmd.advertising |= ADVERTISE_10FULL; ++ if (bp->flags & B44_FLAG_ADV_100HALF) ++ cmd.advertising |= ADVERTISE_100HALF; ++ if (bp->flags & B44_FLAG_ADV_100FULL) ++ cmd.advertising |= ADVERTISE_100FULL; ++ cmd.advertising |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM; ++ cmd.speed = (bp->flags & B44_FLAG_100_BASE_T) ? ++ SPEED_100 : SPEED_10; ++ cmd.duplex = (bp->flags & B44_FLAG_FULL_DUPLEX) ? ++ DUPLEX_FULL : DUPLEX_HALF; ++ cmd.port = 0; ++ cmd.phy_address = bp->phy_addr; ++ cmd.transceiver = (bp->flags & B44_FLAG_INTERNAL_PHY) ? ++ XCVR_INTERNAL : XCVR_EXTERNAL; ++ cmd.autoneg = (bp->flags & B44_FLAG_FORCE_LINK) ? ++ AUTONEG_DISABLE : AUTONEG_ENABLE; ++ cmd.maxtxpkt = 0; ++ cmd.maxrxpkt = 0; ++ if (copy_to_user(useraddr, &cmd, sizeof(cmd))) ++ return -EFAULT; ++ return 0; ++ } ++ case ETHTOOL_SSET: { ++ struct ethtool_cmd cmd; ++ ++ if (!(bp->flags & B44_FLAG_INIT_COMPLETE)) ++ return -EAGAIN; ++ ++ if (copy_from_user(&cmd, useraddr, sizeof(cmd))) ++ return -EFAULT; ++ ++ /* We do not support gigabit. */ ++ if (cmd.autoneg == AUTONEG_ENABLE) { ++ if (cmd.advertising & ++ (ADVERTISED_1000baseT_Half | ++ ADVERTISED_1000baseT_Full)) ++ return -EINVAL; ++ } else if ((cmd.speed != SPEED_100 && ++ cmd.speed != SPEED_10) || ++ (cmd.duplex != DUPLEX_HALF && ++ cmd.duplex != DUPLEX_FULL)) { ++ return -EINVAL; ++ } ++ ++ spin_lock_irq(&bp->lock); ++ ++ if (cmd.autoneg == AUTONEG_ENABLE) { ++ bp->flags &= ~B44_FLAG_FORCE_LINK; ++ bp->flags &= ~(B44_FLAG_ADV_10HALF | ++ B44_FLAG_ADV_10FULL | ++ B44_FLAG_ADV_100HALF | ++ B44_FLAG_ADV_100FULL); ++ if (cmd.advertising & ADVERTISE_10HALF) ++ bp->flags |= B44_FLAG_ADV_10HALF; ++ if (cmd.advertising & ADVERTISE_10FULL) ++ bp->flags |= B44_FLAG_ADV_10FULL; ++ if (cmd.advertising & ADVERTISE_100HALF) ++ bp->flags |= B44_FLAG_ADV_100HALF; ++ if (cmd.advertising & ADVERTISE_100FULL) ++ bp->flags |= B44_FLAG_ADV_100FULL; ++ } else { ++ bp->flags |= B44_FLAG_FORCE_LINK; ++ if (cmd.speed == SPEED_100) ++ bp->flags |= B44_FLAG_100_BASE_T; ++ if (cmd.duplex == DUPLEX_FULL) ++ bp->flags |= B44_FLAG_FULL_DUPLEX; ++ } ++ ++ b44_setup_phy(bp); ++ ++ spin_unlock_irq(&bp->lock); ++ ++ return 0; ++ } ++ ++ case ETHTOOL_GMSGLVL: { ++ struct ethtool_value edata = { ETHTOOL_GMSGLVL }; ++ edata.data = bp->msg_enable; ++ if (copy_to_user(useraddr, &edata, sizeof(edata))) ++ return -EFAULT; ++ return 0; ++ } ++ case ETHTOOL_SMSGLVL: { ++ struct ethtool_value edata; ++ if (copy_from_user(&edata, useraddr, sizeof(edata))) ++ return -EFAULT; ++ bp->msg_enable = edata.data; ++ return 0; ++ } ++ case ETHTOOL_NWAY_RST: { ++ u32 bmcr; ++ int r; ++ ++ spin_lock_irq(&bp->lock); ++ b44_readphy(bp, MII_BMCR, &bmcr); ++ b44_readphy(bp, MII_BMCR, &bmcr); ++ r = -EINVAL; ++ if (bmcr & BMCR_ANENABLE) { ++ b44_writephy(bp, MII_BMCR, ++ bmcr | BMCR_ANRESTART); ++ r = 0; ++ } ++ spin_unlock_irq(&bp->lock); ++ ++ return r; ++ } ++ case ETHTOOL_GLINK: { ++ struct ethtool_value edata = { ETHTOOL_GLINK }; ++ edata.data = netif_carrier_ok(bp->dev) ? 1 : 0; ++ if (copy_to_user(useraddr, &edata, sizeof(edata))) ++ return -EFAULT; ++ return 0; ++ } ++ case ETHTOOL_GRINGPARAM: { ++ struct ethtool_ringparam ering = { ETHTOOL_GRINGPARAM }; ++ ++ ering.rx_max_pending = B44_RX_RING_SIZE - 1; ++ ering.rx_pending = bp->rx_pending; ++ ++ /* XXX ethtool lacks a tx_max_pending, oops... */ ++ ++ if (copy_to_user(useraddr, &ering, sizeof(ering))) ++ return -EFAULT; ++ return 0; ++ } ++ case ETHTOOL_SRINGPARAM: { ++ struct ethtool_ringparam ering; ++ ++ if (copy_from_user(&ering, useraddr, sizeof(ering))) ++ return -EFAULT; ++ ++ if ((ering.rx_pending > B44_RX_RING_SIZE - 1) || ++ (ering.rx_mini_pending != 0) || ++ (ering.rx_jumbo_pending != 0) || ++ (ering.tx_pending > B44_TX_RING_SIZE - 1)) ++ return -EINVAL; ++ ++ spin_lock_irq(&bp->lock); ++ ++ bp->rx_pending = ering.rx_pending; ++ bp->tx_pending = ering.tx_pending; ++ ++ b44_halt(bp); ++ b44_init_rings(bp); ++ b44_init_hw(bp); ++ netif_wake_queue(bp->dev); ++ spin_unlock_irq(&bp->lock); ++ ++ b44_enable_ints(bp); ++ ++ return 0; ++ } ++ case ETHTOOL_GPAUSEPARAM: { ++ struct ethtool_pauseparam epause = { ETHTOOL_GPAUSEPARAM }; ++ ++ epause.autoneg = ++ (bp->flags & B44_FLAG_PAUSE_AUTO) != 0; ++ epause.rx_pause = ++ (bp->flags & B44_FLAG_RX_PAUSE) != 0; ++ epause.tx_pause = ++ (bp->flags & B44_FLAG_TX_PAUSE) != 0; ++ if (copy_to_user(useraddr, &epause, sizeof(epause))) ++ return -EFAULT; ++ return 0; ++ } ++ case ETHTOOL_SPAUSEPARAM: { ++ struct ethtool_pauseparam epause; ++ ++ if (copy_from_user(&epause, useraddr, sizeof(epause))) ++ return -EFAULT; ++ ++ spin_lock_irq(&bp->lock); ++ if (epause.autoneg) ++ bp->flags |= B44_FLAG_PAUSE_AUTO; ++ else ++ bp->flags &= ~B44_FLAG_PAUSE_AUTO; ++ if (epause.rx_pause) ++ bp->flags |= B44_FLAG_RX_PAUSE; ++ else ++ bp->flags &= ~B44_FLAG_RX_PAUSE; ++ if (epause.tx_pause) ++ bp->flags |= B44_FLAG_TX_PAUSE; ++ else ++ bp->flags &= ~B44_FLAG_TX_PAUSE; ++ if (bp->flags & B44_FLAG_PAUSE_AUTO) { ++ b44_halt(bp); ++ b44_init_rings(bp); ++ b44_init_hw(bp); ++ } else { ++ __b44_set_flow_ctrl(bp, bp->flags); ++ } ++ spin_unlock_irq(&bp->lock); ++ ++ b44_enable_ints(bp); ++ ++ return 0; ++ } ++ }; ++ ++ return -EOPNOTSUPP; ++} ++ + static int b44_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) + { + struct mii_ioctl_data *data = if_mii(ifr); + struct b44 *bp = netdev_priv(dev); + int err; + +- spin_lock_irq(&bp->lock); +- err = generic_mii_ioctl(&bp->mii_if, data, cmd, NULL); +- spin_unlock_irq(&bp->lock); ++ switch (cmd) { ++ case SIOCETHTOOL: ++ return b44_ethtool_ioctl(dev, (void __user*) ifr->ifr_data); ++ ++ case SIOCGMIIPHY: ++ data->phy_id = bp->phy_addr; ++ ++ /* fallthru */ ++ case SIOCGMIIREG: { ++ u32 mii_regval; + +- return err; ++ spin_lock_irq(&bp->lock); ++ err = __b44_readphy(bp, data->phy_id & 0x1f, data->reg_num & 0x1f, &mii_regval); ++ spin_unlock_irq(&bp->lock); ++ ++ data->val_out = mii_regval; ++ ++ return err; ++ } ++ ++ case SIOCSMIIREG: ++ if (!capable(CAP_NET_ADMIN)) ++ return -EPERM; ++ ++ spin_lock_irq(&bp->lock); ++ err = __b44_writephy(bp, data->phy_id & 0x1f, data->reg_num & 0x1f, data->val_in); ++ spin_unlock_irq(&bp->lock); ++ ++ return err; ++ ++ default: ++ /* do nothing */ ++ break; ++ }; ++ return -EOPNOTSUPP; + } + + /* Read 128-bytes of EEPROM. */ +@@ -1698,7 +2013,7 @@ + u16 *ptr = (u16 *) data; + + for (i = 0; i < 128; i += 2) +- ptr[i / 2] = readw(bp->regs + 4096 + i); ++ ptr[i / 2] = readw((void *)bp->regs + 4096 + i); + + return 0; + } +@@ -1707,19 +2022,41 @@ + { + u8 eeprom[128]; + int err; ++ unsigned long flags; + +- err = b44_read_eeprom(bp, &eeprom[0]); +- if (err) +- goto out; +- +- bp->dev->dev_addr[0] = eeprom[79]; +- bp->dev->dev_addr[1] = eeprom[78]; +- bp->dev->dev_addr[2] = eeprom[81]; +- bp->dev->dev_addr[3] = eeprom[80]; +- bp->dev->dev_addr[4] = eeprom[83]; +- bp->dev->dev_addr[5] = eeprom[82]; +- +- bp->phy_addr = eeprom[90] & 0x1f; ++ if (bp->pdev->device == PCI_DEVICE_ID_BCM4713) { ++ /* ++ * BCM47xx boards don't have a EEPROM. The MAC is stored in ++ * a NVRAM area somewhere in the flash memory. As we don't ++ * know the location and/or the format of the NVRAM area ++ * here, we simply rely on the bootloader to write the ++ * MAC into the CAM. ++ */ ++ spin_lock_irqsave(&bp->lock, flags); ++ __b44_cam_read(bp, bp->dev->dev_addr, 0); ++ spin_unlock_irqrestore(&bp->lock, flags); ++ ++ /* ++ * BCM47xx boards don't have a PHY. Usually there is a switch ++ * chip with multiple PHYs connected to the PHY port. ++ */ ++ bp->phy_addr = B44_PHY_ADDR_NO_PHY; ++ bp->dma_offset = 0; ++ } else { ++ err = b44_read_eeprom(bp, &eeprom[0]); ++ if (err) ++ return err; ++ ++ bp->dev->dev_addr[0] = eeprom[79]; ++ bp->dev->dev_addr[1] = eeprom[78]; ++ bp->dev->dev_addr[2] = eeprom[81]; ++ bp->dev->dev_addr[3] = eeprom[80]; ++ bp->dev->dev_addr[4] = eeprom[83]; ++ bp->dev->dev_addr[5] = eeprom[82]; ++ ++ bp->phy_addr = eeprom[90] & 0x1f; ++ bp->dma_offset = SB_PCI_DMA; ++ } + + /* With this, plus the rx_header prepended to the data by the + * hardware, we'll land the ethernet header on a 2-byte boundary. +@@ -1729,13 +2066,12 @@ + bp->imask = IMASK_DEF; + + bp->core_unit = ssb_core_unit(bp); +- bp->dma_offset = SB_PCI_DMA; + + /* XXX - really required? + bp->flags |= B44_FLAG_BUGGY_TXPTR; + */ +-out: +- return err; ++ ++ return 0; + } + + static int __devinit b44_init_one(struct pci_dev *pdev, +@@ -1813,7 +2149,7 @@ + + spin_lock_init(&bp->lock); + +- bp->regs = ioremap(b44reg_base, b44reg_len); ++ bp->regs = (unsigned long) ioremap(b44reg_base, b44reg_len); + if (bp->regs == 0UL) { + printk(KERN_ERR PFX "Cannot map device registers, " + "aborting.\n"); +@@ -1874,15 +2210,21 @@ + + pci_save_state(bp->pdev); + +- printk(KERN_INFO "%s: Broadcom 4400 10/100BaseT Ethernet ", dev->name); ++ printk(KERN_INFO "%s: Broadcom %s 10/100BaseT Ethernet ", dev->name, ++ (pdev->device == PCI_DEVICE_ID_BCM4713) ? "47xx" : "4400"); + for (i = 0; i < 6; i++) + printk("%2.2x%c", dev->dev_addr[i], + i == 5 ? '\n' : ':'); + ++ /* Initialize phy */ ++ spin_lock_irq(&bp->lock); ++ b44_chip_reset(bp); ++ spin_unlock_irq(&bp->lock); ++ + return 0; + + err_out_iounmap: +- iounmap(bp->regs); ++ iounmap((void *) bp->regs); + + err_out_free_dev: + free_netdev(dev); +@@ -1904,7 +2246,7 @@ + struct b44 *bp = netdev_priv(dev); + + unregister_netdev(dev); +- iounmap(bp->regs); ++ iounmap((void *) bp->regs); + free_netdev(dev); + pci_release_regions(pdev); + pci_disable_device(pdev); +diff -ur linux-2.6.14.3/drivers/net/b44.h linux-2.6.14.3-openwrt/drivers/net/b44.h +--- linux-2.6.14.3/drivers/net/b44.h 2005-11-24 23:10:21.000000000 +0100 ++++ linux-2.6.14.3-openwrt/drivers/net/b44.h 2005-12-08 13:24:35.000000000 +0100 +@@ -292,6 +292,9 @@ + #define SSB_PCI_MASK1 0xfc000000 + #define SSB_PCI_MASK2 0xc0000000 + ++#define br32(REG) readl((void *)bp->regs + (REG)) ++#define bw32(REG,VAL) writel((VAL), (void *)bp->regs + (REG)) ++ + /* 4400 PHY registers */ + #define B44_MII_AUXCTRL 24 /* Auxiliary Control */ + #define MII_AUXCTRL_DUPLEX 0x0001 /* Full Duplex */ +@@ -345,6 +348,8 @@ + }; + + #define B44_MCAST_TABLE_SIZE 32 ++#define B44_PHY_ADDR_NO_PHY 30 ++#define B44_MDC_RATIO 5000000 + + /* SW copy of device statistics, kept up to date by periodic timer + * which probes HW values. Must have same relative layout as HW +@@ -410,7 +415,7 @@ + struct net_device_stats stats; + struct b44_hw_stats hw_stats; + +- void __iomem *regs; ++ unsigned long regs; + struct pci_dev *pdev; + struct net_device *dev; + diff --git a/openwrt/target/linux/generic-2.6/patches/004-extra_optimization.patch b/openwrt/target/linux/generic-2.6/patches/004-extra_optimization.patch new file mode 100644 index 0000000000..805d2f0ea9 --- /dev/null +++ b/openwrt/target/linux/generic-2.6/patches/004-extra_optimization.patch @@ -0,0 +1,12 @@ +--- linux-2.6.12.5/Makefile.old 2005-10-23 22:56:29.017270000 +0200 ++++ linux-2.6.12.5/Makefile 2005-10-23 22:57:23.226138500 +0200 +@@ -533,6 +533,9 @@ + NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) + CHECKFLAGS += $(NOSTDINC_FLAGS) + ++# improve gcc optimization ++CFLAGS += $(call cc-option,-funit-at-a-time,) ++ + # warn about C99 declaration after statement + CFLAGS += $(call cc-option,-Wdeclaration-after-statement,) + diff --git a/openwrt/target/linux/generic-2.6/patches/100-netfilter_layer7.patch b/openwrt/target/linux/generic-2.6/patches/100-netfilter_layer7.patch new file mode 100644 index 0000000000..0dd2ccf7cc --- /dev/null +++ b/openwrt/target/linux/generic-2.6/patches/100-netfilter_layer7.patch @@ -0,0 +1,2036 @@ +--- linux-2.6.14/include/linux/netfilter_ipv4/ip_conntrack.h 2005-10-27 19:02:08.000000000 -0500 ++++ linux-2.6.14-layer7/include/linux/netfilter_ipv4/ip_conntrack.h 2005-11-12 17:31:34.000000000 -0600 +@@ -253,6 +253,15 @@ struct ip_conntrack + /* Traversed often, so hopefully in different cacheline to top */ + /* These are my tuples; original and reply */ + struct ip_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX]; ++ ++#if defined(CONFIG_IP_NF_MATCH_LAYER7) || defined(CONFIG_IP_NF_MATCH_LAYER7_MODULE) ++ struct { ++ char * app_proto; /* e.g. "http". NULL before decision. "unknown" after decision if no match */ ++ char * app_data; /* application layer data so far. NULL after match decision */ ++ unsigned int app_data_len; ++ } layer7; ++#endif ++ + }; + + struct ip_conntrack_expect +--- linux-2.6.14/include/linux/netfilter_ipv4/ipt_layer7.h 1969-12-31 18:00:00.000000000 -0600 ++++ linux-2.6.14-layer7/include/linux/netfilter_ipv4/ipt_layer7.h 2005-11-12 17:31:34.000000000 -0600 +@@ -0,0 +1,26 @@ ++/* ++ By Matthew Strait <quadong@users.sf.net>, Dec 2003. ++ http://l7-filter.sf.net ++ ++ This program is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public License ++ as published by the Free Software Foundation; either version ++ 2 of the License, or (at your option) any later version. ++ http://www.gnu.org/licenses/gpl.txt ++*/ ++ ++#ifndef _IPT_LAYER7_H ++#define _IPT_LAYER7_H ++ ++#define MAX_PATTERN_LEN 8192 ++#define MAX_PROTOCOL_LEN 256 ++ ++typedef char *(*proc_ipt_search) (char *, char, char *); ++ ++struct ipt_layer7_info { ++ char protocol[MAX_PROTOCOL_LEN]; ++ char invert:1; ++ char pattern[MAX_PATTERN_LEN]; ++}; ++ ++#endif /* _IPT_LAYER7_H */ +--- linux-2.6.14/net/ipv4/netfilter/Kconfig 2005-10-27 19:02:08.000000000 -0500 ++++ linux-2.6.14-layer7/net/ipv4/netfilter/Kconfig 2005-11-12 17:31:34.000000000 -0600 +@@ -205,6 +205,24 @@ config IP_NF_MATCH_MAC + + To compile it as a module, choose M here. If unsure, say N. + ++config IP_NF_MATCH_LAYER7 ++ tristate "Layer 7 match support (EXPERIMENTAL)" ++ depends on IP_NF_IPTABLES && IP_NF_CT_ACCT && IP_NF_CONNTRACK && EXPERIMENTAL ++ help ++ Say Y if you want to be able to classify connections (and their ++ packets) based on regular expression matching of their application ++ layer data. This is one way to classify applications such as ++ peer-to-peer filesharing systems that do not always use the same ++ port. ++ ++ To compile it as a module, choose M here. If unsure, say N. ++ ++config IP_NF_MATCH_LAYER7_DEBUG ++ bool "Layer 7 debugging output" ++ depends on IP_NF_MATCH_LAYER7 ++ help ++ Say Y to get lots of debugging output. ++ + config IP_NF_MATCH_PKTTYPE + tristate "Packet type match support" + depends on IP_NF_IPTABLES +--- linux-2.6.14/net/ipv4/netfilter/Makefile 2005-10-27 19:02:08.000000000 -0500 ++++ linux-2.6.14-layer7/net/ipv4/netfilter/Makefile 2005-11-12 17:31:34.000000000 -0600 +@@ -74,6 +74,8 @@ obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt + obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o + obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o + ++obj-$(CONFIG_IP_NF_MATCH_LAYER7) += ipt_layer7.o ++ + # targets + obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o + obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o +--- linux-2.6.14/net/ipv4/netfilter/ip_conntrack_core.c 2005-10-27 19:02:08.000000000 -0500 ++++ linux-2.6.14-layer7/net/ipv4/netfilter/ip_conntrack_core.c 2005-11-12 17:31:34.000000000 -0600 +@@ -335,6 +335,13 @@ destroy_conntrack(struct nf_conntrack *n + * too. */ + ip_ct_remove_expectations(ct); + ++ #if defined(CONFIG_IP_NF_MATCH_LAYER7) || defined(CONFIG_IP_NF_MATCH_LAYER7_MODULE) ++ if(ct->layer7.app_proto) ++ kfree(ct->layer7.app_proto); ++ if(ct->layer7.app_data) ++ kfree(ct->layer7.app_data); ++ #endif ++ + /* We overload first tuple to link into unconfirmed list. */ + if (!is_confirmed(ct)) { + BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list)); +--- linux-2.6.14/net/ipv4/netfilter/ip_conntrack_standalone.c 2005-10-27 19:02:08.000000000 -0500 ++++ linux-2.6.14-layer7/net/ipv4/netfilter/ip_conntrack_standalone.c 2005-11-12 17:31:34.000000000 -0600 +@@ -188,6 +188,12 @@ static int ct_seq_show(struct seq_file * + return -ENOSPC; + #endif + ++#if defined(CONFIG_IP_NF_MATCH_LAYER7) || defined(CONFIG_IP_NF_MATCH_LAYER7_MODULE) ++ if(conntrack->layer7.app_proto) ++ if (seq_printf(s, "l7proto=%s ",conntrack->layer7.app_proto)) ++ return 1; ++#endif ++ + if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use))) + return -ENOSPC; + +--- linux-2.6.14/net/ipv4/netfilter/ipt_layer7.c 1969-12-31 18:00:00.000000000 -0600 ++++ linux-2.6.14-layer7/net/ipv4/netfilter/ipt_layer7.c 2005-11-12 17:49:24.000000000 -0600 +@@ -0,0 +1,569 @@ ++/* ++ Kernel module to match application layer (OSI layer 7) ++ data in connections. ++ ++ http://l7-filter.sf.net ++ ++ By Matthew Strait and Ethan Sommer, 2003-2005. ++ ++ This program is free software; you can redistribute it and/or ++ modify it under the terms of the GNU General Public License ++ as published by the Free Software Foundation; either version ++ 2 of the License, or (at your option) any later version. ++ http://www.gnu.org/licenses/gpl.txt ++ ++ Based on ipt_string.c (C) 2000 Emmanuel Roger <winfield@freegates.be> ++ and cls_layer7.c (C) 2003 Matthew Strait, Ethan Sommer, Justin Levandoski ++*/ ++ ++#include <linux/module.h> ++#include <linux/skbuff.h> ++#include <linux/netfilter_ipv4/ip_conntrack.h> ++#include <linux/proc_fs.h> ++#include <linux/ctype.h> ++#include <net/ip.h> ++#include <net/tcp.h> ++#include <linux/spinlock.h> ++ ++#include "regexp/regexp.c" ++ ++#include <linux/netfilter_ipv4/ipt_layer7.h> ++#include <linux/netfilter_ipv4/ip_tables.h> ++ ++MODULE_AUTHOR("Matthew Strait <quadong@users.sf.net>, Ethan Sommer <sommere@users.sf.net>"); ++MODULE_LICENSE("GPL"); ++MODULE_DESCRIPTION("iptables application layer match module"); ++MODULE_VERSION("2.0"); ++ ++static int maxdatalen = 2048; // this is the default ++module_param(maxdatalen, int, 0444); ++MODULE_PARM_DESC(maxdatalen, "maximum bytes of data looked at by l7-filter"); ++ ++#ifdef CONFIG_IP_NF_MATCH_LAYER7_DEBUG ++ #define DPRINTK(format,args...) printk(format,##args) ++#else ++ #define DPRINTK(format,args...) ++#endif ++ ++#define TOTAL_PACKETS master_conntrack->counters[IP_CT_DIR_ORIGINAL].packets + \ ++ master_conntrack->counters[IP_CT_DIR_REPLY].packets ++ ++/* Number of packets whose data we look at. ++This can be modified through /proc/net/layer7_numpackets */ ++static int num_packets = 10; ++ ++static struct pattern_cache { ++ char * regex_string; ++ regexp * pattern; ++ struct pattern_cache * next; ++} * first_pattern_cache = NULL; ++ ++/* I'm new to locking. Here are my assumptions: ++ ++- No one will write to /proc/net/layer7_numpackets over and over very fast; ++ if they did, nothing awful would happen. ++ ++- This code will never be processing the same packet twice at the same time, ++ because iptables rules are traversed in order. ++ ++- It doesn't matter if two packets from different connections are in here at ++ the same time, because they don't share any data. ++ ++- It _does_ matter if two packets from the same connection are here at the same ++ time. In this case, we have to protect the conntracks and the list of ++ compiled patterns. ++*/ ++DEFINE_RWLOCK(ct_lock); ++DEFINE_SPINLOCK(list_lock); ++ ++#ifdef CONFIG_IP_NF_MATCH_LAYER7_DEBUG ++/* Converts an unfriendly string into a friendly one by ++replacing unprintables with periods and all whitespace with " ". */ ++static char * friendly_print(unsigned char * s) ++{ ++ char * f = kmalloc(strlen(s) + 1, GFP_ATOMIC); ++ int i; ++ ++ if(!f) { ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: out of memory in friendly_print, bailing.\n"); ++ return NULL; ++ } ++ ++ for(i = 0; i < strlen(s); i++){ ++ if(isprint(s[i]) && s[i] < 128) f[i] = s[i]; ++ else if(isspace(s[i])) f[i] = ' '; ++ else f[i] = '.'; ++ } ++ f[i] = '\0'; ++ return f; ++} ++ ++static char dec2hex(int i) ++{ ++ switch (i) { ++ case 0 ... 9: ++ return (char)(i + '0'); ++ break; ++ case 10 ... 15: ++ return (char)(i - 10 + 'a'); ++ break; ++ default: ++ if (net_ratelimit()) ++ printk("Problem in dec2hex\n"); ++ return '\0'; ++ } ++} ++ ++static char * hex_print(unsigned char * s) ++{ ++ char * g = kmalloc(strlen(s)*3 + 1, GFP_ATOMIC); ++ int i; ++ ++ if(!g) { ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: out of memory in hex_print, bailing.\n"); ++ return NULL; ++ } ++ ++ for(i = 0; i < strlen(s); i++) { ++ g[i*3 ] = dec2hex(s[i]/16); ++ g[i*3 + 1] = dec2hex(s[i]%16); ++ g[i*3 + 2] = ' '; ++ } ++ g[i*3] = '\0'; ++ ++ return g; ++} ++#endif // DEBUG ++ ++/* Use instead of regcomp. As we expect to be seeing the same regexps over and ++over again, it make sense to cache the results. */ ++static regexp * compile_and_cache(char * regex_string, char * protocol) ++{ ++ struct pattern_cache * node = first_pattern_cache; ++ struct pattern_cache * last_pattern_cache = first_pattern_cache; ++ struct pattern_cache * tmp; ++ unsigned int len; ++ ++ while (node != NULL) { ++ if (!strcmp(node->regex_string, regex_string)) ++ return node->pattern; ++ ++ last_pattern_cache = node;/* points at the last non-NULL node */ ++ node = node->next; ++ } ++ ++ /* If we reach the end of the list, then we have not yet cached ++ the pattern for this regex. Let's do that now. ++ Be paranoid about running out of memory to avoid list corruption. */ ++ tmp = kmalloc(sizeof(struct pattern_cache), GFP_ATOMIC); ++ ++ if(!tmp) { ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: out of memory in compile_and_cache, bailing.\n"); ++ return NULL; ++ } ++ ++ tmp->regex_string = kmalloc(strlen(regex_string) + 1, GFP_ATOMIC); ++ tmp->pattern = kmalloc(sizeof(struct regexp), GFP_ATOMIC); ++ tmp->next = NULL; ++ ++ if(!tmp->regex_string || !tmp->pattern) { ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: out of memory in compile_and_cache, bailing.\n"); ++ kfree(tmp->regex_string); ++ kfree(tmp->pattern); ++ kfree(tmp); ++ return NULL; ++ } ++ ++ /* Ok. The new node is all ready now. */ ++ node = tmp; ++ ++ if(first_pattern_cache == NULL) /* list is empty */ ++ first_pattern_cache = node; /* make node the beginning */ ++ else ++ last_pattern_cache->next = node; /* attach node to the end */ ++ ++ /* copy the string and compile the regex */ ++ len = strlen(regex_string); ++ DPRINTK("About to compile this: \"%s\"\n", regex_string); ++ node->pattern = regcomp(regex_string, &len); ++ if ( !node->pattern ) { ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: Error compiling regexp \"%s\" (%s)\n", regex_string, protocol); ++ /* pattern is now cached as NULL, so we won't try again. */ ++ } ++ ++ strcpy(node->regex_string, regex_string); ++ return node->pattern; ++} ++ ++static int can_handle(const struct sk_buff *skb) ++{ ++ if(!skb->nh.iph) /* not IP */ ++ return 0; ++ if(skb->nh.iph->protocol != IPPROTO_TCP && ++ skb->nh.iph->protocol != IPPROTO_UDP && ++ skb->nh.iph->protocol != IPPROTO_ICMP) ++ return 0; ++ return 1; ++} ++ ++/* Returns offset the into the skb->data that the application data starts */ ++static int app_data_offset(const struct sk_buff *skb) ++{ ++ /* In case we are ported somewhere (ebtables?) where skb->nh.iph ++ isn't set, this can be gotten from 4*(skb->data[0] & 0x0f) as well. */ ++ int ip_hl = 4*skb->nh.iph->ihl; ++ ++ if( skb->nh.iph->protocol == IPPROTO_TCP ) { ++ /* 12 == offset into TCP header for the header length field. ++ Can't get this with skb->h.th->doff because the tcphdr ++ struct doesn't get set when routing (this is confirmed to be ++ true in Netfilter as well as QoS.) */ ++ int tcp_hl = 4*(skb->data[ip_hl + 12] >> 4); ++ ++ return ip_hl + tcp_hl; ++ } else if( skb->nh.iph->protocol == IPPROTO_UDP ) { ++ return ip_hl + 8; /* UDP header is always 8 bytes */ ++ } else if( skb->nh.iph->protocol == IPPROTO_ICMP ) { ++ return ip_hl + 8; /* ICMP header is 8 bytes */ ++ } else { ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: tried to handle unknown protocol!\n"); ++ return ip_hl + 8; /* something reasonable */ ++ } ++} ++ ++/* handles whether there's a match when we aren't appending data anymore */ ++static int match_no_append(struct ip_conntrack * conntrack, struct ip_conntrack * master_conntrack, ++ enum ip_conntrack_info ctinfo, enum ip_conntrack_info master_ctinfo, ++ struct ipt_layer7_info * info) ++{ ++ /* If we're in here, throw the app data away */ ++ write_lock(&ct_lock); ++ if(master_conntrack->layer7.app_data != NULL) { ++ ++ #ifdef CONFIG_IP_NF_MATCH_LAYER7_DEBUG ++ if(!master_conntrack->layer7.app_proto) { ++ char * f = friendly_print(master_conntrack->layer7.app_data); ++ char * g = hex_print(master_conntrack->layer7.app_data); ++ DPRINTK("\nl7-filter gave up after %d bytes (%llu packets):\n%s\n", ++ strlen(f), ++ TOTAL_PACKETS, f); ++ kfree(f); ++ DPRINTK("In hex: %s\n", g); ++ kfree(g); ++ } ++ #endif ++ ++ kfree(master_conntrack->layer7.app_data); ++ master_conntrack->layer7.app_data = NULL; /* don't free again */ ++ } ++ write_unlock(&ct_lock); ++ ++ if(master_conntrack->layer7.app_proto){ ++ /* Here child connections set their .app_proto (for /proc/net/ip_conntrack) */ ++ write_lock(&ct_lock); ++ if(!conntrack->layer7.app_proto) { ++ conntrack->layer7.app_proto = kmalloc(strlen(master_conntrack->layer7.app_proto)+1, GFP_ATOMIC); ++ if(!conntrack->layer7.app_proto){ ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: out of memory in match_no_append, bailing.\n"); ++ write_unlock(&ct_lock); ++ return 1; ++ } ++ strcpy(conntrack->layer7.app_proto, master_conntrack->layer7.app_proto); ++ } ++ write_unlock(&ct_lock); ++ ++ return (!strcmp(master_conntrack->layer7.app_proto, info->protocol)); ++ } ++ else { ++ /* If not classified, set to "unknown" to distinguish from ++ connections that are still being tested. */ ++ write_lock(&ct_lock); ++ master_conntrack->layer7.app_proto = kmalloc(strlen("unknown")+1, GFP_ATOMIC); ++ if(!master_conntrack->layer7.app_proto){ ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: out of memory in match_no_append, bailing.\n"); ++ write_unlock(&ct_lock); ++ return 1; ++ } ++ strcpy(master_conntrack->layer7.app_proto, "unknown"); ++ write_unlock(&ct_lock); ++ return 0; ++ } ++} ++ ++/* add the new app data to the conntrack. Return number of bytes added. */ ++static int add_data(struct ip_conntrack * master_conntrack, ++ char * app_data, int appdatalen) ++{ ++ int length = 0, i; ++ int oldlength = master_conntrack->layer7.app_data_len; ++ ++ /* Strip nulls. Make everything lower case (our regex lib doesn't ++ do case insensitivity). Add it to the end of the current data. */ ++ for(i = 0; i < maxdatalen-oldlength-1 && ++ i < appdatalen; i++) { ++ if(app_data[i] != '\0') { ++ master_conntrack->layer7.app_data[length+oldlength] = ++ /* the kernel version of tolower mungs 'upper ascii' */ ++ isascii(app_data[i])? tolower(app_data[i]) : app_data[i]; ++ length++; ++ } ++ } ++ ++ master_conntrack->layer7.app_data[length+oldlength] = '\0'; ++ master_conntrack->layer7.app_data_len = length + oldlength; ++ ++ return length; ++} ++ ++/* Returns true on match and false otherwise. */ ++static int match(/* const */struct sk_buff *skb, const struct net_device *in, ++ const struct net_device *out, const void *matchinfo, ++ int offset, int *hotdrop) ++{ ++ struct ipt_layer7_info * info = (struct ipt_layer7_info *)matchinfo; ++ enum ip_conntrack_info master_ctinfo, ctinfo; ++ struct ip_conntrack *master_conntrack, *conntrack; ++ unsigned char * app_data; ++ unsigned int pattern_result, appdatalen; ++ regexp * comppattern; ++ ++ if(!can_handle(skb)){ ++ DPRINTK("layer7: This is some protocol I can't handle.\n"); ++ return info->invert; ++ } ++ ++ /* Treat parent & all its children together as one connection, except ++ for the purpose of setting conntrack->layer7.app_proto in the actual ++ connection. This makes /proc/net/ip_conntrack more satisfying. */ ++ if(!(conntrack = ip_conntrack_get((struct sk_buff *)skb, &ctinfo)) || ++ !(master_conntrack = ip_conntrack_get((struct sk_buff *)skb, &master_ctinfo))) { ++ //DPRINTK("layer7: packet is not from a known connection, giving up.\n"); ++ return info->invert; ++ } ++ ++ /* Try to get a master conntrack (and its master etc) for FTP, etc. */ ++ while (master_ct(master_conntrack) != NULL) ++ master_conntrack = master_ct(master_conntrack); ++ ++ /* if we've classified it or seen too many packets */ ++ if(TOTAL_PACKETS > num_packets || ++ master_conntrack->layer7.app_proto) { ++ ++ pattern_result = match_no_append(conntrack, master_conntrack, ctinfo, master_ctinfo, info); ++ ++ /* skb->cb[0] == seen. Avoid doing things twice if there are two l7 ++ rules. I'm not sure that using cb for this purpose is correct, although ++ it says "put your private variables there". But it doesn't look like it ++ is being used for anything else in the skbs that make it here. How can ++ I write to cb without making the compiler angry? */ ++ skb->cb[0] = 1; /* marking it seen here is probably irrelevant, but consistant */ ++ ++ return (pattern_result ^ info->invert); ++ } ++ ++ if(skb_is_nonlinear(skb)){ ++ if(skb_linearize(skb, GFP_ATOMIC) != 0){ ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: failed to linearize packet, bailing.\n"); ++ return info->invert; ++ } ++ } ++ ++ /* now that the skb is linearized, it's safe to set these. */ ++ app_data = skb->data + app_data_offset(skb); ++ appdatalen = skb->tail - app_data; ++ ++ spin_lock_bh(&list_lock); ++ /* the return value gets checked later, when we're ready to use it */ ++ comppattern = compile_and_cache(info->pattern, info->protocol); ++ spin_unlock_bh(&list_lock); ++ ++ /* On the first packet of a connection, allocate space for app data */ ++ write_lock(&ct_lock); ++ if(TOTAL_PACKETS == 1 && !skb->cb[0] && !master_conntrack->layer7.app_data) { ++ master_conntrack->layer7.app_data = kmalloc(maxdatalen, GFP_ATOMIC); ++ if(!master_conntrack->layer7.app_data){ ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: out of memory in match, bailing.\n"); ++ write_unlock(&ct_lock); ++ return info->invert; ++ } ++ ++ master_conntrack->layer7.app_data[0] = '\0'; ++ } ++ write_unlock(&ct_lock); ++ ++ /* Can be here, but unallocated, if numpackets is increased near ++ the beginning of a connection */ ++ if(master_conntrack->layer7.app_data == NULL) ++ return (info->invert); /* unmatched */ ++ ++ if(!skb->cb[0]){ ++ int newbytes; ++ write_lock(&ct_lock); ++ newbytes = add_data(master_conntrack, app_data, appdatalen); ++ write_unlock(&ct_lock); ++ ++ if(newbytes == 0) { /* didn't add any data */ ++ skb->cb[0] = 1; ++ /* Didn't match before, not going to match now */ ++ return info->invert; ++ } ++ } ++ ++ /* If looking for "unknown", then never match. "Unknown" means that ++ we've given up; we're still trying with these packets. */ ++ if(!strcmp(info->protocol, "unknown")) { ++ pattern_result = 0; ++ /* If the regexp failed to compile, don't bother running it */ ++ } else if(comppattern && regexec(comppattern, master_conntrack->layer7.app_data)) { ++ DPRINTK("layer7: matched %s\n", info->protocol); ++ pattern_result = 1; ++ } else pattern_result = 0; ++ ++ if(pattern_result) { ++ write_lock(&ct_lock); ++ master_conntrack->layer7.app_proto = kmalloc(strlen(info->protocol)+1, GFP_ATOMIC); ++ if(!master_conntrack->layer7.app_proto){ ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: out of memory in match, bailing.\n"); ++ write_unlock(&ct_lock); ++ return (pattern_result ^ info->invert); ++ } ++ strcpy(master_conntrack->layer7.app_proto, info->protocol); ++ write_unlock(&ct_lock); ++ } ++ ++ /* mark the packet seen */ ++ skb->cb[0] = 1; ++ ++ return (pattern_result ^ info->invert); ++} ++ ++static int checkentry(const char *tablename, const struct ipt_ip *ip, ++ void *matchinfo, unsigned int matchsize, unsigned int hook_mask) ++{ ++ if (matchsize != IPT_ALIGN(sizeof(struct ipt_layer7_info))) ++ return 0; ++ return 1; ++} ++ ++static struct ipt_match layer7_match = { ++ .name = "layer7", ++ .match = &match, ++ .checkentry = &checkentry, ++ .me = THIS_MODULE ++}; ++ ++/* taken from drivers/video/modedb.c */ ++static int my_atoi(const char *s) ++{ ++ int val = 0; ++ ++ for (;; s++) { ++ switch (*s) { ++ case '0'...'9': ++ val = 10*val+(*s-'0'); ++ break; ++ default: ++ return val; ++ } ++ } ++} ++ ++/* write out num_packets to userland. */ ++static int layer7_read_proc(char* page, char ** start, off_t off, int count, ++ int* eof, void * data) ++{ ++ if(num_packets > 99 && net_ratelimit()) ++ printk(KERN_ERR "layer7: NOT REACHED. num_packets too big\n"); ++ ++ page[0] = num_packets/10 + '0'; ++ page[1] = num_packets%10 + '0'; ++ page[2] = '\n'; ++ page[3] = '\0'; ++ ++ *eof=1; ++ ++ return 3; ++} ++ ++/* Read in num_packets from userland */ ++static int layer7_write_proc(struct file* file, const char* buffer, ++ unsigned long count, void *data) ++{ ++ char * foo = kmalloc(count, GFP_ATOMIC); ++ ++ if(!foo){ ++ if (net_ratelimit()) ++ printk(KERN_ERR "layer7: out of memory, bailing. num_packets unchanged.\n"); ++ return count; ++ } ++ ++ if(copy_from_user(foo, buffer, count)) { ++ return -EFAULT; ++ } ++ ++ ++ num_packets = my_atoi(foo); ++ kfree (foo); ++ ++ /* This has an arbitrary limit to make the math easier. I'm lazy. ++ But anyway, 99 is a LOT! If you want more, you're doing it wrong! */ ++ if(num_packets > 99) { ++ printk(KERN_WARNING "layer7: num_packets can't be > 99.\n"); ++ num_packets = 99; ++ } else if(num_packets < 1) { ++ printk(KERN_WARNING "layer7: num_packets can't be < 1.\n"); ++ num_packets = 1; ++ } ++ ++ return count; ++} ++ ++/* register the proc file */ ++static void layer7_init_proc(void) ++{ ++ struct proc_dir_entry* entry; ++ entry = create_proc_entry("layer7_numpackets", 0644, proc_net); ++ entry->read_proc = layer7_read_proc; ++ entry->write_proc = layer7_write_proc; ++} ++ ++static void layer7_cleanup_proc(void) ++{ ++ remove_proc_entry("layer7_numpackets", proc_net); ++} ++ ++static int __init init(void) ++{ ++ layer7_init_proc(); ++ if(maxdatalen < 1) { ++ printk(KERN_WARNING "layer7: maxdatalen can't be < 1, using 1\n"); ++ maxdatalen = 1; ++ } ++ /* This is not a hard limit. It's just here to prevent people from ++ bringing their slow machines to a grinding halt. */ ++ else if(maxdatalen > 65536) { ++ printk(KERN_WARNING "layer7: maxdatalen can't be > 65536, using 65536\n"); ++ maxdatalen = 65536; ++ } ++ return ipt_register_match(&layer7_match); ++} ++ ++static void __exit fini(void) ++{ ++ layer7_cleanup_proc(); ++ ipt_unregister_match(&layer7_match); ++} ++ ++module_init(init); ++module_exit(fini); +--- linux-2.6.14/net/ipv4/netfilter/regexp/regexp.c 1969-12-31 18:00:00.000000000 -0600 ++++ linux-2.6.14-layer7/net/ipv4/netfilter/regexp/regexp.c 2005-11-12 17:31:34.000000000 -0600 +@@ -0,0 +1,1195 @@ ++/* ++ * regcomp and regexec -- regsub and regerror are elsewhere ++ * @(#)regexp.c 1.3 of 18 April 87 ++ * ++ * Copyright (c) 1986 by University of Toronto. ++ * Written by Henry Spencer. Not derived from licensed software. ++ * ++ * Permission is granted to anyone to use this software for any ++ * purpose on any computer system, and to redistribute it freely, ++ * subject to the following restrictions: ++ * ++ * 1. The author is not responsible for the consequences of use of ++ * this software, no matter how awful, even if they arise ++ * from defects in it. ++ * ++ * 2. The origin of this software must not be misrepresented, either ++ * by explicit claim or by omission. ++ * ++ * 3. Altered versions must be plainly marked as such, and must not ++ * be misrepresented as being the original software. ++ * ++ * Beware that some of this code is subtly aware of the way operator ++ * precedence is structured in regular expressions. Serious changes in ++ * regular-expression syntax might require a total rethink. ++ * ++ * This code was modified by Ethan Sommer to work within the kernel ++ * (it now uses kmalloc etc..) ++ * ++ * Modified slightly by Matthew Strait to use more modern C. ++ */ ++ ++#include "regexp.h" ++#include "regmagic.h" ++ ++/* added by ethan and matt. Lets it work in both kernel and user space. ++(So iptables can use it, for instance.) Yea, it goes both ways... */ ++#if __KERNEL__ ++ #define malloc(foo) kmalloc(foo,GFP_ATOMIC) ++#else ++ #define printk(format,args...) printf(format,##args) ++#endif ++ ++void regerror(char * s) ++{ ++ printk("<3>Regexp: %s\n", s); ++ /* NOTREACHED */ ++} ++ ++/* ++ * The "internal use only" fields in regexp.h are present to pass info from ++ * compile to execute that permits the execute phase to run lots faster on ++ * simple cases. They are: ++ * ++ * regstart char that must begin a match; '\0' if none obvious ++ * reganch is the match anchored (at beginning-of-line only)? ++ * regmust string (pointer into program) that match must include, or NULL ++ * regmlen length of regmust string ++ * ++ * Regstart and reganch permit very fast decisions on suitable starting points ++ * for a match, cutting down the work a lot. Regmust permits fast rejection ++ * of lines that cannot possibly match. The regmust tests are costly enough ++ * that regcomp() supplies a regmust only if the r.e. contains something ++ * potentially expensive (at present, the only such thing detected is * or + ++ * at the start of the r.e., which can involve a lot of backup). Regmlen is ++ * supplied because the test in regexec() needs it and regcomp() is computing ++ * it anyway. ++ */ ++ ++/* ++ * Structure for regexp "program". This is essentially a linear encoding ++ * of a nondeterministic finite-state machine (aka syntax charts or ++ * "railroad normal form" in parsing technology). Each node is an opcode ++ * plus a "next" pointer, possibly plus an operand. "Next" pointers of ++ * all nodes except BRANCH implement concatenation; a "next" pointer with ++ * a BRANCH on both ends of it is connecting two alternatives. (Here we ++ * have one of the subtle syntax dependencies: an individual BRANCH (as ++ * opposed to a collection of them) is never concatenated with anything ++ * because of operator precedence.) The operand of some types of node is ++ * a literal string; for others, it is a node leading into a sub-FSM. In ++ * particular, the operand of a BRANCH node is the first node of the branch. ++ * (NB this is *not* a tree structure: the tail of the branch connects ++ * to the thing following the set of BRANCHes.) The opcodes are: ++ */ ++ ++/* definition number opnd? meaning */ ++#define END 0 /* no End of program. */ ++#define BOL 1 /* no Match "" at beginning of line. */ ++#define EOL 2 /* no Match "" at end of line. */ ++#define ANY 3 /* no Match any one character. */ ++#define ANYOF 4 /* str Match any character in this string. */ ++#define ANYBUT 5 /* str Match any character not in this string. */ ++#define BRANCH 6 /* node Match this alternative, or the next... */ ++#define BACK 7 /* no Match "", "next" ptr points backward. */ ++#define EXACTLY 8 /* str Match this string. */ ++#define NOTHING 9 /* no Match empty string. */ ++#define STAR 10 /* node Match this (simple) thing 0 or more times. */ ++#define PLUS 11 /* node Match this (simple) thing 1 or more times. */ ++#define OPEN 20 /* no Mark this point in input as start of #n. */ ++ /* OPEN+1 is number 1, etc. */ ++#define CLOSE 30 /* no Analogous to OPEN. */ ++ ++/* ++ * Opcode notes: ++ * ++ * BRANCH The set of branches constituting a single choice are hooked ++ * together with their "next" pointers, since precedence prevents ++ * anything being concatenated to any individual branch. The ++ * "next" pointer of the last BRANCH in a choice points to the ++ * thing following the whole choice. This is also where the ++ * final "next" pointer of each individual branch points; each ++ * branch starts with the operand node of a BRANCH node. ++ * ++ * BACK Normal "next" pointers all implicitly point forward; BACK ++ * exists to make loop structures possible. ++ * ++ * STAR,PLUS '?', and complex '*' and '+', are implemented as circular ++ * BRANCH structures using BACK. Simple cases (one character ++ * per match) are implemented with STAR and PLUS for speed ++ * and to minimize recursive plunges. ++ * ++ * OPEN,CLOSE ...are numbered at compile time. ++ */ ++ ++/* ++ * A node is one char of opcode followed by two chars of "next" pointer. ++ * "Next" pointers are stored as two 8-bit pieces, high order first. The ++ * value is a positive offset from the opcode of the node containing it. ++ * An operand, if any, simply follows the node. (Note that much of the ++ * code generation knows about this implicit relationship.) ++ * ++ * Using two bytes for the "next" pointer is vast overkill for most things, ++ * but allows patterns to get big without disasters. ++ */ ++#define OP(p) (*(p)) ++#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377)) ++#define OPERAND(p) ((p) + 3) ++ ++/* ++ * See regmagic.h for one further detail of program structure. ++ */ ++ ++ ++/* ++ * Utility definitions. ++ */ ++#ifndef CHARBITS ++#define UCHARAT(p) ((int)*(unsigned char *)(p)) ++#else ++#define UCHARAT(p) ((int)*(p)&CHARBITS) ++#endif ++ ++#define FAIL(m) { regerror(m); return(NULL); } ++#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?') ++#define META "^$.[()|?+*\\" ++ ++/* ++ * Flags to be passed up and down. ++ */ ++#define HASWIDTH 01 /* Known never to match null string. */ ++#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */ ++#define SPSTART 04 /* Starts with * or +. */ ++#define WORST 0 /* Worst case. */ ++ ++/* ++ * Global work variables for regcomp(). ++ */ ++static char *regparse; /* Input-scan pointer. */ ++static int regnpar; /* () count. */ ++static char regdummy; ++static char *regcode; /* Code-emit pointer; ®dummy = don't. */ ++static long regsize; /* Code size. */ ++ ++/* ++ * Forward declarations for regcomp()'s friends. ++ */ ++#ifndef STATIC ++#define STATIC static ++#endif ++STATIC char *reg(int paren,int *flagp); ++STATIC char *regbranch(int *flagp); ++STATIC char *regpiece(int *flagp); ++STATIC char *regatom(int *flagp); ++STATIC char *regnode(char op); ++STATIC char *regnext(char *p); ++STATIC void regc(char b); ++STATIC void reginsert(char op, char *opnd); ++STATIC void regtail(char *p, char *val); ++STATIC void regoptail(char *p, char *val); ++ ++ ++__kernel_size_t my_strcspn(const char *s1,const char *s2) ++{ ++ char *scan1; ++ char *scan2; ++ int count; ++ ++ count = 0; ++ for (scan1 = (char *)s1; *scan1 != '\0'; scan1++) { ++ for (scan2 = (char *)s2; *scan2 != '\0';) /* ++ moved down. */ ++ if (*scan1 == *scan2++) ++ return(count); ++ count++; ++ } ++ return(count); ++} ++ ++/* ++ - regcomp - compile a regular expression into internal code ++ * ++ * We can't allocate space until we know how big the compiled form will be, ++ * but we can't compile it (and thus know how big it is) until we've got a ++ * place to put the code. So we cheat: we compile it twice, once with code ++ * generation turned off and size counting turned on, and once "for real". ++ * This also means that we don't allocate space until we are sure that the ++ * thing really will compile successfully, and we never have to move the ++ * code and thus invalidate pointers into it. (Note that it has to be in ++ * one piece because free() must be able to free it all.) ++ * ++ * Beware that the optimization-preparation code in here knows about some ++ * of the structure of the compiled regexp. ++ */ ++regexp * ++regcomp(char *exp,int *patternsize) ++{ ++ register regexp *r; ++ register char *scan; ++ register char *longest; ++ register int len; ++ int flags; ++ /* commented out by ethan ++ extern char *malloc(); ++ */ ++ ++ if (exp == NULL) ++ FAIL("NULL argument"); ++ ++ /* First pass: determine size, legality. */ ++ regparse = exp; ++ regnpar = 1; ++ regsize = 0L; ++ regcode = ®dummy; ++ regc(MAGIC); ++ if (reg(0, &flags) == NULL) ++ return(NULL); ++ ++ /* Small enough for pointer-storage convention? */ ++ if (regsize >= 32767L) /* Probably could be 65535L. */ ++ FAIL("regexp too big"); ++ ++ /* Allocate space. */ ++ *patternsize=sizeof(regexp) + (unsigned)regsize; ++ r = (regexp *)malloc(sizeof(regexp) + (unsigned)regsize); ++ if (r == NULL) ++ FAIL("out of space"); ++ ++ /* Second pass: emit code. */ ++ regparse = exp; ++ regnpar = 1; ++ regcode = r->program; ++ regc(MAGIC); ++ if (reg(0, &flags) == NULL) ++ return(NULL); ++ ++ /* Dig out information for optimizations. */ ++ r->regstart = '\0'; /* Worst-case defaults. */ ++ r->reganch = 0; ++ r->regmust = NULL; ++ r->regmlen = 0; ++ scan = r->program+1; /* First BRANCH. */ ++ if (OP(regnext(scan)) == END) { /* Only one top-level choice. */ ++ scan = OPERAND(scan); ++ ++ /* Starting-point info. */ ++ if (OP(scan) == EXACTLY) ++ r->regstart = *OPERAND(scan); ++ else if (OP(scan) == BOL) ++ r->reganch++; ++ ++ /* ++ * If there's something expensive in the r.e., find the ++ * longest literal string that must appear and make it the ++ * regmust. Resolve ties in favor of later strings, since ++ * the regstart check works with the beginning of the r.e. ++ * and avoiding duplication strengthens checking. Not a ++ * strong reason, but sufficient in the absence of others. ++ */ ++ if (flags&SPSTART) { ++ longest = NULL; ++ len = 0; ++ for (; scan != NULL; scan = regnext(scan)) ++ if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) { ++ longest = OPERAND(scan); ++ len = strlen(OPERAND(scan)); ++ } ++ r->regmust = longest; ++ r->regmlen = len; ++ } ++ } ++ ++ return(r); ++} ++ ++/* ++ - reg - regular expression, i.e. main body or parenthesized thing ++ * ++ * Caller must absorb opening parenthesis. ++ * ++ * Combining parenthesis handling with the base level of regular expression ++ * is a trifle forced, but the need to tie the tails of the branches to what ++ * follows makes it hard to avoid. ++ */ ++static char * ++reg(int paren, int *flagp /* Parenthesized? */ ) ++{ ++ register char *ret; ++ register char *br; ++ register char *ender; ++ register int parno = 0; /* 0 makes gcc happy */ ++ int flags; ++ ++ *flagp = HASWIDTH; /* Tentatively. */ ++ ++ /* Make an OPEN node, if parenthesized. */ ++ if (paren) { ++ if (regnpar >= NSUBEXP) ++ FAIL("too many ()"); ++ parno = regnpar; ++ regnpar++; ++ ret = regnode(OPEN+parno); ++ } else ++ ret = NULL; ++ ++ /* Pick up the branches, linking them together. */ ++ br = regbranch(&flags); ++ if (br == NULL) ++ return(NULL); ++ if (ret != NULL) ++ regtail(ret, br); /* OPEN -> first. */ ++ else ++ ret = br; ++ if (!(flags&HASWIDTH)) ++ *flagp &= ~HASWIDTH; ++ *flagp |= flags&SPSTART; ++ while (*regparse == '|') { ++ regparse++; ++ br = regbranch(&flags); ++ if (br == NULL) ++ return(NULL); ++ regtail(ret, br); /* BRANCH -> BRANCH. */ ++ if (!(flags&HASWIDTH)) ++ *flagp &= ~HASWIDTH; ++ *flagp |= flags&SPSTART; ++ } ++ ++ /* Make a closing node, and hook it on the end. */ ++ ender = regnode((paren) ? CLOSE+parno : END); ++ regtail(ret, ender); ++ ++ /* Hook the tails of the branches to the closing node. */ ++ for (br = ret; br != NULL; br = regnext(br)) ++ regoptail(br, ender); ++ ++ /* Check for proper termination. */ ++ if (paren && *regparse++ != ')') { ++ FAIL("unmatched ()"); ++ } else if (!paren && *regparse != '\0') { ++ if (*regparse == ')') { ++ FAIL("unmatched ()"); ++ } else ++ FAIL("junk on end"); /* "Can't happen". */ ++ /* NOTREACHED */ ++ } ++ ++ return(ret); ++} ++ ++/* ++ - regbranch - one alternative of an | operator ++ * ++ * Implements the concatenation operator. ++ */ ++static char * ++regbranch(int *flagp) ++{ ++ register char *ret; ++ register char *chain; ++ register char *latest; ++ int flags; ++ ++ *flagp = WORST; /* Tentatively. */ ++ ++ ret = regnode(BRANCH); ++ chain = NULL; ++ while (*regparse != '\0' && *regparse != '|' && *regparse != ')') { ++ latest = regpiece(&flags); ++ if (latest == NULL) ++ return(NULL); ++ *flagp |= flags&HASWIDTH; ++ if (chain == NULL) /* First piece. */ ++ *flagp |= flags&SPSTART; ++ else ++ regtail(chain, latest); ++ chain = latest; ++ } ++ if (chain == NULL) /* Loop ran zero times. */ ++ (void) regnode(NOTHING); ++ ++ return(ret); ++} ++ ++/* ++ - regpiece - something followed by possible [*+?] ++ * ++ * Note that the branching code sequences used for ? and the general cases ++ * of * and + are somewhat optimized: they use the same NOTHING node as ++ * both the endmarker for their branch list and the body of the last branch. ++ * It might seem that this node could be dispensed with entirely, but the ++ * endmarker role is not redundant. ++ */ ++static char * ++regpiece(int *flagp) ++{ ++ register char *ret; ++ register char op; ++ register char *next; ++ int flags; ++ ++ ret = regatom(&flags); ++ if (ret == NULL) ++ return(NULL); ++ ++ op = *regparse; ++ if (!ISMULT(op)) { ++ *flagp = flags; ++ return(ret); ++ } ++ ++ if (!(flags&HASWIDTH) && op != '?') ++ FAIL("*+ operand could be empty"); ++ *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH); ++ ++ if (op == '*' && (flags&SIMPLE)) ++ reginsert(STAR, ret); ++ else if (op == '*') { ++ /* Emit x* as (x&|), where & means "self". */ ++ reginsert(BRANCH, ret); /* Either x */ ++ regoptail(ret, regnode(BACK)); /* and loop */ ++ regoptail(ret, ret); /* back */ ++ regtail(ret, regnode(BRANCH)); /* or */ ++ regtail(ret, regnode(NOTHING)); /* null. */ ++ } else if (op == '+' && (flags&SIMPLE)) ++ reginsert(PLUS, ret); ++ else if (op == '+') { ++ /* Emit x+ as x(&|), where & means "self". */ ++ next = regnode(BRANCH); /* Either */ ++ regtail(ret, next); ++ regtail(regnode(BACK), ret); /* loop back */ ++ regtail(next, regnode(BRANCH)); /* or */ ++ regtail(ret, regnode(NOTHING)); /* null. */ ++ } else if (op == '?') { ++ /* Emit x? as (x|) */ ++ reginsert(BRANCH, ret); /* Either x */ ++ regtail(ret, regnode(BRANCH)); /* or */ ++ next = regnode(NOTHING); /* null. */ ++ regtail(ret, next); ++ regoptail(ret, next); ++ } ++ regparse++; ++ if (ISMULT(*regparse)) ++ FAIL("nested *?+"); ++ ++ return(ret); ++} ++ ++/* ++ - regatom - the lowest level ++ * ++ * Optimization: gobbles an entire sequence of ordinary characters so that ++ * it can turn them into a single node, which is smaller to store and ++ * faster to run. Backslashed characters are exceptions, each becoming a ++ * separate node; the code is simpler that way and it's not worth fixing. ++ */ ++static char * ++regatom(int *flagp) ++{ ++ register char *ret; ++ int flags; ++ ++ *flagp = WORST; /* Tentatively. */ ++ ++ switch (*regparse++) { ++ case '^': ++ ret = regnode(BOL); ++ break; ++ case '$': ++ ret = regnode(EOL); ++ break; ++ case '.': ++ ret = regnode(ANY); ++ *flagp |= HASWIDTH|SIMPLE; ++ break; ++ case '[': { ++ register int class; ++ register int classend; ++ ++ if (*regparse == '^') { /* Complement of range. */ ++ ret = regnode(ANYBUT); ++ regparse++; ++ } else ++ ret = regnode(ANYOF); ++ if (*regparse == ']' || *regparse == '-') ++ regc(*regparse++); ++ while (*regparse != '\0' && *regparse != ']') { ++ if (*regparse == '-') { ++ regparse++; ++ if (*regparse == ']' || *regparse == '\0') ++ regc('-'); ++ else { ++ class = UCHARAT(regparse-2)+1; ++ classend = UCHARAT(regparse); ++ if (class > classend+1) ++ FAIL("invalid [] range"); ++ for (; class <= classend; class++) ++ regc(class); ++ regparse++; ++ } ++ } else ++ regc(*regparse++); ++ } ++ regc('\0'); ++ if (*regparse != ']') ++ FAIL("unmatched []"); ++ regparse++; ++ *flagp |= HASWIDTH|SIMPLE; ++ } ++ break; ++ case '(': ++ ret = reg(1, &flags); ++ if (ret == NULL) ++ return(NULL); ++ *flagp |= flags&(HASWIDTH|SPSTART); ++ break; ++ case '\0': ++ case '|': ++ case ')': ++ FAIL("internal urp"); /* Supposed to be caught earlier. */ ++ break; ++ case '?': ++ case '+': ++ case '*': ++ FAIL("?+* follows nothing"); ++ break; ++ case '\\': ++ if (*regparse == '\0') ++ FAIL("trailing \\"); ++ ret = regnode(EXACTLY); ++ regc(*regparse++); ++ regc('\0'); ++ *flagp |= HASWIDTH|SIMPLE; ++ break; ++ default: { ++ register int len; ++ register char ender; ++ ++ regparse--; ++ len = my_strcspn((const char *)regparse, (const char *)META); ++ if (len <= 0) ++ FAIL("internal disaster"); ++ ender = *(regparse+len); ++ if (len > 1 && ISMULT(ender)) ++ len--; /* Back off clear of ?+* operand. */ ++ *flagp |= HASWIDTH; ++ if (len == 1) ++ *flagp |= SIMPLE; ++ ret = regnode(EXACTLY); ++ while (len > 0) { ++ regc(*regparse++); ++ len--; ++ } ++ regc('\0'); ++ } ++ break; ++ } ++ ++ return(ret); ++} ++ ++/* ++ - regnode - emit a node ++ */ ++static char * /* Location. */ ++regnode(char op) ++{ ++ register char *ret; ++ register char *ptr; ++ ++ ret = regcode; ++ if (ret == ®dummy) { ++ regsize += 3; ++ return(ret); ++ } ++ ++ ptr = ret; ++ *ptr++ = op; ++ *ptr++ = '\0'; /* Null "next" pointer. */ ++ *ptr++ = '\0'; ++ regcode = ptr; ++ ++ return(ret); ++} ++ ++/* ++ - regc - emit (if appropriate) a byte of code ++ */ ++static void ++regc(char b) ++{ ++ if (regcode != ®dummy) ++ *regcode++ = b; ++ else ++ regsize++; ++} ++ ++/* ++ - reginsert - insert an operator in front of already-emitted operand ++ * ++ * Means relocating the operand. ++ */ ++static void ++reginsert(char op, char* opnd) ++{ ++ register char *src; ++ register char *dst; ++ register char *place; ++ ++ if (regcode == ®dummy) { ++ regsize += 3; ++ return; ++ } ++ ++ src = regcode; ++ regcode += 3; ++ dst = regcode; ++ while (src > opnd) ++ *--dst = *--src; ++ ++ place = opnd; /* Op node, where operand used to be. */ ++ *place++ = op; ++ *place++ = '\0'; ++ *place++ = '\0'; ++} ++ ++/* ++ - regtail - set the next-pointer at the end of a node chain ++ */ ++static void ++regtail(char *p, char *val) ++{ ++ register char *scan; ++ register char *temp; ++ register int offset; ++ ++ if (p == ®dummy) ++ return; ++ ++ /* Find last node. */ ++ scan = p; ++ for (;;) { ++ temp = regnext(scan); ++ if (temp == NULL) ++ break; ++ scan = temp; ++ } ++ ++ if (OP(scan) == BACK) ++ offset = scan - val; ++ else ++ offset = val - scan; ++ *(scan+1) = (offset>>8)&0377; ++ *(scan+2) = offset&0377; ++} ++ ++/* ++ - regoptail - regtail on operand of first argument; nop if operandless ++ */ ++static void ++regoptail(char *p, char *val) ++{ ++ /* "Operandless" and "op != BRANCH" are synonymous in practice. */ ++ if (p == NULL || p == ®dummy || OP(p) != BRANCH) ++ return; ++ regtail(OPERAND(p), val); ++} ++ ++/* ++ * regexec and friends ++ */ ++ ++/* ++ * Global work variables for regexec(). ++ */ ++static char *reginput; /* String-input pointer. */ ++static char *regbol; /* Beginning of input, for ^ check. */ ++static char **regstartp; /* Pointer to startp array. */ ++static char **regendp; /* Ditto for endp. */ ++ ++/* ++ * Forwards. ++ */ ++STATIC int regtry(regexp *prog, char *string); ++STATIC int regmatch(char *prog); ++STATIC int regrepeat(char *p); ++ ++#ifdef DEBUG ++int regnarrate = 0; ++void regdump(); ++STATIC char *regprop(char *op); ++#endif ++ ++/* ++ - regexec - match a regexp against a string ++ */ ++int ++regexec(regexp *prog, char *string) ++{ ++ register char *s; ++ ++ /* Be paranoid... */ ++ if (prog == NULL || string == NULL) { ++ printk("<3>Regexp: NULL parameter\n"); ++ return(0); ++ } ++ ++ /* Check validity of program. */ ++ if (UCHARAT(prog->program) != MAGIC) { ++ printk("<3>Regexp: corrupted program\n"); ++ return(0); ++ } ++ ++ /* If there is a "must appear" string, look for it. */ ++ if (prog->regmust != NULL) { ++ s = string; ++ while ((s = strchr(s, prog->regmust[0])) != NULL) { ++ if (strncmp(s, prog->regmust, prog->regmlen) == 0) ++ break; /* Found it. */ ++ s++; ++ } ++ if (s == NULL) /* Not present. */ ++ return(0); ++ } ++ ++ /* Mark beginning of line for ^ . */ ++ regbol = string; ++ ++ /* Simplest case: anchored match need be tried only once. */ ++ if (prog->reganch) ++ return(regtry(prog, string)); ++ ++ /* Messy cases: unanchored match. */ ++ s = string; ++ if (prog->regstart != '\0') ++ /* We know what char it must start with. */ ++ while ((s = strchr(s, prog->regstart)) != NULL) { ++ if (regtry(prog, s)) ++ return(1); ++ s++; ++ } ++ else ++ /* We don't -- general case. */ ++ do { ++ if (regtry(prog, s)) ++ return(1); ++ } while (*s++ != '\0'); ++ ++ /* Failure. */ ++ return(0); ++} ++ ++/* ++ - regtry - try match at specific point ++ */ ++static int /* 0 failure, 1 success */ ++regtry(regexp *prog, char *string) ++{ ++ register int i; ++ register char **sp; ++ register char **ep; ++ ++ reginput = string; ++ regstartp = prog->startp; ++ regendp = prog->endp; ++ ++ sp = prog->startp; ++ ep = prog->endp; ++ for (i = NSUBEXP; i > 0; i--) { ++ *sp++ = NULL; ++ *ep++ = NULL; ++ } ++ if (regmatch(prog->program + 1)) { ++ prog->startp[0] = string; ++ prog->endp[0] = reginput; ++ return(1); ++ } else ++ return(0); ++} ++ ++/* ++ - regmatch - main matching routine ++ * ++ * Conceptually the strategy is simple: check to see whether the current ++ * node matches, call self recursively to see whether the rest matches, ++ * and then act accordingly. In practice we make some effort to avoid ++ * recursion, in particular by going through "ordinary" nodes (that don't ++ * need to know whether the rest of the match failed) by a loop instead of ++ * by recursion. ++ */ ++static int /* 0 failure, 1 success */ ++regmatch(char *prog) ++{ ++ register char *scan = prog; /* Current node. */ ++ char *next; /* Next node. */ ++ ++#ifdef DEBUG ++ if (scan != NULL && regnarrate) ++ fprintf(stderr, "%s(\n", regprop(scan)); ++#endif ++ while (scan != NULL) { ++#ifdef DEBUG ++ if (regnarrate) ++ fprintf(stderr, "%s...\n", regprop(scan)); ++#endif ++ next = regnext(scan); ++ ++ switch (OP(scan)) { ++ case BOL: ++ if (reginput != regbol) ++ return(0); ++ break; ++ case EOL: ++ if (*reginput != '\0') ++ return(0); ++ break; ++ case ANY: ++ if (*reginput == '\0') ++ return(0); ++ reginput++; ++ break; ++ case EXACTLY: { ++ register int len; ++ register char *opnd; ++ ++ opnd = OPERAND(scan); ++ /* Inline the first character, for speed. */ ++ if (*opnd != *reginput) ++ return(0); ++ len = strlen(opnd); ++ if (len > 1 && strncmp(opnd, reginput, len) != 0) ++ return(0); ++ reginput += len; ++ } ++ break; ++ case ANYOF: ++ if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL) ++ return(0); ++ reginput++; ++ break; ++ case ANYBUT: ++ if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL) ++ return(0); ++ reginput++; ++ break; ++ case NOTHING: ++ case BACK: ++ break; ++ case OPEN+1: ++ case OPEN+2: ++ case OPEN+3: ++ case OPEN+4: ++ case OPEN+5: ++ case OPEN+6: ++ case OPEN+7: ++ case OPEN+8: ++ case OPEN+9: { ++ register int no; ++ register char *save; ++ ++ no = OP(scan) - OPEN; ++ save = reginput; ++ ++ if (regmatch(next)) { ++ /* ++ * Don't set startp if some later ++ * invocation of the same parentheses ++ * already has. ++ */ ++ if (regstartp[no] == NULL) ++ regstartp[no] = save; ++ return(1); ++ } else ++ return(0); ++ } ++ break; ++ case CLOSE+1: ++ case CLOSE+2: ++ case CLOSE+3: ++ case CLOSE+4: ++ case CLOSE+5: ++ case CLOSE+6: ++ case CLOSE+7: ++ case CLOSE+8: ++ case CLOSE+9: ++ { ++ register int no; ++ register char *save; ++ ++ no = OP(scan) - CLOSE; ++ save = reginput; ++ ++ if (regmatch(next)) { ++ /* ++ * Don't set endp if some later ++ * invocation of the same parentheses ++ * already has. ++ */ ++ if (regendp[no] == NULL) ++ regendp[no] = save; ++ return(1); ++ } else ++ return(0); ++ } ++ break; ++ case BRANCH: { ++ register char *save; ++ ++ if (OP(next) != BRANCH) /* No choice. */ ++ next = OPERAND(scan); /* Avoid recursion. */ ++ else { ++ do { ++ save = reginput; ++ if (regmatch(OPERAND(scan))) ++ return(1); ++ reginput = save; ++ scan = regnext(scan); ++ } while (scan != NULL && OP(scan) == BRANCH); ++ return(0); ++ /* NOTREACHED */ ++ } ++ } ++ break; ++ case STAR: ++ case PLUS: { ++ register char nextch; ++ register int no; ++ register char *save; ++ register int min; ++ ++ /* ++ * Lookahead to avoid useless match attempts ++ * when we know what character comes next. ++ */ ++ nextch = '\0'; ++ if (OP(next) == EXACTLY) ++ nextch = *OPERAND(next); ++ min = (OP(scan) == STAR) ? 0 : 1; ++ save = reginput; ++ no = regrepeat(OPERAND(scan)); ++ while (no >= min) { ++ /* If it could work, try it. */ ++ if (nextch == '\0' || *reginput == nextch) ++ if (regmatch(next)) ++ return(1); ++ /* Couldn't or didn't -- back up. */ ++ no--; ++ reginput = save + no; ++ } ++ return(0); ++ } ++ break; ++ case END: ++ return(1); /* Success! */ ++ break; ++ default: ++ printk("<3>Regexp: memory corruption\n"); ++ return(0); ++ break; ++ } ++ ++ scan = next; ++ } ++ ++ /* ++ * We get here only if there's trouble -- normally "case END" is ++ * the terminating point. ++ */ ++ printk("<3>Regexp: corrupted pointers\n"); ++ return(0); ++} ++ ++/* ++ - regrepeat - repeatedly match something simple, report how many ++ */ ++static int ++regrepeat(char *p) ++{ ++ register int count = 0; ++ register char *scan; ++ register char *opnd; ++ ++ scan = reginput; ++ opnd = OPERAND(p); ++ switch (OP(p)) { ++ case ANY: ++ count = strlen(scan); ++ scan += count; ++ break; ++ case EXACTLY: ++ while (*opnd == *scan) { ++ count++; ++ scan++; ++ } ++ break; ++ case ANYOF: ++ while (*scan != '\0' && strchr(opnd, *scan) != NULL) { ++ count++; ++ scan++; ++ } ++ break; ++ case ANYBUT: ++ while (*scan != '\0' && strchr(opnd, *scan) == NULL) { ++ count++; ++ scan++; ++ } ++ break; ++ default: /* Oh dear. Called inappropriately. */ ++ printk("<3>Regexp: internal foulup\n"); ++ count = 0; /* Best compromise. */ ++ break; ++ } ++ reginput = scan; ++ ++ return(count); ++} ++ ++/* ++ - regnext - dig the "next" pointer out of a node ++ */ ++static char* ++regnext(char *p) ++{ ++ register int offset; ++ ++ if (p == ®dummy) ++ return(NULL); ++ ++ offset = NEXT(p); ++ if (offset == 0) ++ return(NULL); ++ ++ if (OP(p) == BACK) ++ return(p-offset); ++ else ++ return(p+offset); ++} ++ ++#ifdef DEBUG ++ ++STATIC char *regprop(); ++ ++/* ++ - regdump - dump a regexp onto stdout in vaguely comprehensible form ++ */ ++void ++regdump(regexp *r) ++{ ++ register char *s; ++ register char op = EXACTLY; /* Arbitrary non-END op. */ ++ register char *next; ++ /* extern char *strchr(); */ ++ ++ ++ s = r->program + 1; ++ while (op != END) { /* While that wasn't END last time... */ ++ op = OP(s); ++ printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */ ++ next = regnext(s); ++ if (next == NULL) /* Next ptr. */ ++ printf("(0)"); ++ else ++ printf("(%d)", (s-r->program)+(next-s)); ++ s += 3; ++ if (op == ANYOF || op == ANYBUT || op == EXACTLY) { ++ /* Literal string, where present. */ ++ while (*s != '\0') { ++ putchar(*s); ++ s++; ++ } ++ s++; ++ } ++ putchar('\n'); ++ } ++ ++ /* Header fields of interest. */ ++ if (r->regstart != '\0') ++ printf("start `%c' ", r->regstart); ++ if (r->reganch) ++ printf("anchored "); ++ if (r->regmust != NULL) ++ printf("must have \"%s\"", r->regmust); ++ printf("\n"); ++} ++ ++/* ++ - regprop - printable representation of opcode ++ */ ++static char * ++regprop(char *op) ++{ ++#define BUFLEN 50 ++ register char *p; ++ static char buf[BUFLEN]; ++ ++ strcpy(buf, ":"); ++ ++ switch (OP(op)) { ++ case BOL: ++ p = "BOL"; ++ break; ++ case EOL: ++ p = "EOL"; ++ break; ++ case ANY: ++ p = "ANY"; ++ break; ++ case ANYOF: ++ p = "ANYOF"; ++ break; ++ case ANYBUT: ++ p = "ANYBUT"; ++ break; ++ case BRANCH: ++ p = "BRANCH"; ++ break; ++ case EXACTLY: ++ p = "EXACTLY"; ++ break; ++ case NOTHING: ++ p = "NOTHING"; ++ break; ++ case BACK: ++ p = "BACK"; ++ break; ++ case END: ++ p = "END"; ++ break; ++ case OPEN+1: ++ case OPEN+2: ++ case OPEN+3: ++ case OPEN+4: ++ case OPEN+5: ++ case OPEN+6: ++ case OPEN+7: ++ case OPEN+8: ++ case OPEN+9: ++ snprintf(buf+strlen(buf),BUFLEN-strlen(buf), "OPEN%d", OP(op)-OPEN); ++ p = NULL; ++ break; ++ case CLOSE+1: ++ case CLOSE+2: ++ case CLOSE+3: ++ case CLOSE+4: ++ case CLOSE+5: ++ case CLOSE+6: ++ case CLOSE+7: ++ case CLOSE+8: ++ case CLOSE+9: ++ snprintf(buf+strlen(buf),BUFLEN-strlen(buf), "CLOSE%d", OP(op)-CLOSE); ++ p = NULL; ++ break; ++ case STAR: ++ p = "STAR"; ++ break; ++ case PLUS: ++ p = "PLUS"; ++ break; ++ default: ++ printk("<3>Regexp: corrupted opcode\n"); ++ break; ++ } ++ if (p != NULL) ++ strncat(buf, p, BUFLEN-strlen(buf)); ++ return(buf); ++} ++#endif ++ ++ +--- linux-2.6.14/net/ipv4/netfilter/regexp/regexp.h 1969-12-31 18:00:00.000000000 -0600 ++++ linux-2.6.14-layer7/net/ipv4/netfilter/regexp/regexp.h 2005-11-12 17:31:34.000000000 -0600 +@@ -0,0 +1,41 @@ ++/* ++ * Definitions etc. for regexp(3) routines. ++ * ++ * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof], ++ * not the System V one. ++ */ ++ ++#ifndef REGEXP_H ++#define REGEXP_H ++ ++ ++/* ++http://www.opensource.apple.com/darwinsource/10.3/expect-1/expect/expect.h , ++which contains a version of this library, says: ++ ++ * ++ * NSUBEXP must be at least 10, and no greater than 117 or the parser ++ * will not work properly. ++ * ++ ++However, it looks rather like this library is limited to 10. If you think ++otherwise, let us know. ++*/ ++ ++#define NSUBEXP 10 ++typedef struct regexp { ++ char *startp[NSUBEXP]; ++ char *endp[NSUBEXP]; ++ char regstart; /* Internal use only. */ ++ char reganch; /* Internal use only. */ ++ char *regmust; /* Internal use only. */ ++ int regmlen; /* Internal use only. */ ++ char program[1]; /* Unwarranted chumminess with compiler. */ ++} regexp; ++ ++regexp * regcomp(char *exp, int *patternsize); ++int regexec(regexp *prog, char *string); ++void regsub(regexp *prog, char *source, char *dest); ++void regerror(char *s); ++ ++#endif +--- linux-2.6.14/net/ipv4/netfilter/regexp/regmagic.h 1969-12-31 18:00:00.000000000 -0600 ++++ linux-2.6.14-layer7/net/ipv4/netfilter/regexp/regmagic.h 2005-11-12 17:31:34.000000000 -0600 +@@ -0,0 +1,5 @@ ++/* ++ * The first byte of the regexp internal "program" is actually this magic ++ * number; the start node begins in the second byte. ++ */ ++#define MAGIC 0234 +--- linux-2.6.14/net/ipv4/netfilter/regexp/regsub.c 1969-12-31 18:00:00.000000000 -0600 ++++ linux-2.6.14-layer7/net/ipv4/netfilter/regexp/regsub.c 2005-11-12 17:31:34.000000000 -0600 +@@ -0,0 +1,95 @@ ++/* ++ * regsub ++ * @(#)regsub.c 1.3 of 2 April 86 ++ * ++ * Copyright (c) 1986 by University of Toronto. ++ * Written by Henry Spencer. Not derived from licensed software. ++ * ++ * Permission is granted to anyone to use this software for any ++ * purpose on any computer system, and to redistribute it freely, ++ * subject to the following restrictions: ++ * ++ * 1. The author is not responsible for the consequences of use of ++ * this software, no matter how awful, even if they arise ++ * from defects in it. ++ * ++ * 2. The origin of this software must not be misrepresented, either ++ * by explicit claim or by omission. ++ * ++ * 3. Altered versions must be plainly marked as such, and must not ++ * be misrepresented as being the original software. ++ * ++ * ++ * This code was modified by Ethan Sommer to work within the kernel ++ * (it now uses kmalloc etc..) ++ * ++ */ ++#include "regexp.h" ++#include "regmagic.h" ++#include <linux/string.h> ++ ++ ++#ifndef CHARBITS ++#define UCHARAT(p) ((int)*(unsigned char *)(p)) ++#else ++#define UCHARAT(p) ((int)*(p)&CHARBITS) ++#endif ++ ++#if 0 ++//void regerror(char * s) ++//{ ++// printk("regexp(3): %s", s); ++// /* NOTREACHED */ ++//} ++#endif ++ ++/* ++ - regsub - perform substitutions after a regexp match ++ */ ++void ++regsub(regexp * prog, char * source, char * dest) ++{ ++ register char *src; ++ register char *dst; ++ register char c; ++ register int no; ++ register int len; ++ ++ /* Not necessary and gcc doesn't like it -MLS */ ++ /*extern char *strncpy();*/ ++ ++ if (prog == NULL || source == NULL || dest == NULL) { ++ regerror("NULL parm to regsub"); ++ return; ++ } ++ if (UCHARAT(prog->program) != MAGIC) { ++ regerror("damaged regexp fed to regsub"); ++ return; ++ } ++ ++ src = source; ++ dst = dest; ++ while ((c = *src++) != '\0') { ++ if (c == '&') ++ no = 0; ++ else if (c == '\\' && '0' <= *src && *src <= '9') ++ no = *src++ - '0'; ++ else ++ no = -1; ++ ++ if (no < 0) { /* Ordinary character. */ ++ if (c == '\\' && (*src == '\\' || *src == '&')) ++ c = *src++; ++ *dst++ = c; ++ } else if (prog->startp[no] != NULL && prog->endp[no] != NULL) { ++ len = prog->endp[no] - prog->startp[no]; ++ (void) strncpy(dst, prog->startp[no], len); ++ dst += len; ++ if (len != 0 && *(dst-1) == '\0') { /* strncpy hit NUL. */ ++ regerror("damaged match string"); ++ return; ++ } ++ } ++ } ++ *dst++ = '\0'; ++} diff --git a/openwrt/target/linux/generic-2.6/patches/102-openswan-2.4.0.kernel-2.6-natt.patch b/openwrt/target/linux/generic-2.6/patches/102-openswan-2.4.0.kernel-2.6-natt.patch new file mode 100644 index 0000000000..ab09e3c356 --- /dev/null +++ b/openwrt/target/linux/generic-2.6/patches/102-openswan-2.4.0.kernel-2.6-natt.patch @@ -0,0 +1,129 @@ +packaging/utils/nattpatch 2.6 +--- /dev/null Tue Mar 11 13:02:56 2003 ++++ nat-t/include/net/xfrmudp.h Mon Feb 9 13:51:03 2004 +@@ -0,0 +1,10 @@ ++/* ++ * pointer to function for type that xfrm4_input wants, to permit ++ * decoupling of XFRM from udp.c ++ */ ++#define HAVE_XFRM4_UDP_REGISTER ++ ++typedef int (*xfrm4_rcv_encap_t)(struct sk_buff *skb, __u16 encap_type); ++extern int udp4_register_esp_rcvencap(xfrm4_rcv_encap_t func ++ , xfrm4_rcv_encap_t *oldfunc); ++extern int udp4_unregister_esp_rcvencap(xfrm4_rcv_encap_t func); +--- /distros/kernel/linux-2.6.11.2/net/ipv4/Kconfig 2005-03-09 03:12:33.000000000 -0500 ++++ swan26/net/ipv4/Kconfig 2005-04-04 18:46:13.000000000 -0400 +@@ -351,2 +351,8 @@ + ++config IPSEC_NAT_TRAVERSAL ++ bool "IPSEC NAT-Traversal (KLIPS compatible)" ++ depends on INET ++ ---help--- ++ Includes support for RFC3947/RFC3948 NAT-Traversal of ESP over UDP. ++ + config IP_TCPDIAG +--- /distros/kernel/linux-2.6.11.2/./net/ipv4/udp.c 2005-03-09 03:11:09.000000000 -0500 ++++ ./net/ipv4/udp.c 2005-04-09 20:46:46.000000000 -0400 +@@ -109,2 +109,3 @@ + #include <net/xfrm.h> ++#include <net/xfrmudp.h> + +@@ -114,2 +115,4 @@ + ++static xfrm4_rcv_encap_t xfrm4_rcv_encap_func; ++ + DEFINE_SNMP_STAT(struct udp_mib, udp_statistics); +@@ -894,2 +897,38 @@ + ++#if defined(CONFIG_XFRM) || defined(CONFIG_IPSEC_NAT_TRAVERSAL) ++ ++/* if XFRM isn't a module, then register it directly. */ ++#if 0 && !defined(CONFIG_XFRM_MODULE) && !defined(CONFIG_IPSEC_NAT_TRAVERSAL) ++static xfrm4_rcv_encap_t xfrm4_rcv_encap_func = xfrm4_rcv_encap; ++#else ++static xfrm4_rcv_encap_t xfrm4_rcv_encap_func = NULL; ++#endif ++ ++int udp4_register_esp_rcvencap(xfrm4_rcv_encap_t func ++ , xfrm4_rcv_encap_t *oldfunc) ++{ ++ if(oldfunc != NULL) { ++ *oldfunc = xfrm4_rcv_encap_func; ++ } ++ ++#if 0 ++ if(xfrm4_rcv_encap_func != NULL) ++ return -1; ++#endif ++ ++ xfrm4_rcv_encap_func = func; ++ return 0; ++} ++ ++int udp4_unregister_esp_rcvencap(xfrm4_rcv_encap_t func) ++{ ++ if(xfrm4_rcv_encap_func != func) ++ return -1; ++ ++ xfrm4_rcv_encap_func = NULL; ++ return 0; ++} ++#endif /* CONFIG_XFRM_MODULE || CONFIG_IPSEC_NAT_TRAVERSAL */ ++ ++ + /* return: +@@ -901,5 +940,5 @@ + { +-#ifndef CONFIG_XFRM ++#if !defined(CONFIG_XFRM) && !defined(CONFIG_IPSEC_NAT_TRAVERSAL) + return 1; +-#else ++#else /* either CONFIG_XFRM or CONFIG_IPSEC_NAT_TRAVERSAL */ + struct udp_sock *up = udp_sk(sk); +@@ -915,3 +954,3 @@ + if (udpdata > skb->tail) +- return 1; ++ return 2; + +@@ -919,3 +958,3 @@ + if (!encap_type) +- return 1; ++ return 3; + +@@ -934,3 +973,3 @@ + /* Must be an IKE packet.. pass it through */ +- return 1; ++ return 4; + break; +@@ -947,3 +986,3 @@ + /* Must be an IKE packet.. pass it through */ +- return 1; ++ return 5; + break; +@@ -956,2 +995,4 @@ + */ ++ if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) ++ return 0; + +@@ -1019,5 +1060,9 @@ + if (ret < 0) { +- /* process the ESP packet */ +- ret = xfrm4_rcv_encap(skb, up->encap_type); +- UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS); ++ if(xfrm4_rcv_encap_func != NULL) { ++ ret = (*xfrm4_rcv_encap_func)(skb, up->encap_type); ++ UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS); ++ } else { ++ UDP_INC_STATS_BH(UDP_MIB_INERRORS); ++ ret = 1; ++ } + return -ret; +@@ -1574 +1619,7 @@ + #endif ++ ++#if defined(CONFIG_IPSEC_NAT_TRAVERSAL) ++EXPORT_SYMBOL(udp4_register_esp_rcvencap); ++EXPORT_SYMBOL(udp4_unregister_esp_rcvencap); ++#endif ++ diff --git a/openwrt/target/linux/generic-2.6/patches/103-netfilter-ipset.patch b/openwrt/target/linux/generic-2.6/patches/103-netfilter-ipset.patch new file mode 100644 index 0000000000..54e2828a16 --- /dev/null +++ b/openwrt/target/linux/generic-2.6/patches/103-netfilter-ipset.patch @@ -0,0 +1,5738 @@ +diff -Nur linux-2.6.12.5/include/linux/netfilter_ipv4/ip_set.h linux-2.6.12.5-ipset/include/linux/netfilter_ipv4/ip_set.h +--- linux-2.6.12.5/include/linux/netfilter_ipv4/ip_set.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12.5-ipset/include/linux/netfilter_ipv4/ip_set.h 2005-09-20 13:11:38.782384000 +0200 +@@ -0,0 +1,489 @@ ++#ifndef _IP_SET_H ++#define _IP_SET_H ++ ++/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu> ++ * Patrick Schaaf <bof@bof.de> ++ * Martin Josefsson <gandalf@wlug.westbo.se> ++ * Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++/* ++ * A sockopt of such quality has hardly ever been seen before on the open ++ * market! This little beauty, hardly ever used: above 64, so it's ++ * traditionally used for firewalling, not touched (even once!) by the ++ * 2.0, 2.2 and 2.4 kernels! ++ * ++ * Comes with its own certificate of authenticity, valid anywhere in the ++ * Free world! ++ * ++ * Rusty, 19.4.2000 ++ */ ++#define SO_IP_SET 83 ++ ++/* ++ * Heavily modify by Joakim Axelsson 08.03.2002 ++ * - Made it more modulebased ++ * ++ * Additional heavy modifications by Jozsef Kadlecsik 22.02.2004 ++ * - bindings added ++ * - in order to "deal with" backward compatibility, renamed to ipset ++ */ ++ ++/* ++ * Used so that the kernel module and ipset-binary can match their versions ++ */ ++#define IP_SET_PROTOCOL_VERSION 2 ++ ++#define IP_SET_MAXNAMELEN 32 /* set names and set typenames */ ++ ++/* Lets work with our own typedef for representing an IP address. ++ * We hope to make the code more portable, possibly to IPv6... ++ * ++ * The representation works in HOST byte order, because most set types ++ * will perform arithmetic operations and compare operations. ++ * ++ * For now the type is an uint32_t. ++ * ++ * Make sure to ONLY use the functions when translating and parsing ++ * in order to keep the host byte order and make it more portable: ++ * parse_ip() ++ * parse_mask() ++ * parse_ipandmask() ++ * ip_tostring() ++ * (Joakim: where are they???) ++ */ ++ ++typedef uint32_t ip_set_ip_t; ++ ++/* Sets are identified by an id in kernel space. Tweak with ip_set_id_t ++ * and IP_SET_INVALID_ID if you want to increase the max number of sets. ++ */ ++typedef uint16_t ip_set_id_t; ++ ++#define IP_SET_INVALID_ID 65535 ++ ++/* How deep we follow bindings */ ++#define IP_SET_MAX_BINDINGS 6 ++ ++/* ++ * Option flags for kernel operations (ipt_set_info) ++ */ ++#define IPSET_SRC 0x01 /* Source match/add */ ++#define IPSET_DST 0x02 /* Destination match/add */ ++#define IPSET_MATCH_INV 0x04 /* Inverse matching */ ++ ++/* ++ * Set types (flavours) ++ */ ++#define IPSET_TYPE_IP 0 /* IP address type of set */ ++#define IPSET_TYPE_PORT 1 /* Port type of set */ ++ ++/* Reserved keywords */ ++#define IPSET_TOKEN_DEFAULT ":default:" ++#define IPSET_TOKEN_ALL ":all:" ++ ++/* SO_IP_SET operation constants, and their request struct types. ++ * ++ * Operation ids: ++ * 0-99: commands with version checking ++ * 100-199: add/del/test/bind/unbind ++ * 200-299: list, save, restore ++ */ ++ ++/* Single shot operations: ++ * version, create, destroy, flush, rename and swap ++ * ++ * Sets are identified by name. ++ */ ++ ++#define IP_SET_REQ_STD \ ++ unsigned op; \ ++ unsigned version; \ ++ char name[IP_SET_MAXNAMELEN] ++ ++#define IP_SET_OP_CREATE 0x00000001 /* Create a new (empty) set */ ++struct ip_set_req_create { ++ IP_SET_REQ_STD; ++ char typename[IP_SET_MAXNAMELEN]; ++}; ++ ++#define IP_SET_OP_DESTROY 0x00000002 /* Remove a (empty) set */ ++struct ip_set_req_std { ++ IP_SET_REQ_STD; ++}; ++ ++#define IP_SET_OP_FLUSH 0x00000003 /* Remove all IPs in a set */ ++/* Uses ip_set_req_std */ ++ ++#define IP_SET_OP_RENAME 0x00000004 /* Rename a set */ ++/* Uses ip_set_req_create */ ++ ++#define IP_SET_OP_SWAP 0x00000005 /* Swap two sets */ ++/* Uses ip_set_req_create */ ++ ++union ip_set_name_index { ++ char name[IP_SET_MAXNAMELEN]; ++ ip_set_id_t index; ++}; ++ ++#define IP_SET_OP_GET_BYNAME 0x00000006 /* Get set index by name */ ++struct ip_set_req_get_set { ++ unsigned op; ++ unsigned version; ++ union ip_set_name_index set; ++}; ++ ++#define IP_SET_OP_GET_BYINDEX 0x00000007 /* Get set name by index */ ++/* Uses ip_set_req_get_set */ ++ ++#define IP_SET_OP_VERSION 0x00000100 /* Ask kernel version */ ++struct ip_set_req_version { ++ unsigned op; ++ unsigned version; ++}; ++ ++/* Double shots operations: ++ * add, del, test, bind and unbind. ++ * ++ * First we query the kernel to get the index and type of the target set, ++ * then issue the command. Validity of IP is checked in kernel in order ++ * to minimalize sockopt operations. ++ */ ++ ++/* Get minimal set data for add/del/test/bind/unbind IP */ ++#define IP_SET_OP_ADT_GET 0x00000010 /* Get set and type */ ++struct ip_set_req_adt_get { ++ unsigned op; ++ unsigned version; ++ union ip_set_name_index set; ++ char typename[IP_SET_MAXNAMELEN]; ++}; ++ ++#define IP_SET_REQ_BYINDEX \ ++ unsigned op; \ ++ ip_set_id_t index; ++ ++struct ip_set_req_adt { ++ IP_SET_REQ_BYINDEX; ++}; ++ ++#define IP_SET_OP_ADD_IP 0x00000101 /* Add an IP to a set */ ++/* Uses ip_set_req_adt, with type specific addage */ ++ ++#define IP_SET_OP_DEL_IP 0x00000102 /* Remove an IP from a set */ ++/* Uses ip_set_req_adt, with type specific addage */ ++ ++#define IP_SET_OP_TEST_IP 0x00000103 /* Test an IP in a set */ ++/* Uses ip_set_req_adt, with type specific addage */ ++ ++#define IP_SET_OP_BIND_SET 0x00000104 /* Bind an IP to a set */ ++/* Uses ip_set_req_bind, with type specific addage */ ++struct ip_set_req_bind { ++ IP_SET_REQ_BYINDEX; ++ char binding[IP_SET_MAXNAMELEN]; ++}; ++ ++#define IP_SET_OP_UNBIND_SET 0x00000105 /* Unbind an IP from a set */ ++/* Uses ip_set_req_bind, with type speficic addage ++ * index = 0 means unbinding for all sets */ ++ ++#define IP_SET_OP_TEST_BIND_SET 0x00000106 /* Test binding an IP to a set */ ++/* Uses ip_set_req_bind, with type specific addage */ ++ ++/* Multiple shots operations: list, save, restore. ++ * ++ * - check kernel version and query the max number of sets ++ * - get the basic information on all sets ++ * and size required for the next step ++ * - get actual set data: header, data, bindings ++ */ ++ ++/* Get max_sets and the index of a queried set ++ */ ++#define IP_SET_OP_MAX_SETS 0x00000020 ++struct ip_set_req_max_sets { ++ unsigned op; ++ unsigned version; ++ ip_set_id_t max_sets; /* max_sets */ ++ ip_set_id_t sets; /* real number of sets */ ++ union ip_set_name_index set; /* index of set if name used */ ++}; ++ ++/* Get the id and name of the sets plus size for next step */ ++#define IP_SET_OP_LIST_SIZE 0x00000201 ++#define IP_SET_OP_SAVE_SIZE 0x00000202 ++struct ip_set_req_setnames { ++ unsigned op; ++ ip_set_id_t index; /* set to list/save */ ++ size_t size; /* size to get setdata/bindings */ ++ /* followed by sets number of struct ip_set_name_list */ ++}; ++ ++struct ip_set_name_list { ++ char name[IP_SET_MAXNAMELEN]; ++ char typename[IP_SET_MAXNAMELEN]; ++ ip_set_id_t index; ++ ip_set_id_t id; ++}; ++ ++/* The actual list operation */ ++#define IP_SET_OP_LIST 0x00000203 ++struct ip_set_req_list { ++ IP_SET_REQ_BYINDEX; ++ /* sets number of struct ip_set_list in reply */ ++}; ++ ++struct ip_set_list { ++ ip_set_id_t index; ++ ip_set_id_t binding; ++ u_int32_t ref; ++ size_t header_size; /* Set header data of header_size */ ++ size_t members_size; /* Set members data of members_size */ ++ size_t bindings_size; /* Set bindings data of bindings_size */ ++}; ++ ++struct ip_set_hash_list { ++ ip_set_ip_t ip; ++ ip_set_id_t binding; ++}; ++ ++/* The save operation */ ++#define IP_SET_OP_SAVE 0x00000204 ++/* Uses ip_set_req_list, in the reply replaced by ++ * sets number of struct ip_set_save plus a marker ++ * ip_set_save followed by ip_set_hash_save structures. ++ */ ++struct ip_set_save { ++ ip_set_id_t index; ++ ip_set_id_t binding; ++ size_t header_size; /* Set header data of header_size */ ++ size_t members_size; /* Set members data of members_size */ ++}; ++ ++/* At restoring, ip == 0 means default binding for the given set: */ ++struct ip_set_hash_save { ++ ip_set_ip_t ip; ++ ip_set_id_t id; ++ ip_set_id_t binding; ++}; ++ ++/* The restore operation */ ++#define IP_SET_OP_RESTORE 0x00000205 ++/* Uses ip_set_req_setnames followed by ip_set_restore structures ++ * plus a marker ip_set_restore, followed by ip_set_hash_save ++ * structures. ++ */ ++struct ip_set_restore { ++ char name[IP_SET_MAXNAMELEN]; ++ char typename[IP_SET_MAXNAMELEN]; ++ ip_set_id_t index; ++ size_t header_size; /* Create data of header_size */ ++ size_t members_size; /* Set members data of members_size */ ++}; ++ ++static inline int bitmap_bytes(ip_set_ip_t a, ip_set_ip_t b) ++{ ++ return 4 * ((((b - a + 8) / 8) + 3) / 4); ++} ++ ++#ifdef __KERNEL__ ++ ++#define ip_set_printk(format, args...) \ ++ do { \ ++ printk("%s: %s: ", __FILE__, __FUNCTION__); \ ++ printk(format "\n" , ## args); \ ++ } while (0) ++ ++#if defined(IP_SET_DEBUG) ++#define DP(format, args...) \ ++ do { \ ++ printk("%s: %s (DBG): ", __FILE__, __FUNCTION__);\ ++ printk(format "\n" , ## args); \ ++ } while (0) ++#define IP_SET_ASSERT(x) \ ++ do { \ ++ if (!(x)) \ ++ printk("IP_SET_ASSERT: %s:%i(%s)\n", \ ++ __FILE__, __LINE__, __FUNCTION__); \ ++ } while (0) ++#else ++#define DP(format, args...) ++#define IP_SET_ASSERT(x) ++#endif ++ ++struct ip_set; ++ ++/* ++ * The ip_set_type definition - one per set type, e.g. "ipmap". ++ * ++ * Each individual set has a pointer, set->type, going to one ++ * of these structures. Function pointers inside the structure implement ++ * the real behaviour of the sets. ++ * ++ * If not mentioned differently, the implementation behind the function ++ * pointers of a set_type, is expected to return 0 if ok, and a negative ++ * errno (e.g. -EINVAL) on error. ++ */ ++struct ip_set_type { ++ struct list_head list; /* next in list of set types */ ++ ++ /* test for IP in set (kernel: iptables -m set src|dst) ++ * return 0 if not in set, 1 if in set. ++ */ ++ int (*testip_kernel) (struct ip_set *set, ++ const struct sk_buff * skb, ++ u_int32_t flags, ++ ip_set_ip_t *ip); ++ ++ /* test for IP in set (userspace: ipset -T set IP) ++ * return 0 if not in set, 1 if in set. ++ */ ++ int (*testip) (struct ip_set *set, ++ const void *data, size_t size, ++ ip_set_ip_t *ip); ++ ++ /* ++ * Size of the data structure passed by when ++ * adding/deletin/testing an entry. ++ */ ++ size_t reqsize; ++ ++ /* Add IP into set (userspace: ipset -A set IP) ++ * Return -EEXIST if the address is already in the set, ++ * and -ERANGE if the address lies outside the set bounds. ++ * If the address was not already in the set, 0 is returned. ++ */ ++ int (*addip) (struct ip_set *set, ++ const void *data, size_t size, ++ ip_set_ip_t *ip); ++ ++ /* Add IP into set (kernel: iptables ... -j SET set src|dst) ++ * Return -EEXIST if the address is already in the set, ++ * and -ERANGE if the address lies outside the set bounds. ++ * If the address was not already in the set, 0 is returned. ++ */ ++ int (*addip_kernel) (struct ip_set *set, ++ const struct sk_buff * skb, ++ u_int32_t flags, ++ ip_set_ip_t *ip); ++ ++ /* remove IP from set (userspace: ipset -D set --entry x) ++ * Return -EEXIST if the address is NOT in the set, ++ * and -ERANGE if the address lies outside the set bounds. ++ * If the address really was in the set, 0 is returned. ++ */ ++ int (*delip) (struct ip_set *set, ++ const void *data, size_t size, ++ ip_set_ip_t *ip); ++ ++ /* remove IP from set (kernel: iptables ... -j SET --entry x) ++ * Return -EEXIST if the address is NOT in the set, ++ * and -ERANGE if the address lies outside the set bounds. ++ * If the address really was in the set, 0 is returned. ++ */ ++ int (*delip_kernel) (struct ip_set *set, ++ const struct sk_buff * skb, ++ u_int32_t flags, ++ ip_set_ip_t *ip); ++ ++ /* new set creation - allocated type specific items ++ */ ++ int (*create) (struct ip_set *set, ++ const void *data, size_t size); ++ ++ /* retry the operation after successfully tweaking the set ++ */ ++ int (*retry) (struct ip_set *set); ++ ++ /* set destruction - free type specific items ++ * There is no return value. ++ * Can be called only when child sets are destroyed. ++ */ ++ void (*destroy) (struct ip_set *set); ++ ++ /* set flushing - reset all bits in the set, or something similar. ++ * There is no return value. ++ */ ++ void (*flush) (struct ip_set *set); ++ ++ /* Listing: size needed for header ++ */ ++ size_t header_size; ++ ++ /* Listing: Get the header ++ * ++ * Fill in the information in "data". ++ * This function is always run after list_header_size() under a ++ * writelock on the set. Therefor is the length of "data" always ++ * correct. ++ */ ++ void (*list_header) (const struct ip_set *set, ++ void *data); ++ ++ /* Listing: Get the size for the set members ++ */ ++ int (*list_members_size) (const struct ip_set *set); ++ ++ /* Listing: Get the set members ++ * ++ * Fill in the information in "data". ++ * This function is always run after list_member_size() under a ++ * writelock on the set. Therefor is the length of "data" always ++ * correct. ++ */ ++ void (*list_members) (const struct ip_set *set, ++ void *data); ++ ++ char typename[IP_SET_MAXNAMELEN]; ++ char typecode; ++ int protocol_version; ++ ++ /* Set this to THIS_MODULE if you are a module, otherwise NULL */ ++ struct module *me; ++}; ++ ++extern int ip_set_register_set_type(struct ip_set_type *set_type); ++extern void ip_set_unregister_set_type(struct ip_set_type *set_type); ++ ++/* A generic ipset */ ++struct ip_set { ++ char name[IP_SET_MAXNAMELEN]; /* the name of the set */ ++ rwlock_t lock; /* lock for concurrency control */ ++ ip_set_id_t id; /* set id for swapping */ ++ ip_set_id_t binding; /* default binding for the set */ ++ atomic_t ref; /* in kernel and in hash references */ ++ struct ip_set_type *type; /* the set types */ ++ void *data; /* pooltype specific data */ ++}; ++ ++/* Structure to bind set elements to sets */ ++struct ip_set_hash { ++ struct list_head list; /* list of clashing entries in hash */ ++ ip_set_ip_t ip; /* ip from set */ ++ ip_set_id_t id; /* set id */ ++ ip_set_id_t binding; /* set we bind the element to */ ++}; ++ ++/* register and unregister set references */ ++extern ip_set_id_t ip_set_get_byname(const char name[IP_SET_MAXNAMELEN]); ++extern ip_set_id_t ip_set_get_byindex(ip_set_id_t id); ++extern void ip_set_put(ip_set_id_t id); ++ ++/* API for iptables set match, and SET target */ ++extern void ip_set_addip_kernel(ip_set_id_t id, ++ const struct sk_buff *skb, ++ const u_int32_t *flags); ++extern void ip_set_delip_kernel(ip_set_id_t id, ++ const struct sk_buff *skb, ++ const u_int32_t *flags); ++extern int ip_set_testip_kernel(ip_set_id_t id, ++ const struct sk_buff *skb, ++ const u_int32_t *flags); ++ ++#endif /* __KERNEL__ */ ++ ++#endif /*_IP_SET_H*/ +diff -Nur linux-2.6.12.5/include/linux/netfilter_ipv4/ip_set_iphash.h linux-2.6.12.5-ipset/include/linux/netfilter_ipv4/ip_set_iphash.h +--- linux-2.6.12.5/include/linux/netfilter_ipv4/ip_set_iphash.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12.5-ipset/include/linux/netfilter_ipv4/ip_set_iphash.h 2005-09-20 13:11:38.782384000 +0200 +@@ -0,0 +1,30 @@ ++#ifndef __IP_SET_IPHASH_H ++#define __IP_SET_IPHASH_H ++ ++#include <linux/netfilter_ipv4/ip_set.h> ++ ++#define SETTYPE_NAME "iphash" ++#define MAX_RANGE 0x0000FFFF ++ ++struct ip_set_iphash { ++ ip_set_ip_t *members; /* the iphash proper */ ++ uint32_t initval; /* initval for jhash_1word */ ++ uint32_t prime; /* prime for double hashing */ ++ uint32_t hashsize; /* hash size */ ++ uint16_t probes; /* max number of probes */ ++ uint16_t resize; /* resize factor in percent */ ++ ip_set_ip_t netmask; /* netmask */ ++}; ++ ++struct ip_set_req_iphash_create { ++ uint32_t hashsize; ++ uint16_t probes; ++ uint16_t resize; ++ ip_set_ip_t netmask; ++}; ++ ++struct ip_set_req_iphash { ++ ip_set_ip_t ip; ++}; ++ ++#endif /* __IP_SET_IPHASH_H */ +diff -Nur linux-2.6.12.5/include/linux/netfilter_ipv4/ip_set_ipmap.h linux-2.6.12.5-ipset/include/linux/netfilter_ipv4/ip_set_ipmap.h +--- linux-2.6.12.5/include/linux/netfilter_ipv4/ip_set_ipmap.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12.5-ipset/include/linux/netfilter_ipv4/ip_set_ipmap.h 2005-09-20 13:11:38.782384000 +0200 +@@ -0,0 +1,56 @@ ++#ifndef __IP_SET_IPMAP_H ++#define __IP_SET_IPMAP_H ++ ++#include <linux/netfilter_ipv4/ip_set.h> ++ ++#define SETTYPE_NAME "ipmap" ++#define MAX_RANGE 0x0000FFFF ++ ++struct ip_set_ipmap { ++ void *members; /* the ipmap proper */ ++ ip_set_ip_t first_ip; /* host byte order, included in range */ ++ ip_set_ip_t last_ip; /* host byte order, included in range */ ++ ip_set_ip_t netmask; /* subnet netmask */ ++ ip_set_ip_t sizeid; /* size of set in IPs */ ++ u_int16_t hosts; /* number of hosts in a subnet */ ++}; ++ ++struct ip_set_req_ipmap_create { ++ ip_set_ip_t from; ++ ip_set_ip_t to; ++ ip_set_ip_t netmask; ++}; ++ ++struct ip_set_req_ipmap { ++ ip_set_ip_t ip; ++}; ++ ++unsigned int ++mask_to_bits(ip_set_ip_t mask) ++{ ++ unsigned int bits = 32; ++ ip_set_ip_t maskaddr; ++ ++ if (mask == 0xFFFFFFFF) ++ return bits; ++ ++ maskaddr = 0xFFFFFFFE; ++ while (--bits >= 0 && maskaddr != mask) ++ maskaddr <<= 1; ++ ++ return bits; ++} ++ ++ip_set_ip_t ++range_to_mask(ip_set_ip_t from, ip_set_ip_t to, unsigned int *bits) ++{ ++ ip_set_ip_t mask = 0xFFFFFFFE; ++ ++ *bits = 32; ++ while (--(*bits) >= 0 && mask && (to & mask) != from) ++ mask <<= 1; ++ ++ return mask; ++} ++ ++#endif /* __IP_SET_IPMAP_H */ +diff -Nur linux-2.6.12.5/include/linux/netfilter_ipv4/ip_set_iptree.h linux-2.6.12.5-ipset/include/linux/netfilter_ipv4/ip_set_iptree.h +--- linux-2.6.12.5/include/linux/netfilter_ipv4/ip_set_iptree.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12.5-ipset/include/linux/netfilter_ipv4/ip_set_iptree.h 2005-09-20 13:11:38.782384000 +0200 +@@ -0,0 +1,39 @@ ++#ifndef __IP_SET_IPTREE_H ++#define __IP_SET_IPTREE_H ++ ++#include <linux/netfilter_ipv4/ip_set.h> ++ ++#define SETTYPE_NAME "iptree" ++#define MAX_RANGE 0x0000FFFF ++ ++struct ip_set_iptreed { ++ unsigned long expires[255]; /* x.x.x.ADDR */ ++}; ++ ++struct ip_set_iptreec { ++ struct ip_set_iptreed *tree[255]; /* x.x.ADDR.* */ ++}; ++ ++struct ip_set_iptreeb { ++ struct ip_set_iptreec *tree[255]; /* x.ADDR.*.* */ ++}; ++ ++struct ip_set_iptree { ++ unsigned int timeout; ++ unsigned int gc_interval; ++#ifdef __KERNEL__ ++ struct timer_list gc; ++ struct ip_set_iptreeb *tree[255]; /* ADDR.*.*.* */ ++#endif ++}; ++ ++struct ip_set_req_iptree_create { ++ unsigned int timeout; ++}; ++ ++struct ip_set_req_iptree { ++ ip_set_ip_t ip; ++ unsigned int timeout; ++}; ++ ++#endif /* __IP_SET_IPTREE_H */ +diff -Nur linux-2.6.12.5/include/linux/netfilter_ipv4/ip_set_jhash.h linux-2.6.12.5-ipset/include/linux/netfilter_ipv4/ip_set_jhash.h +--- linux-2.6.12.5/include/linux/netfilter_ipv4/ip_set_jhash.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12.5-ipset/include/linux/netfilter_ipv4/ip_set_jhash.h 2005-09-20 13:11:38.782384000 +0200 +@@ -0,0 +1,148 @@ ++#ifndef _LINUX_IPSET_JHASH_H ++#define _LINUX_IPSET_JHASH_H ++ ++/* This is a copy of linux/jhash.h but the types u32/u8 are changed ++ * to __u32/__u8 so that the header file can be included into ++ * userspace code as well. Jozsef Kadlecsik (kadlec@blackhole.kfki.hu) ++ */ ++ ++/* jhash.h: Jenkins hash support. ++ * ++ * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net) ++ * ++ * http://burtleburtle.net/bob/hash/ ++ * ++ * These are the credits from Bob's sources: ++ * ++ * lookup2.c, by Bob Jenkins, December 1996, Public Domain. ++ * hash(), hash2(), hash3, and mix() are externally useful functions. ++ * Routines to test the hash are included if SELF_TEST is defined. ++ * You can use this free for any purpose. It has no warranty. ++ * ++ * Copyright (C) 2003 David S. Miller (davem@redhat.com) ++ * ++ * I've modified Bob's hash to be useful in the Linux kernel, and ++ * any bugs present are surely my fault. -DaveM ++ */ ++ ++/* NOTE: Arguments are modified. */ ++#define __jhash_mix(a, b, c) \ ++{ \ ++ a -= b; a -= c; a ^= (c>>13); \ ++ b -= c; b -= a; b ^= (a<<8); \ ++ c -= a; c -= b; c ^= (b>>13); \ ++ a -= b; a -= c; a ^= (c>>12); \ ++ b -= c; b -= a; b ^= (a<<16); \ ++ c -= a; c -= b; c ^= (b>>5); \ ++ a -= b; a -= c; a ^= (c>>3); \ ++ b -= c; b -= a; b ^= (a<<10); \ ++ c -= a; c -= b; c ^= (b>>15); \ ++} ++ ++/* The golden ration: an arbitrary value */ ++#define JHASH_GOLDEN_RATIO 0x9e3779b9 ++ ++/* The most generic version, hashes an arbitrary sequence ++ * of bytes. No alignment or length assumptions are made about ++ * the input key. ++ */ ++static inline __u32 jhash(void *key, __u32 length, __u32 initval) ++{ ++ __u32 a, b, c, len; ++ __u8 *k = key; ++ ++ len = length; ++ a = b = JHASH_GOLDEN_RATIO; ++ c = initval; ++ ++ while (len >= 12) { ++ a += (k[0] +((__u32)k[1]<<8) +((__u32)k[2]<<16) +((__u32)k[3]<<24)); ++ b += (k[4] +((__u32)k[5]<<8) +((__u32)k[6]<<16) +((__u32)k[7]<<24)); ++ c += (k[8] +((__u32)k[9]<<8) +((__u32)k[10]<<16)+((__u32)k[11]<<24)); ++ ++ __jhash_mix(a,b,c); ++ ++ k += 12; ++ len -= 12; ++ } ++ ++ c += length; ++ switch (len) { ++ case 11: c += ((__u32)k[10]<<24); ++ case 10: c += ((__u32)k[9]<<16); ++ case 9 : c += ((__u32)k[8]<<8); ++ case 8 : b += ((__u32)k[7]<<24); ++ case 7 : b += ((__u32)k[6]<<16); ++ case 6 : b += ((__u32)k[5]<<8); ++ case 5 : b += k[4]; ++ case 4 : a += ((__u32)k[3]<<24); ++ case 3 : a += ((__u32)k[2]<<16); ++ case 2 : a += ((__u32)k[1]<<8); ++ case 1 : a += k[0]; ++ }; ++ ++ __jhash_mix(a,b,c); ++ ++ return c; ++} ++ ++/* A special optimized version that handles 1 or more of __u32s. ++ * The length parameter here is the number of __u32s in the key. ++ */ ++static inline __u32 jhash2(__u32 *k, __u32 length, __u32 initval) ++{ ++ __u32 a, b, c, len; ++ ++ a = b = JHASH_GOLDEN_RATIO; ++ c = initval; ++ len = length; ++ ++ while (len >= 3) { ++ a += k[0]; ++ b += k[1]; ++ c += k[2]; ++ __jhash_mix(a, b, c); ++ k += 3; len -= 3; ++ } ++ ++ c += length * 4; ++ ++ switch (len) { ++ case 2 : b += k[1]; ++ case 1 : a += k[0]; ++ }; ++ ++ __jhash_mix(a,b,c); ++ ++ return c; ++} ++ ++ ++/* A special ultra-optimized versions that knows they are hashing exactly ++ * 3, 2 or 1 word(s). ++ * ++ * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally ++ * done at the end is not done here. ++ */ ++static inline __u32 jhash_3words(__u32 a, __u32 b, __u32 c, __u32 initval) ++{ ++ a += JHASH_GOLDEN_RATIO; ++ b += JHASH_GOLDEN_RATIO; ++ c += initval; ++ ++ __jhash_mix(a, b, c); ++ ++ return c; ++} ++ ++static inline __u32 jhash_2words(__u32 a, __u32 b, __u32 initval) ++{ ++ return jhash_3words(a, b, 0, initval); ++} ++ ++static inline __u32 jhash_1word(__u32 a, __u32 initval) ++{ ++ return jhash_3words(a, 0, 0, initval); ++} ++ ++#endif /* _LINUX_IPSET_JHASH_H */ +diff -Nur linux-2.6.12.5/include/linux/netfilter_ipv4/ip_set_macipmap.h linux-2.6.12.5-ipset/include/linux/netfilter_ipv4/ip_set_macipmap.h +--- linux-2.6.12.5/include/linux/netfilter_ipv4/ip_set_macipmap.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12.5-ipset/include/linux/netfilter_ipv4/ip_set_macipmap.h 2005-09-20 13:11:38.782384000 +0200 +@@ -0,0 +1,38 @@ ++#ifndef __IP_SET_MACIPMAP_H ++#define __IP_SET_MACIPMAP_H ++ ++#include <linux/netfilter_ipv4/ip_set.h> ++ ++#define SETTYPE_NAME "macipmap" ++#define MAX_RANGE 0x0000FFFF ++ ++/* general flags */ ++#define IPSET_MACIP_MATCHUNSET 1 ++ ++/* per ip flags */ ++#define IPSET_MACIP_ISSET 1 ++ ++struct ip_set_macipmap { ++ void *members; /* the macipmap proper */ ++ ip_set_ip_t first_ip; /* host byte order, included in range */ ++ ip_set_ip_t last_ip; /* host byte order, included in range */ ++ u_int32_t flags; ++}; ++ ++struct ip_set_req_macipmap_create { ++ ip_set_ip_t from; ++ ip_set_ip_t to; ++ u_int32_t flags; ++}; ++ ++struct ip_set_req_macipmap { ++ ip_set_ip_t ip; ++ unsigned char ethernet[ETH_ALEN]; ++}; ++ ++struct ip_set_macip { ++ unsigned short flags; ++ unsigned char ethernet[ETH_ALEN]; ++}; ++ ++#endif /* __IP_SET_MACIPMAP_H */ +diff -Nur linux-2.6.12.5/include/linux/netfilter_ipv4/ip_set_malloc.h linux-2.6.12.5-ipset/include/linux/netfilter_ipv4/ip_set_malloc.h +--- linux-2.6.12.5/include/linux/netfilter_ipv4/ip_set_malloc.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12.5-ipset/include/linux/netfilter_ipv4/ip_set_malloc.h 2005-09-20 13:11:38.782384000 +0200 +@@ -0,0 +1,42 @@ ++#ifndef _IP_SET_MALLOC_H ++#define _IP_SET_MALLOC_H ++ ++#ifdef __KERNEL__ ++ ++/* Memory allocation and deallocation */ ++static size_t max_malloc_size = 0; ++ ++static inline void init_max_malloc_size(void) ++{ ++#define CACHE(x) max_malloc_size = x; ++#include <linux/kmalloc_sizes.h> ++#undef CACHE ++} ++ ++static inline void * ip_set_malloc_atomic(size_t bytes) ++{ ++ if (bytes > max_malloc_size) ++ return __vmalloc(bytes, GFP_ATOMIC, PAGE_KERNEL); ++ else ++ return kmalloc(bytes, GFP_ATOMIC); ++} ++ ++static inline void * ip_set_malloc(size_t bytes) ++{ ++ if (bytes > max_malloc_size) ++ return vmalloc(bytes); ++ else ++ return kmalloc(bytes, GFP_KERNEL); ++} ++ ++static inline void ip_set_free(void * data, size_t bytes) ++{ ++ if (bytes > max_malloc_size) ++ vfree(data); ++ else ++ kfree(data); ++} ++ ++#endif /* __KERNEL__ */ ++ ++#endif /*_IP_SET_MALLOC_H*/ +diff -Nur linux-2.6.12.5/include/linux/netfilter_ipv4/ip_set_nethash.h linux-2.6.12.5-ipset/include/linux/netfilter_ipv4/ip_set_nethash.h +--- linux-2.6.12.5/include/linux/netfilter_ipv4/ip_set_nethash.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12.5-ipset/include/linux/netfilter_ipv4/ip_set_nethash.h 2005-09-20 13:11:38.782384000 +0200 +@@ -0,0 +1,55 @@ ++#ifndef __IP_SET_NETHASH_H ++#define __IP_SET_NETHASH_H ++ ++#include <linux/netfilter_ipv4/ip_set.h> ++ ++#define SETTYPE_NAME "nethash" ++#define MAX_RANGE 0x0000FFFF ++ ++struct ip_set_nethash { ++ ip_set_ip_t *members; /* the nethash proper */ ++ uint32_t initval; /* initval for jhash_1word */ ++ uint32_t prime; /* prime for double hashing */ ++ uint32_t hashsize; /* hash size */ ++ uint16_t probes; /* max number of probes */ ++ uint16_t resize; /* resize factor in percent */ ++ unsigned char cidr[30]; /* CIDR sizes */ ++}; ++ ++struct ip_set_req_nethash_create { ++ uint32_t hashsize; ++ uint16_t probes; ++ uint16_t resize; ++}; ++ ++struct ip_set_req_nethash { ++ ip_set_ip_t ip; ++ unsigned char cidr; ++}; ++ ++static unsigned char shifts[] = {255, 253, 249, 241, 225, 193, 129, 1}; ++ ++static inline ip_set_ip_t ++pack(ip_set_ip_t ip, unsigned char cidr) ++{ ++ ip_set_ip_t addr, *paddr = &addr; ++ unsigned char n, t, *a; ++ ++ addr = htonl(ip & (0xFFFFFFFF << (32 - (cidr)))); ++#ifdef __KERNEL__ ++ DP("ip:%u.%u.%u.%u/%u", NIPQUAD(addr), cidr); ++#endif ++ n = cidr / 8; ++ t = cidr % 8; ++ a = &((unsigned char *)paddr)[n]; ++ *a = *a /(1 << (8 - t)) + shifts[t]; ++#ifdef __KERNEL__ ++ DP("n: %u, t: %u, a: %u", n, t, *a); ++ DP("ip:%u.%u.%u.%u/%u, %u.%u.%u.%u", ++ HIPQUAD(ip), cidr, NIPQUAD(addr)); ++#endif ++ ++ return ntohl(addr); ++} ++ ++#endif /* __IP_SET_NETHASH_H */ +diff -Nur linux-2.6.12.5/include/linux/netfilter_ipv4/ip_set_portmap.h linux-2.6.12.5-ipset/include/linux/netfilter_ipv4/ip_set_portmap.h +--- linux-2.6.12.5/include/linux/netfilter_ipv4/ip_set_portmap.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12.5-ipset/include/linux/netfilter_ipv4/ip_set_portmap.h 2005-09-20 13:11:38.782384000 +0200 +@@ -0,0 +1,25 @@ ++#ifndef __IP_SET_PORTMAP_H ++#define __IP_SET_PORTMAP_H ++ ++#include <linux/netfilter_ipv4/ip_set.h> ++ ++#define SETTYPE_NAME "portmap" ++#define MAX_RANGE 0x0000FFFF ++#define INVALID_PORT (MAX_RANGE + 1) ++ ++struct ip_set_portmap { ++ void *members; /* the portmap proper */ ++ ip_set_ip_t first_port; /* host byte order, included in range */ ++ ip_set_ip_t last_port; /* host byte order, included in range */ ++}; ++ ++struct ip_set_req_portmap_create { ++ ip_set_ip_t from; ++ ip_set_ip_t to; ++}; ++ ++struct ip_set_req_portmap { ++ ip_set_ip_t port; ++}; ++ ++#endif /* __IP_SET_PORTMAP_H */ +diff -Nur linux-2.6.12.5/include/linux/netfilter_ipv4/ip_set_prime.h linux-2.6.12.5-ipset/include/linux/netfilter_ipv4/ip_set_prime.h +--- linux-2.6.12.5/include/linux/netfilter_ipv4/ip_set_prime.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12.5-ipset/include/linux/netfilter_ipv4/ip_set_prime.h 2005-09-20 13:11:38.782384000 +0200 +@@ -0,0 +1,34 @@ ++#ifndef __IP_SET_PRIME_H ++#define __IP_SET_PRIME_H ++ ++static inline unsigned make_prime_bound(unsigned nr) ++{ ++ unsigned long long nr64 = nr; ++ unsigned long long x = 1; ++ nr = 1; ++ while (x <= nr64) { x <<= 2; nr <<= 1; } ++ return nr; ++} ++ ++static inline int make_prime_check(unsigned nr) ++{ ++ unsigned x = 3; ++ unsigned b = make_prime_bound(nr); ++ while (x <= b) { ++ if (0 == (nr % x)) return 0; ++ x += 2; ++ } ++ return 1; ++} ++ ++static unsigned make_prime(unsigned nr) ++{ ++ if (0 == (nr & 1)) nr--; ++ while (nr > 1) { ++ if (make_prime_check(nr)) return nr; ++ nr -= 2; ++ } ++ return 2; ++} ++ ++#endif /* __IP_SET_PRIME_H */ +diff -Nur linux-2.6.12.5/include/linux/netfilter_ipv4/ipt_set.h linux-2.6.12.5-ipset/include/linux/netfilter_ipv4/ipt_set.h +--- linux-2.6.12.5/include/linux/netfilter_ipv4/ipt_set.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12.5-ipset/include/linux/netfilter_ipv4/ipt_set.h 2005-09-20 13:11:38.786384250 +0200 +@@ -0,0 +1,21 @@ ++#ifndef _IPT_SET_H ++#define _IPT_SET_H ++ ++#include <linux/netfilter_ipv4/ip_set.h> ++ ++struct ipt_set_info { ++ ip_set_id_t index; ++ u_int32_t flags[IP_SET_MAX_BINDINGS + 1]; ++}; ++ ++/* match info */ ++struct ipt_set_info_match { ++ struct ipt_set_info match_set; ++}; ++ ++struct ipt_set_info_target { ++ struct ipt_set_info add_set; ++ struct ipt_set_info del_set; ++}; ++ ++#endif /*_IPT_SET_H*/ +diff -Nur linux-2.6.12.5/net/ipv4/netfilter/ip_set.c linux-2.6.12.5-ipset/net/ipv4/netfilter/ip_set.c +--- linux-2.6.12.5/net/ipv4/netfilter/ip_set.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12.5-ipset/net/ipv4/netfilter/ip_set.c 2005-09-20 13:11:38.786384250 +0200 +@@ -0,0 +1,1989 @@ ++/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu> ++ * Patrick Schaaf <bof@bof.de> ++ * Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++/* Kernel module for IP set management */ ++ ++#include <linux/config.h> ++#include <linux/module.h> ++#include <linux/moduleparam.h> ++#include <linux/kmod.h> ++#include <linux/ip.h> ++#include <linux/skbuff.h> ++#include <linux/random.h> ++#include <linux/jhash.h> ++#include <linux/netfilter_ipv4/ip_tables.h> ++#include <linux/errno.h> ++#include <asm/uaccess.h> ++#include <asm/bitops.h> ++#include <asm/semaphore.h> ++#include <linux/spinlock.h> ++#include <linux/vmalloc.h> ++ ++#define ASSERT_READ_LOCK(x) /* dont use that */ ++#define ASSERT_WRITE_LOCK(x) ++#include <linux/netfilter_ipv4/listhelp.h> ++#include <linux/netfilter_ipv4/ip_set.h> ++ ++static struct list_head set_type_list; /* all registered sets */ ++static struct ip_set **ip_set_list; /* all individual sets */ ++static DEFINE_RWLOCK(ip_set_lock); /* protects the lists and the hash */ ++static DECLARE_MUTEX(ip_set_app_mutex); /* serializes user access */ ++static ip_set_id_t ip_set_max = CONFIG_IP_NF_SET_MAX; ++static ip_set_id_t ip_set_bindings_hash_size = CONFIG_IP_NF_SET_HASHSIZE; ++static struct list_head *ip_set_hash; /* hash of bindings */ ++static unsigned int ip_set_hash_random; /* random seed */ ++ ++/* ++ * Sets are identified either by the index in ip_set_list or by id. ++ * The id never changes and is used to find a key in the hash. ++ * The index may change by swapping and used at all other places ++ * (set/SET netfilter modules, binding value, etc.) ++ * ++ * Userspace requests are serialized by ip_set_mutex and sets can ++ * be deleted only from userspace. Therefore ip_set_list locking ++ * must obey the following rules: ++ * ++ * - kernel requests: read and write locking mandatory ++ * - user requests: read locking optional, write locking mandatory ++ */ ++ ++static inline void ++__ip_set_get(ip_set_id_t index) ++{ ++ atomic_inc(&ip_set_list[index]->ref); ++} ++ ++static inline void ++__ip_set_put(ip_set_id_t index) ++{ ++ atomic_dec(&ip_set_list[index]->ref); ++} ++ ++/* ++ * Binding routines ++ */ ++ ++static inline int ++ip_hash_cmp(const struct ip_set_hash *set_hash, ++ ip_set_id_t id, ip_set_ip_t ip) ++{ ++ return set_hash->id == id && set_hash->ip == ip; ++} ++ ++static ip_set_id_t ++ip_set_find_in_hash(ip_set_id_t id, ip_set_ip_t ip) ++{ ++ u_int32_t key = jhash_2words(id, ip, ip_set_hash_random) ++ % ip_set_bindings_hash_size; ++ struct ip_set_hash *set_hash; ++ ++ ASSERT_READ_LOCK(&ip_set_lock); ++ IP_SET_ASSERT(ip_set_list[id]); ++ DP("set: %s, ip: %u.%u.%u.%u", ip_set_list[id]->name, HIPQUAD(ip)); ++ ++ set_hash = LIST_FIND(&ip_set_hash[key], ip_hash_cmp, ++ struct ip_set_hash *, id, ip); ++ ++ DP("set: %s, ip: %u.%u.%u.%u, binding: %s", ip_set_list[id]->name, ++ HIPQUAD(ip), ++ set_hash != NULL ? ip_set_list[set_hash->binding]->name : ""); ++ ++ return (set_hash != NULL ? set_hash->binding : IP_SET_INVALID_ID); ++} ++ ++static inline void ++__set_hash_del(struct ip_set_hash *set_hash) ++{ ++ ASSERT_WRITE_LOCK(&ip_set_lock); ++ IP_SET_ASSERT(ip_set_list[set_hash->binding]); ++ ++ __ip_set_put(set_hash->binding); ++ list_del(&set_hash->list); ++ kfree(set_hash); ++} ++ ++static int ++ip_set_hash_del(ip_set_id_t id, ip_set_ip_t ip) ++{ ++ u_int32_t key = jhash_2words(id, ip, ip_set_hash_random) ++ % ip_set_bindings_hash_size; ++ struct ip_set_hash *set_hash; ++ ++ IP_SET_ASSERT(ip_set_list[id]); ++ DP("set: %s, ip: %u.%u.%u.%u", ip_set_list[id]->name, HIPQUAD(ip)); ++ write_lock_bh(&ip_set_lock); ++ set_hash = LIST_FIND(&ip_set_hash[key], ip_hash_cmp, ++ struct ip_set_hash *, id, ip); ++ DP("set: %s, ip: %u.%u.%u.%u, binding: %s", ip_set_list[id]->name, ++ HIPQUAD(ip), ++ set_hash != NULL ? ip_set_list[set_hash->binding]->name : ""); ++ ++ if (set_hash != NULL) ++ __set_hash_del(set_hash); ++ write_unlock_bh(&ip_set_lock); ++ return 0; ++} ++ ++static int ++ip_set_hash_add(ip_set_id_t id, ip_set_ip_t ip, ip_set_id_t binding) ++{ ++ u_int32_t key = jhash_2words(id, ip, ip_set_hash_random) ++ % ip_set_bindings_hash_size; ++ struct ip_set_hash *set_hash; ++ int ret = 0; ++ ++ IP_SET_ASSERT(ip_set_list[id]); ++ IP_SET_ASSERT(ip_set_list[binding]); ++ DP("set: %s, ip: %u.%u.%u.%u, binding: %s", ip_set_list[id]->name, ++ HIPQUAD(ip), ip_set_list[binding]->name); ++ write_lock_bh(&ip_set_lock); ++ set_hash = LIST_FIND(&ip_set_hash[key], ip_hash_cmp, ++ struct ip_set_hash *, id, ip); ++ if (!set_hash) { ++ set_hash = kmalloc(sizeof(struct ip_set_hash), GFP_KERNEL); ++ if (!set_hash) { ++ ret = -ENOMEM; ++ goto unlock; ++ } ++ INIT_LIST_HEAD(&set_hash->list); ++ set_hash->id = id; ++ set_hash->ip = ip; ++ list_add(&ip_set_hash[key], &set_hash->list); ++ } else { ++ IP_SET_ASSERT(ip_set_list[set_hash->binding]); ++ DP("overwrite binding: %s", ++ ip_set_list[set_hash->binding]->name); ++ __ip_set_put(set_hash->binding); ++ } ++ set_hash->binding = binding; ++ __ip_set_get(set_hash->binding); ++ unlock: ++ write_unlock_bh(&ip_set_lock); ++ return ret; ++} ++ ++#define FOREACH_HASH_DO(fn, args...) \ ++({ \ ++ ip_set_id_t __key; \ ++ struct ip_set_hash *__set_hash; \ ++ \ ++ for (__key = 0; __key < ip_set_bindings_hash_size; __key++) { \ ++ list_for_each_entry(__set_hash, &ip_set_hash[__key], list) \ ++ fn(__set_hash , ## args); \ ++ } \ ++}) ++ ++#define FOREACH_HASH_RW_DO(fn, args...) \ ++({ \ ++ ip_set_id_t __key; \ ++ struct ip_set_hash *__set_hash, *__n; \ ++ \ ++ ASSERT_WRITE_LOCK(&ip_set_lock); \ ++ for (__key = 0; __key < ip_set_bindings_hash_size; __key++) { \ ++ list_for_each_entry_safe(__set_hash, __n, &ip_set_hash[__key], list)\ ++ fn(__set_hash , ## args); \ ++ } \ ++}) ++ ++/* Add, del and test set entries from kernel */ ++ ++#define follow_bindings(index, set, ip) \ ++((index = ip_set_find_in_hash((set)->id, ip)) != IP_SET_INVALID_ID \ ++ || (index = (set)->binding) != IP_SET_INVALID_ID) ++ ++int ++ip_set_testip_kernel(ip_set_id_t index, ++ const struct sk_buff *skb, ++ const u_int32_t *flags) ++{ ++ struct ip_set *set; ++ ip_set_ip_t ip; ++ int res, i = 0; ++ ++ IP_SET_ASSERT(flags[i]); ++ read_lock_bh(&ip_set_lock); ++ do { ++ set = ip_set_list[index]; ++ IP_SET_ASSERT(set); ++ DP("set %s, index %u", set->name, index); ++ read_lock_bh(&set->lock); ++ res = set->type->testip_kernel(set, skb, flags[i], &ip); ++ read_unlock_bh(&set->lock); ++ } while (res > 0 ++ && flags[++i] ++ && follow_bindings(index, set, ip)); ++ read_unlock_bh(&ip_set_lock); ++ ++ return res; ++} ++ ++void ++ip_set_addip_kernel(ip_set_id_t index, ++ const struct sk_buff *skb, ++ const u_int32_t *flags) ++{ ++ struct ip_set *set; ++ ip_set_ip_t ip; ++ int res, i= 0; ++ ++ IP_SET_ASSERT(flags[i]); ++ retry: ++ read_lock_bh(&ip_set_lock); ++ do { ++ set = ip_set_list[index]; ++ IP_SET_ASSERT(set); ++ DP("set %s, index %u", set->name, index); ++ write_lock_bh(&set->lock); ++ res = set->type->addip_kernel(set, skb, flags[i], &ip); ++ write_unlock_bh(&set->lock); ++ } while ((res == 0 || res == -EEXIST) ++ && flags[++i] ++ && follow_bindings(index, set, ip)); ++ read_unlock_bh(&ip_set_lock); ++ ++ if (res == -EAGAIN ++ && set->type->retry ++ && (res = set->type->retry(set)) == 0) ++ goto retry; ++} ++ ++void ++ip_set_delip_kernel(ip_set_id_t index, ++ const struct sk_buff *skb, ++ const u_int32_t *flags) ++{ ++ struct ip_set *set; ++ ip_set_ip_t ip; ++ int res, i = 0; ++ ++ IP_SET_ASSERT(flags[i]); ++ read_lock_bh(&ip_set_lock); ++ do { ++ set = ip_set_list[index]; ++ IP_SET_ASSERT(set); ++ DP("set %s, index %u", set->name, index); ++ write_lock_bh(&set->lock); ++ res = set->type->delip_kernel(set, skb, flags[i], &ip); ++ write_unlock_bh(&set->lock); ++ } while ((res == 0 || res == -EEXIST) ++ && flags[++i] ++ && follow_bindings(index, set, ip)); ++ read_unlock_bh(&ip_set_lock); ++} ++ ++/* Register and deregister settype */ ++ ++static inline int ++set_type_equal(const struct ip_set_type *set_type, const char *str2) ++{ ++ return !strncmp(set_type->typename, str2, IP_SET_MAXNAMELEN - 1); ++} ++ ++static inline struct ip_set_type * ++find_set_type(const char *name) ++{ ++ return LIST_FIND(&set_type_list, ++ set_type_equal, ++ struct ip_set_type *, ++ name); ++} ++ ++int ++ip_set_register_set_type(struct ip_set_type *set_type) ++{ ++ int ret = 0; ++ ++ if (set_type->protocol_version != IP_SET_PROTOCOL_VERSION) { ++ ip_set_printk("'%s' uses wrong protocol version %u (want %u)", ++ set_type->typename, ++ set_type->protocol_version, ++ IP_SET_PROTOCOL_VERSION); ++ return -EINVAL; ++ } ++ ++ write_lock_bh(&ip_set_lock); ++ if (find_set_type(set_type->typename)) { ++ /* Duplicate! */ ++ ip_set_printk("'%s' already registered!", ++ set_type->typename); ++ ret = -EINVAL; ++ goto unlock; ++ } ++ if (!try_module_get(THIS_MODULE)) { ++ ret = -EFAULT; ++ goto unlock; ++ } ++ list_append(&set_type_list, set_type); ++ DP("'%s' registered.", set_type->typename); ++ unlock: ++ write_unlock_bh(&ip_set_lock); ++ return ret; ++} ++ ++void ++ip_set_unregister_set_type(struct ip_set_type *set_type) ++{ ++ write_lock_bh(&ip_set_lock); ++ if (!find_set_type(set_type->typename)) { ++ ip_set_printk("'%s' not registered?", ++ set_type->typename); ++ goto unlock; ++ } ++ LIST_DELETE(&set_type_list, set_type); ++ module_put(THIS_MODULE); ++ DP("'%s' unregistered.", set_type->typename); ++ unlock: ++ write_unlock_bh(&ip_set_lock); ++ ++} ++ ++/* ++ * Userspace routines ++ */ ++ ++/* ++ * Find set by name, reference it once. The reference makes sure the ++ * thing pointed to, does not go away under our feet. Drop the reference ++ * later, using ip_set_put(). ++ */ ++ip_set_id_t ++ip_set_get_byname(const char *name) ++{ ++ ip_set_id_t i, index = IP_SET_INVALID_ID; ++ ++ down(&ip_set_app_mutex); ++ for (i = 0; i < ip_set_max; i++) { ++ if (ip_set_list[i] != NULL ++ && strcmp(ip_set_list[i]->name, name) == 0) { ++ __ip_set_get(i); ++ index = i; ++ break; ++ } ++ } ++ up(&ip_set_app_mutex); ++ return index; ++} ++ ++/* ++ * Find set by index, reference it once. The reference makes sure the ++ * thing pointed to, does not go away under our feet. Drop the reference ++ * later, using ip_set_put(). ++ */ ++ip_set_id_t ++ip_set_get_byindex(ip_set_id_t index) ++{ ++ down(&ip_set_app_mutex); ++ ++ if (index >= ip_set_max) ++ return IP_SET_INVALID_ID; ++ ++ if (ip_set_list[index]) ++ __ip_set_get(index); ++ else ++ index = IP_SET_INVALID_ID; ++ ++ up(&ip_set_app_mutex); ++ return index; ++} ++ ++/* ++ * If the given set pointer points to a valid set, decrement ++ * reference count by 1. The caller shall not assume the index ++ * to be valid, after calling this function. ++ */ ++void ip_set_put(ip_set_id_t index) ++{ ++ down(&ip_set_app_mutex); ++ if (ip_set_list[index]) ++ __ip_set_put(index); ++ up(&ip_set_app_mutex); ++} ++ ++/* Find a set by name or index */ ++static ip_set_id_t ++ip_set_find_byname(const char *name) ++{ ++ ip_set_id_t i, index = IP_SET_INVALID_ID; ++ ++ for (i = 0; i < ip_set_max; i++) { ++ if (ip_set_list[i] != NULL ++ && strcmp(ip_set_list[i]->name, name) == 0) { ++ index = i; ++ break; ++ } ++ } ++ return index; ++} ++ ++static ip_set_id_t ++ip_set_find_byindex(ip_set_id_t index) ++{ ++ if (index >= ip_set_max || ip_set_list[index] == NULL) ++ index = IP_SET_INVALID_ID; ++ ++ return index; ++} ++ ++/* ++ * Add, del, test, bind and unbind ++ */ ++ ++static inline int ++__ip_set_testip(struct ip_set *set, ++ const void *data, ++ size_t size, ++ ip_set_ip_t *ip) ++{ ++ int res; ++ ++ read_lock_bh(&set->lock); ++ res = set->type->testip(set, data, size, ip); ++ read_unlock_bh(&set->lock); ++ ++ return res; ++} ++ ++static int ++__ip_set_addip(ip_set_id_t index, ++ const void *data, ++ size_t size) ++{ ++ struct ip_set *set = ip_set_list[index]; ++ ip_set_ip_t ip; ++ int res; ++ ++ IP_SET_ASSERT(set); ++ do { ++ write_lock_bh(&set->lock); ++ res = set->type->addip(set, data, size, &ip); ++ write_unlock_bh(&set->lock); ++ } while (res == -EAGAIN ++ && set->type->retry ++ && (res = set->type->retry(set)) == 0); ++ ++ return res; ++} ++ ++static int ++ip_set_addip(ip_set_id_t index, ++ const void *data, ++ size_t size) ++{ ++ ++ return __ip_set_addip(index, ++ data + sizeof(struct ip_set_req_adt), ++ size - sizeof(struct ip_set_req_adt)); ++} ++ ++static int ++ip_set_delip(ip_set_id_t index, ++ const void *data, ++ size_t size) ++{ ++ struct ip_set *set = ip_set_list[index]; ++ ip_set_ip_t ip; ++ int res; ++ ++ IP_SET_ASSERT(set); ++ write_lock_bh(&set->lock); ++ res = set->type->delip(set, ++ data + sizeof(struct ip_set_req_adt), ++ size - sizeof(struct ip_set_req_adt), ++ &ip); ++ write_unlock_bh(&set->lock); ++ ++ return res; ++} ++ ++static int ++ip_set_testip(ip_set_id_t index, ++ const void *data, ++ size_t size) ++{ ++ struct ip_set *set = ip_set_list[index]; ++ ip_set_ip_t ip; ++ int res; ++ ++ IP_SET_ASSERT(set); ++ res = __ip_set_testip(set, ++ data + sizeof(struct ip_set_req_adt), ++ size - sizeof(struct ip_set_req_adt), ++ &ip); ++ ++ return (res > 0 ? -EEXIST : res); ++} ++ ++static int ++ip_set_bindip(ip_set_id_t index, ++ const void *data, ++ size_t size) ++{ ++ struct ip_set *set = ip_set_list[index]; ++ struct ip_set_req_bind *req_bind; ++ ip_set_id_t binding; ++ ip_set_ip_t ip; ++ int res; ++ ++ IP_SET_ASSERT(set); ++ if (size < sizeof(struct ip_set_req_bind)) ++ return -EINVAL; ++ ++ req_bind = (struct ip_set_req_bind *) data; ++ req_bind->binding[IP_SET_MAXNAMELEN - 1] = '\0'; ++ ++ if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) { ++ /* Default binding of a set */ ++ char *binding_name; ++ ++ if (size != sizeof(struct ip_set_req_bind) + IP_SET_MAXNAMELEN) ++ return -EINVAL; ++ ++ binding_name = (char *)(data + sizeof(struct ip_set_req_bind)); ++ binding_name[IP_SET_MAXNAMELEN - 1] = '\0'; ++ ++ binding = ip_set_find_byname(binding_name); ++ if (binding == IP_SET_INVALID_ID) ++ return -ENOENT; ++ ++ write_lock_bh(&ip_set_lock); ++ /* Sets as binding values are referenced */ ++ if (set->binding != IP_SET_INVALID_ID) ++ __ip_set_put(set->binding); ++ set->binding = binding; ++ __ip_set_get(set->binding); ++ write_unlock_bh(&ip_set_lock); ++ ++ return 0; ++ } ++ binding = ip_set_find_byname(req_bind->binding); ++ if (binding == IP_SET_INVALID_ID) ++ return -ENOENT; ++ ++ res = __ip_set_testip(set, ++ data + sizeof(struct ip_set_req_bind), ++ size - sizeof(struct ip_set_req_bind), ++ &ip); ++ DP("set %s, ip: %u.%u.%u.%u, binding %s", ++ set->name, HIPQUAD(ip), ip_set_list[binding]->name); ++ ++ if (res >= 0) ++ res = ip_set_hash_add(set->id, ip, binding); ++ ++ return res; ++} ++ ++#define FOREACH_SET_DO(fn, args...) \ ++({ \ ++ ip_set_id_t __i; \ ++ struct ip_set *__set; \ ++ \ ++ for (__i = 0; __i < ip_set_max; __i++) { \ ++ __set = ip_set_list[__i]; \ ++ if (__set != NULL) \ ++ fn(__set , ##args); \ ++ } \ ++}) ++ ++static inline void ++__set_hash_del_byid(struct ip_set_hash *set_hash, ip_set_id_t id) ++{ ++ if (set_hash->id == id) ++ __set_hash_del(set_hash); ++} ++ ++static inline void ++__unbind_default(struct ip_set *set) ++{ ++ if (set->binding != IP_SET_INVALID_ID) { ++ /* Sets as binding values are referenced */ ++ __ip_set_put(set->binding); ++ set->binding = IP_SET_INVALID_ID; ++ } ++} ++ ++static int ++ip_set_unbindip(ip_set_id_t index, ++ const void *data, ++ size_t size) ++{ ++ struct ip_set *set; ++ struct ip_set_req_bind *req_bind; ++ ip_set_ip_t ip; ++ int res; ++ ++ DP(""); ++ if (size < sizeof(struct ip_set_req_bind)) ++ return -EINVAL; ++ ++ req_bind = (struct ip_set_req_bind *) data; ++ req_bind->binding[IP_SET_MAXNAMELEN - 1] = '\0'; ++ ++ DP("%u %s", index, req_bind->binding); ++ if (index == IP_SET_INVALID_ID) { ++ /* unbind :all: */ ++ if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) { ++ /* Default binding of sets */ ++ write_lock_bh(&ip_set_lock); ++ FOREACH_SET_DO(__unbind_default); ++ write_unlock_bh(&ip_set_lock); ++ return 0; ++ } else if (strcmp(req_bind->binding, IPSET_TOKEN_ALL) == 0) { ++ /* Flush all bindings of all sets*/ ++ write_lock_bh(&ip_set_lock); ++ FOREACH_HASH_RW_DO(__set_hash_del); ++ write_unlock_bh(&ip_set_lock); ++ return 0; ++ } ++ DP("unreachable reached!"); ++ return -EINVAL; ++ } ++ ++ set = ip_set_list[index]; ++ IP_SET_ASSERT(set); ++ if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) { ++ /* Default binding of set */ ++ ip_set_id_t binding = ip_set_find_byindex(set->binding); ++ ++ if (binding == IP_SET_INVALID_ID) ++ return -ENOENT; ++ ++ write_lock_bh(&ip_set_lock); ++ /* Sets in hash values are referenced */ ++ __ip_set_put(set->binding); ++ set->binding = IP_SET_INVALID_ID; ++ write_unlock_bh(&ip_set_lock); ++ ++ return 0; ++ } else if (strcmp(req_bind->binding, IPSET_TOKEN_ALL) == 0) { ++ /* Flush all bindings */ ++ ++ write_lock_bh(&ip_set_lock); ++ FOREACH_HASH_RW_DO(__set_hash_del_byid, set->id); ++ write_unlock_bh(&ip_set_lock); ++ return 0; ++ } ++ ++ res = __ip_set_testip(set, ++ data + sizeof(struct ip_set_req_bind), ++ size - sizeof(struct ip_set_req_bind), ++ &ip); ++ ++ DP("set %s, ip: %u.%u.%u.%u", set->name, HIPQUAD(ip)); ++ if (res >= 0) ++ res = ip_set_hash_del(set->id, ip); ++ ++ return res; ++} ++ ++static int ++ip_set_testbind(ip_set_id_t index, ++ const void *data, ++ size_t size) ++{ ++ struct ip_set *set = ip_set_list[index]; ++ struct ip_set_req_bind *req_bind; ++ ip_set_id_t binding; ++ ip_set_ip_t ip; ++ int res; ++ ++ IP_SET_ASSERT(set); ++ if (size < sizeof(struct ip_set_req_bind)) ++ return -EINVAL; ++ ++ req_bind = (struct ip_set_req_bind *) data; ++ req_bind->binding[IP_SET_MAXNAMELEN - 1] = '\0'; ++ ++ if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) { ++ /* Default binding of set */ ++ char *binding_name; ++ ++ if (size != sizeof(struct ip_set_req_bind) + IP_SET_MAXNAMELEN) ++ return -EINVAL; ++ ++ binding_name = (char *)(data + sizeof(struct ip_set_req_bind)); ++ binding_name[IP_SET_MAXNAMELEN - 1] = '\0'; ++ ++ binding = ip_set_find_byname(binding_name); ++ if (binding == IP_SET_INVALID_ID) ++ return -ENOENT; ++ ++ res = (set->binding == binding) ? -EEXIST : 0; ++ ++ return res; ++ } ++ binding = ip_set_find_byname(req_bind->binding); ++ if (binding == IP_SET_INVALID_ID) ++ return -ENOENT; ++ ++ ++ res = __ip_set_testip(set, ++ data + sizeof(struct ip_set_req_bind), ++ size - sizeof(struct ip_set_req_bind), ++ &ip); ++ DP("set %s, ip: %u.%u.%u.%u, binding %s", ++ set->name, HIPQUAD(ip), ip_set_list[binding]->name); ++ ++ if (res >= 0) ++ res = (ip_set_find_in_hash(set->id, ip) == binding) ++ ? -EEXIST : 0; ++ ++ return res; ++} ++ ++static struct ip_set_type * ++find_set_type_rlock(const char *typename) ++{ ++ struct ip_set_type *type; ++ ++ read_lock_bh(&ip_set_lock); ++ type = find_set_type(typename); ++ if (type == NULL) ++ read_unlock_bh(&ip_set_lock); ++ ++ return type; ++} ++ ++static int ++find_free_id(const char *name, ++ ip_set_id_t *index, ++ ip_set_id_t *id) ++{ ++ ip_set_id_t i; ++ ++ *id = IP_SET_INVALID_ID; ++ for (i = 0; i < ip_set_max; i++) { ++ if (ip_set_list[i] == NULL) { ++ if (*id == IP_SET_INVALID_ID) ++ *id = *index = i; ++ } else if (strcmp(name, ip_set_list[i]->name) == 0) ++ /* Name clash */ ++ return -EEXIST; ++ } ++ if (*id == IP_SET_INVALID_ID) ++ /* No free slot remained */ ++ return -ERANGE; ++ /* Check that index is usable as id (swapping) */ ++ check: ++ for (i = 0; i < ip_set_max; i++) { ++ if (ip_set_list[i] != NULL ++ && ip_set_list[i]->id == *id) { ++ *id = i; ++ goto check; ++ } ++ } ++ return 0; ++} ++ ++/* ++ * Create a set ++ */ ++static int ++ip_set_create(const char *name, ++ const char *typename, ++ ip_set_id_t restore, ++ const void *data, ++ size_t size) ++{ ++ struct ip_set *set; ++ ip_set_id_t index, id; ++ int res = 0; ++ ++ DP("setname: %s, typename: %s, id: %u", name, typename, restore); ++ /* ++ * First, and without any locks, allocate and initialize ++ * a normal base set structure. ++ */ ++ set = kmalloc(sizeof(struct ip_set), GFP_KERNEL); ++ if (!set) ++ return -ENOMEM; ++ set->lock = RW_LOCK_UNLOCKED; ++ strncpy(set->name, name, IP_SET_MAXNAMELEN); ++ set->binding = IP_SET_INVALID_ID; ++ atomic_set(&set->ref, 0); ++ ++ /* ++ * Next, take the &ip_set_lock, check that we know the type, ++ * and take a reference on the type, to make sure it ++ * stays available while constructing our new set. ++ * ++ * After referencing the type, we drop the &ip_set_lock, ++ * and let the new set construction run without locks. ++ */ ++ set->type = find_set_type_rlock(typename); ++ if (set->type == NULL) { ++ /* Try loading the module */ ++ char modulename[IP_SET_MAXNAMELEN + strlen("ip_set_") + 1]; ++ strcpy(modulename, "ip_set_"); ++ strcat(modulename, typename); ++ DP("try to load %s", modulename); ++ request_module(modulename); ++ set->type = find_set_type_rlock(typename); ++ } ++ if (set->type == NULL) { ++ ip_set_printk("no set type '%s', set '%s' not created", ++ typename, name); ++ res = -ENOENT; ++ goto out; ++ } ++ if (!try_module_get(set->type->me)) { ++ read_unlock_bh(&ip_set_lock); ++ res = -EFAULT; ++ goto out; ++ } ++ read_unlock_bh(&ip_set_lock); ++ ++ /* ++ * Without holding any locks, create private part. ++ */ ++ res = set->type->create(set, data, size); ++ if (res != 0) ++ goto put_out; ++ ++ /* BTW, res==0 here. */ ++ ++ /* ++ * Here, we have a valid, constructed set. &ip_set_lock again, ++ * find free id/index and check that it is not already in ++ * ip_set_list. ++ */ ++ write_lock_bh(&ip_set_lock); ++ if ((res = find_free_id(set->name, &index, &id)) != 0) { ++ DP("no free id!"); ++ goto cleanup; ++ } ++ ++ /* Make sure restore gets the same index */ ++ if (restore != IP_SET_INVALID_ID && index != restore) { ++ DP("Can't restore, sets are screwed up"); ++ res = -ERANGE; ++ goto cleanup; ++ } ++ ++ /* ++ * Finally! Add our shiny new set to the list, and be done. ++ */ ++ DP("create: '%s' created with index %u, id %u!", set->name, index, id); ++ set->id = id; ++ ip_set_list[index] = set; ++ write_unlock_bh(&ip_set_lock); ++ return res; ++ ++ cleanup: ++ write_unlock_bh(&ip_set_lock); ++ set->type->destroy(set); ++ put_out: ++ module_put(set->type->me); ++ out: ++ kfree(set); ++ return res; ++} ++ ++/* ++ * Destroy a given existing set ++ */ ++static void ++ip_set_destroy_set(ip_set_id_t index) ++{ ++ struct ip_set *set = ip_set_list[index]; ++ ++ IP_SET_ASSERT(set); ++ DP("set: %s", set->name); ++ write_lock_bh(&ip_set_lock); ++ FOREACH_HASH_RW_DO(__set_hash_del_byid, set->id); ++ if (set->binding != IP_SET_INVALID_ID) ++ __ip_set_put(set->binding); ++ ip_set_list[index] = NULL; ++ write_unlock_bh(&ip_set_lock); ++ ++ /* Must call it without holding any lock */ ++ set->type->destroy(set); ++ module_put(set->type->me); ++ kfree(set); ++} ++ ++/* ++ * Destroy a set - or all sets ++ * Sets must not be referenced/used. ++ */ ++static int ++ip_set_destroy(ip_set_id_t index) ++{ ++ ip_set_id_t i; ++ ++ /* ref modification always protected by the mutex */ ++ if (index != IP_SET_INVALID_ID) { ++ if (atomic_read(&ip_set_list[index]->ref)) ++ return -EBUSY; ++ ip_set_destroy_set(index); ++ } else { ++ for (i = 0; i < ip_set_max; i++) { ++ if (ip_set_list[i] != NULL ++ && (atomic_read(&ip_set_list[i]->ref))) ++ return -EBUSY; ++ } ++ ++ for (i = 0; i < ip_set_max; i++) { ++ if (ip_set_list[i] != NULL) ++ ip_set_destroy_set(i); ++ } ++ } ++ return 0; ++} ++ ++static void ++ip_set_flush_set(struct ip_set *set) ++{ ++ DP("set: %s %u", set->name, set->id); ++ ++ write_lock_bh(&set->lock); ++ set->type->flush(set); ++ write_unlock_bh(&set->lock); ++} ++ ++/* ++ * Flush data in a set - or in all sets ++ */ ++static int ++ip_set_flush(ip_set_id_t index) ++{ ++ if (index != IP_SET_INVALID_ID) { ++ IP_SET_ASSERT(ip_set_list[index]); ++ ip_set_flush_set(ip_set_list[index]); ++ } else ++ FOREACH_SET_DO(ip_set_flush_set); ++ ++ return 0; ++} ++ ++/* Rename a set */ ++static int ++ip_set_rename(ip_set_id_t index, const char *name) ++{ ++ struct ip_set *set = ip_set_list[index]; ++ ip_set_id_t i; ++ int res = 0; ++ ++ DP("set: %s to %s", set->name, name); ++ write_lock_bh(&ip_set_lock); ++ for (i = 0; i < ip_set_max; i++) { ++ if (ip_set_list[i] != NULL ++ && strncmp(ip_set_list[i]->name, ++ name, ++ IP_SET_MAXNAMELEN - 1) == 0) { ++ res = -EEXIST; ++ goto unlock; ++ } ++ } ++ strncpy(set->name, name, IP_SET_MAXNAMELEN); ++ unlock: ++ write_unlock_bh(&ip_set_lock); ++ return res; ++} ++ ++/* ++ * Swap two sets so that name/index points to the other. ++ * References are also swapped. ++ */ ++static int ++ip_set_swap(ip_set_id_t from_index, ip_set_id_t to_index) ++{ ++ struct ip_set *from = ip_set_list[from_index]; ++ struct ip_set *to = ip_set_list[to_index]; ++ char from_name[IP_SET_MAXNAMELEN]; ++ u_int32_t from_ref; ++ ++ DP("set: %s to %s", from->name, to->name); ++ /* Type can't be changed. Artifical restriction. */ ++ if (from->type->typecode != to->type->typecode) ++ return -ENOEXEC; ++ ++ /* No magic here: ref munging protected by the mutex */ ++ write_lock_bh(&ip_set_lock); ++ strncpy(from_name, from->name, IP_SET_MAXNAMELEN); ++ from_ref = atomic_read(&from->ref); ++ ++ strncpy(from->name, to->name, IP_SET_MAXNAMELEN); ++ atomic_set(&from->ref, atomic_read(&to->ref)); ++ strncpy(to->name, from_name, IP_SET_MAXNAMELEN); ++ atomic_set(&to->ref, from_ref); ++ ++ ip_set_list[from_index] = to; ++ ip_set_list[to_index] = from; ++ ++ write_unlock_bh(&ip_set_lock); ++ return 0; ++} ++ ++/* ++ * List set data ++ */ ++ ++static inline void ++__set_hash_bindings_size_list(struct ip_set_hash *set_hash, ++ ip_set_id_t id, size_t *size) ++{ ++ if (set_hash->id == id) ++ *size += sizeof(struct ip_set_hash_list); ++} ++ ++static inline void ++__set_hash_bindings_size_save(struct ip_set_hash *set_hash, ++ ip_set_id_t id, size_t *size) ++{ ++ if (set_hash->id == id) ++ *size += sizeof(struct ip_set_hash_save); ++} ++ ++static inline void ++__set_hash_bindings(struct ip_set_hash *set_hash, ++ ip_set_id_t id, void *data, int *used) ++{ ++ if (set_hash->id == id) { ++ struct ip_set_hash_list *hash_list = ++ (struct ip_set_hash_list *)(data + *used); ++ ++ hash_list->ip = set_hash->ip; ++ hash_list->binding = set_hash->binding; ++ *used += sizeof(struct ip_set_hash_list); ++ } ++} ++ ++static int ip_set_list_set(ip_set_id_t index, ++ void *data, ++ int *used, ++ int len) ++{ ++ struct ip_set *set = ip_set_list[index]; ++ struct ip_set_list *set_list; ++ ++ /* Pointer to our header */ ++ set_list = (struct ip_set_list *) (data + *used); ++ ++ DP("set: %s, used: %d %p %p", set->name, *used, data, data + *used); ++ ++ /* Get and ensure header size */ ++ if (*used + sizeof(struct ip_set_list) > len) ++ goto not_enough_mem; ++ *used += sizeof(struct ip_set_list); ++ ++ read_lock_bh(&set->lock); ++ /* Get and ensure set specific header size */ ++ set_list->header_size = set->type->header_size; ++ if (*used + set_list->header_size > len) ++ goto unlock_set; ++ ++ /* Fill in the header */ ++ set_list->index = index; ++ set_list->binding = set->binding; ++ set_list->ref = atomic_read(&set->ref); ++ ++ /* Fill in set spefific header data */ ++ set->type->list_header(set, data + *used); ++ *used += set_list->header_size; ++ ++ /* Get and ensure set specific members size */ ++ set_list->members_size = set->type->list_members_size(set); ++ if (*used + set_list->members_size > len) ++ goto unlock_set; ++ ++ /* Fill in set spefific members data */ ++ set->type->list_members(set, data + *used); ++ *used += set_list->members_size; ++ read_unlock_bh(&set->lock); ++ ++ /* Bindings */ ++ ++ /* Get and ensure set specific bindings size */ ++ set_list->bindings_size = 0; ++ FOREACH_HASH_DO(__set_hash_bindings_size_list, ++ set->id, &set_list->bindings_size); ++ if (*used + set_list->bindings_size > len) ++ goto not_enough_mem; ++ ++ /* Fill in set spefific bindings data */ ++ FOREACH_HASH_DO(__set_hash_bindings, set->id, data, used); ++ ++ return 0; ++ ++ unlock_set: ++ read_unlock_bh(&set->lock); ++ not_enough_mem: ++ DP("not enough mem, try again"); ++ return -EAGAIN; ++} ++ ++/* ++ * Save sets ++ */ ++static int ip_set_save_set(ip_set_id_t index, ++ void *data, ++ int *used, ++ int len) ++{ ++ struct ip_set *set; ++ struct ip_set_save *set_save; ++ ++ /* Pointer to our header */ ++ set_save = (struct ip_set_save *) (data + *used); ++ ++ /* Get and ensure header size */ ++ if (*used + sizeof(struct ip_set_save) > len) ++ goto not_enough_mem; ++ *used += sizeof(struct ip_set_save); ++ ++ set = ip_set_list[index]; ++ DP("set: %s, used: %u(%u) %p %p", set->name, *used, len, ++ data, data + *used); ++ ++ read_lock_bh(&set->lock); ++ /* Get and ensure set specific header size */ ++ set_save->header_size = set->type->header_size; ++ if (*used + set_save->header_size > len) ++ goto unlock_set; ++ ++ /* Fill in the header */ ++ set_save->index = index; ++ set_save->binding = set->binding; ++ ++ /* Fill in set spefific header data */ ++ set->type->list_header(set, data + *used); ++ *used += set_save->header_size; ++ ++ DP("set header filled: %s, used: %u %p %p", set->name, *used, ++ data, data + *used); ++ /* Get and ensure set specific members size */ ++ set_save->members_size = set->type->list_members_size(set); ++ if (*used + set_save->members_size > len) ++ goto unlock_set; ++ ++ /* Fill in set spefific members data */ ++ set->type->list_members(set, data + *used); ++ *used += set_save->members_size; ++ read_unlock_bh(&set->lock); ++ DP("set members filled: %s, used: %u %p %p", set->name, *used, ++ data, data + *used); ++ return 0; ++ ++ unlock_set: ++ read_unlock_bh(&set->lock); ++ not_enough_mem: ++ DP("not enough mem, try again"); ++ return -EAGAIN; ++} ++ ++static inline void ++__set_hash_save_bindings(struct ip_set_hash *set_hash, ++ ip_set_id_t id, ++ void *data, ++ int *used, ++ int len, ++ int *res) ++{ ++ if (*res == 0 ++ && (id == IP_SET_INVALID_ID || set_hash->id == id)) { ++ struct ip_set_hash_save *hash_save = ++ (struct ip_set_hash_save *)(data + *used); ++ /* Ensure bindings size */ ++ if (*used + sizeof(struct ip_set_hash_save) > len) { ++ *res = -ENOMEM; ++ return; ++ } ++ hash_save->id = set_hash->id; ++ hash_save->ip = set_hash->ip; ++ hash_save->binding = set_hash->binding; ++ *used += sizeof(struct ip_set_hash_save); ++ } ++} ++ ++static int ip_set_save_bindings(ip_set_id_t index, ++ void *data, ++ int *used, ++ int len) ++{ ++ int res = 0; ++ struct ip_set_save *set_save; ++ ++ DP("used %u, len %u", *used, len); ++ /* Get and ensure header size */ ++ if (*used + sizeof(struct ip_set_save) > len) ++ return -ENOMEM; ++ ++ /* Marker */ ++ set_save = (struct ip_set_save *) (data + *used); ++ set_save->index = IP_SET_INVALID_ID; ++ *used += sizeof(struct ip_set_save); ++ ++ DP("marker added used %u, len %u", *used, len); ++ /* Fill in bindings data */ ++ if (index != IP_SET_INVALID_ID) ++ /* Sets are identified by id in hash */ ++ index = ip_set_list[index]->id; ++ FOREACH_HASH_DO(__set_hash_save_bindings, index, data, used, len, &res); ++ ++ return res; ++} ++ ++/* ++ * Restore sets ++ */ ++static int ip_set_restore(void *data, ++ int len) ++{ ++ int res = 0; ++ int line = 0, used = 0, members_size; ++ struct ip_set *set; ++ struct ip_set_hash_save *hash_save; ++ struct ip_set_restore *set_restore; ++ ip_set_id_t index; ++ ++ /* Loop to restore sets */ ++ while (1) { ++ line++; ++ ++ DP("%u %u %u", used, sizeof(struct ip_set_restore), len); ++ /* Get and ensure header size */ ++ if (used + sizeof(struct ip_set_restore) > len) ++ return line; ++ set_restore = (struct ip_set_restore *) (data + used); ++ used += sizeof(struct ip_set_restore); ++ ++ /* Ensure data size */ ++ if (used ++ + set_restore->header_size ++ + set_restore->members_size > len) ++ return line; ++ ++ /* Check marker */ ++ if (set_restore->index == IP_SET_INVALID_ID) { ++ line--; ++ goto bindings; ++ } ++ ++ /* Try to create the set */ ++ DP("restore %s %s", set_restore->name, set_restore->typename); ++ res = ip_set_create(set_restore->name, ++ set_restore->typename, ++ set_restore->index, ++ data + used, ++ set_restore->header_size); ++ ++ if (res != 0) ++ return line; ++ used += set_restore->header_size; ++ ++ index = ip_set_find_byindex(set_restore->index); ++ DP("index %u, restore_index %u", index, set_restore->index); ++ if (index != set_restore->index) ++ return line; ++ /* Try to restore members data */ ++ set = ip_set_list[index]; ++ members_size = 0; ++ DP("members_size %u reqsize %u", ++ set_restore->members_size, set->type->reqsize); ++ while (members_size + set->type->reqsize <= ++ set_restore->members_size) { ++ line++; ++ DP("members: %u, line %u", members_size, line); ++ res = __ip_set_addip(index, ++ data + used + members_size, ++ set->type->reqsize); ++ if (!(res == 0 || res == -EEXIST)) ++ return line; ++ members_size += set->type->reqsize; ++ } ++ ++ DP("members_size %u %u", ++ set_restore->members_size, members_size); ++ if (members_size != set_restore->members_size) ++ return line++; ++ used += set_restore->members_size; ++ } ++ ++ bindings: ++ /* Loop to restore bindings */ ++ while (used < len) { ++ line++; ++ ++ DP("restore binding, line %u", line); ++ /* Get and ensure size */ ++ if (used + sizeof(struct ip_set_hash_save) > len) ++ return line; ++ hash_save = (struct ip_set_hash_save *) (data + used); ++ used += sizeof(struct ip_set_hash_save); ++ ++ /* hash_save->id is used to store the index */ ++ index = ip_set_find_byindex(hash_save->id); ++ DP("restore binding index %u, id %u, %u -> %u", ++ index, hash_save->id, hash_save->ip, hash_save->binding); ++ if (index != hash_save->id) ++ return line; ++ ++ set = ip_set_list[hash_save->id]; ++ /* Null valued IP means default binding */ ++ if (hash_save->ip) ++ res = ip_set_hash_add(set->id, ++ hash_save->ip, ++ hash_save->binding); ++ else { ++ IP_SET_ASSERT(set->binding == IP_SET_INVALID_ID); ++ write_lock_bh(&ip_set_lock); ++ set->binding = hash_save->binding; ++ __ip_set_get(set->binding); ++ write_unlock_bh(&ip_set_lock); ++ DP("default binding: %u", set->binding); ++ } ++ if (res != 0) ++ return line; ++ } ++ if (used != len) ++ return line; ++ ++ return 0; ++} ++ ++static int ++ip_set_sockfn_set(struct sock *sk, int optval, void *user, unsigned int len) ++{ ++ void *data; ++ int res = 0; /* Assume OK */ ++ unsigned *op; ++ struct ip_set_req_adt *req_adt; ++ ip_set_id_t index = IP_SET_INVALID_ID; ++ int (*adtfn)(ip_set_id_t index, ++ const void *data, size_t size); ++ struct fn_table { ++ int (*fn)(ip_set_id_t index, ++ const void *data, size_t size); ++ } adtfn_table[] = ++ { { ip_set_addip }, { ip_set_delip }, { ip_set_testip}, ++ { ip_set_bindip}, { ip_set_unbindip }, { ip_set_testbind }, ++ }; ++ ++ DP("optval=%d, user=%p, len=%d", optval, user, len); ++ if (!capable(CAP_NET_ADMIN)) ++ return -EPERM; ++ if (optval != SO_IP_SET) ++ return -EBADF; ++ if (len <= sizeof(unsigned)) { ++ ip_set_printk("short userdata (want >%zu, got %u)", ++ sizeof(unsigned), len); ++ return -EINVAL; ++ } ++ data = vmalloc(len); ++ if (!data) { ++ DP("out of mem for %u bytes", len); ++ return -ENOMEM; ++ } ++ if (copy_from_user(data, user, len) != 0) { ++ res = -EFAULT; ++ goto done; ++ } ++ if (down_interruptible(&ip_set_app_mutex)) { ++ res = -EINTR; ++ goto done; ++ } ++ ++ op = (unsigned *)data; ++ DP("op=%x", *op); ++ ++ if (*op < IP_SET_OP_VERSION) { ++ /* Check the version at the beginning of operations */ ++ struct ip_set_req_version *req_version = ++ (struct ip_set_req_version *) data; ++ if (req_version->version != IP_SET_PROTOCOL_VERSION) { ++ res = -EPROTO; ++ goto done; ++ } ++ } ++ ++ switch (*op) { ++ case IP_SET_OP_CREATE:{ ++ struct ip_set_req_create *req_create ++ = (struct ip_set_req_create *) data; ++ ++ if (len <= sizeof(struct ip_set_req_create)) { ++ ip_set_printk("short CREATE data (want >%zu, got %u)", ++ sizeof(struct ip_set_req_create), len); ++ res = -EINVAL; ++ goto done; ++ } ++ req_create->name[IP_SET_MAXNAMELEN - 1] = '\0'; ++ req_create->typename[IP_SET_MAXNAMELEN - 1] = '\0'; ++ res = ip_set_create(req_create->name, ++ req_create->typename, ++ IP_SET_INVALID_ID, ++ data + sizeof(struct ip_set_req_create), ++ len - sizeof(struct ip_set_req_create)); ++ goto done; ++ } ++ case IP_SET_OP_DESTROY:{ ++ struct ip_set_req_std *req_destroy ++ = (struct ip_set_req_std *) data; ++ ++ if (len != sizeof(struct ip_set_req_std)) { ++ ip_set_printk("invalid DESTROY data (want %zu, got %u)", ++ sizeof(struct ip_set_req_std), len); ++ res = -EINVAL; ++ goto done; ++ } ++ if (strcmp(req_destroy->name, IPSET_TOKEN_ALL) == 0) { ++ /* Destroy all sets */ ++ index = IP_SET_INVALID_ID; ++ } else { ++ req_destroy->name[IP_SET_MAXNAMELEN - 1] = '\0'; ++ index = ip_set_find_byname(req_destroy->name); ++ ++ if (index == IP_SET_INVALID_ID) { ++ res = -ENOENT; ++ goto done; ++ } ++ } ++ ++ res = ip_set_destroy(index); ++ goto done; ++ } ++ case IP_SET_OP_FLUSH:{ ++ struct ip_set_req_std *req_flush = ++ (struct ip_set_req_std *) data; ++ ++ if (len != sizeof(struct ip_set_req_std)) { ++ ip_set_printk("invalid FLUSH data (want %zu, got %u)", ++ sizeof(struct ip_set_req_std), len); ++ res = -EINVAL; ++ goto done; ++ } ++ if (strcmp(req_flush->name, IPSET_TOKEN_ALL) == 0) { ++ /* Flush all sets */ ++ index = IP_SET_INVALID_ID; ++ } else { ++ req_flush->name[IP_SET_MAXNAMELEN - 1] = '\0'; ++ index = ip_set_find_byname(req_flush->name); ++ ++ if (index == IP_SET_INVALID_ID) { ++ res = -ENOENT; ++ goto done; ++ } ++ } ++ res = ip_set_flush(index); ++ goto done; ++ } ++ case IP_SET_OP_RENAME:{ ++ struct ip_set_req_create *req_rename ++ = (struct ip_set_req_create *) data; ++ ++ if (len != sizeof(struct ip_set_req_create)) { ++ ip_set_printk("invalid RENAME data (want %zu, got %u)", ++ sizeof(struct ip_set_req_create), len); ++ res = -EINVAL; ++ goto done; ++ } ++ ++ req_rename->name[IP_SET_MAXNAMELEN - 1] = '\0'; ++ req_rename->typename[IP_SET_MAXNAMELEN - 1] = '\0'; ++ ++ index = ip_set_find_byname(req_rename->name); ++ if (index == IP_SET_INVALID_ID) { ++ res = -ENOENT; ++ goto done; ++ } ++ res = ip_set_rename(index, req_rename->typename); ++ goto done; ++ } ++ case IP_SET_OP_SWAP:{ ++ struct ip_set_req_create *req_swap ++ = (struct ip_set_req_create *) data; ++ ip_set_id_t to_index; ++ ++ if (len != sizeof(struct ip_set_req_create)) { ++ ip_set_printk("invalid SWAP data (want %zu, got %u)", ++ sizeof(struct ip_set_req_create), len); ++ res = -EINVAL; ++ goto done; ++ } ++ ++ req_swap->name[IP_SET_MAXNAMELEN - 1] = '\0'; ++ req_swap->typename[IP_SET_MAXNAMELEN - 1] = '\0'; ++ ++ index = ip_set_find_byname(req_swap->name); ++ if (index == IP_SET_INVALID_ID) { ++ res = -ENOENT; ++ goto done; ++ } ++ to_index = ip_set_find_byname(req_swap->typename); ++ if (to_index == IP_SET_INVALID_ID) { ++ res = -ENOENT; ++ goto done; ++ } ++ res = ip_set_swap(index, to_index); ++ goto done; ++ } ++ default: ++ break; /* Set identified by id */ ++ } ++ ++ /* There we may have add/del/test/bind/unbind/test_bind operations */ ++ if (*op < IP_SET_OP_ADD_IP || *op > IP_SET_OP_TEST_BIND_SET) { ++ res = -EBADMSG; ++ goto done; ++ } ++ adtfn = adtfn_table[*op - IP_SET_OP_ADD_IP].fn; ++ ++ if (len < sizeof(struct ip_set_req_adt)) { ++ ip_set_printk("short data in adt request (want >=%zu, got %u)", ++ sizeof(struct ip_set_req_adt), len); ++ res = -EINVAL; ++ goto done; ++ } ++ req_adt = (struct ip_set_req_adt *) data; ++ ++ /* -U :all: :all:|:default: uses IP_SET_INVALID_ID */ ++ if (!(*op == IP_SET_OP_UNBIND_SET ++ && req_adt->index == IP_SET_INVALID_ID)) { ++ index = ip_set_find_byindex(req_adt->index); ++ if (index == IP_SET_INVALID_ID) { ++ res = -ENOENT; ++ goto done; ++ } ++ } ++ res = adtfn(index, data, len); ++ ++ done: ++ up(&ip_set_app_mutex); ++ vfree(data); ++ if (res > 0) ++ res = 0; ++ DP("final result %d", res); ++ return res; ++} ++ ++static int ++ip_set_sockfn_get(struct sock *sk, int optval, void *user, int *len) ++{ ++ int res = 0; ++ unsigned *op; ++ ip_set_id_t index = IP_SET_INVALID_ID; ++ void *data; ++ int copylen = *len; ++ ++ DP("optval=%d, user=%p, len=%d", optval, user, *len); ++ if (!capable(CAP_NET_ADMIN)) ++ return -EPERM; ++ if (optval != SO_IP_SET) ++ return -EBADF; ++ if (*len < sizeof(unsigned)) { ++ ip_set_printk("short userdata (want >=%zu, got %d)", ++ sizeof(unsigned), *len); ++ return -EINVAL; ++ } ++ data = vmalloc(*len); ++ if (!data) { ++ DP("out of mem for %d bytes", *len); ++ return -ENOMEM; ++ } ++ if (copy_from_user(data, user, *len) != 0) { ++ res = -EFAULT; ++ goto done; ++ } ++ if (down_interruptible(&ip_set_app_mutex)) { ++ res = -EINTR; ++ goto done; ++ } ++ ++ op = (unsigned *) data; ++ DP("op=%x", *op); ++ ++ if (*op < IP_SET_OP_VERSION) { ++ /* Check the version at the beginning of operations */ ++ struct ip_set_req_version *req_version = ++ (struct ip_set_req_version *) data; ++ if (req_version->version != IP_SET_PROTOCOL_VERSION) { ++ res = -EPROTO; ++ goto done; ++ } ++ } ++ ++ switch (*op) { ++ case IP_SET_OP_VERSION: { ++ struct ip_set_req_version *req_version = ++ (struct ip_set_req_version *) data; ++ ++ if (*len != sizeof(struct ip_set_req_version)) { ++ ip_set_printk("invalid VERSION (want %zu, got %d)", ++ sizeof(struct ip_set_req_version), ++ *len); ++ res = -EINVAL; ++ goto done; ++ } ++ ++ req_version->version = IP_SET_PROTOCOL_VERSION; ++ res = copy_to_user(user, req_version, ++ sizeof(struct ip_set_req_version)); ++ goto done; ++ } ++ case IP_SET_OP_GET_BYNAME: { ++ struct ip_set_req_get_set *req_get ++ = (struct ip_set_req_get_set *) data; ++ ++ if (*len != sizeof(struct ip_set_req_get_set)) { ++ ip_set_printk("invalid GET_BYNAME (want %zu, got %d)", ++ sizeof(struct ip_set_req_get_set), *len); ++ res = -EINVAL; ++ goto done; ++ } ++ req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0'; ++ index = ip_set_find_byname(req_get->set.name); ++ req_get->set.index = index; ++ goto copy; ++ } ++ case IP_SET_OP_GET_BYINDEX: { ++ struct ip_set_req_get_set *req_get ++ = (struct ip_set_req_get_set *) data; ++ ++ if (*len != sizeof(struct ip_set_req_get_set)) { ++ ip_set_printk("invalid GET_BYINDEX (want %zu, got %d)", ++ sizeof(struct ip_set_req_get_set), *len); ++ res = -EINVAL; ++ goto done; ++ } ++ req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0'; ++ index = ip_set_find_byindex(req_get->set.index); ++ strncpy(req_get->set.name, ++ index == IP_SET_INVALID_ID ? "" ++ : ip_set_list[index]->name, IP_SET_MAXNAMELEN); ++ goto copy; ++ } ++ case IP_SET_OP_ADT_GET: { ++ struct ip_set_req_adt_get *req_get ++ = (struct ip_set_req_adt_get *) data; ++ ++ if (*len != sizeof(struct ip_set_req_adt_get)) { ++ ip_set_printk("invalid ADT_GET (want %zu, got %d)", ++ sizeof(struct ip_set_req_adt_get), *len); ++ res = -EINVAL; ++ goto done; ++ } ++ req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0'; ++ index = ip_set_find_byname(req_get->set.name); ++ if (index != IP_SET_INVALID_ID) { ++ req_get->set.index = index; ++ strncpy(req_get->typename, ++ ip_set_list[index]->type->typename, ++ IP_SET_MAXNAMELEN - 1); ++ } else { ++ res = -ENOENT; ++ goto done; ++ } ++ goto copy; ++ } ++ case IP_SET_OP_MAX_SETS: { ++ struct ip_set_req_max_sets *req_max_sets ++ = (struct ip_set_req_max_sets *) data; ++ ip_set_id_t i; ++ ++ if (*len != sizeof(struct ip_set_req_max_sets)) { ++ ip_set_printk("invalid MAX_SETS (want %zu, got %d)", ++ sizeof(struct ip_set_req_max_sets), *len); ++ res = -EINVAL; ++ goto done; ++ } ++ ++ if (strcmp(req_max_sets->set.name, IPSET_TOKEN_ALL) == 0) { ++ req_max_sets->set.index = IP_SET_INVALID_ID; ++ } else { ++ req_max_sets->set.name[IP_SET_MAXNAMELEN - 1] = '\0'; ++ req_max_sets->set.index = ++ ip_set_find_byname(req_max_sets->set.name); ++ if (req_max_sets->set.index == IP_SET_INVALID_ID) { ++ res = -ENOENT; ++ goto done; ++ } ++ } ++ req_max_sets->max_sets = ip_set_max; ++ req_max_sets->sets = 0; ++ for (i = 0; i < ip_set_max; i++) { ++ if (ip_set_list[i] != NULL) ++ req_max_sets->sets++; ++ } ++ goto copy; ++ } ++ case IP_SET_OP_LIST_SIZE: ++ case IP_SET_OP_SAVE_SIZE: { ++ struct ip_set_req_setnames *req_setnames ++ = (struct ip_set_req_setnames *) data; ++ struct ip_set_name_list *name_list; ++ struct ip_set *set; ++ ip_set_id_t i; ++ int used; ++ ++ if (*len < sizeof(struct ip_set_req_setnames)) { ++ ip_set_printk("short LIST_SIZE (want >=%zu, got %d)", ++ sizeof(struct ip_set_req_setnames), *len); ++ res = -EINVAL; ++ goto done; ++ } ++ ++ req_setnames->size = 0; ++ used = sizeof(struct ip_set_req_setnames); ++ for (i = 0; i < ip_set_max; i++) { ++ if (ip_set_list[i] == NULL) ++ continue; ++ name_list = (struct ip_set_name_list *) ++ (data + used); ++ used += sizeof(struct ip_set_name_list); ++ if (used > copylen) { ++ res = -EAGAIN; ++ goto done; ++ } ++ set = ip_set_list[i]; ++ /* Fill in index, name, etc. */ ++ name_list->index = i; ++ name_list->id = set->id; ++ strncpy(name_list->name, ++ set->name, ++ IP_SET_MAXNAMELEN - 1); ++ strncpy(name_list->typename, ++ set->type->typename, ++ IP_SET_MAXNAMELEN - 1); ++ DP("filled %s of type %s, index %u\n", ++ name_list->name, name_list->typename, ++ name_list->index); ++ if (!(req_setnames->index == IP_SET_INVALID_ID ++ || req_setnames->index == i)) ++ continue; ++ /* Update size */ ++ switch (*op) { ++ case IP_SET_OP_LIST_SIZE: { ++ req_setnames->size += sizeof(struct ip_set_list) ++ + set->type->header_size ++ + set->type->list_members_size(set); ++ FOREACH_HASH_DO(__set_hash_bindings_size_list, ++ i, &req_setnames->size); ++ break; ++ } ++ case IP_SET_OP_SAVE_SIZE: { ++ req_setnames->size += sizeof(struct ip_set_save) ++ + set->type->header_size ++ + set->type->list_members_size(set); ++ FOREACH_HASH_DO(__set_hash_bindings_size_save, ++ i, &req_setnames->size); ++ break; ++ } ++ default: ++ break; ++ } ++ } ++ if (copylen != used) { ++ res = -EAGAIN; ++ goto done; ++ } ++ goto copy; ++ } ++ case IP_SET_OP_LIST: { ++ struct ip_set_req_list *req_list ++ = (struct ip_set_req_list *) data; ++ ip_set_id_t i; ++ int used; ++ ++ if (*len < sizeof(struct ip_set_req_list)) { ++ ip_set_printk("short LIST (want >=%zu, got %d)", ++ sizeof(struct ip_set_req_list), *len); ++ res = -EINVAL; ++ goto done; ++ } ++ index = req_list->index; ++ if (index != IP_SET_INVALID_ID ++ && ip_set_find_byindex(index) != index) { ++ res = -ENOENT; ++ goto done; ++ } ++ used = 0; ++ if (index == IP_SET_INVALID_ID) { ++ /* List all sets */ ++ for (i = 0; i < ip_set_max && res == 0; i++) { ++ if (ip_set_list[i] != NULL) ++ res = ip_set_list_set(i, data, &used, *len); ++ } ++ } else { ++ /* List an individual set */ ++ res = ip_set_list_set(index, data, &used, *len); ++ } ++ if (res != 0) ++ goto done; ++ else if (copylen != used) { ++ res = -EAGAIN; ++ goto done; ++ } ++ goto copy; ++ } ++ case IP_SET_OP_SAVE: { ++ struct ip_set_req_list *req_save ++ = (struct ip_set_req_list *) data; ++ ip_set_id_t i; ++ int used; ++ ++ if (*len < sizeof(struct ip_set_req_list)) { ++ ip_set_printk("short SAVE (want >=%zu, got %d)", ++ sizeof(struct ip_set_req_list), *len); ++ res = -EINVAL; ++ goto done; ++ } ++ index = req_save->index; ++ if (index != IP_SET_INVALID_ID ++ && ip_set_find_byindex(index) != index) { ++ res = -ENOENT; ++ goto done; ++ } ++ used = 0; ++ if (index == IP_SET_INVALID_ID) { ++ /* Save all sets */ ++ for (i = 0; i < ip_set_max && res == 0; i++) { ++ if (ip_set_list[i] != NULL) ++ res = ip_set_save_set(i, data, &used, *len); ++ } ++ } else { ++ /* Save an individual set */ ++ res = ip_set_save_set(index, data, &used, *len); ++ } ++ if (res == 0) ++ res = ip_set_save_bindings(index, data, &used, *len); ++ ++ if (res != 0) ++ goto done; ++ else if (copylen != used) { ++ res = -EAGAIN; ++ goto done; ++ } ++ goto copy; ++ } ++ case IP_SET_OP_RESTORE: { ++ struct ip_set_req_setnames *req_restore ++ = (struct ip_set_req_setnames *) data; ++ int line; ++ ++ if (*len < sizeof(struct ip_set_req_setnames) ++ || *len != req_restore->size) { ++ ip_set_printk("invalid RESTORE (want =%zu, got %d)", ++ req_restore->size, *len); ++ res = -EINVAL; ++ goto done; ++ } ++ line = ip_set_restore(data + sizeof(struct ip_set_req_setnames), ++ req_restore->size - sizeof(struct ip_set_req_setnames)); ++ DP("ip_set_restore: %u", line); ++ if (line != 0) { ++ res = -EAGAIN; ++ req_restore->size = line; ++ copylen = sizeof(struct ip_set_req_setnames); ++ goto copy; ++ } ++ goto done; ++ } ++ default: ++ res = -EBADMSG; ++ goto done; ++ } /* end of switch(op) */ ++ ++ copy: ++ DP("set %s, copylen %u", index != IP_SET_INVALID_ID ++ && ip_set_list[index] ++ ? ip_set_list[index]->name ++ : ":all:", copylen); ++ if (res == 0) ++ res = copy_to_user(user, data, copylen); ++ else ++ copy_to_user(user, data, copylen); ++ ++ done: ++ up(&ip_set_app_mutex); ++ vfree(data); ++ if (res > 0) ++ res = 0; ++ DP("final result %d", res); ++ return res; ++} ++ ++static struct nf_sockopt_ops so_set = { ++ .pf = PF_INET, ++ .set_optmin = SO_IP_SET, ++ .set_optmax = SO_IP_SET + 1, ++ .set = &ip_set_sockfn_set, ++ .get_optmin = SO_IP_SET, ++ .get_optmax = SO_IP_SET + 1, ++ .get = &ip_set_sockfn_get, ++ .use = 0 ++}; ++ ++static int max_sets, hash_size; ++module_param(max_sets, int, 0600); ++MODULE_PARM_DESC(max_sets, "maximal number of sets"); ++module_param(hash_size, int, 0600); ++MODULE_PARM_DESC(hash_size, "hash size for bindings"); ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); ++MODULE_DESCRIPTION("module implementing core IP set support"); ++ ++static int __init init(void) ++{ ++ int res; ++ ip_set_id_t i; ++ ++ get_random_bytes(&ip_set_hash_random, 4); ++ if (max_sets) ++ ip_set_max = max_sets; ++ ip_set_list = vmalloc(sizeof(struct ip_set *) * ip_set_max); ++ if (!ip_set_list) { ++ printk(KERN_ERR "Unable to create ip_set_list\n"); ++ return -ENOMEM; ++ } ++ memset(ip_set_list, 0, sizeof(struct ip_set *) * ip_set_max); ++ if (hash_size) ++ ip_set_bindings_hash_size = hash_size; ++ ip_set_hash = vmalloc(sizeof(struct list_head) * ip_set_bindings_hash_size); ++ if (!ip_set_hash) { ++ printk(KERN_ERR "Unable to create ip_set_hash\n"); ++ vfree(ip_set_list); ++ return -ENOMEM; ++ } ++ for (i = 0; i < ip_set_bindings_hash_size; i++) ++ INIT_LIST_HEAD(&ip_set_hash[i]); ++ ++ INIT_LIST_HEAD(&set_type_list); ++ ++ res = nf_register_sockopt(&so_set); ++ if (res != 0) { ++ ip_set_printk("SO_SET registry failed: %d", res); ++ vfree(ip_set_list); ++ vfree(ip_set_hash); ++ return res; ++ } ++ return 0; ++} ++ ++static void __exit fini(void) ++{ ++ /* There can't be any existing set or binding */ ++ nf_unregister_sockopt(&so_set); ++ vfree(ip_set_list); ++ vfree(ip_set_hash); ++ DP("these are the famous last words"); ++} ++ ++EXPORT_SYMBOL(ip_set_register_set_type); ++EXPORT_SYMBOL(ip_set_unregister_set_type); ++ ++EXPORT_SYMBOL(ip_set_get_byname); ++EXPORT_SYMBOL(ip_set_get_byindex); ++EXPORT_SYMBOL(ip_set_put); ++ ++EXPORT_SYMBOL(ip_set_addip_kernel); ++EXPORT_SYMBOL(ip_set_delip_kernel); ++EXPORT_SYMBOL(ip_set_testip_kernel); ++ ++module_init(init); ++module_exit(fini); +diff -Nur linux-2.6.12.5/net/ipv4/netfilter/ip_set_iphash.c linux-2.6.12.5-ipset/net/ipv4/netfilter/ip_set_iphash.c +--- linux-2.6.12.5/net/ipv4/netfilter/ip_set_iphash.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12.5-ipset/net/ipv4/netfilter/ip_set_iphash.c 2005-09-20 13:11:38.786384250 +0200 +@@ -0,0 +1,379 @@ ++/* Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++/* Kernel module implementing an ip hash set */ ++ ++#include <linux/module.h> ++#include <linux/ip.h> ++#include <linux/skbuff.h> ++#include <linux/netfilter_ipv4/ip_tables.h> ++#include <linux/netfilter_ipv4/ip_set.h> ++#include <linux/errno.h> ++#include <asm/uaccess.h> ++#include <asm/bitops.h> ++#include <linux/spinlock.h> ++#include <linux/vmalloc.h> ++#include <linux/random.h> ++ ++#include <net/ip.h> ++ ++#include <linux/netfilter_ipv4/ip_set_malloc.h> ++#include <linux/netfilter_ipv4/ip_set_iphash.h> ++#include <linux/netfilter_ipv4/ip_set_jhash.h> ++#include <linux/netfilter_ipv4/ip_set_prime.h> ++ ++static inline __u32 ++jhash_ip(const struct ip_set_iphash *map, ip_set_ip_t ip) ++{ ++ return jhash_1word(ip, map->initval); ++} ++ ++static inline __u32 ++randhash_ip(const struct ip_set_iphash *map, ip_set_ip_t ip) ++{ ++ return (1 + ip % map->prime); ++} ++ ++static inline __u32 ++hash_id(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_iphash *map = (struct ip_set_iphash *) set->data; ++ __u32 jhash, randhash, id; ++ u_int16_t i; ++ ++ *hash_ip = ip & map->netmask; ++ jhash = jhash_ip(map, *hash_ip); ++ randhash = randhash_ip(map, *hash_ip); ++ DP("set: %s, ip:%u.%u.%u.%u, %u.%u.%u.%u, %u.%u.%u.%u", ++ set->name, HIPQUAD(ip), HIPQUAD(*hash_ip), HIPQUAD(map->netmask)); ++ ++ for (i = 0; i < map->probes; i++) { ++ id = (jhash + i * randhash) % map->hashsize; ++ DP("hash key: %u", id); ++ if (map->members[id] == *hash_ip) ++ return id; ++ /* No shortcut at testing - there can be deleted ++ * entries. */ ++ } ++ return UINT_MAX; ++} ++ ++static inline int ++__testip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) ++{ ++ return (hash_id(set, ip, hash_ip) != UINT_MAX); ++} ++ ++static int ++testip(struct ip_set *set, const void *data, size_t size, ++ ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_req_iphash *req = ++ (struct ip_set_req_iphash *) data; ++ ++ if (size != sizeof(struct ip_set_req_iphash)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_iphash), ++ size); ++ return -EINVAL; ++ } ++ return __testip(set, req->ip, hash_ip); ++} ++ ++static int ++testip_kernel(struct ip_set *set, const struct sk_buff *skb, ++ u_int32_t flags, ip_set_ip_t *hash_ip) ++{ ++ return __testip(set, ++ ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr ++ : skb->nh.iph->daddr), ++ hash_ip); ++} ++ ++static inline int ++__addip(struct ip_set_iphash *map, ip_set_ip_t ip, ip_set_ip_t *hash_ip) ++{ ++ __u32 jhash, randhash, probe; ++ u_int16_t i; ++ ++ *hash_ip = ip & map->netmask; ++ jhash = jhash_ip(map, *hash_ip); ++ randhash = randhash_ip(map, *hash_ip); ++ ++ for (i = 0; i < map->probes; i++) { ++ probe = (jhash + i * randhash) % map->hashsize; ++ if (map->members[probe] == *hash_ip) ++ return -EEXIST; ++ if (!map->members[probe]) { ++ map->members[probe] = *hash_ip; ++ return 0; ++ } ++ } ++ /* Trigger rehashing */ ++ return -EAGAIN; ++} ++ ++static int ++addip(struct ip_set *set, const void *data, size_t size, ++ ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_req_iphash *req = ++ (struct ip_set_req_iphash *) data; ++ ++ if (size != sizeof(struct ip_set_req_iphash)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_iphash), ++ size); ++ return -EINVAL; ++ } ++ return __addip((struct ip_set_iphash *) set->data, req->ip, hash_ip); ++} ++ ++static int ++addip_kernel(struct ip_set *set, const struct sk_buff *skb, ++ u_int32_t flags, ip_set_ip_t *hash_ip) ++{ ++ return __addip((struct ip_set_iphash *) set->data, ++ ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr ++ : skb->nh.iph->daddr), ++ hash_ip); ++} ++ ++static int retry(struct ip_set *set) ++{ ++ struct ip_set_iphash *map = (struct ip_set_iphash *) set->data; ++ ip_set_ip_t hash_ip, *members; ++ u_int32_t i, hashsize; ++ unsigned newbytes; ++ int res; ++ struct ip_set_iphash tmp = { ++ .hashsize = map->hashsize, ++ .probes = map->probes, ++ .resize = map->resize, ++ .netmask = map->netmask, ++ }; ++ ++ if (map->resize == 0) ++ return -ERANGE; ++ ++ again: ++ res = 0; ++ ++ /* Calculate new parameters */ ++ get_random_bytes(&tmp.initval, 4); ++ hashsize = tmp.hashsize + (tmp.hashsize * map->resize)/100; ++ if (hashsize == tmp.hashsize) ++ hashsize++; ++ tmp.prime = make_prime(hashsize); ++ ++ ip_set_printk("rehashing of set %s triggered: " ++ "hashsize grows from %u to %u", ++ set->name, tmp.hashsize, hashsize); ++ tmp.hashsize = hashsize; ++ ++ newbytes = hashsize * sizeof(ip_set_ip_t); ++ tmp.members = ip_set_malloc_atomic(newbytes); ++ if (!tmp.members) { ++ DP("out of memory for %d bytes", newbytes); ++ return -ENOMEM; ++ } ++ memset(tmp.members, 0, newbytes); ++ ++ write_lock_bh(&set->lock); ++ map = (struct ip_set_iphash *) set->data; /* Play safe */ ++ for (i = 0; i < map->hashsize && res == 0; i++) { ++ if (map->members[i]) ++ res = __addip(&tmp, map->members[i], &hash_ip); ++ } ++ if (res) { ++ /* Failure, try again */ ++ write_unlock_bh(&set->lock); ++ ip_set_free(tmp.members, newbytes); ++ goto again; ++ } ++ ++ /* Success at resizing! */ ++ members = map->members; ++ hashsize = map->hashsize; ++ ++ map->initval = tmp.initval; ++ map->prime = tmp.prime; ++ map->hashsize = tmp.hashsize; ++ map->members = tmp.members; ++ write_unlock_bh(&set->lock); ++ ++ ip_set_free(members, hashsize * sizeof(ip_set_ip_t)); ++ ++ return 0; ++} ++ ++static inline int ++__delip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_iphash *map = (struct ip_set_iphash *) set->data; ++ ip_set_ip_t id = hash_id(set, ip, hash_ip); ++ ++ if (id == UINT_MAX) ++ return -EEXIST; ++ ++ map->members[id] = 0; ++ return 0; ++} ++ ++static int ++delip(struct ip_set *set, const void *data, size_t size, ++ ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_req_iphash *req = ++ (struct ip_set_req_iphash *) data; ++ ++ if (size != sizeof(struct ip_set_req_iphash)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_iphash), ++ size); ++ return -EINVAL; ++ } ++ return __delip(set, req->ip, hash_ip); ++} ++ ++static int ++delip_kernel(struct ip_set *set, const struct sk_buff *skb, ++ u_int32_t flags, ip_set_ip_t *hash_ip) ++{ ++ return __delip(set, ++ ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr ++ : skb->nh.iph->daddr), ++ hash_ip); ++} ++ ++static int create(struct ip_set *set, const void *data, size_t size) ++{ ++ unsigned newbytes; ++ struct ip_set_req_iphash_create *req = ++ (struct ip_set_req_iphash_create *) data; ++ struct ip_set_iphash *map; ++ ++ if (size != sizeof(struct ip_set_req_iphash_create)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_iphash_create), ++ size); ++ return -EINVAL; ++ } ++ ++ if (req->hashsize < 1) { ++ ip_set_printk("hashsize too small"); ++ return -ENOEXEC; ++ } ++ ++ map = kmalloc(sizeof(struct ip_set_iphash), GFP_KERNEL); ++ if (!map) { ++ DP("out of memory for %d bytes", ++ sizeof(struct ip_set_iphash)); ++ return -ENOMEM; ++ } ++ get_random_bytes(&map->initval, 4); ++ map->prime = make_prime(req->hashsize); ++ map->hashsize = req->hashsize; ++ map->probes = req->probes; ++ map->resize = req->resize; ++ map->netmask = req->netmask; ++ newbytes = map->hashsize * sizeof(ip_set_ip_t); ++ map->members = ip_set_malloc(newbytes); ++ if (!map->members) { ++ DP("out of memory for %d bytes", newbytes); ++ kfree(map); ++ return -ENOMEM; ++ } ++ memset(map->members, 0, newbytes); ++ ++ set->data = map; ++ return 0; ++} ++ ++static void destroy(struct ip_set *set) ++{ ++ struct ip_set_iphash *map = (struct ip_set_iphash *) set->data; ++ ++ ip_set_free(map->members, map->hashsize * sizeof(ip_set_ip_t)); ++ kfree(map); ++ ++ set->data = NULL; ++} ++ ++static void flush(struct ip_set *set) ++{ ++ struct ip_set_iphash *map = (struct ip_set_iphash *) set->data; ++ memset(map->members, 0, map->hashsize * sizeof(ip_set_ip_t)); ++} ++ ++static void list_header(const struct ip_set *set, void *data) ++{ ++ struct ip_set_iphash *map = (struct ip_set_iphash *) set->data; ++ struct ip_set_req_iphash_create *header = ++ (struct ip_set_req_iphash_create *) data; ++ ++ header->hashsize = map->hashsize; ++ header->probes = map->probes; ++ header->resize = map->resize; ++ header->netmask = map->netmask; ++} ++ ++static int list_members_size(const struct ip_set *set) ++{ ++ struct ip_set_iphash *map = (struct ip_set_iphash *) set->data; ++ ++ return (map->hashsize * sizeof(ip_set_ip_t)); ++} ++ ++static void list_members(const struct ip_set *set, void *data) ++{ ++ struct ip_set_iphash *map = (struct ip_set_iphash *) set->data; ++ int bytes = map->hashsize * sizeof(ip_set_ip_t); ++ ++ memcpy(data, map->members, bytes); ++} ++ ++static struct ip_set_type ip_set_iphash = { ++ .typename = SETTYPE_NAME, ++ .typecode = IPSET_TYPE_IP, ++ .protocol_version = IP_SET_PROTOCOL_VERSION, ++ .create = &create, ++ .destroy = &destroy, ++ .flush = &flush, ++ .reqsize = sizeof(struct ip_set_req_iphash), ++ .addip = &addip, ++ .addip_kernel = &addip_kernel, ++ .retry = &retry, ++ .delip = &delip, ++ .delip_kernel = &delip_kernel, ++ .testip = &testip, ++ .testip_kernel = &testip_kernel, ++ .header_size = sizeof(struct ip_set_req_iphash_create), ++ .list_header = &list_header, ++ .list_members_size = &list_members_size, ++ .list_members = &list_members, ++ .me = THIS_MODULE, ++}; ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); ++MODULE_DESCRIPTION("iphash type of IP sets"); ++ ++static int __init init(void) ++{ ++ init_max_malloc_size(); ++ return ip_set_register_set_type(&ip_set_iphash); ++} ++ ++static void __exit fini(void) ++{ ++ /* FIXME: possible race with ip_set_create() */ ++ ip_set_unregister_set_type(&ip_set_iphash); ++} ++ ++module_init(init); ++module_exit(fini); +diff -Nur linux-2.6.12.5/net/ipv4/netfilter/ip_set_ipmap.c linux-2.6.12.5-ipset/net/ipv4/netfilter/ip_set_ipmap.c +--- linux-2.6.12.5/net/ipv4/netfilter/ip_set_ipmap.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12.5-ipset/net/ipv4/netfilter/ip_set_ipmap.c 2005-09-20 13:11:38.802385250 +0200 +@@ -0,0 +1,313 @@ ++/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu> ++ * Patrick Schaaf <bof@bof.de> ++ * Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++/* Kernel module implementing an IP set type: the single bitmap type */ ++ ++#include <linux/module.h> ++#include <linux/ip.h> ++#include <linux/skbuff.h> ++#include <linux/netfilter_ipv4/ip_tables.h> ++#include <linux/netfilter_ipv4/ip_set.h> ++#include <linux/errno.h> ++#include <asm/uaccess.h> ++#include <asm/bitops.h> ++#include <linux/spinlock.h> ++ ++#include <linux/netfilter_ipv4/ip_set_ipmap.h> ++ ++static inline ip_set_ip_t ++ip_to_id(const struct ip_set_ipmap *map, ip_set_ip_t ip) ++{ ++ return (ip - map->first_ip)/map->hosts; ++} ++ ++static inline int ++__testip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data; ++ ++ if (ip < map->first_ip || ip > map->last_ip) ++ return -ERANGE; ++ ++ *hash_ip = ip & map->netmask; ++ DP("set: %s, ip:%u.%u.%u.%u, %u.%u.%u.%u", ++ set->name, HIPQUAD(ip), HIPQUAD(*hash_ip)); ++ return !!test_bit(ip_to_id(map, *hash_ip), map->members); ++} ++ ++static int ++testip(struct ip_set *set, const void *data, size_t size, ++ ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_req_ipmap *req = ++ (struct ip_set_req_ipmap *) data; ++ ++ if (size != sizeof(struct ip_set_req_ipmap)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_ipmap), ++ size); ++ return -EINVAL; ++ } ++ return __testip(set, req->ip, hash_ip); ++} ++ ++static int ++testip_kernel(struct ip_set *set, ++ const struct sk_buff *skb, ++ u_int32_t flags, ++ ip_set_ip_t *hash_ip) ++{ ++ int res; ++ ++ DP("flag: %s src: %u.%u.%u.%u dst: %u.%u.%u.%u", ++ flags & IPSET_SRC ? "SRC" : "DST", ++ NIPQUAD(skb->nh.iph->saddr), ++ NIPQUAD(skb->nh.iph->daddr)); ++ ++ res = __testip(set, ++ ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr ++ : skb->nh.iph->daddr), ++ hash_ip); ++ return (res < 0 ? 0 : res); ++} ++ ++static inline int ++__addip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data; ++ ++ if (ip < map->first_ip || ip > map->last_ip) ++ return -ERANGE; ++ ++ *hash_ip = ip & map->netmask; ++ DP("%u.%u.%u.%u, %u.%u.%u.%u", HIPQUAD(ip), HIPQUAD(*hash_ip)); ++ if (test_and_set_bit(ip_to_id(map, *hash_ip), map->members)) ++ return -EEXIST; ++ ++ return 0; ++} ++ ++static int ++addip(struct ip_set *set, const void *data, size_t size, ++ ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_req_ipmap *req = ++ (struct ip_set_req_ipmap *) data; ++ ++ if (size != sizeof(struct ip_set_req_ipmap)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_ipmap), ++ size); ++ return -EINVAL; ++ } ++ DP("%u.%u.%u.%u", HIPQUAD(req->ip)); ++ return __addip(set, req->ip, hash_ip); ++} ++ ++static int ++addip_kernel(struct ip_set *set, const struct sk_buff *skb, ++ u_int32_t flags, ip_set_ip_t *hash_ip) ++{ ++ return __addip(set, ++ ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr ++ : skb->nh.iph->daddr), ++ hash_ip); ++} ++ ++static inline int ++__delip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data; ++ ++ if (ip < map->first_ip || ip > map->last_ip) ++ return -ERANGE; ++ ++ *hash_ip = ip & map->netmask; ++ DP("%u.%u.%u.%u, %u.%u.%u.%u", HIPQUAD(ip), HIPQUAD(*hash_ip)); ++ if (!test_and_clear_bit(ip_to_id(map, *hash_ip), map->members)) ++ return -EEXIST; ++ ++ return 0; ++} ++ ++static int ++delip(struct ip_set *set, const void *data, size_t size, ++ ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_req_ipmap *req = ++ (struct ip_set_req_ipmap *) data; ++ ++ if (size != sizeof(struct ip_set_req_ipmap)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_ipmap), ++ size); ++ return -EINVAL; ++ } ++ return __delip(set, req->ip, hash_ip); ++} ++ ++static int ++delip_kernel(struct ip_set *set, const struct sk_buff *skb, ++ u_int32_t flags, ip_set_ip_t *hash_ip) ++{ ++ return __delip(set, ++ ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr ++ : skb->nh.iph->daddr), ++ hash_ip); ++} ++ ++static int create(struct ip_set *set, const void *data, size_t size) ++{ ++ int newbytes; ++ struct ip_set_req_ipmap_create *req = ++ (struct ip_set_req_ipmap_create *) data; ++ struct ip_set_ipmap *map; ++ ++ if (size != sizeof(struct ip_set_req_ipmap_create)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_ipmap_create), ++ size); ++ return -EINVAL; ++ } ++ ++ DP("from %u.%u.%u.%u to %u.%u.%u.%u", ++ HIPQUAD(req->from), HIPQUAD(req->to)); ++ ++ if (req->from > req->to) { ++ DP("bad ip range"); ++ return -ENOEXEC; ++ } ++ ++ if (req->to - req->from > MAX_RANGE) { ++ ip_set_printk("range too big (max %d addresses)", ++ MAX_RANGE); ++ return -ENOEXEC; ++ } ++ ++ map = kmalloc(sizeof(struct ip_set_ipmap), GFP_KERNEL); ++ if (!map) { ++ DP("out of memory for %d bytes", ++ sizeof(struct ip_set_ipmap)); ++ return -ENOMEM; ++ } ++ map->first_ip = req->from; ++ map->last_ip = req->to; ++ map->netmask = req->netmask; ++ ++ if (req->netmask == 0xFFFFFFFF) { ++ map->hosts = 1; ++ map->sizeid = map->last_ip - map->first_ip + 1; ++ } else { ++ unsigned int mask_bits, netmask_bits; ++ ip_set_ip_t mask; ++ ++ map->first_ip &= map->netmask; /* Should we better bark? */ ++ ++ mask = range_to_mask(map->first_ip, map->last_ip, &mask_bits); ++ netmask_bits = mask_to_bits(map->netmask); ++ ++ if (!mask || netmask_bits <= mask_bits) ++ return -ENOEXEC; ++ ++ map->hosts = 2 << (32 - netmask_bits - 1); ++ map->sizeid = 2 << (netmask_bits - mask_bits - 1); ++ } ++ newbytes = bitmap_bytes(0, map->sizeid - 1); ++ map->members = kmalloc(newbytes, GFP_KERNEL); ++ if (!map->members) { ++ DP("out of memory for %d bytes", newbytes); ++ kfree(map); ++ return -ENOMEM; ++ } ++ memset(map->members, 0, newbytes); ++ ++ set->data = map; ++ return 0; ++} ++ ++static void destroy(struct ip_set *set) ++{ ++ struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data; ++ ++ kfree(map->members); ++ kfree(map); ++ ++ set->data = NULL; ++} ++ ++static void flush(struct ip_set *set) ++{ ++ struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data; ++ memset(map->members, 0, bitmap_bytes(0, map->sizeid - 1)); ++} ++ ++static void list_header(const struct ip_set *set, void *data) ++{ ++ struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data; ++ struct ip_set_req_ipmap_create *header = ++ (struct ip_set_req_ipmap_create *) data; ++ ++ header->from = map->first_ip; ++ header->to = map->last_ip; ++ header->netmask = map->netmask; ++} ++ ++static int list_members_size(const struct ip_set *set) ++{ ++ struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data; ++ ++ return bitmap_bytes(0, map->sizeid - 1); ++} ++ ++static void list_members(const struct ip_set *set, void *data) ++{ ++ struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data; ++ int bytes = bitmap_bytes(0, map->sizeid - 1); ++ ++ memcpy(data, map->members, bytes); ++} ++ ++static struct ip_set_type ip_set_ipmap = { ++ .typename = SETTYPE_NAME, ++ .typecode = IPSET_TYPE_IP, ++ .protocol_version = IP_SET_PROTOCOL_VERSION, ++ .create = &create, ++ .destroy = &destroy, ++ .flush = &flush, ++ .reqsize = sizeof(struct ip_set_req_ipmap), ++ .addip = &addip, ++ .addip_kernel = &addip_kernel, ++ .delip = &delip, ++ .delip_kernel = &delip_kernel, ++ .testip = &testip, ++ .testip_kernel = &testip_kernel, ++ .header_size = sizeof(struct ip_set_req_ipmap_create), ++ .list_header = &list_header, ++ .list_members_size = &list_members_size, ++ .list_members = &list_members, ++ .me = THIS_MODULE, ++}; ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); ++MODULE_DESCRIPTION("ipmap type of IP sets"); ++ ++static int __init init(void) ++{ ++ return ip_set_register_set_type(&ip_set_ipmap); ++} ++ ++static void __exit fini(void) ++{ ++ /* FIXME: possible race with ip_set_create() */ ++ ip_set_unregister_set_type(&ip_set_ipmap); ++} ++ ++module_init(init); ++module_exit(fini); +diff -Nur linux-2.6.12.5/net/ipv4/netfilter/ip_set_iptree.c linux-2.6.12.5-ipset/net/ipv4/netfilter/ip_set_iptree.c +--- linux-2.6.12.5/net/ipv4/netfilter/ip_set_iptree.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12.5-ipset/net/ipv4/netfilter/ip_set_iptree.c 2005-09-20 13:11:38.802385250 +0200 +@@ -0,0 +1,510 @@ ++/* Copyright (C) 2005 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++/* Kernel module implementing an IP set type: the iptree type */ ++ ++#include <linux/module.h> ++#include <linux/ip.h> ++#include <linux/skbuff.h> ++#include <linux/slab.h> ++#include <linux/delay.h> ++#include <linux/netfilter_ipv4/ip_tables.h> ++#include <linux/netfilter_ipv4/ip_set.h> ++#include <linux/errno.h> ++#include <asm/uaccess.h> ++#include <asm/bitops.h> ++#include <linux/spinlock.h> ++ ++#include <linux/netfilter_ipv4/ip_set_iptree.h> ++ ++/* Garbage collection interval in seconds: */ ++#define IPTREE_GC_TIME 5*60 ++/* Sleep so many milliseconds before trying again ++ * to delete the gc timer at destroying a set */ ++#define IPTREE_DESTROY_SLEEP 100 ++ ++static kmem_cache_t *branch_cachep; ++static kmem_cache_t *leaf_cachep; ++ ++#define ABCD(a,b,c,d,addrp) do { \ ++ a = ((unsigned char *)addrp)[3]; \ ++ b = ((unsigned char *)addrp)[2]; \ ++ c = ((unsigned char *)addrp)[1]; \ ++ d = ((unsigned char *)addrp)[0]; \ ++} while (0) ++ ++#define TESTIP_WALK(map, elem, branch) do { \ ++ if ((map)->tree[elem]) { \ ++ branch = (map)->tree[elem]; \ ++ } else \ ++ return 0; \ ++} while (0) ++ ++static inline int ++__testip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_iptree *map = (struct ip_set_iptree *) set->data; ++ struct ip_set_iptreeb *btree; ++ struct ip_set_iptreec *ctree; ++ struct ip_set_iptreed *dtree; ++ unsigned char a,b,c,d; ++ ++ *hash_ip = ip; ++ ABCD(a, b, c, d, hash_ip); ++ DP("%u %u %u %u timeout %u", a, b, c, d, map->timeout); ++ TESTIP_WALK(map, a, btree); ++ TESTIP_WALK(btree, b, ctree); ++ TESTIP_WALK(ctree, c, dtree); ++ DP("%lu %lu", dtree->expires[d], jiffies); ++ return !!(map->timeout ? (time_after(dtree->expires[d], jiffies)) ++ : dtree->expires[d]); ++} ++ ++static int ++testip(struct ip_set *set, const void *data, size_t size, ++ ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_req_iptree *req = ++ (struct ip_set_req_iptree *) data; ++ ++ if (size != sizeof(struct ip_set_req_iptree)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_iptree), ++ size); ++ return -EINVAL; ++ } ++ return __testip(set, req->ip, hash_ip); ++} ++ ++static int ++testip_kernel(struct ip_set *set, ++ const struct sk_buff *skb, ++ u_int32_t flags, ++ ip_set_ip_t *hash_ip) ++{ ++ int res; ++ ++ DP("flag: %s src: %u.%u.%u.%u dst: %u.%u.%u.%u", ++ flags & IPSET_SRC ? "SRC" : "DST", ++ NIPQUAD(skb->nh.iph->saddr), ++ NIPQUAD(skb->nh.iph->daddr)); ++ ++ res = __testip(set, ++ ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr ++ : skb->nh.iph->daddr), ++ hash_ip); ++ return (res < 0 ? 0 : res); ++} ++ ++#define ADDIP_WALK(map, elem, branch, type, cachep) do { \ ++ if ((map)->tree[elem]) { \ ++ DP("found %u", elem); \ ++ branch = (map)->tree[elem]; \ ++ } else { \ ++ branch = (type *) \ ++ kmem_cache_alloc(cachep, GFP_KERNEL); \ ++ if (branch == NULL) \ ++ return -ENOMEM; \ ++ memset(branch, 0, sizeof(*branch)); \ ++ (map)->tree[elem] = branch; \ ++ DP("alloc %u", elem); \ ++ } \ ++} while (0) ++ ++static inline int ++__addip(struct ip_set *set, ip_set_ip_t ip, unsigned int timeout, ++ ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_iptree *map = (struct ip_set_iptree *) set->data; ++ struct ip_set_iptreeb *btree; ++ struct ip_set_iptreec *ctree; ++ struct ip_set_iptreed *dtree; ++ unsigned char a,b,c,d; ++ int ret = 0; ++ ++ *hash_ip = ip; ++ ABCD(a, b, c, d, hash_ip); ++ DP("%u %u %u %u timeout %u", a, b, c, d, timeout); ++ ADDIP_WALK(map, a, btree, struct ip_set_iptreeb, branch_cachep); ++ ADDIP_WALK(btree, b, ctree, struct ip_set_iptreec, branch_cachep); ++ ADDIP_WALK(ctree, c, dtree, struct ip_set_iptreed, leaf_cachep); ++ if (dtree->expires[d] ++ && (!map->timeout || time_after(dtree->expires[d], jiffies))) ++ ret = -EEXIST; ++ dtree->expires[d] = map->timeout ? (timeout * HZ + jiffies) : 1; ++ DP("%u %lu", d, dtree->expires[d]); ++ return ret; ++} ++ ++static int ++addip(struct ip_set *set, const void *data, size_t size, ++ ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_iptree *map = (struct ip_set_iptree *) set->data; ++ struct ip_set_req_iptree *req = ++ (struct ip_set_req_iptree *) data; ++ ++ if (size != sizeof(struct ip_set_req_iptree)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_iptree), ++ size); ++ return -EINVAL; ++ } ++ DP("%u.%u.%u.%u %u", HIPQUAD(req->ip), req->timeout); ++ return __addip(set, req->ip, ++ req->timeout ? req->timeout : map->timeout, ++ hash_ip); ++} ++ ++static int ++addip_kernel(struct ip_set *set, const struct sk_buff *skb, ++ u_int32_t flags, ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_iptree *map = (struct ip_set_iptree *) set->data; ++ ++ return __addip(set, ++ ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr ++ : skb->nh.iph->daddr), ++ map->timeout, ++ hash_ip); ++} ++ ++#define DELIP_WALK(map, elem, branch) do { \ ++ if ((map)->tree[elem]) { \ ++ branch = (map)->tree[elem]; \ ++ } else \ ++ return -EEXIST; \ ++} while (0) ++ ++static inline int ++__delip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_iptree *map = (struct ip_set_iptree *) set->data; ++ struct ip_set_iptreeb *btree; ++ struct ip_set_iptreec *ctree; ++ struct ip_set_iptreed *dtree; ++ unsigned char a,b,c,d; ++ ++ *hash_ip = ip; ++ ABCD(a, b, c, d, hash_ip); ++ DELIP_WALK(map, a, btree); ++ DELIP_WALK(btree, b, ctree); ++ DELIP_WALK(ctree, c, dtree); ++ ++ if (dtree->expires[d]) { ++ dtree->expires[d] = 0; ++ return 0; ++ } ++ return -EEXIST; ++} ++ ++static int ++delip(struct ip_set *set, const void *data, size_t size, ++ ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_req_iptree *req = ++ (struct ip_set_req_iptree *) data; ++ ++ if (size != sizeof(struct ip_set_req_iptree)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_iptree), ++ size); ++ return -EINVAL; ++ } ++ return __delip(set, req->ip, hash_ip); ++} ++ ++static int ++delip_kernel(struct ip_set *set, const struct sk_buff *skb, ++ u_int32_t flags, ip_set_ip_t *hash_ip) ++{ ++ return __delip(set, ++ ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr ++ : skb->nh.iph->daddr), ++ hash_ip); ++} ++ ++#define LOOP_WALK_BEGIN(map, i, branch) \ ++ for (i = 0; i < 255; i++) { \ ++ if (!(map)->tree[i]) \ ++ continue; \ ++ branch = (map)->tree[i] ++ ++#define LOOP_WALK_END } ++ ++static void ip_tree_gc(unsigned long ul_set) ++{ ++ struct ip_set *set = (void *) ul_set; ++ struct ip_set_iptree *map = (struct ip_set_iptree *) set->data; ++ struct ip_set_iptreeb *btree; ++ struct ip_set_iptreec *ctree; ++ struct ip_set_iptreed *dtree; ++ unsigned char a,b,c,d; ++ unsigned char i,j,k; ++ ++ i = j = k = 0; ++ DP("gc: %s", set->name); ++ write_lock_bh(&set->lock); ++ LOOP_WALK_BEGIN(map, a, btree); ++ LOOP_WALK_BEGIN(btree, b, ctree); ++ LOOP_WALK_BEGIN(ctree, c, dtree); ++ for (d = 0; d < 255; d++) { ++ if (dtree->expires[d]) { ++ DP("gc: %u %u %u %u: expires %lu jiffies %lu", ++ a, b, c, d, ++ dtree->expires[d], jiffies); ++ if (map->timeout ++ && time_before(dtree->expires[d], jiffies)) ++ dtree->expires[d] = 0; ++ else ++ k = 1; ++ } ++ } ++ if (k == 0) { ++ DP("gc: %s: leaf %u %u %u empty", ++ set->name, a, b, c); ++ kmem_cache_free(leaf_cachep, dtree); ++ ctree->tree[c] = NULL; ++ } else { ++ DP("gc: %s: leaf %u %u %u not empty", ++ set->name, a, b, c); ++ j = 1; ++ k = 0; ++ } ++ LOOP_WALK_END; ++ if (j == 0) { ++ DP("gc: %s: branch %u %u empty", ++ set->name, a, b); ++ kmem_cache_free(branch_cachep, ctree); ++ btree->tree[b] = NULL; ++ } else { ++ DP("gc: %s: branch %u %u not empty", ++ set->name, a, b); ++ i = 1; ++ j = k = 0; ++ } ++ LOOP_WALK_END; ++ if (i == 0) { ++ DP("gc: %s: branch %u empty", ++ set->name, a); ++ kmem_cache_free(branch_cachep, btree); ++ map->tree[a] = NULL; ++ } else { ++ DP("gc: %s: branch %u not empty", ++ set->name, a); ++ i = j = k = 0; ++ } ++ LOOP_WALK_END; ++ write_unlock_bh(&set->lock); ++ ++ map->gc.expires = jiffies + map->gc_interval * HZ; ++ add_timer(&map->gc); ++} ++ ++static int create(struct ip_set *set, const void *data, size_t size) ++{ ++ struct ip_set_req_iptree_create *req = ++ (struct ip_set_req_iptree_create *) data; ++ struct ip_set_iptree *map; ++ ++ if (size != sizeof(struct ip_set_req_iptree_create)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_iptree_create), ++ size); ++ return -EINVAL; ++ } ++ ++ map = kmalloc(sizeof(struct ip_set_iptree), GFP_KERNEL); ++ if (!map) { ++ DP("out of memory for %d bytes", ++ sizeof(struct ip_set_iptree)); ++ return -ENOMEM; ++ } ++ memset(map, 0, sizeof(*map)); ++ map->timeout = req->timeout; ++ set->data = map; ++ ++ /* If there is no timeout for the entries, ++ * we still have to call gc because delete ++ * do not clean up empty branches */ ++ map->gc_interval = IPTREE_GC_TIME; ++ init_timer(&map->gc); ++ map->gc.data = (unsigned long) set; ++ map->gc.function = ip_tree_gc; ++ map->gc.expires = jiffies + map->gc_interval * HZ; ++ add_timer(&map->gc); ++ ++ return 0; ++} ++ ++static void __flush(struct ip_set_iptree *map) ++{ ++ struct ip_set_iptreeb *btree; ++ struct ip_set_iptreec *ctree; ++ struct ip_set_iptreed *dtree; ++ unsigned int a,b,c; ++ ++ LOOP_WALK_BEGIN(map, a, btree); ++ LOOP_WALK_BEGIN(btree, b, ctree); ++ LOOP_WALK_BEGIN(ctree, c, dtree); ++ kmem_cache_free(leaf_cachep, dtree); ++ LOOP_WALK_END; ++ kmem_cache_free(branch_cachep, ctree); ++ LOOP_WALK_END; ++ kmem_cache_free(branch_cachep, btree); ++ LOOP_WALK_END; ++} ++ ++static void destroy(struct ip_set *set) ++{ ++ struct ip_set_iptree *map = (struct ip_set_iptree *) set->data; ++ ++ while (!del_timer(&map->gc)) ++ msleep(IPTREE_DESTROY_SLEEP); ++ __flush(map); ++ kfree(map); ++ set->data = NULL; ++} ++ ++static void flush(struct ip_set *set) ++{ ++ struct ip_set_iptree *map = (struct ip_set_iptree *) set->data; ++ unsigned int timeout = map->timeout; ++ ++ __flush(map); ++ memset(map, 0, sizeof(*map)); ++ map->timeout = timeout; ++} ++ ++static void list_header(const struct ip_set *set, void *data) ++{ ++ struct ip_set_iptree *map = (struct ip_set_iptree *) set->data; ++ struct ip_set_req_iptree_create *header = ++ (struct ip_set_req_iptree_create *) data; ++ ++ header->timeout = map->timeout; ++} ++ ++static int list_members_size(const struct ip_set *set) ++{ ++ struct ip_set_iptree *map = (struct ip_set_iptree *) set->data; ++ struct ip_set_iptreeb *btree; ++ struct ip_set_iptreec *ctree; ++ struct ip_set_iptreed *dtree; ++ unsigned char a,b,c,d; ++ unsigned int count = 0; ++ ++ LOOP_WALK_BEGIN(map, a, btree); ++ LOOP_WALK_BEGIN(btree, b, ctree); ++ LOOP_WALK_BEGIN(ctree, c, dtree); ++ for (d = 0; d < 255; d++) { ++ if (dtree->expires[d] ++ && (!map->timeout || time_after(dtree->expires[d], jiffies))) ++ count++; ++ } ++ LOOP_WALK_END; ++ LOOP_WALK_END; ++ LOOP_WALK_END; ++ ++ DP("members %u", count); ++ return (count * sizeof(struct ip_set_req_iptree)); ++} ++ ++static void list_members(const struct ip_set *set, void *data) ++{ ++ struct ip_set_iptree *map = (struct ip_set_iptree *) set->data; ++ struct ip_set_iptreeb *btree; ++ struct ip_set_iptreec *ctree; ++ struct ip_set_iptreed *dtree; ++ unsigned char a,b,c,d; ++ size_t offset = 0; ++ struct ip_set_req_iptree *entry; ++ ++ LOOP_WALK_BEGIN(map, a, btree); ++ LOOP_WALK_BEGIN(btree, b, ctree); ++ LOOP_WALK_BEGIN(ctree, c, dtree); ++ for (d = 0; d < 255; d++) { ++ if (dtree->expires[d] ++ && (!map->timeout || time_after(dtree->expires[d], jiffies))) { ++ entry = (struct ip_set_req_iptree *)(data + offset); ++ entry->ip = ((a << 24) | (b << 16) | (c << 8) | d); ++ entry->timeout = !map->timeout ? 0 ++ : (dtree->expires[d] - jiffies)/HZ; ++ offset += sizeof(struct ip_set_req_iptree); ++ } ++ } ++ LOOP_WALK_END; ++ LOOP_WALK_END; ++ LOOP_WALK_END; ++} ++ ++static struct ip_set_type ip_set_iptree = { ++ .typename = SETTYPE_NAME, ++ .typecode = IPSET_TYPE_IP, ++ .protocol_version = IP_SET_PROTOCOL_VERSION, ++ .create = &create, ++ .destroy = &destroy, ++ .flush = &flush, ++ .reqsize = sizeof(struct ip_set_req_iptree), ++ .addip = &addip, ++ .addip_kernel = &addip_kernel, ++ .delip = &delip, ++ .delip_kernel = &delip_kernel, ++ .testip = &testip, ++ .testip_kernel = &testip_kernel, ++ .header_size = sizeof(struct ip_set_req_iptree_create), ++ .list_header = &list_header, ++ .list_members_size = &list_members_size, ++ .list_members = &list_members, ++ .me = THIS_MODULE, ++}; ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); ++MODULE_DESCRIPTION("iptree type of IP sets"); ++ ++static int __init init(void) ++{ ++ int ret; ++ ++ branch_cachep = kmem_cache_create("ip_set_iptreeb", ++ sizeof(struct ip_set_iptreeb), ++ 0, 0, NULL, NULL); ++ if (!branch_cachep) { ++ printk(KERN_ERR "Unable to create ip_set_iptreeb slab cache\n"); ++ ret = -ENOMEM; ++ goto out; ++ } ++ leaf_cachep = kmem_cache_create("ip_set_iptreed", ++ sizeof(struct ip_set_iptreed), ++ 0, 0, NULL, NULL); ++ if (!leaf_cachep) { ++ printk(KERN_ERR "Unable to create ip_set_iptreed slab cache\n"); ++ ret = -ENOMEM; ++ goto free_branch; ++ } ++ ret = ip_set_register_set_type(&ip_set_iptree); ++ if (ret == 0) ++ goto out; ++ ++ kmem_cache_destroy(leaf_cachep); ++ free_branch: ++ kmem_cache_destroy(branch_cachep); ++ out: ++ return ret; ++} ++ ++static void __exit fini(void) ++{ ++ /* FIXME: possible race with ip_set_create() */ ++ ip_set_unregister_set_type(&ip_set_iptree); ++ kmem_cache_destroy(leaf_cachep); ++ kmem_cache_destroy(branch_cachep); ++} ++ ++module_init(init); ++module_exit(fini); +diff -Nur linux-2.6.12.5/net/ipv4/netfilter/ip_set_macipmap.c linux-2.6.12.5-ipset/net/ipv4/netfilter/ip_set_macipmap.c +--- linux-2.6.12.5/net/ipv4/netfilter/ip_set_macipmap.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12.5-ipset/net/ipv4/netfilter/ip_set_macipmap.c 2005-09-20 13:11:38.802385250 +0200 +@@ -0,0 +1,338 @@ ++/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu> ++ * Patrick Schaaf <bof@bof.de> ++ * Martin Josefsson <gandalf@wlug.westbo.se> ++ * Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++/* Kernel module implementing an IP set type: the macipmap type */ ++ ++#include <linux/module.h> ++#include <linux/ip.h> ++#include <linux/skbuff.h> ++#include <linux/netfilter_ipv4/ip_tables.h> ++#include <linux/netfilter_ipv4/ip_set.h> ++#include <linux/errno.h> ++#include <asm/uaccess.h> ++#include <asm/bitops.h> ++#include <linux/spinlock.h> ++#include <linux/if_ether.h> ++#include <linux/vmalloc.h> ++ ++#include <linux/netfilter_ipv4/ip_set_malloc.h> ++#include <linux/netfilter_ipv4/ip_set_macipmap.h> ++ ++static int ++testip(struct ip_set *set, const void *data, size_t size, ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_macipmap *map = (struct ip_set_macipmap *) set->data; ++ struct ip_set_macip *table = (struct ip_set_macip *) map->members; ++ struct ip_set_req_macipmap *req = (struct ip_set_req_macipmap *) data; ++ ++ if (size != sizeof(struct ip_set_req_macipmap)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_macipmap), ++ size); ++ return -EINVAL; ++ } ++ ++ if (req->ip < map->first_ip || req->ip > map->last_ip) ++ return -ERANGE; ++ ++ *hash_ip = req->ip; ++ DP("set: %s, ip:%u.%u.%u.%u, %u.%u.%u.%u", ++ set->name, HIPQUAD(req->ip), HIPQUAD(*hash_ip)); ++ if (test_bit(IPSET_MACIP_ISSET, ++ (void *) &table[req->ip - map->first_ip].flags)) { ++ return (memcmp(req->ethernet, ++ &table[req->ip - map->first_ip].ethernet, ++ ETH_ALEN) == 0); ++ } else { ++ return (map->flags & IPSET_MACIP_MATCHUNSET ? 1 : 0); ++ } ++} ++ ++static int ++testip_kernel(struct ip_set *set, const struct sk_buff *skb, ++ u_int32_t flags, ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_macipmap *map = ++ (struct ip_set_macipmap *) set->data; ++ struct ip_set_macip *table = ++ (struct ip_set_macip *) map->members; ++ ip_set_ip_t ip; ++ ++ ip = ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr ++ : skb->nh.iph->daddr); ++ DP("flag: %s src: %u.%u.%u.%u dst: %u.%u.%u.%u", ++ flags & IPSET_SRC ? "SRC" : "DST", ++ NIPQUAD(skb->nh.iph->saddr), ++ NIPQUAD(skb->nh.iph->daddr)); ++ ++ if (ip < map->first_ip || ip > map->last_ip) ++ return 0; ++ ++ *hash_ip = ip; ++ DP("set: %s, ip:%u.%u.%u.%u, %u.%u.%u.%u", ++ set->name, HIPQUAD(ip), HIPQUAD(*hash_ip)); ++ if (test_bit(IPSET_MACIP_ISSET, ++ (void *) &table[ip - map->first_ip].flags)) { ++ /* Is mac pointer valid? ++ * If so, compare... */ ++ return (skb->mac.raw >= skb->head ++ && (skb->mac.raw + ETH_HLEN) <= skb->data ++ && (memcmp(eth_hdr(skb)->h_source, ++ &table[ip - map->first_ip].ethernet, ++ ETH_ALEN) == 0)); ++ } else { ++ return (map->flags & IPSET_MACIP_MATCHUNSET ? 1 : 0); ++ } ++} ++ ++/* returns 0 on success */ ++static inline int ++__addip(struct ip_set *set, ++ ip_set_ip_t ip, unsigned char *ethernet, ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_macipmap *map = ++ (struct ip_set_macipmap *) set->data; ++ struct ip_set_macip *table = ++ (struct ip_set_macip *) map->members; ++ ++ if (ip < map->first_ip || ip > map->last_ip) ++ return -ERANGE; ++ if (test_and_set_bit(IPSET_MACIP_ISSET, ++ (void *) &table[ip - map->first_ip].flags)) ++ return -EEXIST; ++ ++ *hash_ip = ip; ++ DP("%u.%u.%u.%u, %u.%u.%u.%u", HIPQUAD(ip), HIPQUAD(*hash_ip)); ++ memcpy(&table[ip - map->first_ip].ethernet, ethernet, ETH_ALEN); ++ return 0; ++} ++ ++static int ++addip(struct ip_set *set, const void *data, size_t size, ++ ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_req_macipmap *req = ++ (struct ip_set_req_macipmap *) data; ++ ++ if (size != sizeof(struct ip_set_req_macipmap)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_macipmap), ++ size); ++ return -EINVAL; ++ } ++ return __addip(set, req->ip, req->ethernet, hash_ip); ++} ++ ++static int ++addip_kernel(struct ip_set *set, const struct sk_buff *skb, ++ u_int32_t flags, ip_set_ip_t *hash_ip) ++{ ++ ip_set_ip_t ip; ++ ++ ip = ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr ++ : skb->nh.iph->daddr); ++ ++ if (!(skb->mac.raw >= skb->head ++ && (skb->mac.raw + ETH_HLEN) <= skb->data)) ++ return -EINVAL; ++ ++ return __addip(set, ip, eth_hdr(skb)->h_source, hash_ip); ++} ++ ++static inline int ++__delip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_macipmap *map = ++ (struct ip_set_macipmap *) set->data; ++ struct ip_set_macip *table = ++ (struct ip_set_macip *) map->members; ++ ++ if (ip < map->first_ip || ip > map->last_ip) ++ return -ERANGE; ++ if (!test_and_clear_bit(IPSET_MACIP_ISSET, ++ (void *)&table[ip - map->first_ip].flags)) ++ return -EEXIST; ++ ++ *hash_ip = ip; ++ DP("%u.%u.%u.%u, %u.%u.%u.%u", HIPQUAD(ip), HIPQUAD(*hash_ip)); ++ return 0; ++} ++ ++static int ++delip(struct ip_set *set, const void *data, size_t size, ++ ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_req_macipmap *req = ++ (struct ip_set_req_macipmap *) data; ++ ++ if (size != sizeof(struct ip_set_req_macipmap)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_macipmap), ++ size); ++ return -EINVAL; ++ } ++ return __delip(set, req->ip, hash_ip); ++} ++ ++static int ++delip_kernel(struct ip_set *set, const struct sk_buff *skb, ++ u_int32_t flags, ip_set_ip_t *hash_ip) ++{ ++ return __delip(set, ++ ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr ++ : skb->nh.iph->daddr), ++ hash_ip); ++} ++ ++static inline size_t members_size(ip_set_id_t from, ip_set_id_t to) ++{ ++ return (size_t)((to - from + 1) * sizeof(struct ip_set_macip)); ++} ++ ++static int create(struct ip_set *set, const void *data, size_t size) ++{ ++ int newbytes; ++ struct ip_set_req_macipmap_create *req = ++ (struct ip_set_req_macipmap_create *) data; ++ struct ip_set_macipmap *map; ++ ++ if (size != sizeof(struct ip_set_req_macipmap_create)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_macipmap_create), ++ size); ++ return -EINVAL; ++ } ++ ++ DP("from %u.%u.%u.%u to %u.%u.%u.%u", ++ HIPQUAD(req->from), HIPQUAD(req->to)); ++ ++ if (req->from > req->to) { ++ DP("bad ip range"); ++ return -ENOEXEC; ++ } ++ ++ if (req->to - req->from > MAX_RANGE) { ++ ip_set_printk("range too big (max %d addresses)", ++ MAX_RANGE); ++ return -ENOEXEC; ++ } ++ ++ map = kmalloc(sizeof(struct ip_set_macipmap), GFP_KERNEL); ++ if (!map) { ++ DP("out of memory for %d bytes", ++ sizeof(struct ip_set_macipmap)); ++ return -ENOMEM; ++ } ++ map->flags = req->flags; ++ map->first_ip = req->from; ++ map->last_ip = req->to; ++ newbytes = members_size(map->first_ip, map->last_ip); ++ map->members = ip_set_malloc(newbytes); ++ if (!map->members) { ++ DP("out of memory for %d bytes", newbytes); ++ kfree(map); ++ return -ENOMEM; ++ } ++ memset(map->members, 0, newbytes); ++ ++ set->data = map; ++ return 0; ++} ++ ++static void destroy(struct ip_set *set) ++{ ++ struct ip_set_macipmap *map = ++ (struct ip_set_macipmap *) set->data; ++ ++ ip_set_free(map->members, members_size(map->first_ip, map->last_ip)); ++ kfree(map); ++ ++ set->data = NULL; ++} ++ ++static void flush(struct ip_set *set) ++{ ++ struct ip_set_macipmap *map = ++ (struct ip_set_macipmap *) set->data; ++ memset(map->members, 0, members_size(map->first_ip, map->last_ip)); ++} ++ ++static void list_header(const struct ip_set *set, void *data) ++{ ++ struct ip_set_macipmap *map = ++ (struct ip_set_macipmap *) set->data; ++ struct ip_set_req_macipmap_create *header = ++ (struct ip_set_req_macipmap_create *) data; ++ ++ DP("list_header %x %x %u", map->first_ip, map->last_ip, ++ map->flags); ++ ++ header->from = map->first_ip; ++ header->to = map->last_ip; ++ header->flags = map->flags; ++} ++ ++static int list_members_size(const struct ip_set *set) ++{ ++ struct ip_set_macipmap *map = ++ (struct ip_set_macipmap *) set->data; ++ ++ return members_size(map->first_ip, map->last_ip); ++} ++ ++static void list_members(const struct ip_set *set, void *data) ++{ ++ struct ip_set_macipmap *map = ++ (struct ip_set_macipmap *) set->data; ++ ++ int bytes = members_size(map->first_ip, map->last_ip); ++ ++ memcpy(data, map->members, bytes); ++} ++ ++static struct ip_set_type ip_set_macipmap = { ++ .typename = SETTYPE_NAME, ++ .typecode = IPSET_TYPE_IP, ++ .protocol_version = IP_SET_PROTOCOL_VERSION, ++ .create = &create, ++ .destroy = &destroy, ++ .flush = &flush, ++ .reqsize = sizeof(struct ip_set_req_macipmap), ++ .addip = &addip, ++ .addip_kernel = &addip_kernel, ++ .delip = &delip, ++ .delip_kernel = &delip_kernel, ++ .testip = &testip, ++ .testip_kernel = &testip_kernel, ++ .header_size = sizeof(struct ip_set_req_macipmap_create), ++ .list_header = &list_header, ++ .list_members_size = &list_members_size, ++ .list_members = &list_members, ++ .me = THIS_MODULE, ++}; ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); ++MODULE_DESCRIPTION("macipmap type of IP sets"); ++ ++static int __init init(void) ++{ ++ init_max_malloc_size(); ++ return ip_set_register_set_type(&ip_set_macipmap); ++} ++ ++static void __exit fini(void) ++{ ++ /* FIXME: possible race with ip_set_create() */ ++ ip_set_unregister_set_type(&ip_set_macipmap); ++} ++ ++module_init(init); ++module_exit(fini); +diff -Nur linux-2.6.12.5/net/ipv4/netfilter/ip_set_nethash.c linux-2.6.12.5-ipset/net/ipv4/netfilter/ip_set_nethash.c +--- linux-2.6.12.5/net/ipv4/netfilter/ip_set_nethash.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12.5-ipset/net/ipv4/netfilter/ip_set_nethash.c 2005-09-20 13:11:38.802385250 +0200 +@@ -0,0 +1,449 @@ ++/* Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++/* Kernel module implementing a cidr nethash set */ ++ ++#include <linux/module.h> ++#include <linux/ip.h> ++#include <linux/skbuff.h> ++#include <linux/netfilter_ipv4/ip_tables.h> ++#include <linux/netfilter_ipv4/ip_set.h> ++#include <linux/errno.h> ++#include <asm/uaccess.h> ++#include <asm/bitops.h> ++#include <linux/spinlock.h> ++#include <linux/vmalloc.h> ++#include <linux/random.h> ++ ++#include <net/ip.h> ++ ++#include <linux/netfilter_ipv4/ip_set_malloc.h> ++#include <linux/netfilter_ipv4/ip_set_nethash.h> ++#include <linux/netfilter_ipv4/ip_set_jhash.h> ++#include <linux/netfilter_ipv4/ip_set_prime.h> ++ ++static inline __u32 ++jhash_ip(const struct ip_set_nethash *map, ip_set_ip_t ip) ++{ ++ return jhash_1word(ip, map->initval); ++} ++ ++static inline __u32 ++randhash_ip(const struct ip_set_nethash *map, ip_set_ip_t ip) ++{ ++ return (1 + ip % map->prime); ++} ++ ++static inline __u32 ++hash_id_cidr(struct ip_set_nethash *map, ++ ip_set_ip_t ip, ++ unsigned char cidr, ++ ip_set_ip_t *hash_ip) ++{ ++ __u32 jhash, randhash, id; ++ u_int16_t i; ++ ++ *hash_ip = pack(ip, cidr); ++ jhash = jhash_ip(map, *hash_ip); ++ randhash = randhash_ip(map, *hash_ip); ++ ++ for (i = 0; i < map->probes; i++) { ++ id = (jhash + i * randhash) % map->hashsize; ++ DP("hash key: %u", id); ++ if (map->members[id] == *hash_ip) ++ return id; ++ } ++ return UINT_MAX; ++} ++ ++static inline __u32 ++hash_id(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_nethash *map = (struct ip_set_nethash *) set->data; ++ __u32 id = UINT_MAX; ++ int i; ++ ++ for (i = 0; i < 30 && map->cidr[i]; i++) { ++ id = hash_id_cidr(map, ip, map->cidr[i], hash_ip); ++ if (id != UINT_MAX) ++ break; ++ } ++ return id; ++} ++ ++static inline int ++__testip_cidr(struct ip_set *set, ip_set_ip_t ip, unsigned char cidr, ++ ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_nethash *map = (struct ip_set_nethash *) set->data; ++ ++ return (hash_id_cidr(map, ip, cidr, hash_ip) != UINT_MAX); ++} ++ ++static inline int ++__testip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) ++{ ++ return (hash_id(set, ip, hash_ip) != UINT_MAX); ++} ++ ++static int ++testip(struct ip_set *set, const void *data, size_t size, ++ ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_req_nethash *req = ++ (struct ip_set_req_nethash *) data; ++ ++ if (size != sizeof(struct ip_set_req_nethash)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_nethash), ++ size); ++ return -EINVAL; ++ } ++ return (req->cidr == 32 ? __testip(set, req->ip, hash_ip) ++ : __testip_cidr(set, req->ip, req->cidr, hash_ip)); ++} ++ ++static int ++testip_kernel(struct ip_set *set, const struct sk_buff *skb, ++ u_int32_t flags, ip_set_ip_t *hash_ip) ++{ ++ return __testip(set, ++ ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr ++ : skb->nh.iph->daddr), ++ hash_ip); ++} ++ ++static inline int ++__addip_base(struct ip_set_nethash *map, ip_set_ip_t ip) ++{ ++ __u32 jhash, randhash, probe; ++ u_int16_t i; ++ ++ jhash = jhash_ip(map, ip); ++ randhash = randhash_ip(map, ip); ++ ++ for (i = 0; i < map->probes; i++) { ++ probe = (jhash + i * randhash) % map->hashsize; ++ if (map->members[probe] == ip) ++ return -EEXIST; ++ if (!map->members[probe]) { ++ map->members[probe] = ip; ++ return 0; ++ } ++ } ++ /* Trigger rehashing */ ++ return -EAGAIN; ++} ++ ++static inline int ++__addip(struct ip_set_nethash *map, ip_set_ip_t ip, unsigned char cidr, ++ ip_set_ip_t *hash_ip) ++{ ++ *hash_ip = pack(ip, cidr); ++ DP("%u.%u.%u.%u/%u, %u.%u.%u.%u", HIPQUAD(ip), cidr, HIPQUAD(*hash_ip)); ++ ++ return __addip_base(map, *hash_ip); ++} ++ ++static void ++update_cidr_sizes(struct ip_set_nethash *map, unsigned char cidr) ++{ ++ unsigned char next; ++ int i; ++ ++ for (i = 0; i < 30 && map->cidr[i]; i++) { ++ if (map->cidr[i] == cidr) { ++ return; ++ } else if (map->cidr[i] < cidr) { ++ next = map->cidr[i]; ++ map->cidr[i] = cidr; ++ cidr = next; ++ } ++ } ++ if (i < 30) ++ map->cidr[i] = cidr; ++} ++ ++static int ++addip(struct ip_set *set, const void *data, size_t size, ++ ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_req_nethash *req = ++ (struct ip_set_req_nethash *) data; ++ int ret; ++ ++ if (size != sizeof(struct ip_set_req_nethash)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_nethash), ++ size); ++ return -EINVAL; ++ } ++ ret = __addip((struct ip_set_nethash *) set->data, ++ req->ip, req->cidr, hash_ip); ++ ++ if (ret == 0) ++ update_cidr_sizes((struct ip_set_nethash *) set->data, ++ req->cidr); ++ ++ return ret; ++} ++ ++static int ++addip_kernel(struct ip_set *set, const struct sk_buff *skb, ++ u_int32_t flags, ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_nethash *map = (struct ip_set_nethash *) set->data; ++ int ret = -ERANGE; ++ ip_set_ip_t ip = ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr ++ : skb->nh.iph->daddr); ++ ++ if (map->cidr[0]) ++ ret = __addip(map, ip, map->cidr[0], hash_ip); ++ ++ return ret; ++} ++ ++static int retry(struct ip_set *set) ++{ ++ struct ip_set_nethash *map = (struct ip_set_nethash *) set->data; ++ ip_set_ip_t *members; ++ u_int32_t i, hashsize; ++ unsigned newbytes; ++ int res; ++ struct ip_set_nethash tmp = { ++ .hashsize = map->hashsize, ++ .probes = map->probes, ++ .resize = map->resize ++ }; ++ ++ if (map->resize == 0) ++ return -ERANGE; ++ ++ memcpy(tmp.cidr, map->cidr, 30 * sizeof(unsigned char)); ++ again: ++ res = 0; ++ ++ /* Calculate new parameters */ ++ get_random_bytes(&tmp.initval, 4); ++ hashsize = tmp.hashsize + (tmp.hashsize * map->resize)/100; ++ if (hashsize == tmp.hashsize) ++ hashsize++; ++ tmp.prime = make_prime(hashsize); ++ ++ ip_set_printk("rehashing of set %s triggered: " ++ "hashsize grows from %u to %u", ++ set->name, tmp.hashsize, hashsize); ++ tmp.hashsize = hashsize; ++ ++ newbytes = hashsize * sizeof(ip_set_ip_t); ++ tmp.members = ip_set_malloc_atomic(newbytes); ++ if (!tmp.members) { ++ DP("out of memory for %d bytes", newbytes); ++ return -ENOMEM; ++ } ++ memset(tmp.members, 0, newbytes); ++ ++ write_lock_bh(&set->lock); ++ map = (struct ip_set_nethash *) set->data; /* Play safe */ ++ for (i = 0; i < map->hashsize && res == 0; i++) { ++ if (map->members[i]) ++ res = __addip_base(&tmp, map->members[i]); ++ } ++ if (res) { ++ /* Failure, try again */ ++ write_unlock_bh(&set->lock); ++ ip_set_free(tmp.members, newbytes); ++ goto again; ++ } ++ ++ /* Success at resizing! */ ++ members = map->members; ++ hashsize = map->hashsize; ++ ++ map->initval = tmp.initval; ++ map->prime = tmp.prime; ++ map->hashsize = tmp.hashsize; ++ map->members = tmp.members; ++ write_unlock_bh(&set->lock); ++ ++ ip_set_free(members, hashsize * sizeof(ip_set_ip_t)); ++ ++ return 0; ++} ++ ++static inline int ++__delip(struct ip_set_nethash *map, ip_set_ip_t ip, unsigned char cidr, ++ ip_set_ip_t *hash_ip) ++{ ++ ip_set_ip_t id = hash_id_cidr(map, ip, cidr, hash_ip); ++ ++ if (id == UINT_MAX) ++ return -EEXIST; ++ ++ map->members[id] = 0; ++ return 0; ++} ++ ++static int ++delip(struct ip_set *set, const void *data, size_t size, ++ ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_req_nethash *req = ++ (struct ip_set_req_nethash *) data; ++ ++ if (size != sizeof(struct ip_set_req_nethash)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_nethash), ++ size); ++ return -EINVAL; ++ } ++ /* TODO: no garbage collection in map->cidr */ ++ return __delip((struct ip_set_nethash *) set->data, ++ req->ip, req->cidr, hash_ip); ++} ++ ++static int ++delip_kernel(struct ip_set *set, const struct sk_buff *skb, ++ u_int32_t flags, ip_set_ip_t *hash_ip) ++{ ++ struct ip_set_nethash *map = (struct ip_set_nethash *) set->data; ++ int ret = -ERANGE; ++ ip_set_ip_t ip = ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr ++ : skb->nh.iph->daddr); ++ ++ if (map->cidr[0]) ++ ret = __delip(map, ip, map->cidr[0], hash_ip); ++ ++ return ret; ++} ++ ++static int create(struct ip_set *set, const void *data, size_t size) ++{ ++ unsigned newbytes; ++ struct ip_set_req_nethash_create *req = ++ (struct ip_set_req_nethash_create *) data; ++ struct ip_set_nethash *map; ++ ++ if (size != sizeof(struct ip_set_req_nethash_create)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_nethash_create), ++ size); ++ return -EINVAL; ++ } ++ ++ if (req->hashsize < 1) { ++ ip_set_printk("hashsize too small"); ++ return -ENOEXEC; ++ } ++ ++ map = kmalloc(sizeof(struct ip_set_nethash), GFP_KERNEL); ++ if (!map) { ++ DP("out of memory for %d bytes", ++ sizeof(struct ip_set_nethash)); ++ return -ENOMEM; ++ } ++ get_random_bytes(&map->initval, 4); ++ map->prime = make_prime(req->hashsize); ++ map->hashsize = req->hashsize; ++ map->probes = req->probes; ++ map->resize = req->resize; ++ memset(map->cidr, 0, 30 * sizeof(unsigned char)); ++ newbytes = map->hashsize * sizeof(ip_set_ip_t); ++ map->members = ip_set_malloc(newbytes); ++ if (!map->members) { ++ DP("out of memory for %d bytes", newbytes); ++ kfree(map); ++ return -ENOMEM; ++ } ++ memset(map->members, 0, newbytes); ++ ++ set->data = map; ++ return 0; ++} ++ ++static void destroy(struct ip_set *set) ++{ ++ struct ip_set_nethash *map = (struct ip_set_nethash *) set->data; ++ ++ ip_set_free(map->members, map->hashsize * sizeof(ip_set_ip_t)); ++ kfree(map); ++ ++ set->data = NULL; ++} ++ ++static void flush(struct ip_set *set) ++{ ++ struct ip_set_nethash *map = (struct ip_set_nethash *) set->data; ++ memset(map->members, 0, map->hashsize * sizeof(ip_set_ip_t)); ++ memset(map->cidr, 0, 30 * sizeof(unsigned char)); ++} ++ ++static void list_header(const struct ip_set *set, void *data) ++{ ++ struct ip_set_nethash *map = (struct ip_set_nethash *) set->data; ++ struct ip_set_req_nethash_create *header = ++ (struct ip_set_req_nethash_create *) data; ++ ++ header->hashsize = map->hashsize; ++ header->probes = map->probes; ++ header->resize = map->resize; ++} ++ ++static int list_members_size(const struct ip_set *set) ++{ ++ struct ip_set_nethash *map = (struct ip_set_nethash *) set->data; ++ ++ return (map->hashsize * sizeof(ip_set_ip_t)); ++} ++ ++static void list_members(const struct ip_set *set, void *data) ++{ ++ struct ip_set_nethash *map = (struct ip_set_nethash *) set->data; ++ int bytes = map->hashsize * sizeof(ip_set_ip_t); ++ ++ memcpy(data, map->members, bytes); ++} ++ ++static struct ip_set_type ip_set_nethash = { ++ .typename = SETTYPE_NAME, ++ .typecode = IPSET_TYPE_IP, ++ .protocol_version = IP_SET_PROTOCOL_VERSION, ++ .create = &create, ++ .destroy = &destroy, ++ .flush = &flush, ++ .reqsize = sizeof(struct ip_set_req_nethash), ++ .addip = &addip, ++ .addip_kernel = &addip_kernel, ++ .retry = &retry, ++ .delip = &delip, ++ .delip_kernel = &delip_kernel, ++ .testip = &testip, ++ .testip_kernel = &testip_kernel, ++ .header_size = sizeof(struct ip_set_req_nethash_create), ++ .list_header = &list_header, ++ .list_members_size = &list_members_size, ++ .list_members = &list_members, ++ .me = THIS_MODULE, ++}; ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); ++MODULE_DESCRIPTION("nethash type of IP sets"); ++ ++static int __init init(void) ++{ ++ return ip_set_register_set_type(&ip_set_nethash); ++} ++ ++static void __exit fini(void) ++{ ++ /* FIXME: possible race with ip_set_create() */ ++ ip_set_unregister_set_type(&ip_set_nethash); ++} ++ ++module_init(init); ++module_exit(fini); +diff -Nur linux-2.6.12.5/net/ipv4/netfilter/ip_set_portmap.c linux-2.6.12.5-ipset/net/ipv4/netfilter/ip_set_portmap.c +--- linux-2.6.12.5/net/ipv4/netfilter/ip_set_portmap.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12.5-ipset/net/ipv4/netfilter/ip_set_portmap.c 2005-09-20 13:11:38.802385250 +0200 +@@ -0,0 +1,325 @@ ++/* Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++/* Kernel module implementing a port set type as a bitmap */ ++ ++#include <linux/module.h> ++#include <linux/ip.h> ++#include <linux/tcp.h> ++#include <linux/udp.h> ++#include <linux/skbuff.h> ++#include <linux/netfilter_ipv4/ip_tables.h> ++#include <linux/netfilter_ipv4/ip_set.h> ++#include <linux/errno.h> ++#include <asm/uaccess.h> ++#include <asm/bitops.h> ++#include <linux/spinlock.h> ++ ++#include <net/ip.h> ++ ++#include <linux/netfilter_ipv4/ip_set_portmap.h> ++ ++/* We must handle non-linear skbs */ ++static inline ip_set_ip_t ++get_port(const struct sk_buff *skb, u_int32_t flags) ++{ ++ struct iphdr *iph = skb->nh.iph; ++ u_int16_t offset = ntohs(iph->frag_off) & IP_OFFSET; ++ ++ switch (iph->protocol) { ++ case IPPROTO_TCP: { ++ struct tcphdr tcph; ++ ++ /* See comments at tcp_match in ip_tables.c */ ++ if (offset) ++ return INVALID_PORT; ++ ++ if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0) ++ /* No choice either */ ++ return INVALID_PORT; ++ ++ return ntohs(flags & IPSET_SRC ? ++ tcph.source : tcph.dest); ++ } ++ case IPPROTO_UDP: { ++ struct udphdr udph; ++ ++ if (offset) ++ return INVALID_PORT; ++ ++ if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &udph, sizeof(udph)) < 0) ++ /* No choice either */ ++ return INVALID_PORT; ++ ++ return ntohs(flags & IPSET_SRC ? ++ udph.source : udph.dest); ++ } ++ default: ++ return INVALID_PORT; ++ } ++} ++ ++static inline int ++__testport(struct ip_set *set, ip_set_ip_t port, ip_set_ip_t *hash_port) ++{ ++ struct ip_set_portmap *map = (struct ip_set_portmap *) set->data; ++ ++ if (port < map->first_port || port > map->last_port) ++ return -ERANGE; ++ ++ *hash_port = port; ++ DP("set: %s, port:%u, %u", set->name, port, *hash_port); ++ return !!test_bit(port - map->first_port, map->members); ++} ++ ++static int ++testport(struct ip_set *set, const void *data, size_t size, ++ ip_set_ip_t *hash_port) ++{ ++ struct ip_set_req_portmap *req = ++ (struct ip_set_req_portmap *) data; ++ ++ if (size != sizeof(struct ip_set_req_portmap)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_portmap), ++ size); ++ return -EINVAL; ++ } ++ return __testport(set, req->port, hash_port); ++} ++ ++static int ++testport_kernel(struct ip_set *set, const struct sk_buff *skb, ++ u_int32_t flags, ip_set_ip_t *hash_port) ++{ ++ int res; ++ ip_set_ip_t port = get_port(skb, flags); ++ ++ DP("flag %s port %u", flags & IPSET_SRC ? "SRC" : "DST", port); ++ if (port == INVALID_PORT) ++ return 0; ++ ++ res = __testport(set, port, hash_port); ++ ++ return (res < 0 ? 0 : res); ++} ++ ++static inline int ++__addport(struct ip_set *set, ip_set_ip_t port, ip_set_ip_t *hash_port) ++{ ++ struct ip_set_portmap *map = (struct ip_set_portmap *) set->data; ++ ++ if (port < map->first_port || port > map->last_port) ++ return -ERANGE; ++ if (test_and_set_bit(port - map->first_port, map->members)) ++ return -EEXIST; ++ ++ *hash_port = port; ++ DP("port %u", port); ++ return 0; ++} ++ ++static int ++addport(struct ip_set *set, const void *data, size_t size, ++ ip_set_ip_t *hash_port) ++{ ++ struct ip_set_req_portmap *req = ++ (struct ip_set_req_portmap *) data; ++ ++ if (size != sizeof(struct ip_set_req_portmap)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_portmap), ++ size); ++ return -EINVAL; ++ } ++ return __addport(set, req->port, hash_port); ++} ++ ++static int ++addport_kernel(struct ip_set *set, const struct sk_buff *skb, ++ u_int32_t flags, ip_set_ip_t *hash_port) ++{ ++ ip_set_ip_t port = get_port(skb, flags); ++ ++ if (port == INVALID_PORT) ++ return -EINVAL; ++ ++ return __addport(set, port, hash_port); ++} ++ ++static inline int ++__delport(struct ip_set *set, ip_set_ip_t port, ip_set_ip_t *hash_port) ++{ ++ struct ip_set_portmap *map = (struct ip_set_portmap *) set->data; ++ ++ if (port < map->first_port || port > map->last_port) ++ return -ERANGE; ++ if (!test_and_clear_bit(port - map->first_port, map->members)) ++ return -EEXIST; ++ ++ *hash_port = port; ++ DP("port %u", port); ++ return 0; ++} ++ ++static int ++delport(struct ip_set *set, const void *data, size_t size, ++ ip_set_ip_t *hash_port) ++{ ++ struct ip_set_req_portmap *req = ++ (struct ip_set_req_portmap *) data; ++ ++ if (size != sizeof(struct ip_set_req_portmap)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_portmap), ++ size); ++ return -EINVAL; ++ } ++ return __delport(set, req->port, hash_port); ++} ++ ++static int ++delport_kernel(struct ip_set *set, const struct sk_buff *skb, ++ u_int32_t flags, ip_set_ip_t *hash_port) ++{ ++ ip_set_ip_t port = get_port(skb, flags); ++ ++ if (port == INVALID_PORT) ++ return -EINVAL; ++ ++ return __delport(set, port, hash_port); ++} ++ ++static int create(struct ip_set *set, const void *data, size_t size) ++{ ++ int newbytes; ++ struct ip_set_req_portmap_create *req = ++ (struct ip_set_req_portmap_create *) data; ++ struct ip_set_portmap *map; ++ ++ if (size != sizeof(struct ip_set_req_portmap_create)) { ++ ip_set_printk("data length wrong (want %zu, have %zu)", ++ sizeof(struct ip_set_req_portmap_create), ++ size); ++ return -EINVAL; ++ } ++ ++ DP("from %u to %u", req->from, req->to); ++ ++ if (req->from > req->to) { ++ DP("bad port range"); ++ return -ENOEXEC; ++ } ++ ++ if (req->to - req->from > MAX_RANGE) { ++ ip_set_printk("range too big (max %d ports)", ++ MAX_RANGE); ++ return -ENOEXEC; ++ } ++ ++ map = kmalloc(sizeof(struct ip_set_portmap), GFP_KERNEL); ++ if (!map) { ++ DP("out of memory for %d bytes", ++ sizeof(struct ip_set_portmap)); ++ return -ENOMEM; ++ } ++ map->first_port = req->from; ++ map->last_port = req->to; ++ newbytes = bitmap_bytes(req->from, req->to); ++ map->members = kmalloc(newbytes, GFP_KERNEL); ++ if (!map->members) { ++ DP("out of memory for %d bytes", newbytes); ++ kfree(map); ++ return -ENOMEM; ++ } ++ memset(map->members, 0, newbytes); ++ ++ set->data = map; ++ return 0; ++} ++ ++static void destroy(struct ip_set *set) ++{ ++ struct ip_set_portmap *map = (struct ip_set_portmap *) set->data; ++ ++ kfree(map->members); ++ kfree(map); ++ ++ set->data = NULL; ++} ++ ++static void flush(struct ip_set *set) ++{ ++ struct ip_set_portmap *map = (struct ip_set_portmap *) set->data; ++ memset(map->members, 0, bitmap_bytes(map->first_port, map->last_port)); ++} ++ ++static void list_header(const struct ip_set *set, void *data) ++{ ++ struct ip_set_portmap *map = (struct ip_set_portmap *) set->data; ++ struct ip_set_req_portmap_create *header = ++ (struct ip_set_req_portmap_create *) data; ++ ++ DP("list_header %u %u", map->first_port, map->last_port); ++ ++ header->from = map->first_port; ++ header->to = map->last_port; ++} ++ ++static int list_members_size(const struct ip_set *set) ++{ ++ struct ip_set_portmap *map = (struct ip_set_portmap *) set->data; ++ ++ return bitmap_bytes(map->first_port, map->last_port); ++} ++ ++static void list_members(const struct ip_set *set, void *data) ++{ ++ struct ip_set_portmap *map = (struct ip_set_portmap *) set->data; ++ int bytes = bitmap_bytes(map->first_port, map->last_port); ++ ++ memcpy(data, map->members, bytes); ++} ++ ++static struct ip_set_type ip_set_portmap = { ++ .typename = SETTYPE_NAME, ++ .typecode = IPSET_TYPE_PORT, ++ .protocol_version = IP_SET_PROTOCOL_VERSION, ++ .create = &create, ++ .destroy = &destroy, ++ .flush = &flush, ++ .reqsize = sizeof(struct ip_set_req_portmap), ++ .addip = &addport, ++ .addip_kernel = &addport_kernel, ++ .delip = &delport, ++ .delip_kernel = &delport_kernel, ++ .testip = &testport, ++ .testip_kernel = &testport_kernel, ++ .header_size = sizeof(struct ip_set_req_portmap_create), ++ .list_header = &list_header, ++ .list_members_size = &list_members_size, ++ .list_members = &list_members, ++ .me = THIS_MODULE, ++}; ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); ++MODULE_DESCRIPTION("portmap type of IP sets"); ++ ++static int __init init(void) ++{ ++ return ip_set_register_set_type(&ip_set_portmap); ++} ++ ++static void __exit fini(void) ++{ ++ /* FIXME: possible race with ip_set_create() */ ++ ip_set_unregister_set_type(&ip_set_portmap); ++} ++ ++module_init(init); ++module_exit(fini); +diff -Nur linux-2.6.12.5/net/ipv4/netfilter/ipt_set.c linux-2.6.12.5-ipset/net/ipv4/netfilter/ipt_set.c +--- linux-2.6.12.5/net/ipv4/netfilter/ipt_set.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12.5-ipset/net/ipv4/netfilter/ipt_set.c 2005-09-20 13:11:38.802385250 +0200 +@@ -0,0 +1,112 @@ ++/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu> ++ * Patrick Schaaf <bof@bof.de> ++ * Martin Josefsson <gandalf@wlug.westbo.se> ++ * Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++/* Kernel module to match an IP set. */ ++ ++#include <linux/module.h> ++#include <linux/ip.h> ++#include <linux/skbuff.h> ++ ++#include <linux/netfilter_ipv4/ip_tables.h> ++#include <linux/netfilter_ipv4/ip_set.h> ++#include <linux/netfilter_ipv4/ipt_set.h> ++ ++static inline int ++match_set(const struct ipt_set_info *info, ++ const struct sk_buff *skb, ++ int inv) ++{ ++ if (ip_set_testip_kernel(info->index, skb, info->flags)) ++ inv = !inv; ++ return inv; ++} ++ ++static int ++match(const struct sk_buff *skb, ++ const struct net_device *in, ++ const struct net_device *out, ++ const void *matchinfo, ++ int offset, ++ int *hotdrop) ++{ ++ const struct ipt_set_info_match *info = matchinfo; ++ ++ return match_set(&info->match_set, ++ skb, ++ info->match_set.flags[0] & IPSET_MATCH_INV); ++} ++ ++static int ++checkentry(const char *tablename, ++ const struct ipt_ip *ip, ++ void *matchinfo, ++ unsigned int matchsize, ++ unsigned int hook_mask) ++{ ++ struct ipt_set_info_match *info = ++ (struct ipt_set_info_match *) matchinfo; ++ ip_set_id_t index; ++ ++ if (matchsize != IPT_ALIGN(sizeof(struct ipt_set_info_match))) { ++ ip_set_printk("invalid matchsize %d", matchsize); ++ return 0; ++ } ++ ++ index = ip_set_get_byindex(info->match_set.index); ++ ++ if (index == IP_SET_INVALID_ID) { ++ ip_set_printk("Cannot find set indentified by id %u to match", ++ info->match_set.index); ++ return 0; /* error */ ++ } ++ if (info->match_set.flags[IP_SET_MAX_BINDINGS] != 0) { ++ ip_set_printk("That's nasty!"); ++ return 0; /* error */ ++ } ++ ++ return 1; ++} ++ ++static void destroy(void *matchinfo, unsigned int matchsize) ++{ ++ struct ipt_set_info_match *info = matchinfo; ++ ++ if (matchsize != IPT_ALIGN(sizeof(struct ipt_set_info_match))) { ++ ip_set_printk("invalid matchsize %d", matchsize); ++ return; ++ } ++ ++ ip_set_put(info->match_set.index); ++} ++ ++static struct ipt_match set_match = { ++ .name = "set", ++ .match = &match, ++ .checkentry = &checkentry, ++ .destroy = &destroy, ++ .me = THIS_MODULE ++}; ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); ++MODULE_DESCRIPTION("iptables IP set match module"); ++ ++static int __init init(void) ++{ ++ return ipt_register_match(&set_match); ++} ++ ++static void __exit fini(void) ++{ ++ ipt_unregister_match(&set_match); ++} ++ ++module_init(init); ++module_exit(fini); +diff -Nur linux-2.6.12.5/net/ipv4/netfilter/ipt_SET.c linux-2.6.12.5-ipset/net/ipv4/netfilter/ipt_SET.c +--- linux-2.6.12.5/net/ipv4/netfilter/ipt_SET.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.12.5-ipset/net/ipv4/netfilter/ipt_SET.c 2005-09-20 13:11:38.802385250 +0200 +@@ -0,0 +1,128 @@ ++/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu> ++ * Patrick Schaaf <bof@bof.de> ++ * Martin Josefsson <gandalf@wlug.westbo.se> ++ * Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ */ ++ ++/* ipt_SET.c - netfilter target to manipulate IP sets */ ++ ++#include <linux/types.h> ++#include <linux/ip.h> ++#include <linux/timer.h> ++#include <linux/module.h> ++#include <linux/netfilter.h> ++#include <linux/netdevice.h> ++#include <linux/if.h> ++#include <linux/inetdevice.h> ++#include <net/protocol.h> ++#include <net/checksum.h> ++#include <linux/netfilter_ipv4.h> ++#include <linux/netfilter_ipv4/ip_nat_rule.h> ++#include <linux/netfilter_ipv4/ipt_set.h> ++ ++static unsigned int ++target(struct sk_buff **pskb, ++ const struct net_device *in, ++ const struct net_device *out, ++ unsigned int hooknum, ++ const void *targinfo, ++ void *userinfo) ++{ ++ const struct ipt_set_info_target *info = targinfo; ++ ++ if (info->add_set.index != IP_SET_INVALID_ID) ++ ip_set_addip_kernel(info->add_set.index, ++ *pskb, ++ info->add_set.flags); ++ if (info->del_set.index != IP_SET_INVALID_ID) ++ ip_set_delip_kernel(info->del_set.index, ++ *pskb, ++ info->del_set.flags); ++ ++ return IPT_CONTINUE; ++} ++ ++static int ++checkentry(const char *tablename, ++ const struct ipt_entry *e, ++ void *targinfo, ++ unsigned int targinfosize, unsigned int hook_mask) ++{ ++ struct ipt_set_info_target *info = ++ (struct ipt_set_info_target *) targinfo; ++ ip_set_id_t index; ++ ++ if (targinfosize != IPT_ALIGN(sizeof(*info))) { ++ DP("bad target info size %u", targinfosize); ++ return 0; ++ } ++ ++ if (info->add_set.index != IP_SET_INVALID_ID) { ++ index = ip_set_get_byindex(info->add_set.index); ++ if (index == IP_SET_INVALID_ID) { ++ ip_set_printk("cannot find add_set index %u as target", ++ info->add_set.index); ++ return 0; /* error */ ++ } ++ } ++ ++ if (info->del_set.index != IP_SET_INVALID_ID) { ++ index = ip_set_get_byindex(info->del_set.index); ++ if (index == IP_SET_INVALID_ID) { ++ ip_set_printk("cannot find del_set index %u as target", ++ info->del_set.index); ++ return 0; /* error */ ++ } ++ } ++ if (info->add_set.flags[IP_SET_MAX_BINDINGS] != 0 ++ || info->del_set.flags[IP_SET_MAX_BINDINGS] != 0) { ++ ip_set_printk("That's nasty!"); ++ return 0; /* error */ ++ } ++ ++ return 1; ++} ++ ++static void destroy(void *targetinfo, unsigned int targetsize) ++{ ++ struct ipt_set_info_target *info = targetinfo; ++ ++ if (targetsize != IPT_ALIGN(sizeof(struct ipt_set_info_target))) { ++ ip_set_printk("invalid targetsize %d", targetsize); ++ return; ++ } ++ ++ if (info->add_set.index != IP_SET_INVALID_ID) ++ ip_set_put(info->add_set.index); ++ if (info->del_set.index != IP_SET_INVALID_ID) ++ ip_set_put(info->del_set.index); ++} ++ ++static struct ipt_target SET_target = { ++ .name = "SET", ++ .target = target, ++ .checkentry = checkentry, ++ .destroy = destroy, ++ .me = THIS_MODULE ++}; ++ ++MODULE_LICENSE("GPL"); ++MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); ++MODULE_DESCRIPTION("iptables IP set target module"); ++ ++static int __init init(void) ++{ ++ return ipt_register_target(&SET_target); ++} ++ ++static void __exit fini(void) ++{ ++ ipt_unregister_target(&SET_target); ++} ++ ++module_init(init); ++module_exit(fini); +diff -Nur linux-2.6.12.5/net/ipv4/netfilter/Kconfig linux-2.6.12.5-ipset/net/ipv4/netfilter/Kconfig +--- linux-2.6.12.5/net/ipv4/netfilter/Kconfig 2005-08-15 02:20:18.000000000 +0200 ++++ linux-2.6.12.5-ipset/net/ipv4/netfilter/Kconfig 2005-09-20 13:11:38.802385250 +0200 +@@ -692,5 +692,106 @@ + Allows altering the ARP packet payload: source and destination + hardware and network addresses. + ++config IP_NF_SET ++ tristate "IP set support" ++ depends on INET && NETFILTER ++ help ++ This option adds IP set support to the kernel. ++ In order to define and use sets, you need the userspace utility ++ ipset(8). ++ ++ To compile it as a module, choose M here. If unsure, say N. ++ ++config IP_NF_SET_MAX ++ int "Maximum number of IP sets" ++ default 256 ++ range 2 65534 ++ depends on IP_NF_SET ++ help ++ You can define here default value of the maximum number ++ of IP sets for the kernel. ++ ++ The value can be overriden by the 'max_sets' module ++ parameter of the 'ip_set' module. ++ ++config IP_NF_SET_HASHSIZE ++ int "Hash size for bindings of IP sets" ++ default 1024 ++ depends on IP_NF_SET ++ help ++ You can define here default value of the hash size for ++ bindings of IP sets. ++ ++ The value can be overriden by the 'hash_size' module ++ parameter of the 'ip_set' module. ++ ++config IP_NF_SET_IPMAP ++ tristate "ipmap set support" ++ depends on IP_NF_SET ++ help ++ This option adds the ipmap set type support. ++ ++ To compile it as a module, choose M here. If unsure, say N. ++ ++config IP_NF_SET_MACIPMAP ++ tristate "macipmap set support" ++ depends on IP_NF_SET ++ help ++ This option adds the macipmap set type support. ++ ++ To compile it as a module, choose M here. If unsure, say N. ++ ++config IP_NF_SET_PORTMAP ++ tristate "portmap set support" ++ depends on IP_NF_SET ++ help ++ This option adds the portmap set type support. ++ ++ To compile it as a module, choose M here. If unsure, say N. ++ ++config IP_NF_SET_IPHASH ++ tristate "iphash set support" ++ depends on IP_NF_SET ++ help ++ This option adds the iphash set type support. ++ ++ To compile it as a module, choose M here. If unsure, say N. ++ ++config IP_NF_SET_NETHASH ++ tristate "nethash set support" ++ depends on IP_NF_SET ++ help ++ This option adds the nethash set type support. ++ ++ To compile it as a module, choose M here. If unsure, say N. ++ ++config IP_NF_SET_IPTREE ++ tristate "iptree set support" ++ depends on IP_NF_SET ++ help ++ This option adds the iptree set type support. ++ ++ To compile it as a module, choose M here. If unsure, say N. ++ ++config IP_NF_MATCH_SET ++ tristate "set match support" ++ depends on IP_NF_SET ++ help ++ Set matching matches against given IP sets. ++ You need the ipset utility to create and set up the sets. ++ ++ To compile it as a module, choose M here. If unsure, say N. ++ ++config IP_NF_TARGET_SET ++ tristate "SET target support" ++ depends on IP_NF_SET ++ help ++ The SET target makes possible to add/delete entries ++ in IP sets. ++ You need the ipset utility to create and set up the sets. ++ ++ To compile it as a module, choose M here. If unsure, say N. ++ ++ + endmenu + +diff -Nur linux-2.6.12.5/net/ipv4/netfilter/Makefile linux-2.6.12.5-ipset/net/ipv4/netfilter/Makefile +--- linux-2.6.12.5/net/ipv4/netfilter/Makefile 2005-08-15 02:20:18.000000000 +0200 ++++ linux-2.6.12.5-ipset/net/ipv4/netfilter/Makefile 2005-09-20 13:11:38.818386250 +0200 +@@ -39,6 +39,7 @@ + obj-$(CONFIG_IP_NF_MATCH_HASHLIMIT) += ipt_hashlimit.o + obj-$(CONFIG_IP_NF_MATCH_SCTP) += ipt_sctp.o + obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o ++obj-$(CONFIG_IP_NF_MATCH_SET) += ipt_set.o + obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o + obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o + obj-$(CONFIG_IP_NF_MATCH_PKTTYPE) += ipt_pkttype.o +@@ -77,6 +78,16 @@ + obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o + obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o + obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o ++obj-$(CONFIG_IP_NF_TARGET_SET) += ipt_SET.o ++ ++# sets ++obj-$(CONFIG_IP_NF_SET) += ip_set.o ++obj-$(CONFIG_IP_NF_SET_IPMAP) += ip_set_ipmap.o ++obj-$(CONFIG_IP_NF_SET_PORTMAP) += ip_set_portmap.o ++obj-$(CONFIG_IP_NF_SET_MACIPMAP) += ip_set_macipmap.o ++obj-$(CONFIG_IP_NF_SET_IPHASH) += ip_set_iphash.o ++obj-$(CONFIG_IP_NF_SET_NETHASH) += ip_set_nethash.o ++obj-$(CONFIG_IP_NF_SET_IPTREE) += ip_set_iptree.o + obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o + + # generic ARP tables diff --git a/openwrt/target/linux/generic-2.6/patches/104-pf_ring.patch b/openwrt/target/linux/generic-2.6/patches/104-pf_ring.patch new file mode 100644 index 0000000000..527e5d5337 --- /dev/null +++ b/openwrt/target/linux/generic-2.6/patches/104-pf_ring.patch @@ -0,0 +1,1833 @@ +diff --unified --recursive --new-file linux-2.6.15-rc6/include/linux/ring.h linux-2.6.15-rc6-1-686-smp-ring3/include/linux/ring.h +--- linux-2.6.15-rc6/include/linux/ring.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.15-rc6-1-686-smp-ring3/include/linux/ring.h 2005-12-24 00:24:01.000000000 +0100 +@@ -0,0 +1,107 @@ ++/* ++ * Definitions for packet ring ++ * ++ * 2004 - Luca Deri <deri@ntop.org> ++ */ ++#ifndef __RING_H ++#define __RING_H ++ ++ ++#define INCLUDE_MAC_INFO ++ ++#ifdef INCLUDE_MAC_INFO ++#define SKB_DISPLACEMENT 14 /* Include MAC address information */ ++#else ++#define SKB_DISPLACEMENT 0 /* Do NOT include MAC address information */ ++#endif ++ ++#define RING_MAGIC ++#define RING_MAGIC_VALUE 0x88 ++#define RING_FLOWSLOT_VERSION 6 ++#define RING_VERSION "3.1" ++ ++#define SO_ADD_TO_CLUSTER 99 ++#define SO_REMOVE_FROM_CLUSTER 100 ++#define SO_SET_REFLECTOR 101 ++ ++/* *********************************** */ ++ ++#ifndef HAVE_PCAP ++struct pcap_pkthdr { ++ struct timeval ts; /* time stamp */ ++ u_int32_t caplen; /* length of portion present */ ++ u_int32_t len; /* length this packet (off wire) */ ++}; ++#endif ++ ++/* *********************************** */ ++ ++enum cluster_type { ++ cluster_per_flow = 0, ++ cluster_round_robin ++}; ++ ++/* *********************************** */ ++ ++#define RING_MIN_SLOT_SIZE (60+sizeof(struct pcap_pkthdr)) ++#define RING_MAX_SLOT_SIZE (1514+sizeof(struct pcap_pkthdr)) ++ ++/* *********************************** */ ++ ++typedef struct flowSlotInfo { ++ u_int16_t version, sample_rate; ++ u_int32_t tot_slots, slot_len, data_len, tot_mem; ++ ++ u_int64_t tot_pkts, tot_lost; ++ u_int64_t tot_insert, tot_read; ++ u_int32_t insert_idx, remove_idx; ++} FlowSlotInfo; ++ ++/* *********************************** */ ++ ++typedef struct flowSlot { ++#ifdef RING_MAGIC ++ u_char magic; /* It must alwasy be zero */ ++#endif ++ u_char slot_state; /* 0=empty, 1=full */ ++ u_char bucket; /* bucket[bucketLen] */ ++} FlowSlot; ++ ++/* *********************************** */ ++ ++#ifdef __KERNEL__ ++ ++FlowSlotInfo* getRingPtr(void); ++int allocateRing(char *deviceName, u_int numSlots, ++ u_int bucketLen, u_int sampleRate); ++unsigned int pollRing(struct file *fp, struct poll_table_struct * wait); ++void deallocateRing(void); ++ ++/* ************************* */ ++ ++typedef int (*handle_ring_skb)(struct sk_buff *skb, ++ u_char recv_packet, u_char real_skb); ++extern handle_ring_skb get_skb_ring_handler(void); ++extern void set_skb_ring_handler(handle_ring_skb the_handler); ++extern void do_skb_ring_handler(struct sk_buff *skb, ++ u_char recv_packet, u_char real_skb); ++ ++typedef int (*handle_ring_buffer)(struct net_device *dev, ++ char *data, int len); ++extern handle_ring_buffer get_buffer_ring_handler(void); ++extern void set_buffer_ring_handler(handle_ring_buffer the_handler); ++extern int do_buffer_ring_handler(struct net_device *dev, ++ char *data, int len); ++#endif /* __KERNEL__ */ ++ ++/* *********************************** */ ++ ++#define PF_RING 27 /* Packet Ring */ ++#define SOCK_RING PF_RING ++ ++/* ioctl() */ ++#define SIORINGPOLL 0x8888 ++ ++/* *********************************** */ ++ ++#endif /* __RING_H */ +diff --unified --recursive --new-file linux-2.6.15-rc6/net/core/dev.c linux-2.6.15-rc6-1-686-smp-ring3/net/core/dev.c +--- linux-2.6.15-rc6/net/core/dev.c 2005-12-19 01:36:54.000000000 +0100 ++++ linux-2.6.15-rc6-1-686-smp-ring3/net/core/dev.c 2005-12-24 00:24:02.000000000 +0100 +@@ -115,6 +115,56 @@ + #endif /* CONFIG_NET_RADIO */ + #include <asm/current.h> + ++#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE) ++ ++/* #define RING_DEBUG */ ++ ++#include <linux/ring.h> ++#include <linux/version.h> ++ ++static handle_ring_skb ring_handler = NULL; ++ ++handle_ring_skb get_skb_ring_handler() { return(ring_handler); } ++ ++void set_skb_ring_handler(handle_ring_skb the_handler) { ++ ring_handler = the_handler; ++} ++ ++void do_skb_ring_handler(struct sk_buff *skb, ++ u_char recv_packet, u_char real_skb) { ++ if(ring_handler) ++ ring_handler(skb, recv_packet, real_skb); ++} ++ ++/* ******************* */ ++ ++static handle_ring_buffer buffer_ring_handler = NULL; ++ ++handle_ring_buffer get_buffer_ring_handler() { return(buffer_ring_handler); } ++ ++void set_buffer_ring_handler(handle_ring_buffer the_handler) { ++ buffer_ring_handler = the_handler; ++} ++ ++int do_buffer_ring_handler(struct net_device *dev, char *data, int len) { ++ if(buffer_ring_handler) { ++ buffer_ring_handler(dev, data, len); ++ return(1); ++ } else ++ return(0); ++} ++ ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) ++EXPORT_SYMBOL(get_skb_ring_handler); ++EXPORT_SYMBOL(set_skb_ring_handler); ++EXPORT_SYMBOL(do_skb_ring_handler); ++ ++EXPORT_SYMBOL(get_buffer_ring_handler); ++EXPORT_SYMBOL(set_buffer_ring_handler); ++EXPORT_SYMBOL(do_buffer_ring_handler); ++#endif ++ ++#endif + /* + * The list of packet types we will receive (as opposed to discard) + * and the routines to invoke. +@@ -1296,6 +1346,10 @@ + skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); + #endif + if (q->enqueue) { ++#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE) ++ if(ring_handler) ring_handler(skb, 0, 1); ++#endif /* CONFIG_RING */ ++ + /* Grab device queue */ + spin_lock(&dev->queue_lock); + +@@ -1437,6 +1491,13 @@ + + preempt_disable(); + err = netif_rx(skb); ++#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE) ++ if(ring_handler && ring_handler(skb, 1, 1)) { ++ /* The packet has been copied into a ring */ ++ return(NET_RX_SUCCESS); ++ } ++#endif /* CONFIG_RING */ ++ + if (local_softirq_pending()) + do_softirq(); + preempt_enable(); +@@ -1582,6 +1643,13 @@ + struct net_device *orig_dev; + int ret = NET_RX_DROP; + unsigned short type; ++#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE) ++ if(ring_handler && ring_handler(skb, 1, 1)) { ++ /* The packet has been copied into a ring */ ++ return(NET_RX_SUCCESS); ++ } ++#endif /* CONFIG_RING */ ++ + + /* if we've gotten here through NAPI, check netpoll */ + if (skb->dev->poll && netpoll_rx(skb)) +diff --unified --recursive --new-file linux-2.6.15-rc6/net/Kconfig linux-2.6.15-rc6-1-686-smp-ring3/net/Kconfig +--- linux-2.6.15-rc6/net/Kconfig 2005-12-19 01:36:54.000000000 +0100 ++++ linux-2.6.15-rc6-1-686-smp-ring3/net/Kconfig 2005-12-24 00:24:02.000000000 +0100 +@@ -31,6 +31,7 @@ + source "net/unix/Kconfig" + source "net/xfrm/Kconfig" + ++source "net/ring/Kconfig" + config INET + bool "TCP/IP networking" + ---help--- +diff --unified --recursive --new-file linux-2.6.15-rc6/net/Makefile linux-2.6.15-rc6-1-686-smp-ring3/net/Makefile +--- linux-2.6.15-rc6/net/Makefile 2005-12-19 01:36:54.000000000 +0100 ++++ linux-2.6.15-rc6-1-686-smp-ring3/net/Makefile 2005-12-24 00:24:02.000000000 +0100 +@@ -42,6 +42,7 @@ + obj-$(CONFIG_DECNET) += decnet/ + obj-$(CONFIG_ECONET) += econet/ + obj-$(CONFIG_VLAN_8021Q) += 8021q/ ++obj-$(CONFIG_RING) += ring/ + obj-$(CONFIG_IP_DCCP) += dccp/ + obj-$(CONFIG_IP_SCTP) += sctp/ + obj-$(CONFIG_IEEE80211) += ieee80211/ +diff --unified --recursive --new-file linux-2.6.15-rc6/net/ring/Kconfig linux-2.6.15-rc6-1-686-smp-ring3/net/ring/Kconfig +--- linux-2.6.15-rc6/net/ring/Kconfig 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.15-rc6-1-686-smp-ring3/net/ring/Kconfig 2005-12-24 00:24:02.000000000 +0100 +@@ -0,0 +1,14 @@ ++config RING ++ tristate "PF_RING sockets (EXPERIMENTAL)" ++ depends on EXPERIMENTAL ++ ---help--- ++ PF_RING socket family, optimized for packet capture. ++ If a PF_RING socket is bound to an adapter (via the bind() system ++ call), such adapter will be used in read-only mode until the socket ++ is destroyed. Whenever an incoming packet is received from the adapter ++ it will not passed to upper layers, but instead it is copied to a ring ++ buffer, which in turn is exported to user space applications via mmap. ++ Please refer to http://luca.ntop.org/Ring.pdf for more. ++ ++ Say N unless you know what you are doing. ++ +diff --unified --recursive --new-file linux-2.6.15-rc6/net/ring/Makefile linux-2.6.15-rc6-1-686-smp-ring3/net/ring/Makefile +--- linux-2.6.15-rc6/net/ring/Makefile 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.15-rc6-1-686-smp-ring3/net/ring/Makefile 2005-12-24 00:24:02.000000000 +0100 +@@ -0,0 +1,7 @@ ++# ++# Makefile for the ring driver. ++# ++ ++obj-m += ring.o ++ ++ring-objs := ring_packet.o +diff --unified --recursive --new-file linux-2.6.15-rc6/net/ring/ring_packet.c linux-2.6.15-rc6-1-686-smp-ring3/net/ring/ring_packet.c +--- linux-2.6.15-rc6/net/ring/ring_packet.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.15-rc6-1-686-smp-ring3/net/ring/ring_packet.c 2005-12-24 00:24:02.000000000 +0100 +@@ -0,0 +1,1568 @@ ++/* ++ * ++ * (C) 2004-05 - Luca Deri <deri@ntop.org> ++ * ++ * This code includes contributions courtesy of ++ * - Jeff Randall <jrandall@nexvu.com> ++ * - Helmut Manck <helmut.manck@secunet.com> ++ * - Brad Doctor <brad@stillsecure.com> ++ * - Amit D. Chaudhary <amit_ml@rajgad.com> ++ * ++ */ ++ ++/* ++ TO DO: ++ add an entry inside the /proc filesystem ++*/ ++ ++#include <linux/version.h> ++#include <linux/config.h> ++#include <linux/module.h> ++#include <linux/kernel.h> ++#include <linux/socket.h> ++#include <linux/skbuff.h> ++#include <linux/rtnetlink.h> ++#include <linux/in.h> ++#include <linux/in6.h> ++#include <linux/init.h> ++#include <linux/filter.h> ++#include <linux/ring.h> ++#include <linux/ip.h> ++#include <linux/tcp.h> ++#include <linux/udp.h> ++#include <linux/list.h> ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) ++#include <net/xfrm.h> ++#else ++#include <linux/poll.h> ++#endif ++#include <net/sock.h> ++#include <asm/io.h> /* needed for virt_to_phys() */ ++ ++/* #define RING_DEBUG */ ++ ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)) ++static inline int remap_page_range(struct vm_area_struct *vma, ++ unsigned long uvaddr, ++ unsigned long paddr, ++ unsigned long size, ++ pgprot_t prot) { ++ return(remap_pfn_range(vma, uvaddr, paddr >> PAGE_SHIFT, ++ size, prot)); ++} ++#endif ++ ++/* ************************************************* */ ++ ++#define CLUSTER_LEN 8 ++ ++struct ring_cluster { ++ u_short cluster_id; /* 0 = no cluster */ ++ u_short num_cluster_elements; ++ enum cluster_type hashing_mode; ++ u_short hashing_id; ++ struct sock *sk[CLUSTER_LEN]; ++ struct ring_cluster *next; /* NULL = last element of the cluster */ ++}; ++ ++/* ************************************************* */ ++ ++struct ring_element { ++ struct list_head list; ++ struct sock *sk; ++}; ++ ++/* ************************************************* */ ++ ++struct ring_opt { ++ struct net_device *ring_netdev; ++ ++ /* Cluster */ ++ u_short cluster_id; /* 0 = no cluster */ ++ ++ /* Reflector */ ++ struct net_device *reflector_dev; ++ ++ /* Packet buffers */ ++ unsigned long order; ++ ++ /* Ring Slots */ ++ unsigned long ring_memory; ++ FlowSlotInfo *slots_info; /* Basically it points to ring_memory */ ++ char *ring_slots; /* Basically it points to ring_memory ++ +sizeof(FlowSlotInfo) */ ++ ++ /* Packet Sampling */ ++ u_int pktToSample, sample_rate; ++ ++ /* BPF Filter */ ++ struct sk_filter *bpfFilter; ++ ++ /* Locks */ ++ atomic_t num_ring_slots_waiters; ++ wait_queue_head_t ring_slots_waitqueue; ++ rwlock_t ring_index_lock; ++ ++ /* Indexes (Internal) */ ++ u_int insert_page_id, insert_slot_id; ++}; ++ ++/* ************************************************* */ ++ ++/* List of all ring sockets. */ ++static struct list_head ring_table; ++ ++/* List of all clusters */ ++static struct ring_cluster *ring_cluster_list; ++ ++static rwlock_t ring_mgmt_lock = RW_LOCK_UNLOCKED; ++ ++/* ********************************** */ ++ ++/* Forward */ ++static struct proto_ops ring_ops; ++ ++#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11)) ++static struct proto ring_proto; ++#endif ++ ++static int skb_ring_handler(struct sk_buff *skb, u_char recv_packet, ++ u_char real_skb); ++static int buffer_ring_handler(struct net_device *dev, char *data, int len); ++static int remove_from_cluster(struct sock *sock, struct ring_opt *pfr); ++ ++/* Extern */ ++ ++/* ********************************** */ ++ ++/* Defaults */ ++static u_int bucket_len = 128, num_slots = 4096, sample_rate = 1, ++ transparent_mode = 1, enable_tx_capture = 0; ++ ++MODULE_PARM(bucket_len, "i"); ++MODULE_PARM_DESC(bucket_len, "Number of ring buckets"); ++MODULE_PARM(num_slots, "i"); ++MODULE_PARM_DESC(num_slots, "Number of ring slots"); ++MODULE_PARM(sample_rate, "i"); ++MODULE_PARM_DESC(sample_rate, "Ring packet sample rate"); ++MODULE_PARM(transparent_mode, "i"); ++MODULE_PARM_DESC(transparent_mode, ++ "Set to 1 to set transparent mode " ++ "(slower but backwards compatible)"); ++MODULE_PARM(enable_tx_capture, "i"); ++MODULE_PARM_DESC(enable_tx_capture, "Set to 1 to capture outgoing packets"); ++ ++/* ********************************** */ ++ ++#define MIN_QUEUED_PKTS 64 ++#define MAX_QUEUE_LOOPS 64 ++ ++ ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) ++#define ring_sk_datatype(__sk) ((struct ring_opt *)__sk) ++#define ring_sk(__sk) ((__sk)->sk_protinfo) ++#else ++#define ring_sk_datatype(a) (a) ++#define ring_sk(__sk) ((__sk)->protinfo.pf_ring) ++#endif ++ ++#define _rdtsc() ({ uint64_t x; asm volatile("rdtsc" : "=A" (x)); x; }) ++ ++/* ++ int dev_queue_xmit(struct sk_buff *skb) ++ skb->dev; ++ struct net_device *dev_get_by_name(const char *name) ++*/ ++ ++/* ********************************** */ ++ ++static void ring_sock_destruct(struct sock *sk) { ++ ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) ++ skb_queue_purge(&sk->sk_receive_queue); ++ ++ if (!sock_flag(sk, SOCK_DEAD)) { ++#if defined(RING_DEBUG) ++ printk("Attempt to release alive ring socket: %p\n", sk); ++#endif ++ return; ++ } ++ ++ BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); ++ BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); ++#else ++ ++ BUG_TRAP(atomic_read(&sk->rmem_alloc)==0); ++ BUG_TRAP(atomic_read(&sk->wmem_alloc)==0); ++ ++ if (!sk->dead) { ++#if defined(RING_DEBUG) ++ printk("Attempt to release alive ring socket: %p\n", sk); ++#endif ++ return; ++ } ++#endif ++ ++ kfree(ring_sk(sk)); ++ ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) ++ MOD_DEC_USE_COUNT; ++#endif ++} ++ ++/* ********************************** */ ++/* ++ * ring_insert() ++ * ++ * store the sk in a new element and add it ++ * to the head of the list. ++ */ ++static inline void ring_insert(struct sock *sk) { ++ struct ring_element *next; ++ ++#if defined(RING_DEBUG) ++ printk("RING: ring_insert()\n"); ++#endif ++ ++ next = kmalloc(sizeof(struct ring_element), GFP_ATOMIC); ++ if(next != NULL) { ++ next->sk = sk; ++ write_lock_irq(&ring_mgmt_lock); ++ list_add(&next->list, &ring_table); ++ write_unlock_irq(&ring_mgmt_lock); ++ } else { ++ if (net_ratelimit()) ++ printk("RING: could not kmalloc slot!!\n"); ++ } ++} ++ ++/* ********************************** */ ++/* ++ * ring_remove() ++ * ++ * For each of the elements in the list: ++ * - check if this is the element we want to delete ++ * - if it is, remove it from the list, and free it. ++ * ++ * stop when we find the one we're looking for (break), ++ * or when we reach the end of the list. ++ */ ++static inline void ring_remove(struct sock *sk) { ++ struct list_head *ptr; ++ struct ring_element *entry; ++ ++ ++ for(ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) { ++ entry = list_entry(ptr, struct ring_element, list); ++ ++ if(entry->sk == sk) { ++ write_lock_irq(&ring_mgmt_lock); ++ list_del(ptr); ++ kfree(ptr); ++ write_unlock_irq(&ring_mgmt_lock); ++ break; ++ } ++ } ++ ++} ++ ++/* ********************************** */ ++ ++static u_int32_t num_queued_pkts(struct ring_opt *pfr) { ++ ++ if(pfr->ring_slots != NULL) { ++ ++ u_int32_t tot_insert = pfr->slots_info->insert_idx, ++#if defined(RING_DEBUG) ++ tot_read = pfr->slots_info->tot_read, tot_pkts; ++#else ++ tot_read = pfr->slots_info->tot_read; ++#endif ++ ++ if(tot_insert >= tot_read) { ++#if defined(RING_DEBUG) ++ tot_pkts = tot_insert-tot_read; ++#endif ++ return(tot_insert-tot_read); ++ } else { ++#if defined(RING_DEBUG) ++ tot_pkts = ((u_int32_t)-1)+tot_insert-tot_read; ++#endif ++ return(((u_int32_t)-1)+tot_insert-tot_read); ++ } ++ ++#if defined(RING_DEBUG) ++ printk("-> num_queued_pkts=%d [tot_insert=%d][tot_read=%d]\n", ++ tot_pkts, tot_insert, tot_read); ++#endif ++ ++ } else ++ return(0); ++} ++ ++/* ********************************** */ ++ ++static inline FlowSlot* get_insert_slot(struct ring_opt *pfr) { ++#if defined(RING_DEBUG) ++ printk("get_insert_slot(%d)\n", pfr->slots_info->insert_idx); ++#endif ++ ++ if(pfr->ring_slots != NULL) { ++ FlowSlot *slot = (FlowSlot*)&(pfr->ring_slots[pfr->slots_info->insert_idx ++ *pfr->slots_info->slot_len]); ++ return(slot); ++ } else ++ return(NULL); ++} ++ ++/* ********************************** */ ++ ++static inline FlowSlot* get_remove_slot(struct ring_opt *pfr) { ++#if defined(RING_DEBUG) ++ printk("get_remove_slot(%d)\n", pfr->slots_info->remove_idx); ++#endif ++ ++ if(pfr->ring_slots != NULL) ++ return((FlowSlot*)&(pfr->ring_slots[pfr->slots_info->remove_idx* ++ pfr->slots_info->slot_len])); ++ else ++ return(NULL); ++} ++ ++/* ********************************** */ ++ ++static void add_skb_to_ring(struct sk_buff *skb, ++ struct ring_opt *pfr, ++ u_char recv_packet, ++ u_char real_skb /* 1=skb 0=faked skb */) { ++ FlowSlot *theSlot; ++ int idx, displ; ++ ++ if(recv_packet) { ++ /* Hack for identifying a packet received by the e1000 */ ++ if(real_skb) { ++ displ = SKB_DISPLACEMENT; ++ } else ++ displ = 0; /* Received by the e1000 wrapper */ ++ } else ++ displ = 0; ++ ++ write_lock(&pfr->ring_index_lock); ++ pfr->slots_info->tot_pkts++; ++ write_unlock(&pfr->ring_index_lock); ++ ++ /* BPF Filtering (from af_packet.c) */ ++ if(pfr->bpfFilter != NULL) { ++ unsigned res = 1, len; ++ ++ len = skb->len-skb->data_len; ++ ++ write_lock(&pfr->ring_index_lock); ++ skb->data -= displ; ++ res = sk_run_filter(skb, pfr->bpfFilter->insns, pfr->bpfFilter->len); ++ skb->data += displ; ++ write_unlock(&pfr->ring_index_lock); ++ ++ if(res == 0) { ++ /* Filter failed */ ++ ++#if defined(RING_DEBUG) ++ printk("add_skb_to_ring(skb): Filter failed [len=%d][tot=%llu]" ++ "[insertIdx=%d][pkt_type=%d][cloned=%d]\n", ++ (int)skb->len, pfr->slots_info->tot_pkts, ++ pfr->slots_info->insert_idx, ++ skb->pkt_type, skb->cloned); ++#endif ++ ++ return; ++ } ++ } ++ ++ /* ************************** */ ++ ++ if(pfr->sample_rate > 1) { ++ if(pfr->pktToSample == 0) { ++ write_lock(&pfr->ring_index_lock); ++ pfr->pktToSample = pfr->sample_rate; ++ write_unlock(&pfr->ring_index_lock); ++ } else { ++ write_lock(&pfr->ring_index_lock); ++ pfr->pktToSample--; ++ write_unlock(&pfr->ring_index_lock); ++ ++#if defined(RING_DEBUG) ++ printk("add_skb_to_ring(skb): sampled packet [len=%d]" ++ "[tot=%llu][insertIdx=%d][pkt_type=%d][cloned=%d]\n", ++ (int)skb->len, pfr->slots_info->tot_pkts, ++ pfr->slots_info->insert_idx, ++ skb->pkt_type, skb->cloned); ++#endif ++ return; ++ } ++ } ++ ++ /* ************************************* */ ++ ++ if((pfr->reflector_dev != NULL) ++ && (!netif_queue_stopped(pfr->reflector_dev))) { ++ int cpu = smp_processor_id(); ++ ++ /* increase reference counter so that this skb is not freed */ ++ atomic_inc(&skb->users); ++ ++ skb->data -= displ; ++ ++ /* send it */ ++ if (pfr->reflector_dev->xmit_lock_owner != cpu) { ++ spin_lock_bh(&pfr->reflector_dev->xmit_lock); ++ pfr->reflector_dev->xmit_lock_owner = cpu; ++ spin_unlock_bh(&pfr->reflector_dev->xmit_lock); ++ ++ if (pfr->reflector_dev->hard_start_xmit(skb, ++ pfr->reflector_dev) == 0) { ++ spin_lock_bh(&pfr->reflector_dev->xmit_lock); ++ pfr->reflector_dev->xmit_lock_owner = -1; ++ skb->data += displ; ++ spin_unlock_bh(&pfr->reflector_dev->xmit_lock); ++#if defined(RING_DEBUG) ++ printk("++ hard_start_xmit succeeded\n"); ++#endif ++ return; /* OK */ ++ } ++ ++ spin_lock_bh(&pfr->reflector_dev->xmit_lock); ++ pfr->reflector_dev->xmit_lock_owner = -1; ++ spin_unlock_bh(&pfr->reflector_dev->xmit_lock); ++ } ++ ++#if defined(RING_DEBUG) ++ printk("++ hard_start_xmit failed\n"); ++#endif ++ skb->data += displ; ++ return; /* -ENETDOWN */ ++ } ++ ++ /* ************************************* */ ++ ++#if defined(RING_DEBUG) ++ printk("add_skb_to_ring(skb) [len=%d][tot=%llu][insertIdx=%d]" ++ "[pkt_type=%d][cloned=%d]\n", ++ (int)skb->len, pfr->slots_info->tot_pkts, ++ pfr->slots_info->insert_idx, ++ skb->pkt_type, skb->cloned); ++#endif ++ ++ idx = pfr->slots_info->insert_idx; ++ theSlot = get_insert_slot(pfr); ++ ++ if((theSlot != NULL) && (theSlot->slot_state == 0)) { ++ struct pcap_pkthdr *hdr; ++ char *bucket; ++ ++ /* Update Index */ ++ idx++; ++ ++ if(idx == pfr->slots_info->tot_slots) { ++ write_lock(&pfr->ring_index_lock); ++ pfr->slots_info->insert_idx = 0; ++ write_unlock(&pfr->ring_index_lock); ++ } else { ++ write_lock(&pfr->ring_index_lock); ++ pfr->slots_info->insert_idx = idx; ++ write_unlock(&pfr->ring_index_lock); ++ } ++ ++ bucket = &theSlot->bucket; ++ hdr = (struct pcap_pkthdr*)bucket; ++ ++ /* BD - API changed for time keeping */ ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)) ++ if(skb->stamp.tv_sec == 0) do_gettimeofday(&skb->stamp); ++ ++ hdr->ts.tv_sec = skb->stamp.tv_sec, hdr->ts.tv_usec = skb->stamp.tv_usec; ++#else ++ if(skb->tstamp.off_sec == 0) __net_timestamp(skb); ++ ++ hdr->ts.tv_sec = skb->tstamp.off_sec, hdr->ts.tv_usec = skb->tstamp.off_usec; ++#endif ++ ++ hdr->caplen = skb->len+displ; ++ ++ if(hdr->caplen > pfr->slots_info->data_len) ++ hdr->caplen = pfr->slots_info->data_len; ++ ++ hdr->len = skb->len+displ; ++ memcpy(&bucket[sizeof(struct pcap_pkthdr)], ++ skb->data-displ, hdr->caplen); ++ ++#if defined(RING_DEBUG) ++ { ++ static unsigned int lastLoss = 0; ++ ++ if(pfr->slots_info->tot_lost ++ && (lastLoss != pfr->slots_info->tot_lost)) { ++ printk("add_skb_to_ring(%d): [data_len=%d]" ++ "[hdr.caplen=%d][skb->len=%d]" ++ "[pcap_pkthdr=%d][removeIdx=%d]" ++ "[loss=%lu][page=%u][slot=%u]\n", ++ idx-1, pfr->slots_info->data_len, hdr->caplen, skb->len, ++ sizeof(struct pcap_pkthdr), ++ pfr->slots_info->remove_idx, ++ (long unsigned int)pfr->slots_info->tot_lost, ++ pfr->insert_page_id, pfr->insert_slot_id); ++ ++ lastLoss = pfr->slots_info->tot_lost; ++ } ++ } ++#endif ++ ++ write_lock(&pfr->ring_index_lock); ++ pfr->slots_info->tot_insert++; ++ theSlot->slot_state = 1; ++ write_unlock(&pfr->ring_index_lock); ++ } else { ++ write_lock(&pfr->ring_index_lock); ++ pfr->slots_info->tot_lost++; ++ write_unlock(&pfr->ring_index_lock); ++ ++#if defined(RING_DEBUG) ++ printk("add_skb_to_ring(skb): packet lost [loss=%lu]" ++ "[removeIdx=%u][insertIdx=%u]\n", ++ (long unsigned int)pfr->slots_info->tot_lost, ++ pfr->slots_info->remove_idx, pfr->slots_info->insert_idx); ++#endif ++ } ++ ++ /* wakeup in case of poll() */ ++ if(waitqueue_active(&pfr->ring_slots_waitqueue)) ++ wake_up_interruptible(&pfr->ring_slots_waitqueue); ++} ++ ++/* ********************************** */ ++ ++static u_int hash_skb(struct ring_cluster *cluster_ptr, ++ struct sk_buff *skb, u_char recv_packet) { ++ u_int idx; ++ int displ; ++ struct iphdr *ip; ++ ++ if(cluster_ptr->hashing_mode == cluster_round_robin) { ++ idx = cluster_ptr->hashing_id++; ++ } else { ++ /* Per-flow clustering */ ++ if(skb->len > sizeof(struct iphdr)+sizeof(struct tcphdr)) { ++ if(recv_packet) ++ displ = 0; ++ else ++ displ = SKB_DISPLACEMENT; ++ ++ /* ++ skb->data+displ ++ ++ Always points to to the IP part of the packet ++ */ ++ ++ ip = (struct iphdr*)(skb->data+displ); ++ ++ idx = ip->saddr+ip->daddr+ip->protocol; ++ ++ if(ip->protocol == IPPROTO_TCP) { ++ struct tcphdr *tcp = (struct tcphdr*)(skb->data+displ ++ +sizeof(struct iphdr)); ++ idx += tcp->source+tcp->dest; ++ } else if(ip->protocol == IPPROTO_UDP) { ++ struct udphdr *udp = (struct udphdr*)(skb->data+displ ++ +sizeof(struct iphdr)); ++ idx += udp->source+udp->dest; ++ } ++ } else ++ idx = skb->len; ++ } ++ ++ return(idx % cluster_ptr->num_cluster_elements); ++} ++ ++/* ********************************** */ ++ ++static int skb_ring_handler(struct sk_buff *skb, ++ u_char recv_packet, ++ u_char real_skb /* 1=skb 0=faked skb */) { ++ struct sock *skElement; ++ int rc = 0; ++ struct list_head *ptr; ++ struct ring_cluster *cluster_ptr; ++ ++#ifdef PROFILING ++ uint64_t rdt = _rdtsc(), rdt1, rdt2; ++#endif ++ ++ if((!skb) /* Invalid skb */ ++ || ((!enable_tx_capture) && (!recv_packet))) { ++ /* ++ An outgoing packet is about to be sent out ++ but we decided not to handle transmitted ++ packets. ++ */ ++ return(0); ++ } ++ ++#if defined(RING_DEBUG) ++ if(0) { ++ printk("skb_ring_handler() [len=%d][dev=%s]\n", skb->len, ++ skb->dev->name == NULL ? "<NULL>" : skb->dev->name); ++ } ++#endif ++ ++#ifdef PROFILING ++ rdt1 = _rdtsc(); ++#endif ++ ++ /* [1] Check unclustered sockets */ ++ for (ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) { ++ struct ring_opt *pfr; ++ struct ring_element *entry; ++ ++ entry = list_entry(ptr, struct ring_element, list); ++ ++ read_lock(&ring_mgmt_lock); ++ skElement = entry->sk; ++ pfr = ring_sk(skElement); ++ read_unlock(&ring_mgmt_lock); ++ ++ if((pfr != NULL) ++ && (pfr->cluster_id == 0 /* No cluster */) ++ && (pfr->ring_slots != NULL) ++ && (pfr->ring_netdev == skb->dev)) { ++ /* We've found the ring where the packet can be stored */ ++ read_lock(&ring_mgmt_lock); ++ add_skb_to_ring(skb, pfr, recv_packet, real_skb); ++ read_unlock(&ring_mgmt_lock); ++ ++ rc = 1; /* Ring found: we've done our job */ ++ } ++ } ++ ++ /* [2] Check socket clusters */ ++ cluster_ptr = ring_cluster_list; ++ ++ while(cluster_ptr != NULL) { ++ struct ring_opt *pfr; ++ ++ if(cluster_ptr->num_cluster_elements > 0) { ++ u_int skb_hash = hash_skb(cluster_ptr, skb, recv_packet); ++ ++ read_lock(&ring_mgmt_lock); ++ skElement = cluster_ptr->sk[skb_hash]; ++ read_unlock(&ring_mgmt_lock); ++ ++ if(skElement != NULL) { ++ pfr = ring_sk(skElement); ++ ++ if((pfr != NULL) ++ && (pfr->ring_slots != NULL) ++ && (pfr->ring_netdev == skb->dev)) { ++ /* We've found the ring where the packet can be stored */ ++ read_lock(&ring_mgmt_lock); ++ add_skb_to_ring(skb, pfr, recv_packet, real_skb); ++ read_unlock(&ring_mgmt_lock); ++ ++ rc = 1; /* Ring found: we've done our job */ ++ } ++ } ++ } ++ ++ cluster_ptr = cluster_ptr->next; ++ } ++ ++#ifdef PROFILING ++ rdt1 = _rdtsc()-rdt1; ++#endif ++ ++#ifdef PROFILING ++ rdt2 = _rdtsc(); ++#endif ++ ++ if(transparent_mode) rc = 0; ++ ++ if((rc != 0) && real_skb) ++ dev_kfree_skb(skb); /* Free the skb */ ++ ++#ifdef PROFILING ++ rdt2 = _rdtsc()-rdt2; ++ rdt = _rdtsc()-rdt; ++ ++#if defined(RING_DEBUG) ++ printk("# cycles: %d [lock costed %d %d%%][free costed %d %d%%]\n", ++ (int)rdt, rdt-rdt1, ++ (int)((float)((rdt-rdt1)*100)/(float)rdt), ++ rdt2, ++ (int)((float)(rdt2*100)/(float)rdt)); ++#endif ++#endif ++ ++ return(rc); /* 0 = packet not handled */ ++} ++ ++/* ********************************** */ ++ ++struct sk_buff skb; ++ ++static int buffer_ring_handler(struct net_device *dev, ++ char *data, int len) { ++ ++#if defined(RING_DEBUG) ++ printk("buffer_ring_handler: [dev=%s][len=%d]\n", ++ dev->name == NULL ? "<NULL>" : dev->name, len); ++#endif ++ ++ /* BD - API changed for time keeping */ ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)) ++ skb.dev = dev, skb.len = len, skb.data = data, ++ skb.data_len = len, skb.stamp.tv_sec = 0; /* Calculate the time */ ++#else ++ skb.dev = dev, skb.len = len, skb.data = data, ++ skb.data_len = len, skb.tstamp.off_sec = 0; /* Calculate the time */ ++#endif ++ ++ skb_ring_handler(&skb, 1, 0 /* fake skb */); ++ ++ return(0); ++} ++ ++/* ********************************** */ ++ ++static int ring_create(struct socket *sock, int protocol) { ++ struct sock *sk; ++ struct ring_opt *pfr; ++ int err; ++ ++#if defined(RING_DEBUG) ++ printk("RING: ring_create()\n"); ++#endif ++ ++ /* Are you root, superuser or so ? */ ++ if(!capable(CAP_NET_ADMIN)) ++ return -EPERM; ++ ++ if(sock->type != SOCK_RAW) ++ return -ESOCKTNOSUPPORT; ++ ++ if(protocol != htons(ETH_P_ALL)) ++ return -EPROTONOSUPPORT; ++ ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) ++ MOD_INC_USE_COUNT; ++#endif ++ ++ err = -ENOMEM; ++ ++ // BD: -- broke this out to keep it more simple and clear as to what the ++ // options are. ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) ++#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11)) ++ sk = sk_alloc(PF_RING, GFP_KERNEL, 1, NULL); ++#else ++ // BD: API changed in 2.6.12, ref: ++ // http://svn.clkao.org/svnweb/linux/revision/?rev=28201 ++ sk = sk_alloc(PF_RING, GFP_ATOMIC, &ring_proto, 1); ++#endif ++#else ++ /* Kernel 2.4 */ ++ sk = sk_alloc(PF_RING, GFP_KERNEL, 1); ++#endif ++ ++ if (sk == NULL) ++ goto out; ++ ++ sock->ops = &ring_ops; ++ sock_init_data(sock, sk); ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) ++#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11)) ++ sk_set_owner(sk, THIS_MODULE); ++#endif ++#endif ++ ++ err = -ENOMEM; ++ ring_sk(sk) = ring_sk_datatype(kmalloc(sizeof(*pfr), GFP_KERNEL)); ++ ++ if (!(pfr = ring_sk(sk))) { ++ sk_free(sk); ++ goto out; ++ } ++ memset(pfr, 0, sizeof(*pfr)); ++ init_waitqueue_head(&pfr->ring_slots_waitqueue); ++ pfr->ring_index_lock = RW_LOCK_UNLOCKED; ++ atomic_set(&pfr->num_ring_slots_waiters, 0); ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) ++ sk->sk_family = PF_RING; ++ sk->sk_destruct = ring_sock_destruct; ++#else ++ sk->family = PF_RING; ++ sk->destruct = ring_sock_destruct; ++ sk->num = protocol; ++#endif ++ ++ ring_insert(sk); ++ ++#if defined(RING_DEBUG) ++ printk("RING: ring_create() - created\n"); ++#endif ++ ++ return(0); ++ out: ++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) ++ MOD_DEC_USE_COUNT; ++#endif ++ return err; ++} ++ ++/* *********************************************** */ ++ ++static int ring_release(struct socket *sock) ++{ ++ struct sock *sk = sock->sk; ++ struct ring_opt *pfr = ring_sk(sk); ++ ++ if(!sk) ++ return 0; ++ ++#if defined(RING_DEBUG) ++ printk("RING: called ring_release\n"); ++#endif ++ ++#if defined(RING_DEBUG) ++ printk("RING: ring_release entered\n"); ++#endif ++ ++ ring_remove(sk); ++ ++ sock_orphan(sk); ++ sock->sk = NULL; ++ ++ /* Free the ring buffer */ ++ if(pfr->ring_memory) { ++ struct page *page, *page_end; ++ ++ page_end = virt_to_page(pfr->ring_memory + (PAGE_SIZE << pfr->order) - 1); ++ for(page = virt_to_page(pfr->ring_memory); page <= page_end; page++) ++ ClearPageReserved(page); ++ ++ free_pages(pfr->ring_memory, pfr->order); ++ } ++ ++ kfree(pfr); ++ ring_sk(sk) = NULL; ++ ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) ++ skb_queue_purge(&sk->sk_write_queue); ++#endif ++ sock_put(sk); ++ ++#if defined(RING_DEBUG) ++ printk("RING: ring_release leaving\n"); ++#endif ++ ++ return 0; ++} ++ ++/* ********************************** */ ++/* ++ * We create a ring for this socket and bind it to the specified device ++ */ ++static int packet_ring_bind(struct sock *sk, struct net_device *dev) ++{ ++ u_int the_slot_len; ++ u_int32_t tot_mem; ++ struct ring_opt *pfr = ring_sk(sk); ++ struct page *page, *page_end; ++ ++ if(!dev) return(-1); ++ ++#if defined(RING_DEBUG) ++ printk("RING: packet_ring_bind(%s) called\n", dev->name); ++#endif ++ ++ /* ********************************************** ++ ++ ************************************* ++ * * ++ * FlowSlotInfo * ++ * * ++ ************************************* <-+ ++ * FlowSlot * | ++ ************************************* | ++ * FlowSlot * | ++ ************************************* +- num_slots ++ * FlowSlot * | ++ ************************************* | ++ * FlowSlot * | ++ ************************************* <-+ ++ ++ ********************************************** */ ++ ++ the_slot_len = sizeof(u_char) /* flowSlot.slot_state */ ++#ifdef RING_MAGIC ++ + sizeof(u_char) ++#endif ++ + sizeof(struct pcap_pkthdr) ++ + bucket_len /* flowSlot.bucket */; ++ ++ tot_mem = sizeof(FlowSlotInfo) + num_slots*the_slot_len; ++ ++ /* ++ Calculate the value of the order parameter used later. ++ See http://www.linuxjournal.com/article.php?sid=1133 ++ */ ++ for(pfr->order = 0;(PAGE_SIZE << pfr->order) < tot_mem; pfr->order++) ; ++ ++ /* ++ We now try to allocate the memory as required. If we fail ++ we try to allocate a smaller amount or memory (hence a ++ smaller ring). ++ */ ++ while((pfr->ring_memory = __get_free_pages(GFP_ATOMIC, pfr->order)) == 0) ++ if(pfr->order-- == 0) ++ break; ++ ++ if(pfr->order == 0) { ++ printk("RING: ERROR not enough memory for ring\n"); ++ return(-1); ++ } else { ++ printk("RING: succesfully allocated %lu KB [tot_mem=%d][order=%ld]\n", ++ PAGE_SIZE >> (10 - pfr->order), tot_mem, pfr->order); ++ } ++ ++ tot_mem = PAGE_SIZE << pfr->order; ++ memset((char*)pfr->ring_memory, 0, tot_mem); ++ ++ /* Now we need to reserve the pages */ ++ page_end = virt_to_page(pfr->ring_memory + (PAGE_SIZE << pfr->order) - 1); ++ for(page = virt_to_page(pfr->ring_memory); page <= page_end; page++) ++ SetPageReserved(page); ++ ++ pfr->slots_info = (FlowSlotInfo*)pfr->ring_memory; ++ pfr->ring_slots = (char*)(pfr->ring_memory+sizeof(FlowSlotInfo)); ++ ++ pfr->slots_info->version = RING_FLOWSLOT_VERSION; ++ pfr->slots_info->slot_len = the_slot_len; ++ pfr->slots_info->data_len = bucket_len; ++ pfr->slots_info->tot_slots = (tot_mem-sizeof(FlowSlotInfo))/the_slot_len; ++ pfr->slots_info->tot_mem = tot_mem; ++ pfr->slots_info->sample_rate = sample_rate; ++ ++ printk("RING: allocated %d slots [slot_len=%d][tot_mem=%u]\n", ++ pfr->slots_info->tot_slots, pfr->slots_info->slot_len, ++ pfr->slots_info->tot_mem); ++ ++#ifdef RING_MAGIC ++ { ++ int i; ++ ++ for(i=0; i<pfr->slots_info->tot_slots; i++) { ++ unsigned long idx = i*pfr->slots_info->slot_len; ++ FlowSlot *slot = (FlowSlot*)&pfr->ring_slots[idx]; ++ slot->magic = RING_MAGIC_VALUE; slot->slot_state = 0; ++ } ++ } ++#endif ++ ++ pfr->insert_page_id = 1, pfr->insert_slot_id = 0; ++ ++ /* ++ IMPORTANT ++ Leave this statement here as last one. In fact when ++ the ring_netdev != NULL the socket is ready to be used. ++ */ ++ pfr->ring_netdev = dev; ++ ++ return(0); ++} ++ ++/* ************************************* */ ++ ++/* Bind to a device */ ++static int ring_bind(struct socket *sock, ++ struct sockaddr *sa, int addr_len) ++{ ++ struct sock *sk=sock->sk; ++ struct net_device *dev = NULL; ++ ++#if defined(RING_DEBUG) ++ printk("RING: ring_bind() called\n"); ++#endif ++ ++ /* ++ * Check legality ++ */ ++ if (addr_len != sizeof(struct sockaddr)) ++ return -EINVAL; ++ if (sa->sa_family != PF_RING) ++ return -EINVAL; ++ ++ /* Safety check: add trailing zero if missing */ ++ sa->sa_data[sizeof(sa->sa_data)-1] = '\0'; ++ ++#if defined(RING_DEBUG) ++ printk("RING: searching device %s\n", sa->sa_data); ++#endif ++ ++ if((dev = __dev_get_by_name(sa->sa_data)) == NULL) { ++#if defined(RING_DEBUG) ++ printk("RING: search failed\n"); ++#endif ++ return(-EINVAL); ++ } else ++ return(packet_ring_bind(sk, dev)); ++} ++ ++/* ************************************* */ ++ ++static int ring_mmap(struct file *file, ++ struct socket *sock, ++ struct vm_area_struct *vma) ++{ ++ struct sock *sk = sock->sk; ++ struct ring_opt *pfr = ring_sk(sk); ++ unsigned long size, start; ++ u_int pagesToMap; ++ char *ptr; ++ ++#if defined(RING_DEBUG) ++ printk("RING: ring_mmap() called\n"); ++#endif ++ ++ if(pfr->ring_memory == 0) { ++#if defined(RING_DEBUG) ++ printk("RING: ring_mmap() failed: mapping area to an unbound socket\n"); ++#endif ++ return -EINVAL; ++ } ++ ++ size = (unsigned long)(vma->vm_end-vma->vm_start); ++ ++ if(size % PAGE_SIZE) { ++#if defined(RING_DEBUG) ++ printk("RING: ring_mmap() failed: len is not multiple of PAGE_SIZE\n"); ++#endif ++ return(-EINVAL); ++ } ++ ++ /* if userspace tries to mmap beyond end of our buffer, fail */ ++ if(size > pfr->slots_info->tot_mem) { ++#if defined(RING_DEBUG) ++ printk("proc_mmap() failed: area too large [%ld > %d]\n", size, pfr->slots_info->tot_mem); ++#endif ++ return(-EINVAL); ++ } ++ ++ pagesToMap = size/PAGE_SIZE; ++ ++#if defined(RING_DEBUG) ++ printk("RING: ring_mmap() called. %d pages to map\n", pagesToMap); ++#endif ++ ++#if defined(RING_DEBUG) ++ printk("RING: mmap [slot_len=%d][tot_slots=%d] for ring on device %s\n", ++ pfr->slots_info->slot_len, pfr->slots_info->tot_slots, ++ pfr->ring_netdev->name); ++#endif ++ ++ /* we do not want to have this area swapped out, lock it */ ++ vma->vm_flags |= VM_LOCKED; ++ start = vma->vm_start; ++ ++ /* Ring slots start from page 1 (page 0 is reserved for FlowSlotInfo) */ ++ ptr = (char*)(start+PAGE_SIZE); ++ ++ if(remap_page_range( ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) ++ vma, ++#endif ++ start, ++ __pa(pfr->ring_memory), ++ PAGE_SIZE*pagesToMap, vma->vm_page_prot)) { ++#if defined(RING_DEBUG) ++ printk("remap_page_range() failed\n"); ++#endif ++ return(-EAGAIN); ++ } ++ ++#if defined(RING_DEBUG) ++ printk("proc_mmap(pagesToMap=%d): success.\n", pagesToMap); ++#endif ++ ++ return 0; ++} ++ ++/* ************************************* */ ++ ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) ++static int ring_recvmsg(struct kiocb *iocb, struct socket *sock, ++ struct msghdr *msg, size_t len, int flags) ++#else ++ static int ring_recvmsg(struct socket *sock, struct msghdr *msg, int len, ++ int flags, struct scm_cookie *scm) ++#endif ++{ ++ FlowSlot* slot; ++ struct ring_opt *pfr = ring_sk(sock->sk); ++ u_int32_t queued_pkts, num_loops = 0; ++ ++#if defined(RING_DEBUG) ++ printk("ring_recvmsg called\n"); ++#endif ++ ++ slot = get_remove_slot(pfr); ++ ++ while((queued_pkts = num_queued_pkts(pfr)) < MIN_QUEUED_PKTS) { ++ wait_event_interruptible(pfr->ring_slots_waitqueue, 1); ++ ++#if defined(RING_DEBUG) ++ printk("-> ring_recvmsg returning %d [queued_pkts=%d][num_loops=%d]\n", ++ slot->slot_state, queued_pkts, num_loops); ++#endif ++ ++ if(queued_pkts > 0) { ++ if(num_loops++ > MAX_QUEUE_LOOPS) ++ break; ++ } ++ } ++ ++#if defined(RING_DEBUG) ++ if(slot != NULL) ++ printk("ring_recvmsg is returning [queued_pkts=%d][num_loops=%d]\n", ++ queued_pkts, num_loops); ++#endif ++ ++ return(queued_pkts); ++} ++ ++/* ************************************* */ ++ ++unsigned int ring_poll(struct file * file, ++ struct socket *sock, poll_table *wait) ++{ ++ FlowSlot* slot; ++ struct ring_opt *pfr = ring_sk(sock->sk); ++ ++#if defined(RING_DEBUG) ++ printk("poll called\n"); ++#endif ++ ++ slot = get_remove_slot(pfr); ++ ++ if((slot != NULL) && (slot->slot_state == 0)) ++ poll_wait(file, &pfr->ring_slots_waitqueue, wait); ++ ++#if defined(RING_DEBUG) ++ printk("poll returning %d\n", slot->slot_state); ++#endif ++ ++ if((slot != NULL) && (slot->slot_state == 1)) ++ return(POLLIN | POLLRDNORM); ++ else ++ return(0); ++} ++ ++/* ************************************* */ ++ ++int add_to_cluster_list(struct ring_cluster *el, ++ struct sock *sock) { ++ ++ if(el->num_cluster_elements == CLUSTER_LEN) ++ return(-1); /* Cluster full */ ++ ++ ring_sk_datatype(ring_sk(sock))->cluster_id = el->cluster_id; ++ el->sk[el->num_cluster_elements] = sock; ++ el->num_cluster_elements++; ++ return(0); ++} ++ ++/* ************************************* */ ++ ++int remove_from_cluster_list(struct ring_cluster *el, ++ struct sock *sock) { ++ int i, j; ++ ++ for(i=0; i<CLUSTER_LEN; i++) ++ if(el->sk[i] == sock) { ++ el->num_cluster_elements--; ++ ++ if(el->num_cluster_elements > 0) { ++ /* The cluster contains other elements */ ++ for(j=i; j<CLUSTER_LEN-1; j++) ++ el->sk[j] = el->sk[j+1]; ++ ++ el->sk[CLUSTER_LEN-1] = NULL; ++ } else { ++ /* Empty cluster */ ++ memset(el->sk, 0, sizeof(el->sk)); ++ } ++ ++ return(0); ++ } ++ ++ return(-1); /* Not found */ ++} ++ ++/* ************************************* */ ++ ++static int remove_from_cluster(struct sock *sock, ++ struct ring_opt *pfr) ++{ ++ struct ring_cluster *el; ++ ++#if defined(RING_DEBUG) ++ printk("--> remove_from_cluster(%d)\n", pfr->cluster_id); ++#endif ++ ++ if(pfr->cluster_id == 0 /* 0 = No Cluster */) ++ return(0); /* Noting to do */ ++ ++ el = ring_cluster_list; ++ ++ while(el != NULL) { ++ if(el->cluster_id == pfr->cluster_id) { ++ return(remove_from_cluster_list(el, sock)); ++ } else ++ el = el->next; ++ } ++ ++ return(-EINVAL); /* Not found */ ++} ++ ++/* ************************************* */ ++ ++static int add_to_cluster(struct sock *sock, ++ struct ring_opt *pfr, ++ u_short cluster_id) ++{ ++ struct ring_cluster *el; ++ ++#ifndef RING_DEBUG ++ printk("--> add_to_cluster(%d)\n", cluster_id); ++#endif ++ ++ if(cluster_id == 0 /* 0 = No Cluster */) return(-EINVAL); ++ ++ if(pfr->cluster_id != 0) ++ remove_from_cluster(sock, pfr); ++ ++ el = ring_cluster_list; ++ ++ while(el != NULL) { ++ if(el->cluster_id == cluster_id) { ++ return(add_to_cluster_list(el, sock)); ++ } else ++ el = el->next; ++ } ++ ++ /* There's no existing cluster. We need to create one */ ++ if((el = kmalloc(sizeof(struct ring_cluster), GFP_KERNEL)) == NULL) ++ return(-ENOMEM); ++ ++ el->cluster_id = cluster_id; ++ el->num_cluster_elements = 1; ++ el->hashing_mode = cluster_per_flow; /* Default */ ++ el->hashing_id = 0; ++ ++ memset(el->sk, 0, sizeof(el->sk)); ++ el->sk[0] = sock; ++ el->next = ring_cluster_list; ++ ring_cluster_list = el; ++ pfr->cluster_id = cluster_id; ++ ++ return(0); /* 0 = OK */ ++} ++ ++/* ************************************* */ ++ ++/* Code taken/inspired from core/sock.c */ ++static int ring_setsockopt(struct socket *sock, ++ int level, int optname, ++ char *optval, int optlen) ++{ ++ struct ring_opt *pfr = ring_sk(sock->sk); ++ int val, found, ret = 0; ++ u_int cluster_id; ++ char devName[8]; ++ ++ if((optlen<sizeof(int)) || (pfr == NULL)) ++ return(-EINVAL); ++ ++ if (get_user(val, (int *)optval)) ++ return -EFAULT; ++ ++ found = 1; ++ ++ switch(optname) ++ { ++ case SO_ATTACH_FILTER: ++ ret = -EINVAL; ++ if (optlen == sizeof(struct sock_fprog)) { ++ unsigned int fsize; ++ struct sock_fprog fprog; ++ struct sk_filter *filter; ++ ++ ret = -EFAULT; ++ ++ /* ++ NOTE ++ ++ Do not call copy_from_user within a held ++ splinlock (e.g. ring_mgmt_lock) as this caused ++ problems when certain debugging was enabled under ++ 2.6.5 -- including hard lockups of the machine. ++ */ ++ if(copy_from_user(&fprog, optval, sizeof(fprog))) ++ break; ++ ++ fsize = sizeof(struct sock_filter) * fprog.len; ++ filter = kmalloc(fsize, GFP_KERNEL); ++ ++ if(filter == NULL) { ++ ret = -ENOMEM; ++ break; ++ } ++ ++ if(copy_from_user(filter->insns, fprog.filter, fsize)) ++ break; ++ ++ filter->len = fprog.len; ++ ++ if(sk_chk_filter(filter->insns, filter->len) != 0) { ++ /* Bad filter specified */ ++ kfree(filter); ++ pfr->bpfFilter = NULL; ++ break; ++ } ++ ++ /* get the lock, set the filter, release the lock */ ++ write_lock(&ring_mgmt_lock); ++ pfr->bpfFilter = filter; ++ write_unlock(&ring_mgmt_lock); ++ } ++ ret = 0; ++ break; ++ ++ case SO_DETACH_FILTER: ++ write_lock(&ring_mgmt_lock); ++ found = 1; ++ if(pfr->bpfFilter != NULL) { ++ kfree(pfr->bpfFilter); ++ pfr->bpfFilter = NULL; ++ write_unlock(&ring_mgmt_lock); ++ break; ++ } ++ ret = -ENONET; ++ break; ++ ++ case SO_ADD_TO_CLUSTER: ++ if (optlen!=sizeof(val)) ++ return -EINVAL; ++ ++ if (copy_from_user(&cluster_id, optval, sizeof(cluster_id))) ++ return -EFAULT; ++ ++ write_lock(&ring_mgmt_lock); ++ ret = add_to_cluster(sock->sk, pfr, cluster_id); ++ write_unlock(&ring_mgmt_lock); ++ break; ++ ++ case SO_REMOVE_FROM_CLUSTER: ++ write_lock(&ring_mgmt_lock); ++ ret = remove_from_cluster(sock->sk, pfr); ++ write_unlock(&ring_mgmt_lock); ++ break; ++ ++ case SO_SET_REFLECTOR: ++ if(optlen >= (sizeof(devName)-1)) ++ return -EINVAL; ++ ++ if(optlen > 0) { ++ if(copy_from_user(devName, optval, optlen)) ++ return -EFAULT; ++ } ++ ++ devName[optlen] = '\0'; ++ ++#if defined(RING_DEBUG) ++ printk("+++ SO_SET_REFLECTOR(%s)\n", devName); ++#endif ++ ++ write_lock(&ring_mgmt_lock); ++ pfr->reflector_dev = dev_get_by_name(devName); ++ write_unlock(&ring_mgmt_lock); ++ ++#if defined(RING_DEBUG) ++ if(pfr->reflector_dev != NULL) ++ printk("SO_SET_REFLECTOR(%s): succeded\n", devName); ++ else ++ printk("SO_SET_REFLECTOR(%s): device unknown\n", devName); ++#endif ++ break; ++ ++ default: ++ found = 0; ++ break; ++ } ++ ++ if(found) ++ return(ret); ++ else ++ return(sock_setsockopt(sock, level, optname, optval, optlen)); ++} ++ ++/* ************************************* */ ++ ++static int ring_ioctl(struct socket *sock, ++ unsigned int cmd, unsigned long arg) ++{ ++ switch(cmd) ++ { ++ case SIOCGIFFLAGS: ++ case SIOCSIFFLAGS: ++ case SIOCGIFCONF: ++ case SIOCGIFMETRIC: ++ case SIOCSIFMETRIC: ++ case SIOCGIFMEM: ++ case SIOCSIFMEM: ++ case SIOCGIFMTU: ++ case SIOCSIFMTU: ++ case SIOCSIFLINK: ++ case SIOCGIFHWADDR: ++ case SIOCSIFHWADDR: ++ case SIOCSIFMAP: ++ case SIOCGIFMAP: ++ case SIOCSIFSLAVE: ++ case SIOCGIFSLAVE: ++ case SIOCGIFINDEX: ++ case SIOCGIFNAME: ++ case SIOCGIFCOUNT: ++ case SIOCSIFHWBROADCAST: ++ return(dev_ioctl(cmd,(void *) arg)); ++ ++ default: ++ return -EOPNOTSUPP; ++ } ++ ++ return 0; ++} ++ ++/* ************************************* */ ++ ++static struct proto_ops ring_ops = { ++ .family = PF_RING, ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) ++ .owner = THIS_MODULE, ++#endif ++ ++ /* Operations that make no sense on ring sockets. */ ++ .connect = sock_no_connect, ++ .socketpair = sock_no_socketpair, ++ .accept = sock_no_accept, ++ .getname = sock_no_getname, ++ .listen = sock_no_listen, ++ .shutdown = sock_no_shutdown, ++ .sendpage = sock_no_sendpage, ++ .sendmsg = sock_no_sendmsg, ++ .getsockopt = sock_no_getsockopt, ++ ++ /* Now the operations that really occur. */ ++ .release = ring_release, ++ .bind = ring_bind, ++ .mmap = ring_mmap, ++ .poll = ring_poll, ++ .setsockopt = ring_setsockopt, ++ .ioctl = ring_ioctl, ++ .recvmsg = ring_recvmsg, ++}; ++ ++/* ************************************ */ ++ ++static struct net_proto_family ring_family_ops = { ++ .family = PF_RING, ++ .create = ring_create, ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) ++ .owner = THIS_MODULE, ++#endif ++}; ++ ++// BD: API changed in 2.6.12, ref: ++// http://svn.clkao.org/svnweb/linux/revision/?rev=28201 ++#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11)) ++static struct proto ring_proto = { ++ .name = "PF_RING", ++ .owner = THIS_MODULE, ++ .obj_size = sizeof(struct sock), ++}; ++#endif ++ ++/* ************************************ */ ++ ++static void __exit ring_exit(void) ++{ ++ struct list_head *ptr; ++ struct ring_element *entry; ++ ++ for(ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) { ++ entry = list_entry(ptr, struct ring_element, list); ++ kfree(entry); ++ } ++ ++ while(ring_cluster_list != NULL) { ++ struct ring_cluster *next = ring_cluster_list->next; ++ kfree(ring_cluster_list); ++ ring_cluster_list = next; ++ } ++ ++ set_skb_ring_handler(NULL); ++ set_buffer_ring_handler(NULL); ++ sock_unregister(PF_RING); ++ ++ printk("PF_RING shut down.\n"); ++} ++ ++/* ************************************ */ ++ ++static int __init ring_init(void) ++{ ++ printk("Welcome to PF_RING %s\n(C) 2004-05 L.Deri <deri@ntop.org>\n", ++ RING_VERSION); ++ ++ INIT_LIST_HEAD(&ring_table); ++ ring_cluster_list = NULL; ++ ++ sock_register(&ring_family_ops); ++ ++ set_skb_ring_handler(skb_ring_handler); ++ set_buffer_ring_handler(buffer_ring_handler); ++ ++ if(get_buffer_ring_handler() != buffer_ring_handler) { ++ printk("PF_RING: set_buffer_ring_handler FAILED\n"); ++ ++ set_skb_ring_handler(NULL); ++ set_buffer_ring_handler(NULL); ++ sock_unregister(PF_RING); ++ return -1; ++ } else { ++ printk("PF_RING: bucket length %d bytes\n", bucket_len); ++ printk("PF_RING: ring slots %d\n", num_slots); ++ printk("PF_RING: sample rate %d [1=no sampling]\n", sample_rate); ++ printk("PF_RING: capture TX %s\n", ++ enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]"); ++ printk("PF_RING: transparent mode %s\n", ++ transparent_mode ? "Yes" : "No"); ++ ++ printk("PF_RING initialized correctly.\n"); ++ return 0; ++ } ++} ++ ++module_init(ring_init); ++module_exit(ring_exit); ++MODULE_LICENSE("GPL"); ++ ++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) ++MODULE_ALIAS_NETPROTO(PF_RING); ++#endif diff --git a/openwrt/target/linux/generic-2.6/patches/105-netfilter_time.patch b/openwrt/target/linux/generic-2.6/patches/105-netfilter_time.patch new file mode 100644 index 0000000000..34257ee93c --- /dev/null +++ b/openwrt/target/linux/generic-2.6/patches/105-netfilter_time.patch @@ -0,0 +1,241 @@ +diff -urN linux-2.6.15-rc6.orig/net/ipv4/netfilter/ipt_time.c linux-2.6.15-rc6/net/ipv4/netfilter/ipt_time.c +--- linux-2.6.15-rc6.orig/net/ipv4/netfilter/ipt_time.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.15-rc6/net/ipv4/netfilter/ipt_time.c 2006-01-07 13:02:59.000000000 +0100 +@@ -0,0 +1,178 @@ ++/* ++ This is a module which is used for time matching ++ It is using some modified code from dietlibc (localtime() function) ++ that you can find at http://www.fefe.de/dietlibc/ ++ This file is distributed under the terms of the GNU General Public ++ License (GPL). Copies of the GPL can be obtained from: ftp://prep.ai.mit.edu/pub/gnu/GPL ++ 2001-05-04 Fabrice MARIE <fabrice@netfilter.org> : initial development. ++ 2001-21-05 Fabrice MARIE <fabrice@netfilter.org> : bug fix in the match code, ++ thanks to "Zeng Yu" <zengy@capitel.com.cn> for bug report. ++ 2001-26-09 Fabrice MARIE <fabrice@netfilter.org> : force the match to be in LOCAL_IN or PRE_ROUTING only. ++ 2001-30-11 Fabrice : added the possibility to use the match in FORWARD/OUTPUT with a little hack, ++ added Nguyen Dang Phuoc Dong <dongnd@tlnet.com.vn> patch to support timezones. ++ 2004-05-02 Fabrice : added support for date matching, from an idea of Fabien COELHO. ++*/ ++ ++#include <linux/module.h> ++#include <linux/skbuff.h> ++#include <linux/netfilter_ipv4/ip_tables.h> ++#include <linux/netfilter_ipv4/ipt_time.h> ++#include <linux/time.h> ++ ++MODULE_AUTHOR("Fabrice MARIE <fabrice@netfilter.org>"); ++MODULE_DESCRIPTION("Match arrival timestamp/date"); ++MODULE_LICENSE("GPL"); ++ ++struct tm ++{ ++ int tm_sec; /* Seconds. [0-60] (1 leap second) */ ++ int tm_min; /* Minutes. [0-59] */ ++ int tm_hour; /* Hours. [0-23] */ ++ int tm_mday; /* Day. [1-31] */ ++ int tm_mon; /* Month. [0-11] */ ++ int tm_year; /* Year - 1900. */ ++ int tm_wday; /* Day of week. [0-6] */ ++ int tm_yday; /* Days in year.[0-365] */ ++ int tm_isdst; /* DST. [-1/0/1]*/ ++ ++ long int tm_gmtoff; /* we don't care, we count from GMT */ ++ const char *tm_zone; /* we don't care, we count from GMT */ ++}; ++ ++void ++localtime(const u32 time, struct tm *r); ++ ++static int ++match(const struct sk_buff *skb, ++ const struct net_device *in, ++ const struct net_device *out, ++ const void *matchinfo, ++ int offset, ++ int *hotdrop) ++{ ++ const struct ipt_time_info *info = matchinfo; /* match info for rule */ ++ struct tm currenttime; /* time human readable */ ++ u_int8_t days_of_week[7] = {64, 32, 16, 8, 4, 2, 1}; ++ u_int16_t packet_time; ++ ++ /* We might not have a timestamp, get one */ ++ if (skb->tstamp.off_sec == 0) ++ __net_timestamp((struct sk_buff *)skb); ++ ++ /* First we make sure we are in the date start-stop boundaries */ ++ if ((skb->tstamp.off_sec < info->date_start) || (skb->tstamp.off_sec > info->date_stop)) ++ return 0; /* We are outside the date boundaries */ ++ ++ /* Transform the timestamp of the packet, in a human readable form */ ++ localtime(skb->tstamp.off_sec, ¤ttime); ++ ++ /* check if we match this timestamp, we start by the days... */ ++ if ((days_of_week[currenttime.tm_wday] & info->days_match) != days_of_week[currenttime.tm_wday]) ++ return 0; /* the day doesn't match */ ++ ++ /* ... check the time now */ ++ packet_time = (currenttime.tm_hour * 60) + currenttime.tm_min; ++ if ((packet_time < info->time_start) || (packet_time > info->time_stop)) ++ return 0; ++ ++ /* here we match ! */ ++ return 1; ++} ++ ++static int ++checkentry(const char *tablename, ++ const struct ipt_ip *ip, ++ void *matchinfo, ++ unsigned int matchsize, ++ unsigned int hook_mask) ++{ ++ struct ipt_time_info *info = matchinfo; /* match info for rule */ ++ ++ /* First, check that we are in the correct hooks */ ++ if (hook_mask ++ & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT))) ++ { ++ printk("ipt_time: error, only valid for PRE_ROUTING, LOCAL_IN, FORWARD and OUTPUT)\n"); ++ return 0; ++ } ++ ++ /* Check the size */ ++ if (matchsize != IPT_ALIGN(sizeof(struct ipt_time_info))) ++ return 0; ++ /* Now check the coherence of the data ... */ ++ if ((info->time_start > 1439) || /* 23*60+59 = 1439*/ ++ (info->time_stop > 1439)) ++ { ++ printk(KERN_WARNING "ipt_time: invalid argument\n"); ++ return 0; ++ } ++ ++ return 1; ++} ++ ++static struct ipt_match time_match = { ++ .name = "time", ++ .match = &match, ++ .checkentry = &checkentry, ++ .me = THIS_MODULE ++}; ++ ++static int __init init(void) ++{ ++ printk("ipt_time loading\n"); ++ return ipt_register_match(&time_match); ++} ++ ++static void __exit fini(void) ++{ ++ ipt_unregister_match(&time_match); ++ printk("ipt_time unloaded\n"); ++} ++ ++module_init(init); ++module_exit(fini); ++ ++ ++/* The part below is borowed and modified from dietlibc */ ++ ++/* seconds per day */ ++#define SPD 24*60*60 ++ ++void ++localtime(const u32 time, struct tm *r) { ++ u32 i, timep; ++ extern struct timezone sys_tz; ++ const unsigned int __spm[12] = ++ { 0, ++ (31), ++ (31+28), ++ (31+28+31), ++ (31+28+31+30), ++ (31+28+31+30+31), ++ (31+28+31+30+31+30), ++ (31+28+31+30+31+30+31), ++ (31+28+31+30+31+30+31+31), ++ (31+28+31+30+31+30+31+31+30), ++ (31+28+31+30+31+30+31+31+30+31), ++ (31+28+31+30+31+30+31+31+30+31+30), ++ }; ++ register u32 work; ++ ++ timep = time - (sys_tz.tz_minuteswest * 60); ++ work=timep%(SPD); ++ r->tm_sec=work%60; work/=60; ++ r->tm_min=work%60; r->tm_hour=work/60; ++ work=timep/(SPD); ++ r->tm_wday=(4+work)%7; ++ for (i=1970; ; ++i) { ++ register time_t k= (!(i%4) && ((i%100) || !(i%400)))?366:365; ++ if (work>k) ++ work-=k; ++ else ++ break; ++ } ++ r->tm_year=i-1900; ++ for (i=11; i && __spm[i]>work; --i) ; ++ r->tm_mon=i; ++ r->tm_mday=work-__spm[i]+1; ++} +diff -urN linux-2.6.15-rc6.orig/net/ipv4/netfilter/Kconfig linux-2.6.15-rc6/net/ipv4/netfilter/Kconfig +--- linux-2.6.15-rc6.orig/net/ipv4/netfilter/Kconfig 2006-01-07 13:00:21.000000000 +0100 ++++ linux-2.6.15-rc6/net/ipv4/netfilter/Kconfig 2006-01-07 13:05:07.000000000 +0100 +@@ -278,6 +278,22 @@ + + To compile it as a module, choose M here. If unsure, say N. + ++ ++config IP_NF_MATCH_TIME ++ tristate 'TIME match support' ++ depends on IP_NF_IPTABLES ++ help ++ This option adds a `time' match, which allows you ++ to match based on the packet arrival time/date ++ (arrival time/date at the machine which netfilter is running on) or ++ departure time/date (for locally generated packets). ++ ++ If you say Y here, try iptables -m time --help for more information. ++ If you want to compile it as a module, say M here and read ++ ++ Documentation/modules.txt. If unsure, say `N'. ++ ++ + config IP_NF_MATCH_RECENT + tristate "recent match support" + depends on IP_NF_IPTABLES +diff -urN linux-2.6.15-rc6.orig/net/ipv4/netfilter/Makefile linux-2.6.15-rc6/net/ipv4/netfilter/Makefile +--- linux-2.6.15-rc6.orig/net/ipv4/netfilter/Makefile 2006-01-07 13:00:21.000000000 +0100 ++++ linux-2.6.15-rc6/net/ipv4/netfilter/Makefile 2006-01-07 13:03:50.000000000 +0100 +@@ -58,6 +58,7 @@ + obj-$(CONFIG_IP_NF_MATCH_MULTIPORT) += ipt_multiport.o + obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o + obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o ++obj-$(CONFIG_IP_NF_MATCH_TIME) += ipt_time.o + obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o + obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o + obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o +diff -urN linux-2.6.15-rc6.orig/include/linux/netfilter_ipv4/ipt_time.h linux-2.6.15-rc6/include/linux/netfilter_ipv4/ipt_time.h +--- linux-2.6.15-rc6.orig/include/linux/netfilter_ipv4/ipt_time.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.15-rc6/include/linux/netfilter_ipv4/ipt_time.h 2006-01-07 13:02:14.000000000 +0100 +@@ -0,0 +1,18 @@ ++#ifndef __ipt_time_h_included__ ++#define __ipt_time_h_included__ ++ ++ ++struct ipt_time_info { ++ u_int8_t days_match; /* 1 bit per day. -SMTWTFS */ ++ u_int16_t time_start; /* 0 < time_start < 23*60+59 = 1439 */ ++ u_int16_t time_stop; /* 0:0 < time_stat < 23:59 */ ++ ++ /* FIXME: Keep this one for userspace iptables binary compability: */ ++ u_int8_t kerneltime; /* ignore skb time (and use kerneltime) or not. */ ++ ++ time_t date_start; ++ time_t date_stop; ++}; ++ ++ ++#endif /* __ipt_time_h_included__ */ |