A Simple Block Driver for Linux Kernel 2.6.31

Programming Amazon Web Services
Linux Device Drivers, 3rd Edition

My current work involves writing my first Linux block device driver. Going to the web to find a sample, I discovered Jonathan Corbet‘s Simple Block Driver article with its associated block driver example code. It’s a nice succinct implementation of a ramdisk – pretty much the simplest working block device. There’s only one problem, though, the article was written in 2003, when kernel 2.6.0 was the new kid on the block. Trying to build it on openSUSE 11.2 with kernel 2.6.31 just produced a slew of compile errors. A bit of research revealed that there were major changes to the kernel block device interface in 2.6.31, so I would have to port the example to get it working.

About a day and a half of poring through the kernel source and the excellent LDD3 (hardcopy) later, I had a running simple block driver for kernel 2.6.31. I’ve also tested it successfully on SUSE 11 SP1 Beta, which uses kernel 2.6.32. Here’s the code, followed by instructions for getting it working.

sbd.c

/*
 * A sample, extra-simple block driver. Updated for kernel 2.6.31.
 *
 * (C) 2003 Eklektix, Inc.
 * (C) 2010 Pat Patterson <pat at superpat dot com>
 * Redistributable under the terms of the GNU GPL.
 */

#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/init.h>

#include <linux/kernel.h> /* printk() */
#include <linux/fs.h>     /* everything... */
#include <linux/errno.h>  /* error codes */
#include <linux/types.h>  /* size_t */
#include <linux/vmalloc.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/hdreg.h>

MODULE_LICENSE("Dual BSD/GPL");
static char *Version = "1.4";

static int major_num = 0;
module_param(major_num, int, 0);
static int logical_block_size = 512;
module_param(logical_block_size, int, 0);
static int nsectors = 1024; /* How big the drive is */
module_param(nsectors, int, 0);

/*
 * We can tweak our hardware sector size, but the kernel talks to us
 * in terms of small sectors, always.
 */
#define KERNEL_SECTOR_SIZE 512

/*
 * Our request queue.
 */
static struct request_queue *Queue;

/*
 * The internal representation of our device.
 */
static struct sbd_device {
	unsigned long size;
	spinlock_t lock;
	u8 *data;
	struct gendisk *gd;
} Device;

/*
 * Handle an I/O request.
 */
static void sbd_transfer(struct sbd_device *dev, sector_t sector,
		unsigned long nsect, char *buffer, int write) {
	unsigned long offset = sector * logical_block_size;
	unsigned long nbytes = nsect * logical_block_size;

	if ((offset + nbytes) > dev->size) {
		printk (KERN_NOTICE "sbd: Beyond-end write (%ld %ld)\n", offset, nbytes);
		return;
	}
	if (write)
		memcpy(dev->data + offset, buffer, nbytes);
	else
		memcpy(buffer, dev->data + offset, nbytes);
}

static void sbd_request(struct request_queue *q) {
	struct request *req;

	req = blk_fetch_request(q);
	while (req != NULL) {
		// blk_fs_request() was removed in 2.6.36 - many thanks to
		// Christian Paro for the heads up and fix...
		//if (!blk_fs_request(req)) {
		if (req == NULL || (req->cmd_type != REQ_TYPE_FS)) {
			printk (KERN_NOTICE "Skip non-CMD request\n");
			__blk_end_request_all(req, -EIO);
			continue;
		}
		sbd_transfer(&Device, blk_rq_pos(req), blk_rq_cur_sectors(req),
				req->buffer, rq_data_dir(req));
		if ( ! __blk_end_request_cur(req, 0) ) {
			req = blk_fetch_request(q);
		}
	}
}

/*
 * The HDIO_GETGEO ioctl is handled in blkdev_ioctl(), which
 * calls this. We need to implement getgeo, since we can't
 * use tools such as fdisk to partition the drive otherwise.
 */
int sbd_getgeo(struct block_device * block_device, struct hd_geometry * geo) {
	long size;

	/* We have no real geometry, of course, so make something up. */
	size = Device.size * (logical_block_size / KERNEL_SECTOR_SIZE);
	geo->cylinders = (size & ~0x3f) >> 6;
	geo->heads = 4;
	geo->sectors = 16;
	geo->start = 0;
	return 0;
}

/*
 * The device operations structure.
 */
static struct block_device_operations sbd_ops = {
		.owner  = THIS_MODULE,
		.getgeo = sbd_getgeo
};

static int __init sbd_init(void) {
	/*
	 * Set up our internal device.
	 */
	Device.size = nsectors * logical_block_size;
	spin_lock_init(&Device.lock);
	Device.data = vmalloc(Device.size);
	if (Device.data == NULL)
		return -ENOMEM;
	/*
	 * Get a request queue.
	 */
	Queue = blk_init_queue(sbd_request, &Device.lock);
	if (Queue == NULL)
		goto out;
	blk_queue_logical_block_size(Queue, logical_block_size);
	/*
	 * Get registered.
	 */
	major_num = register_blkdev(major_num, "sbd");
	if (major_num < 0) {
		printk(KERN_WARNING "sbd: unable to get major number\n");
		goto out;
	}
	/*
	 * And the gendisk structure.
	 */
	Device.gd = alloc_disk(16);
	if (!Device.gd)
		goto out_unregister;
	Device.gd->major = major_num;
	Device.gd->first_minor = 0;
	Device.gd->fops = &sbd_ops;
	Device.gd->private_data = &Device;
	strcpy(Device.gd->disk_name, "sbd0");
	set_capacity(Device.gd, nsectors);
	Device.gd->queue = Queue;
	add_disk(Device.gd);

	return 0;

out_unregister:
	unregister_blkdev(major_num, "sbd");
out:
	vfree(Device.data);
	return -ENOMEM;
}

static void __exit sbd_exit(void)
{
	del_gendisk(Device.gd);
	put_disk(Device.gd);
	unregister_blkdev(major_num, "sbd");
	blk_cleanup_queue(Queue);
	vfree(Device.data);
}

module_init(sbd_init);
module_exit(sbd_exit);

Makefile

obj-m := sbd.o
KDIR := /lib/modules/$(shell uname -r)/build
PWD := $(shell pwd)
default:
	$(MAKE) -C $(KDIR) SUBDIRS=$(PWD) modules

There are two main areas of change compared with Jonathan’s original:

  • sbd_request() uses the blk_fetch_request(), blk_rq_pos(), blk_rq_cur_sectors() and __blk_end_request_cur() functions rather than elv_next_request(), req->sector, req->current_nr_sectors and end_request() respectively. The structure of the loop also changes so we handle each sector from the request individually. One outstanding task for me is to investigate whether req->buffer holds all of the data for the entire request, so I can handle it all in one shot, rather than sector-by-sector. My first attempt resulted in the (virtual) machine hanging when I installed the driver, so I clearly need to do some more work in this area!
  • The driver implements the getgeo operation (in sbd_getgeo), rather than ioctl, since blkdev_ioctl now handles HDIO_GETGEO by calling the driver’s getgeo function. This is a nice simplification since it moves a copy_to_user call out of each driver and into the kernel.

Before building, ensure you have the kernel source, headers, gcc, make etc – if you’ve read this far, you likely have all this and/or know how to get it, so I won’t spell it all out here. You’ll also need to go to the kernel source directory and do the following to prepare your build environment, if you have not already done so:

cd /usr/src/`uname -r`
make oldconfig && make prepare

Now, back in the directory with the sbd source, you can build it:

make -C /lib/modules/`uname -r`/build M=`pwd` modules

You’ll see a warning about ‘Version’ being defined, but not used, but don’t worry about that :-). Now we can load the module, partition the ramdisk, make a filesystem, mount it, and create a file:

opensuse:/home/pat/sbd # insmod sbd.ko
opensuse:/home/pat/sbd # fdisk /dev/sbd0
Device contains neither a valid DOS partition table, nor Sun, SGI or OSF disklabel
Building a new DOS disklabel with disk identifier 0x5f93978c.
Changes will remain in memory only, until you decide to write them.
After that, of course, the previous content won't be recoverable.

Warning: invalid flag 0x0000 of partition table 4 will be corrected by w(rite)

Command (m for help): n
Command action
   e   extended
   p   primary partition (1-4)
p
Partition number (1-4): 1
First cylinder (1-16, default 1):
Using default value 1
Last cylinder, +cylinders or +size{K,M,G} (1-16, default 16):
Using default value 16

Command (m for help): w
The partition table has been altered!

Calling ioctl() to re-read partition table.
Syncing disks.
opensuse:/home/pat/sbd # mkfs /dev/sbd0p1
mke2fs 1.41.9 (22-Aug-2009)
Filesystem label=
OS type: Linux
Block size=1024 (log=0)
Fragment size=1024 (log=0)
64 inodes, 504 blocks
25 blocks (4.96%) reserved for the super user
First data block=1
Maximum filesystem blocks=524288
1 block group
8192 blocks per group, 8192 fragments per group
64 inodes per group

Writing inode tables: done
Writing superblocks and filesystem accounting information: done

This filesystem will be automatically checked every 24 mounts or
180 days, whichever comes first.  Use tune2fs -c or -i to override.
opensuse:/home/pat/sbd # mount /dev/sbd0p1 /mnt
opensuse:/home/pat/sbd # echo Hi > /mnt/file1
opensuse:/home/pat/sbd # cat /mnt/file1
Hi
opensuse:/home/pat/sbd # ls -l /mnt
total 13
-rw-r--r-- 1 root root     3 2010-04-29 07:04 file1
drwx------ 2 root root 12288 2010-04-29 07:04 lost+found
opensuse:/home/pat/sbd # umount /mnt
opensuse:/home/pat/sbd # rmmod sbd

Hopefully this all works for you, and is as useful for you as it has been for me. Many thanks to Jonathan for the original version and the excellent LDD3. One final piece of housekeeping – although the comment at the top of sbd.c mentions only GPL, the MODULE_LICENSE macro specifies “Dual BSD/GPL”. I am interpreting the original code as being under the dual GPL/BSD license and this version is similarly dual licensed.

UPDATE (Feb 5 2011) See the comment by Michele regarding changes to logical_block_size!

UPDATE (Apr 23 2015) See the comment by Sarge regarding changes for kernel 3.15-rc2 and later

96 Replies to “A Simple Block Driver for Linux Kernel 2.6.31”

  1. This is a tremendous help for me while I am trying to work in 2.6.32 kernel and trying to port the driver code in the book (LDD3). BIG THANKS, you saved my time friend.

  2. Thanks, this code was a huge help. One important case it doesn’t handle though is a detach/rmmod while an I/O is in progress.

  3. Pat, I’m trying to build a simple char device driver based on LDD3 book.
    But I got a problem in compiling.
    When I call “make”, it produce nothing.
    I have Ubuntu 9.10 (Lucid Lynx) kernel 2.6.32(standard fresh installed).
    Could you give some advices what should I prepare(maybe kernel tree or linux header, etc) first?

    Thanks.

  4. Should my makefile look like

    obj-m := sbd.o
    KDIR := /lib/modules/$(shell uname -r)/build
    PWD := $(shell pwd)
    default:
    $(MAKE) -C $(KDIR) SUBDIRS=$(PWD) modules

  5. Hi Pat, I got this error when trying to do “make oldconfig && make prepare”

    root@kyu:/usr/src/linux-headers-2.6.32-21# make oldconfig && make prepare
    scripts/kconfig/conf -o arch/x86/Kconfig
    #
    # configuration written to .config
    #
    scripts/kconfig/conf -s arch/x86/Kconfig
    CHK include/linux/version.h
    CHK include/linux/utsrelease.h
    SYMLINK include/asm -> include/asm-x86
    make[1]: *** No rule to make target `kernel/bounds.c’, needed by `kernel/bounds.s’. Stop.
    make: *** [prepare0] Error 2
    root@kyu:/usr/src/linux-headers-2.6.32-21#

    Can you please point what is wrong with my work?

  6. Hi Pat,

    Thanks..that was a great starting point for me…Please suggest any enhancement that I can do on this code where I can go much deeper into device driver modules domain…

    Also, on command line we have M=`pwd` where M=current directory. What this command option informs?

    I am new to device driver programming. As per another member who commented above, it doesn’t handle though is a detach/rmmod while an I/O is in progress. What does it exactly mean? How do we test it?

    Please advice,

    Thanks

  7. Hi Veb – the M=`pwd` tells the Linux build system to build modules in the current directory. Handling detach/rmmod while an I/O is in progress is beyond the scope of a simple example. If you’re looking for more comprehensive examples, then /usr/src/linux/drivers is a good place to start.

  8. The linux documentation itself is not updated. Thanks a lot for the help. It is a good starting point for beginners like me. Now, I need to figure out, how to go ahead from here…

  9. First of all, thanks a lot for this updated version. Linux kernel seems to change at a dreadful speed.
    I had a few issues with the driver, in that it will not work if you change the logical_block_size. The main reason is that in the sbd_transfer function
    unsigned long offset = sector * logical_block_size;
    unsigned long nbytes = nsect * logical_block_size;
    should become:
    unsigned long offset = sector * KERNEL_SECTOR_SIZE;
    unsigned long nbytes = nsect * KERNEL_SECTOR_SIZE;

    Also the following line in sbd_getgeo is fishy:
    size = Device.size * (logical_block_size / KERNEL_SECTOR_SIZE);
    size is probably intended to be in 512 byte sectors, but Device.size is the size in bytes…
    I would change it with
    size = Device.size / KERNEL_SECTOR_SIZE;

    (I haven’t tested this last one.)

    Thanks again for the huge help,

    Michele

  10. Hi Michele – glad you found this useful, and thanks for the advice regarding logical_block_size – I no longer have everything set up to test this, but I’ve put a note at the end of the blog entry directing folks to your comment. Thanks for taking the time 🙂

  11. Thanks a lot, I was stuck using APIs from the older version of Linux and didnt even realize until i saw this page that elv_next_request was outdated.. u saved.. my college project..

  12. Thanks, Christian – I just updated the entry appropriately. Glad to see folks are still finding this useful!

  13. hi, pat.

    I’m trying to compile a block device driver and when i do the make it gives me these errors.

    root@ubuntu:/home/sergio12345/Desktop/practica# make
    make -C /lib/modules/2.6.30-020630-generic/build SUBDIRS=/home/sergio12345/Desktop/practica modules
    make[1]: Entering directory `/usr/src/linux-headers-2.6.30-020630-generic’
    CC [M] /home/sergio12345/Desktop/practica/bd.o

    /home/sergio12345/Desktop/practica/bd.c: In function ‘sbd_request’:
    /home/sergio12345/Desktop/practica/bd.c:74: error: implicit declaration of function ‘blk_fetch_request’
    /home/sergio12345/Desktop/practica/bd.c:74: warning: assignment makes pointer from integer without a cast
    /home/sergio12345/Desktop/practica/bd.c:81: error: implicit declaration of function ‘__blk_end_request_all’
    /home/sergio12345/Desktop/practica/bd.c:84: error: implicit declaration of function ‘blk_rq_pos’
    /home/sergio12345/Desktop/practica/bd.c:84: error: implicit declaration of function ‘blk_rq_cur_sectors’
    /home/sergio12345/Desktop/practica/bd.c:86: error: implicit declaration of function ‘__blk_end_request_cur’
    /home/sergio12345/Desktop/practica/bd.c:87: warning: assignment makes pointer from integer without a cast
    /home/sergio12345/Desktop/practica/bd.c: In function ‘sbd_init’:
    /home/sergio12345/Desktop/practica/bd.c:132: error: implicit declaration of function ‘blk_queue_logical_block_size’
    make[2]: *** [/home/sergio12345/Desktop/practica/bd.o] Error 1
    make[1]: *** [_module_/home/sergio12345/Desktop/practica] Error 2
    make[1]: Leaving directory `/usr/src/linux-headers-2.6.30-020630-generic’
    make: *** [default] Error 2
    root@ubuntu:/home/sergio12345/Desktop/practica#

    thanks for ur help.

  14. This has been a great help!! I am writing a block driver for a solid state disk on our computer boards. Was stuck with errors for elv_next_request and end_request. This saved me a lot of time!!! Thanks.

  15. I have built my driver and allowed the major number to be allocated automatically. My system allocates number 251 and it is displayed under /proc/devices. When I look under /dev it displays major number 259. Do you know why the difference?

  16. hi.. thanx for sharing code. i have a doubt that if i want to use make_request function for this, which will eliminate the io queuing. so is this happen in different way in 2.6.31.

    1. Hi Neeraj – Yes, you could use make_request if it is a better choice for what you’re doing. My purpose here was simply to port the sample to 2.6.31 rather than give a comprehensive account of block driver creation.

  17. Hi,

    Thanks for sharing your experience.

    1. I am trying out my first Block Device and followed your example and it is asking for a filesystem type on mounting the sbd0p1 device:
    yangcomputer:~/exp # mount /dev/sbd0p1 /mnt
    mount: you must specify the filesystem type

    2. This example applies to a ramdisk. Do you have any idea how I can implement a similar block device on a real hard disk? Which part of the code do I need to change?

    Thanks,

    Yan

    1. Hi Yan,

      1. Did you do fdisk and mkfs to create a partition and filesystem respectively? It sounds like you might have forgotten one or both steps.

      2. I don’t have any experience implementing a driver for a real hard disk – when I did this I was creating a network block device. You should probably look at the SCSI and IDE block drivers in the Linux source tree.

      Cheers,

      Pat

  18. Hi Pat,

    I did use fdisk to format and partititon /dev/sbd0 however it complained about:
    fdisk /dev/sbd0
    You must set cylinders.
    You can do this from the extra functions menu.

    Command (m for help):

    So I went into Extra menu to set the cylinders to 1.

    Am I doing it right?

    Thanks,

    Yan

  19. Hello Pat
    after going these(Command (m for help): w
    The partition table has been altered!

    Calling ioctl() to re-read partition table.
    Syncing disks.) command when i reach at

    opensuse:/home/pat/sbd # mkfs /dev/sbd0p1
    and when i put that path (actually my system path is-deepak@deepak-Satellite-C665:~/ddd/dp$ ls
    Makefile modules.order Module.symvers sbd.c sbd.ko sbd.mod.c sbd.mod.o sbd.o
    deepak@deepak-Satellite-C665:~/ddd/dp$ pwd
    /home/deepak/ddd/dp)
    it showing deepak@deepak-Satellite-C665:~$ /home/deepak/ddd/dp/sbd # mkfs /dev/sbd0p2
    bash: /home/deepak/ddd/dp/sbd: No such file or directory
    deepak@deepak-Satellite-C665:~$
    that error what i have to do..

  20. Hi,
    when i mount this using #sudo mount /dev/sbd0p2
    i get an error and it is
    180 days, whichever comes first. Use tune2fs -c or -i to override.
    deepak@deepak-Satellite-C665:~/ddd/dp$ sudo mount /dev/sbd0p2
    mount: can’t find /dev/sbd0p2 in /etc/fstab or /etc/mtab
    i need help

  21. Hi Deepak – I can’t see anything obviously wrong in what you’re doing. Go back over the instructions and double check that you’ve followed them exactly. What version of Linux are you using?

  22. Sir,
    I have take your code and cpmpile but I get error that ” fatal error: linux/module.h: No such file or directory
    compilation terminated.” So, what can I do?

  23. Hi Pat, looks like I found a bug 🙂 You should change line 140 like:

    major_num = register_blkdev(major_num, “sbd”);
    if (major_num < 0) { // use < instead of <=

    register_blkdev returns 0 when user requests a specific (non-zero) major number.

  24. Hi Pat,

    I am not understanding one thing that what difference it makes if we execute blk_fetch_request function inside while loop like LDD3.

    If I do so, fdisk just hang.

  25. Hey Pat,

    Perhaps I’m missing something but….

    static void sbd_request(struct request_queue *q) {
    struct request *req;

    req = blk_fetch_request(q);
    while (req != NULL) {
    /* Are you sure you want to continue in the following block */
    if (req == NULL || (req->cmd_type != REQ_TYPE_FS)) {
    printk (KERN_NOTICE “Skip non-CMD request\n”);
    __blk_end_request_all(req, -EIO);
    continue; // this will skip an iteration and spin surely?
    }
    sbd_transfer(&Device, blk_rq_pos(req), blk_rq_cur_sectors(req),
    req->buffer, rq_data_dir(req));
    if ( ! __blk_end_request_cur(req, 0) ) {
    req = blk_fetch_request(q);
    }
    }
    }

    Would wrapping it in an if/else not be better such as:

    static void sbd_request(struct request_queue *q) {
    struct request *req;

    req = blk_fetch_request(q);
    while (req != NULL) {
    /* Are you sure you want to continue in the following block */
    if (req == NULL || (req->cmd_type != REQ_TYPE_FS)) {
    printk (KERN_NOTICE “Skip non-CMD request\n”);
    __blk_end_request_all(req, -EIO);
    } else {
    sbd_transfer(&Device, blk_rq_pos(req), blk_rq_cur_sectors(req),
    req->buffer, rq_data_dir(req));
    }
    if ( ! __blk_end_request_cur(req, 0) ) {
    req = blk_fetch_request(q);
    }
    }
    }

    1. Nick – I think you’re right on the possibility of spinning, but you wouldn’t want to do __blk_end_request_all() then __blk_end_request_cur(). Since I’m no longer working on this stuff and have no way of testing it, are you able to confirm that the following works?

      static void sbd_request(struct request_queue *q) {
      	struct request *req;
      
      	req = blk_fetch_request(q);
      	while (req != NULL) {
      		// blk_fs_request() was removed in 2.6.36 - many thanks to
      		// Christian Paro for the heads up and fix...
      		//if (!blk_fs_request(req)) {
      		if (req == NULL || (req->cmd_type != REQ_TYPE_FS)) {
      			printk (KERN_NOTICE "Skip non-CMD request\n");
      			__blk_end_request_all(req, -EIO);
      			break;
      		}
      		sbd_transfer(&Device, blk_rq_pos(req), blk_rq_cur_sectors(req),
      				req->buffer, rq_data_dir(req));
      		if ( ! __blk_end_request_cur(req, 0) ) {
      			req = blk_fetch_request(q);
      		}
      	}
      }
      
  26. Thank you for this, great job and it works properly.
    I try to run this driver on two machines, to impliment something like NBD “network block device”
    the problem is that if I am in the method sbd_transfer, “if I try to open a udp socket the system crashes and blocks completely. If i do this another where there is no problem
    do you have any idea??

  27. Hi ,
    I am new to the block device driver . When I was doing the insmod ,I am getting a system hang . If I comment out add_disk () , I could see the driver is inserting . Why the add_Disk () is causing hang .I am usinh redhat 2.6.18

    Thanks
    Pradeep

  28. I’m using 2.6.24. I built built this by using the source from http://lwn.net/Articles/58720/. All went well except that fdisk must have changed because I have to use x instead of n to create a new partition (typing n just says to use x). But in the x menu there is no way to create a new partition. Then I checked “man fdisk” but that also does not say how to create the partition.

    Can someone give me additional information on how to create the necessary partition with fdisk from Linux 2.6.24?

  29. Yes, it is simply because I don’t use Linux much and I have a 2.6.24 already loaded. I am trying to get some understanding of, what I call, “driver interface” code … I have always only been at the hardware driver level. If I try to load a later version without any experience I will certainly run into problems which will cost time that I don’t have.

    The x command in fdisk means “extra functionality (experts only)”

  30. Eddy – as I mention in the article, the block driver interface changed significantly in 2.6.31, so any work you’re doing in 2.6.24 will be wasted. Grab a VMware or VirtualBox image of a recent Ubuntu – you’ll have it spun up and working in no time.

  31. Hello,

    Can this driver be used to simulate a swap backing store?

    What I am looking for is a bare-bone example of a block device that can simulate a backing store. I will be later on extend this driver to add my own stuff.

    Thanks.

    1. Hi Hebbo – this is really the inverse of a swap backing store – it’s implementing a block device in RAM. You want to map memory to disk. I don’t think this is very helpful for you.

  32. I tried this code in 2.6.32-38,after insmod it is invoking request function for 12 times to read ,after doing fdisk its not creating sbd0p1 what might be issue n solution

  33. Works great in 3.10.14 kernel. Had to do one fix for very large ramdisk:

    Device.size = (long)nsectors * (long)logical_block_size;

  34. As of kernel version 3.15-rc2 and later, due to commit b4f42e2831ff9b9fa19252265d7c8985d47eefb9 in branch Linux-3.14.y, this is needed in sbd_request:
    Instead of:
    sbd_transfer(&Device, blk_rq_pos(req), blk_rq_cur_sectors(req),
    req->buffer, rq_data_dir(req));

    .. this:
    sbd_transfer(&Device, blk_rq_pos(req), blk_rq_cur_sectors(req),
    bio_data(req->bio), rq_data_dir(req));

  35. with this fix, everything builds and then runs just fine on Ubuntu Utopic, latest kernel for that distribution. Thanks very much for this; great job porting to later kernels.

  36. Just FYI and for completeness: the commit number in linux-stable git repo that removes blk_fs_request and other macros is 33659ebbae262228eef4e0fe990f393d1f0ed941 on August 7, 2010, in 2.6.36-rc1.

  37. (trying once more):
    It’s good that you are still in the loop here. I think there is a bug in your sbd_request function.

    You have:
    struct request *req;

    req = blk_fetch_request(q);
    while (req != NULL) {
    // blk_fs_request() was removed in 2.6.36 – many thanks to
    // Christian Paro for the heads up and fix…
    //if (!blk_fs_request(req)) {
    if (req == NULL || (req->cmd_type != REQ_TYPE_FS)) {
    printk (KERN_NOTICE “Skip non-CMD request\n”);
    __blk_end_request_all(req, -EIO);
    continue;
    }
    sbd_transfer(&Device, blk_rq_pos(req), blk_rq_cur_sectors(req),
    bio_data(req->bio), rq_data_dir(req));
    if ( ! __blk_end_request_cur(req, 0) ) {
    req = blk_fetch_request(q);
    }
    }

    in:
    if (req == NULL || (req->cmd_type != REQ_TYPE_FS)) {
    printk (KERN_NOTICE “Skip non-CMD request\n”);
    __blk_end_request_all(req, -EIO);
    continue;
    }

    if req is _not_ NULL, then the request will be ended, but req still has an unmodfied non-NULL value so, after the continue, it will go to the top of the loop which will then come right back into this block and perhaps crash the kernel if trying to end an already ended req is not handled right in the kernel. Otherwise, this is an endless loop. Unsure what you want to do here, as this is non-standard code from all of the other drivers I have been able to look at. Please advise.

  38. Wow, Sarge – you’re absolutely right. I think a req = blk_fetch_request(q); just before the continue would fix it… What do you think? Unfortunately, I’ve moved on from this work and have no real way to test it…

  39. I don’t know.. it seems to me that the entire request has been ended (call to __blk_end_request_all), so there should be no more sections left to process, and executing “break” instead of “continue” is probably the solution; basically ending request processing for this particular full request. It may not be a good idea to call fetch_request again on an already null or finished request. But, I also see a problem where the top of the loop tests for req != NULL and then immediately in the next if statement, tests for req == NULL. This will never be. So, here, req must be non-NULL.

    In any case, here is my new version. I tried to remove the “__” in front of the blk_end* calls, but that caused multiple kernel panics when the module was loaded, so I don’t know what that is all about, as the kernel source I have (Ubuntu 3.19.0-15-generic) shows identical code for both functions. No time to track that one down, and this works as it is.

    This new version is tested and it works, not disturbing a healthy system. I also managed to remove the compiler warning for the Version number (which I bumped up). All licenses are the same: dual BSD/GPL. I didn’t change or add to the copyrights because I want to remain anonymous.

    (No way to attach files here on this blog as far as I can see, so I’ll just post the text. You’ll need to reformat as you wish, especially if you want to re-post this in the main article.)
    — code starts —

    /*
    * A sample, extra-simple block driver. Updated for kernel 2.6.31.
    *
    * (C) 2003 Eklektix, Inc.
    * (C) 2010 Pat Patterson
    * Redistributable under the terms of the GNU GPL.
    */

    #include
    #include
    #include

    #include /* printk() */
    #include /* everything… */
    #include /* error codes */
    #include /* size_t */
    #include
    #include
    #include
    #include

    #include

    MODULE_LICENSE(“Dual BSD/GPL”);
    static char *Version __attribute__((unused)) = “1.5”;

    static int major_num = 0;
    module_param(major_num, int, 0);
    static int logical_block_size = 512;
    module_param(logical_block_size, int, 0);
    static int nsectors = 1024; /* How big the drive is */
    module_param(nsectors, int, 0);

    /*
    * We can tweak our hardware sector size, but the kernel talks to us
    * in terms of small sectors, always.
    */
    #define KERNEL_SECTOR_SIZE 512

    /*
    * Our request queue.
    */
    static struct request_queue *Queue;

    /*
    * The internal representation of our device.
    */
    static struct sbd_device {
    unsigned long size;
    spinlock_t lock;
    u8 *data;
    struct gendisk *gd;
    } Device;

    /*
    * Handle an I/O request.
    */
    static void sbd_transfer(struct sbd_device *dev, sector_t sector,
    unsigned long nsect, char *buffer, int write) {
    unsigned long offset = sector * logical_block_size;
    unsigned long nbytes = nsect * logical_block_size;

    if ((offset + nbytes) > dev->size) {
    printk (KERN_NOTICE “sbd: Beyond-end write (%ld %ld)\n”, offset, nbytes);
    return;
    }
    if (write)
    memcpy(dev->data + offset, buffer, nbytes);
    else
    memcpy(buffer, dev->data + offset, nbytes);
    }

    static void sbd_request(struct request_queue *q) {
    struct request *req;

    req = blk_fetch_request(q);
    while (req != NULL) {
    // blk_fs_request() was removed in 2.6.36 – many thanks to
    // Christian Paro for the heads up and fix…
    //if (!blk_fs_request(req)) {
    if (req->cmd_type != REQ_TYPE_FS) {
    printk (KERN_NOTICE “Skip non-CMD request\n”);
    __blk_end_request_all(req, -EIO);
    req = blk_fetch_request(q);
    continue;
    }

    sbd_transfer(&Device, blk_rq_pos(req), blk_rq_cur_sectors(req),
    #if LINUX_VERSION_CODE buffer,
    #else
    bio_data(req->bio),
    #endif
    rq_data_dir(req));
    if ( ! __blk_end_request_cur(req, 0) ) {
    req = blk_fetch_request(q);
    }
    }
    }

    /*
    * The HDIO_GETGEO ioctl is handled in blkdev_ioctl(), which
    * calls this. We need to implement getgeo, since we can’t
    * use tools such as fdisk to partition the drive otherwise.
    */
    int sbd_getgeo(struct block_device * block_device, struct hd_geometry * geo) {
    long size;

    /* We have no real geometry, of course, so make something up. */
    size = Device.size * (logical_block_size / KERNEL_SECTOR_SIZE);
    geo->cylinders = (size & ~0x3f) >> 6;
    geo->heads = 4;
    geo->sectors = 16;
    geo->start = 0;
    return 0;
    }

    /*
    * The device operations structure.
    */
    static struct block_device_operations sbd_ops = {
    .owner = THIS_MODULE,
    .getgeo = sbd_getgeo
    };

    static int __init sbd_init(void) {
    /*
    * Set up our internal device.
    */
    Device.size = nsectors * logical_block_size;
    spin_lock_init(&Device.lock);
    Device.data = vmalloc(Device.size);
    if (Device.data == NULL)
    return -ENOMEM;
    /*
    * Get a request queue.
    */
    Queue = blk_init_queue(sbd_request, &Device.lock);
    if (Queue == NULL)
    goto out;
    blk_queue_logical_block_size(Queue, logical_block_size);
    /*
    * Get registered.
    */
    major_num = register_blkdev(major_num, “sbd”);
    if (major_num major = major_num;
    Device.gd->first_minor = 0;
    Device.gd->fops = &sbd_ops;
    Device.gd->private_data = &Device;
    strcpy(Device.gd->disk_name, “sbd0”);
    set_capacity(Device.gd, nsectors);
    Device.gd->queue = Queue;
    add_disk(Device.gd);

    return 0;

    out_unregister:
    unregister_blkdev(major_num, “sbd”);
    out:
    vfree(Device.data);
    return -ENOMEM;
    }

    static void __exit sbd_exit(void)
    {
    del_gendisk(Device.gd);
    put_disk(Device.gd);
    unregister_blkdev(major_num, “sbd”);
    blk_cleanup_queue(Queue);
    vfree(Device.data);
    }

    module_init(sbd_init);
    module_exit(sbd_exit);

    — code ends —

  40. ok, that messed up the formatting completely. Also, some sort of issue with open and closed angle brackets. All the include statements are the same, but linux/version.h needs to be added.

  41. Sarge, you’re code change worked fine for me. I just want to mention that your macro in sbd_transfer got pretty garbled in addition to your include statements when you posted.

    I’m assuming you were going for something like this…

    sbd_transfer(&Device, blk_rq_pos(req), blk_rq_cur_sectors(req),
    #if LINUX_VERSION_CODE < KERNEL_VERSION(3,15,0)
    buffer,
    #else
    bio_data(req->bio),
    #endif
    rq_data_dir(req));

  42. oops, didn’t fix all of it :/

    sbd_transfer(&Device, blk_rq_pos(req), blk_rq_cur_sectors(req),
    #if LINUX_VERSION_CODE < KERNEL_VERSION(3,15,0)
    req->buffer,
    #else
    bio_data(req->bio),
    #endif
    rq_data_dir(req));

  43. Hello Sir,
    I have written the pseudo driver, Now for the read, write and open API, how am i going to use the device file. in read and write API in the function fopen i have passed the module name as “./module_name”. The driver contains the transfer and request functions how they work.

  44. I implement my first block device driver for one PCIe card that has utilizing the DMA of this card based on this example from http://opensourceforu.efytimes.com/2012/02/device-drivers-disk-on-ram-block-drivers/. I just replace the memcpy() function in ramdevice_write() and ramdevice_read() with my own DMA version of write()/read() function. My own own DMA version of write()/read() function work well for millions of times. They worked in interrupt mode. Linux crashes after several ‘dd’ commoands for 128KB data. rb_transfer() is similar to sbd_transfer(). Before I try this example, may I ask some questions? Is there any special requirement for the function sbd_transfer()?

    a) Can the sbd_transfer() function call spin_lock()?
    b) Can the sbd_transfer() function call wait_event_interruptible_timeout ()?
    c) Can the sbd_transfer() function call vmalloc()/vfree(), kmalloc()/kfree()?
    d) Is it OK when one interrupt is generated during the execution time of the sbd_transfer() function?
    e) Is it OK to call schedule_work() in ISR of the DMA during the execution time of the sbd_transfer() function?
    f) Is it OK to sleep during the execution time of the sbd_transfer() function?
    g) Sometimes irqs_disabled() returns true. Does it mean that no interrupt will be generated?
    h) Is any one example that can handle the request and end the request in different function call?

    1. Hi Hank – I haven’t worked on this stuff for over 5 years, so I’m afraid I can’t answer any of your questions – sorry!

  45. Hi There,

    Is there a way :

    1.I can copy this entire file from the RAM and store as a file, so that I dont loose it on shut down
    2.On reboot, I can load this previously stored file

    1. Sree – Not easily – you would have to add features to the driver. It’s really just a sample, showing how to use the kernel block driver interface.

  46. In kernel 4.10.0, you may get error like
    error: ‘struct request’ has no member named ‘buffer’
    sbd_transfer(&Device, blk_rq_pos(req), blk_rq_cur_sectors(req),req->buffer, rq_data_dir(req));
    since buffer is removed from struct request in this version.
    Use bio_data(rq->bio) in place of req->buffer

Leave a Reply

Your email address will not be published. Required fields are marked *