/*
* Register device numbers dev..(dev+range-1)
* range must be nonzero
* The hash chain is sorted on range, so that subranges can override.
*/void blk_register_region(dev_t devt, unsigned long range, structmodule *module,
struct kobject *(*probe)(dev_t, int *, void *),
int (*lock)(dev_t, void *), void *data)
{
kobj_map(bdev_map, devt, range, module, probe, lock, data);
}
/**
* add_disk - add partitioning information to kernel list
* @disk: per-device partitioning information
* This function registers the partitioning information in @disk
* with the kernel.
*///添加分区磁盘到分区系统void add_disk(struct gendisk *disk)
{
struct backing_dev_info *bdi;
dev_t devt;
int retval;
/* minors == 0 indicates to use ext devt from part0 and should
* be accompanied with EXT_DEVT flag. Make sure all
* parameters make sense.
*/
WARN_ON(disk->minors && !(disk->major || disk->first_minor));
WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT));
disk->flags |= GENHD_FL_UP;
/**
* blk_alloc_devt - allocate a dev_t for a partition
* 根据磁盘的主次设备号信息为磁盘分配设备号
* 这里要特别注意的是:blk_alloc_devt是阻塞的。。。
*/
retval = blk_alloc_devt(&disk->part0, &devt);
if (retval) {
WARN_ON(1);
return;
}
disk_to_dev(disk)->devt = devt;
/* ->major and ->first_minor aren't supposed to be
* dereferenced from here on, but set them just in case.
*/
disk->major = MAJOR(devt);
disk->first_minor = MINOR(devt);
//调用disk_alloc_events初始化磁盘的事件(alloc|add|del|release)处理机制。//在最开始磁盘事件会被设置为被阻塞的。
disk_alloc_events(disk);
//调用bdi_register_dev将磁盘注册到bdi
bdi = &disk->queue->backing_dev_info;
bdi_register_dev(bdi, disk_devt(disk));
//调用blk_register_region将磁盘添加到bdev_map中
blk_register_region(disk_devt(disk), disk->minors, NULL,
exact_match, exact_lock, disk);
/* 调用register_disk将磁盘添加到系统中。主要完成:
* 将主设备的分区(第0个分区)信息标记设置为分区无效
* 调用device_add将设备添加到系统中
* 在sys文件系统中为设备及其属性创建目录及文件
* 发出设备添加到系统的uevent事件(如果能获取分区的信息,则也为分区发送uevent事件)。
*/
register_disk(disk);
/*
调用blk_register_queue注册磁盘的请求队列。主要是为队列和队列的调度器在设备的
sys文件系统目录中创建相应的sys目录/文件,并且发出uevent事件。
*/
blk_register_queue(disk);
/*
* Take an extra ref on queue which will be put on disk_release()
* so that it sticks around as long as @disk is there.
*/
WARN_ON_ONCE(!blk_get_queue(disk->queue));
retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj,
"bdi");
WARN_ON(retval);
/*
调用__disk_unblock_events完成
在/sys文件系统的设备目录下创建磁盘的事件属性文件
将磁盘事件添加到全局链表disk_events中
解除对磁盘事件的阻塞。
*/
disk_add_events(disk);
}
static int blkdev_open(struct inode * inode, struct file * filp){
struct block_device *bdev;
/*
* Preserve backwards compatibility and allow large file access
* even if userspace doesn't ask for it explicitly. Some mkfs
* binary needs it. We might want to drop this workaround
* during an unstable branch.
*/
filp->f_flags |= O_LARGEFILE;
if (filp->f_flags & O_NDELAY)
filp->f_mode |= FMODE_NDELAY;
if (filp->f_flags & O_EXCL)
filp->f_mode |= FMODE_EXCL;
if ((filp->f_flags & O_ACCMODE) ==3)
filp->f_mode |= FMODE_WRITE_IOCTL;
bdev = bd_acquire(inode);
if (bdev ==NULL)
return-ENOMEM;
filp->f_mapping = bdev->bd_inode->i_mapping;
return blkdev_get(bdev, filp->f_mode, filp);
}
/*
* main unit of I/O forthe block layer and lower layers (ie drivers and
* stacking drivers)
*/
struct bio {
struct bio *bi_next; /* request queue link */
struct block_device *bi_bdev;
unsigned long bi_flags; /* status, command, etc */
/* bottom bits READ/WRITE,
* top bits priority
*/
unsigned long bi_rw;
struct bvec_iter bi_iter;
/* Number of segments in this BIO after
* physical address coalescing is performed.
*/
unsigned int bi_phys_segments;
/*
* To keep track ofthe max segment size, we account forthe
* sizes ofthefirstandlast mergeable segments in this bio.
*/
unsigned int bi_seg_front_size;
unsigned int bi_seg_back_size;
atomic_t bi_remaining;
bio_end_io_t *bi_end_io;
void *bi_private;
#ifdef CONFIG_BLK_DEV_THROTTLING
bio_throtl_end_io_t *bi_throtl_end_io1;
void *bi_throtl_private1;
bio_throtl_end_io_t *bi_throtl_end_io2;
void *bi_throtl_private2;
#endif#ifdef CONFIG_BLK_CGROUP
/*
* Optional ioc and css associated with this bio. Put on bio
* release. Read comment on top of bio_associate_current().
*/
struct io_context *bi_ioc;
struct cgroup_subsys_state *bi_css;
#endif
union {
#if defined(CONFIG_BLK_DEV_INTEGRITY)
struct bio_integrity_payload *bi_integrity; /* data integrity */
#endif
};
unsigned short bi_vcnt; /* how many bio_vec's */
/*
* When using dircet-io (O_DIRECT), we can't getthe inode from a bio
* by walking bio->bi_io_vec->bv_page->mapping->host
* sincethe page is anon.
*/
struct inode *bi_dio_inode;
/*
* Everything starting with bi_max_vecs will be preserved by bio_reset()
*/
unsigned short bi_max_vecs; /* max bvl_vecs we can hold */
atomic_t bi_cnt; /* pin count */
struct bio_vec *bi_io_vec; /* the actual vec list */
struct bio_set *bi_pool;
/*
* We can inline a numberof vecs attheendofthe bio, to avoid
* double allocations for a small numberof bio_vecs. This member
* MUST obviously be kept atthe very endofthe bio.
*/
struct bio_vec bi_inline_vecs[0];
};
/*
* Request flags. For use inthe cmd_flags field of struct request, andin
* bi_rw of struct bio. Note thatsome flags are only valid in either one.
*/
enum rq_flag_bits {
/* common flags */
__REQ_WRITE, /* notset, read. set, write */
__REQ_FAILFAST_DEV, /* no driver retries of device errors */
__REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
__REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */
__REQ_SYNC, /* request is sync (sync writeorread) */
__REQ_META, /* metadata io request */
__REQ_PRIO, /* boost priority in cfq */
__REQ_DISCARD, /* request to discard sectors */
__REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
__REQ_WRITE_SAME, /* write same block many times */
__REQ_NOIDLE, /* don't anticipate more IO after this one */
__REQ_INTEGRITY, /* I/O includes block integrity payload */
__REQ_FUA, /* forced unit access */
__REQ_FLUSH, /* request for cache flush */
__REQ_POST_FLUSH_BARRIER,/* cache barrier after a data req */
__REQ_BARRIER, /* marks flush req as barrier */
__REQ_BG, /* background activity */
__REQ_FG, /* foreground activity */
/* bio only flags */
__REQ_RAHEAD, /* read ahead, can fail anytime */
__REQ_THROTTLED, /* This bio has already been subjected to
* throttling rules. Don't do it again. */
/* request only flags */
__REQ_SORTED = __REQ_RAHEAD, /* elevator knows about this request */
__REQ_SOFTBARRIER, /* may not be passed by ioscheduler */
__REQ_NOMERGE, /* don't touch this for merging */
__REQ_STARTED, /* drive already may have started this one */
__REQ_DONTPREP, /* don't call prep for this one */
__REQ_QUEUED, /* uses queueing */
__REQ_ELVPRIV, /* elevator private data attached */
__REQ_FAILED, /* setifthe request failed */
__REQ_QUIET, /* don't worry about errors */
__REQ_PREEMPT, /* setfor"ide_preempt" requests and also
for requests for which the SCSI "quiesce"
state must be ignored. */
__REQ_ALLOCED, /* request came from our alloc pool */
__REQ_COPY_USER, /* contains copies of user pages */
__REQ_FLUSH_SEQ, /* request for flush sequence */
__REQ_IO_STAT, /* account I/O stat */
__REQ_MIXED_MERGE, /* merge of different types, fail separately */
__REQ_PM, /* runtime pm request */
__REQ_HASHED, /* on IO scheduler merge hash */
__REQ_MQ_INFLIGHT, /* track inflight for MQ */
__REQ_URGENT, /* urgent request */
__REQ_NR_BITS, /* stops here */
};
/**
* submit_bio - submit a bio to the block device layer for I/O
* @rw: whether to %READor%WRITE, or maybe to %READA (read ahead)
* @bio: The &struct bio which describes the I/O
*
* submit_bio() is very similar in purpose to generic_make_request(), and
* uses that function to do most of the work. Both are fairly rough
* interfaces; @bio must be presetup and ready for I/O.
*
*/
void submit_bio(int rw, struct bio *bio)
{
bio->bi_rw |= rw;
/*
* If it's a regular read/writeor a barrier with data attached,
* go through the normal accounting stuff before submission.
*/if (bio_has_data(bio)) {
unsigned int count;
//从request_queue队列中获取,或者直接获得sector的数据
if (unlikely(rw & REQ_WRITE_SAME))
count = bdev_logical_block_size(bio->bi_bdev) >> 9;
else
count = bio_sectors(bio);
if (rw & WRITE) {
count_vm_events(PGPGOUT, count);
} else {
task_io_account_read(bio->bi_iter.bi_size);
count_vm_events(PGPGIN, count);
}
#ifdef DCHECK_ROOT_FORCE
check_wrt(rw, bio);
#endifif (unlikely(block_dump)) {
char b[BDEVNAME_SIZE];
struct task_struct *tsk;
tsk = get_dirty_task(bio);
printk(KERN_WARNING "%s(%d): %s block %Lu on %s (%u sectors)\n",
tsk->comm, task_pid_nr(tsk),
(rw & WRITE) ? "WRITE" : "READ",
(unsigned long long)bio->bi_iter.bi_sector,
bdevname(bio->bi_bdev, b),
count);
}
}
generic_make_request(bio);
}