一、文件系统类型
了解Android系统目录树的建立之前,有必要了解文件系统类型。Linux内核中将文件系统类型抽象为结构体struct file_system_type,其中name为文件系统名称,例如ext4、f2fs、rootfs等;mount()\mount2()是挂载文件系统时调用的接口,用于创建super_block,并返回根目录;kill_sb()在卸载文件系统时调用,做一些清理工作;next指向下一个文件系统类型。
struct file_system_type {
const char *name;
int fs_flags;
................................................
struct dentry *(*mount) (struct file_system_type *, int,
const char *, void *);
struct dentry *(*mount2) (struct vfsmount *, struct file_system_type *, int,
const char *, void *);
void *(*alloc_mnt_data) (void);
void (*kill_sb) (struct super_block *);
struct module *owner;
struct file_system_type * next;
struct hlist_head fs_supers;
...............................................
};
所有注册到内核的文件系统类型,都放在以file_systems为表头的单链表中。register_filesystem()就是向该链表中加入新的元素;unregister_filesystem()就是将对应的文件系统类型从该链表中删除;get_fs_type()就是根据文件系统名称在链表中查找。
常见文件系统类型,都是在对应模块初始化时注册的,比如ext4在模块初始化时注册ext4_fs_type。
static int __init ext4_init_fs(void)
{
int i, err;
ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
ext4_li_info = NULL;
mutex_init(&ext4_li_mtx);
/* Build-time check for flags consistency */
................................................
register_as_ext3();
register_as_ext2();
err = register_filesystem(&ext4_fs_type);
................................................
}
通过cat /proc/filesystems节点查看系统中所有注册的文件系统类型名称。
二、根目录的创建
进程的路径信息保存在task_struct成员fs_struct *fs指向的结构体中,其中root为根目录,pwd为当前目录。fs_struct *fs的数据来源于父进程,当clone_flags的CLONE_FS置位时,父子进程指向同一个fs_struct指针,否则创建一个fs_struct,并把父进程的信息拷贝过来。
struct fs_struct {
......................
struct path root, pwd;
};
static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
{
struct fs_struct *fs = current->fs;
if (clone_flags & CLONE_FS) {
/* tsk->fs is already what we want */
spin_lock(&fs->lock);
if (fs->in_exec) {
spin_unlock(&fs->lock);
return -EAGAIN;
}
fs->users++;
spin_unlock(&fs->lock);
return 0;
}
tsk->fs = copy_fs_struct(fs);
if (!tsk->fs)
return -ENOMEM;
return 0;
}
init_task是所有进程中的老祖宗,其它进程的fs_struct *fs都直接或间接来源与init_task的fs_struct *fs,该指针指向的结构体是在start_kernel()-->vfs_caches_init()-->mnt_init()-->init_mount_tree()中初始化的。
static void __init init_mount_tree(void)
{
struct vfsmount *mnt;
struct mnt_namespace *ns;
struct path root;
struct file_system_type *type;
type = get_fs_type("rootfs");
if (!type)
panic("Can't find rootfs type");
mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
put_filesystem(type);
if (IS_ERR(mnt))
panic("Can't create rootfs");
ns = create_mnt_ns(mnt);
if (IS_ERR(ns))
panic("Can't allocate initial namespace");
init_task.nsproxy->mnt_ns = ns;
get_mnt_ns(ns);
root.mnt = mnt;
root.dentry = mnt->mnt_root;
mnt->mnt_flags |= MNT_LOCKED;
set_fs_pwd(current->fs, &root); //设置为init_task 的当前目录
set_fs_root(current->fs, &root); //设置为init_task 的根目录
}
vfs_kern_mount()首先调用alloc_vfsmnt()分配并初始一个struct mount结构体,其成员mnt_devname初始化为rootfs,然后调用mount_fs()获取rootfs文件系统的根目录。
struct vfsmount *
vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
{
struct mount *mnt;
struct dentry *root;
if (!type)
return ERR_PTR(-ENODEV);
mnt = alloc_vfsmnt(name);
if (!mnt)
return ERR_PTR(-ENOMEM);
................................................................................
root = mount_fs(type, flags, name, &mnt->mnt, data);
if (IS_ERR(root)) {
mnt_free_id(mnt);
free_vfsmnt(mnt);
return ERR_CAST(root);
}
mnt->mnt.mnt_root = root;
mnt->mnt.mnt_sb = root->d_sb;
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
mnt->mnt_parent = mnt;
lock_mount_hash();
list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
unlock_mount_hash();
return &mnt->mnt;
}
mount_fs()调用对应文件系统类型的mount接口,来创建对应文件系统的super_block和根目录。这里的文件系统类型为rootfs,其对应的mount接口为rootfs_mount()。
struct dentry *
mount_fs(struct file_system_type *type, int flags, const char *name, struct vfsmount *mnt, void *data)
{
struct dentry *root;
struct super_block *sb;
char *secdata = NULL;
int error = -ENOMEM;
.......................................................
if (type->mount2)
root = type->mount2(mnt, type, flags, name, data);
else
root = type->mount(type, flags, name, data);
if (IS_ERR(root)) {
error = PTR_ERR(root);
goto out_free_secdata;
}
sb = root->d_sb;
BUG_ON(!sb);
WARN_ON(!sb->s_bdi);
sb->s_flags |= MS_BORN;
.......................................................
}
static struct file_system_type rootfs_fs_type = {
.name = "rootfs",
.mount = rootfs_mount,
.kill_sb = kill_litter_super,
};
static struct dentry *rootfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
static unsigned long once;
void *fill = ramfs_fill_super;
if (test_and_set_bit(0, &once))
return ERR_PTR(-ENODEV);
if (IS_ENABLED(CONFIG_TMPFS) && is_tmpfs)
fill = shmem_fill_super;
return mount_nodev(fs_type, flags, data, fill);
}
mount_nodev()调用sget()查找或创建一个super_block,调用fill_super填充super_block数据,包括创建根目录赋值给super_block->s_root。fill_super对应的是ramfs_fill_super。
struct dentry *mount_nodev(struct file_system_type *fs_type,
int flags, void *data,
int (*fill_super)(struct super_block *, void *, int))
{
int error;
struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL);
if (IS_ERR(s))
return ERR_CAST(s);
error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
if (error) {
deactivate_locked_super(s);
return ERR_PTR(error);
}
s->s_flags |= MS_ACTIVE;
return dget(s->s_root);
}
ramfs_fill_super()-->d_make_root()-->__d_alloc(struct super_block *sb, const struct qstr *name)的参数name为空时,将以"/"作为目录名,这就是根目录"/"的由来。
int ramfs_fill_super(struct super_block *sb, void *data, int silent)
{
struct ramfs_fs_info *fsi;
struct inode *inode;
int err;
.........................................................
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = RAMFS_MAGIC;
sb->s_op = &ramfs_ops;
sb->s_time_gran = 1;
inode = ramfs_get_inode(sb, NULL, S_IFDIR | fsi->mount_opts.mode, 0);
sb->s_root = d_make_root(inode);
if (!sb->s_root)
return -ENOMEM;
return 0;
}
struct dentry *d_make_root(struct inode *root_inode)
{
struct dentry *res = NULL;
if (root_inode) {
res = __d_alloc(root_inode->i_sb, NULL);
......................................................
}
return res;
}
struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
{
struct dentry *dentry;
char *dname;
int err;
dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
if (!dentry)
return NULL;
...........................................................
dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
if (unlikely(!name)) {
static const struct qstr anon = QSTR_INIT("/", 1);
name = &anon;
dname = dentry->d_iname;
} else if (name->len > DNAME_INLINE_LEN-1) {
.......................................................
} else {
dname = dentry->d_iname;
}
...........................................................
return dentry;
}
根目录的结构体关系可以简化如下,init_task的成员fs指向结构体fs_struct,fs_struct成员保存了根目录路径struct path,struct path的成员dentry指向rootfs文件系统根目录,根目录的名称为"/",成员mnt指向vfsmount结构体,vfsmount的成员mnt_root指向根目录,vfsmount包含于结构体mount中。
三、子目录的创建
Android中目录初始化是在init进程中完成的。一部分是在init进程first_stage阶段创建目录并挂载文件系统,另一部分是解析fstab文件,根据文件配置完成分区挂载。
mkdir("/dev", 0755);
mkdir("/proc", 0755);
mkdir("/sys", 0755);
mount("tmpfs", "/dev", "tmpfs", MS_NOSUID, "mode=0755");
mkdir("/dev/pts", 0755);
mkdir("/dev/socket", 0755);
mount("devpts", "/dev/pts", "devpts", 0, NULL);
mount("proc", "/proc", "proc", 0, NULL);
mount("sysfs", "/sys", "sysfs", 0, NULL);
on fs
write /proc/bootprof "INIT:Mount_START"
mount_all /fstab.mt6580
这里先介绍子目录的创建,下一节介绍文件系统挂载。创建目录的系统调用是mkdir(),mkdir()调用到sys_mkdirat()。sys_mkdirat()首先调用 user_path_create(),该函数执行完毕后,父目录保存在参数struct path *path中,返回一个dentry指针,dentry->d_name保存了目录名以及对应的哈希值。
SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
{
return sys_mkdirat(AT_FDCWD, pathname, mode);
}
SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
{
struct dentry *dentry;
struct path path;
int error;
unsigned int lookup_flags = LOOKUP_DIRECTORY;
retry:
dentry = user_path_create(dfd, pathname, &path, lookup_flags);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
if (!IS_POSIXACL(path.dentry->d_inode))
mode &= ~current_umask();
error = security_path_mkdir(&path, dentry, mode);
if (!error)
error = vfs_mkdir2(path.mnt, path.dentry->d_inode, dentry, mode);
done_path_create(&path, dentry);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
goto retry;
}
return error;
}
user_path_create()-->filename_create()-->__lookup_hash()调用lookup_dcache()在hash表dentry_hashtable中查找,如果找到就返回。如果没有找到,就调用d_alloc()分配一个dentry,然后调用lookup_real()-->dir->i_op->lookup()在父目录数据块中查找是否有对应名字的目录,如果有会初始化dentry的d_inode成员。
static struct dentry *__lookup_hash(const struct qstr *name,
struct dentry *base, unsigned int flags)
{
struct dentry *dentry = lookup_dcache(name, base, flags);
if (dentry)
return dentry;
dentry = d_alloc(base, name);
if (unlikely(!dentry))
return ERR_PTR(-ENOMEM);
return lookup_real(base->d_inode, dentry, flags);
}
static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
struct dentry *old;
/* Don't create child dentry for a dead directory. */
if (unlikely(IS_DEADDIR(dir))) {
dput(dentry);
return ERR_PTR(-ENOENT);
}
old = dir->i_op->lookup(dir, dentry, flags);
if (unlikely(old)) {
dput(dentry);
dentry = old;
}
return dentry;
}
sys_mkdirat()再调用vfs_mkdir2(),该函数中先确认目录是否已经创建(d_inode是否为空),如果已经创建则返回。如果没有创建,则通过dir->i_op->mkdir(dir, dentry, mode)创建目录。
int vfs_mkdir2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, umode_t mode)
{
int error = may_create(mnt, dir, dentry);
unsigned max_links = dir->i_sb->s_max_links;
if (error)
return error;
if (!dir->i_op->mkdir)
return -EPERM;
mode &= (S_IRWXUGO|S_ISVTX);
error = security_inode_mkdir(dir, dentry, mode);
if (error)
return error;
if (max_links && dir->i_nlink >= max_links)
return -EMLINK;
error = dir->i_op->mkdir(dir, dentry, mode);
if (!error)
fsnotify_mkdir(dir, dentry);
return error;
}
结构体关系可以简化如下,子目录的dentry都通过d_child链入到父目录dentry的d_subdirs中。已经打开目录的dentry,会通过d_hash成员链入到hash表dentry_hashtable中,hash值由父目录指针和文件/目录名称构造而成。
四、挂载设备
挂载文件系统的调用是mount,int mount(const char *source, const char *target,const char *filesystemtype, unsigned long mountflags, const void *data)。参数source:将要挂载的文件系统,通常是一个设备名,或者文件名;target:文件系统要挂载的目标目录;filesystemtype:文件系统的类型,例如“ext2”、”ext4”、”proc”等;mountflags指定文件系统的读写访问标志,例如MS_RDONLY、MS_REMOUNT等;data:某些文件系统特有的参数。mount成功执行时,返回0,失败返回 -1。
内核代码从SYSCALL_DEFINE5(mount)-->do_mount()-->do_new_mount()开始跟踪,vfs_kern_mount()上面已经分析过,是用于创建文件系统的super_block和根目录。
static int do_new_mount(struct path *path, const char *fstype, int flags,
int mnt_flags, const char *name, void *data)
{
struct file_system_type *type;
struct vfsmount *mnt;
int err;
if (!fstype)
return -EINVAL;
type = get_fs_type(fstype);
if (!type)
return -ENODEV;
mnt = vfs_kern_mount(type, flags, name, data);
if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
!mnt->mnt_sb->s_subtype)
mnt = fs_set_subtype(mnt, fstype);
..............................................................
err = do_add_mount(real_mount(mnt), path, mnt_flags);
if (err)
mntput(mnt);
return err;
}
do_add_mount()-->lock_mount()搜索挂载目标路径的mountpoint,如果目标目录没有被挂载过,直接用该目录创建mountpoint;如果目标目录被挂载过,甚至重复挂载,要一直查到最后一个被挂载的文件系统根目录,获得目录后再创建mountpoint。hash表mountpoint_hashtable,以dentry为键值存放dentry对应的mountpoint,创建mountpoint时先在该hash表中查找,如果没找到就创建一个。
static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
{
struct mountpoint *mp;
struct mount *parent;
int err;
mnt_flags &= ~MNT_INTERNAL_FLAGS;
mp = lock_mount(path);
if (IS_ERR(mp))
return PTR_ERR(mp);
parent = real_mount(path->mnt);
err = -EINVAL;
..........................................................................
newmnt->mnt.mnt_flags = mnt_flags;
err = graft_tree(newmnt, parent, mp);
unlock:
unlock_mount(mp);
return err;
}
static struct mountpoint *lock_mount(struct path *path)
{
struct vfsmount *mnt;
struct dentry *dentry = path->dentry;
retry:
inode_lock(dentry->d_inode);
if (unlikely(cant_mount(dentry))) {
inode_unlock(dentry->d_inode);
return ERR_PTR(-ENOENT);
}
namespace_lock();
mnt = lookup_mnt(path);
if (likely(!mnt)) {
struct mountpoint *mp = get_mountpoint(dentry);
...............................................................................
return mp;
}
namespace_unlock();
inode_unlock(path->dentry->d_inode);
path_put(path);
path->mnt = mnt;
dentry = path->dentry = dget(mnt->mnt_root);
goto retry;
}
do_add_mount()-->graft_tree()-->attach_recursive_mnt()先调用mnt_set_mountpoint()建立起子mount与父mount之间的关系,再调用commit_tree()-->__attach_mnt()将子mount加入到hash表mount_hashtable中,该哈希表的键值由父mount和挂载目标目录组成。在路径搜索的过程中,follow_managed()会在mount_hashtable中查找当前目录是否有对应的子mount,如果有进入到子mount的根目录,从而实现了路径的跳转。
static int attach_recursive_mnt(struct mount *source_mnt,
struct mount *dest_mnt,
struct mountpoint *dest_mp,
struct path *parent_path)
{
HLIST_HEAD(tree_list);
struct mnt_namespace *ns = dest_mnt->mnt_ns;
struct mountpoint *smp;
struct mount *child, *p;
struct hlist_node *n;
int err;
.............................................................................
smp = get_mountpoint(source_mnt->mnt.mnt_root);
if (IS_ERR(smp))
return PTR_ERR(smp);
.............................................................................
if (parent_path) {
detach_mnt(source_mnt, parent_path);
attach_mnt(source_mnt, dest_mnt, dest_mp);
touch_mnt_namespace(source_mnt->mnt_ns);
} else {
mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
commit_tree(source_mnt);
}
..............................................................................
return 0;
}
static int follow_managed(struct path *path, struct nameidata *nd)
{
struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */
unsigned managed;
bool need_mntput = false;
int ret = 0;
..............................................................
while (managed = ACCESS_ONCE(path->dentry->d_flags),
managed &= DCACHE_MANAGED_DENTRY,
unlikely(managed != 0)) {
..........................................................
if (managed & DCACHE_MOUNTED) {
struct vfsmount *mounted = lookup_mnt(path);
if (mounted) {
dput(path->dentry);
if (need_mntput)
mntput(path->mnt);
path->mnt = mounted;
path->dentry = dget(mounted->mnt_root);
need_mntput = true;
continue;
}
.....................................................
}
........................................................
break;
}
........................................................
return ret;
}
以proc为例,mount后的结构体关系简化如下。子mount的mnt_mountpoint成员指向挂载目录,mnt_parent指向挂载目录所在mount。子mount以挂载目录指针为键值存放在hash表mount_hashtable中。