Android系统目录树建立过程

2023-11-04

一、文件系统类型

了解Android系统目录树的建立之前，有必要了解文件系统类型。Linux内核中将文件系统类型抽象为结构体struct file_system_type，其中name为文件系统名称，例如ext4、f2fs、rootfs等；mount()\mount2()是挂载文件系统时调用的接口，用于创建super_block，并返回根目录；kill_sb()在卸载文件系统时调用，做一些清理工作；next指向下一个文件系统类型。

struct file_system_type {
	const char *name;
	int fs_flags;
    ................................................
	struct dentry *(*mount) (struct file_system_type *, int,
		       const char *, void *);
	struct dentry *(*mount2) (struct vfsmount *, struct file_system_type *, int,
			       const char *, void *);
	void *(*alloc_mnt_data) (void);
	void (*kill_sb) (struct super_block *);
	struct module *owner;
	struct file_system_type * next;
	struct hlist_head fs_supers;
    ...............................................
};

所有注册到内核的文件系统类型，都放在以file_systems为表头的单链表中。register_filesystem()就是向该链表中加入新的元素；unregister_filesystem()就是将对应的文件系统类型从该链表中删除；get_fs_type()就是根据文件系统名称在链表中查找。

常见文件系统类型，都是在对应模块初始化时注册的，比如ext4在模块初始化时注册ext4_fs_type。

static int __init ext4_init_fs(void)
{
	int i, err;

	ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
	ext4_li_info = NULL;
	mutex_init(&ext4_li_mtx);

	/* Build-time check for flags consistency */
	................................................
	register_as_ext3();
	register_as_ext2();
	err = register_filesystem(&ext4_fs_type);
	................................................
}

通过cat /proc/filesystems节点查看系统中所有注册的文件系统类型名称。

二、根目录的创建

进程的路径信息保存在task_struct成员fs_struct *fs指向的结构体中，其中root为根目录，pwd为当前目录。fs_struct *fs的数据来源于父进程，当clone_flags的CLONE_FS置位时，父子进程指向同一个fs_struct指针，否则创建一个fs_struct，并把父进程的信息拷贝过来。

struct fs_struct {
    ......................
	struct path root, pwd;
};
static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
{
	struct fs_struct *fs = current->fs;
	if (clone_flags & CLONE_FS) {
		/* tsk->fs is already what we want */
		spin_lock(&fs->lock);
		if (fs->in_exec) {
			spin_unlock(&fs->lock);
			return -EAGAIN;
		}
		fs->users++;
		spin_unlock(&fs->lock);
		return 0;
	}
	tsk->fs = copy_fs_struct(fs);
	if (!tsk->fs)
		return -ENOMEM;
	return 0;
}

init_task是所有进程中的老祖宗，其它进程的fs_struct *fs都直接或间接来源与init_task的fs_struct *fs，该指针指向的结构体是在start_kernel()-->vfs_caches_init()-->mnt_init()-->init_mount_tree()中初始化的。

static void __init init_mount_tree(void)
{
	struct vfsmount *mnt;
	struct mnt_namespace *ns;
	struct path root;
	struct file_system_type *type;

	type = get_fs_type("rootfs");
	if (!type)
		panic("Can't find rootfs type");
	mnt = vfs_kern_mount(type, 0, "rootfs", NULL);
	put_filesystem(type);
	if (IS_ERR(mnt))
		panic("Can't create rootfs");

	ns = create_mnt_ns(mnt);
	if (IS_ERR(ns))
		panic("Can't allocate initial namespace");

	init_task.nsproxy->mnt_ns = ns;
	get_mnt_ns(ns);

	root.mnt = mnt;
	root.dentry = mnt->mnt_root;
	mnt->mnt_flags |= MNT_LOCKED;

	set_fs_pwd(current->fs, &root);                //设置为init_task 的当前目录
	set_fs_root(current->fs, &root);                //设置为init_task 的根目录
}

vfs_kern_mount()首先调用alloc_vfsmnt()分配并初始一个struct mount结构体，其成员mnt_devname初始化为rootfs，然后调用mount_fs()获取rootfs文件系统的根目录。

struct vfsmount *
vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
{
	struct mount *mnt;
	struct dentry *root;

	if (!type)
		return ERR_PTR(-ENODEV);

	mnt = alloc_vfsmnt(name);
	if (!mnt)
		return ERR_PTR(-ENOMEM);

	................................................................................

	root = mount_fs(type, flags, name, &mnt->mnt, data);
	if (IS_ERR(root)) {
		mnt_free_id(mnt);
		free_vfsmnt(mnt);
		return ERR_CAST(root);
	}

	mnt->mnt.mnt_root = root;
	mnt->mnt.mnt_sb = root->d_sb;
	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
	mnt->mnt_parent = mnt;
	lock_mount_hash();
	list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts);
	unlock_mount_hash();
	return &mnt->mnt;
}

mount_fs()调用对应文件系统类型的mount接口，来创建对应文件系统的super_block和根目录。这里的文件系统类型为rootfs，其对应的mount接口为rootfs_mount()。

struct dentry *
mount_fs(struct file_system_type *type, int flags, const char *name, struct vfsmount *mnt, void *data)
{
	struct dentry *root;
	struct super_block *sb;
	char *secdata = NULL;
	int error = -ENOMEM;
    .......................................................
	if (type->mount2)
		root = type->mount2(mnt, type, flags, name, data);
	else
		root = type->mount(type, flags, name, data);
	if (IS_ERR(root)) {
		error = PTR_ERR(root);
		goto out_free_secdata;
	}
	sb = root->d_sb;
	BUG_ON(!sb);
	WARN_ON(!sb->s_bdi);
	sb->s_flags |= MS_BORN;
    .......................................................
}
static struct file_system_type rootfs_fs_type = {
	.name		= "rootfs",
	.mount		= rootfs_mount,
	.kill_sb	= kill_litter_super,
};
static struct dentry *rootfs_mount(struct file_system_type *fs_type,
	int flags, const char *dev_name, void *data)
{
	static unsigned long once;
	void *fill = ramfs_fill_super;

	if (test_and_set_bit(0, &once))
		return ERR_PTR(-ENODEV);

	if (IS_ENABLED(CONFIG_TMPFS) && is_tmpfs)
		fill = shmem_fill_super;

	return mount_nodev(fs_type, flags, data, fill);
}

mount_nodev()调用sget()查找或创建一个super_block，调用fill_super填充super_block数据，包括创建根目录赋值给super_block->s_root。fill_super对应的是ramfs_fill_super。

struct dentry *mount_nodev(struct file_system_type *fs_type,
	int flags, void *data,
	int (*fill_super)(struct super_block *, void *, int))
{
	int error;
	struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL);

	if (IS_ERR(s))
		return ERR_CAST(s);

	error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
	if (error) {
		deactivate_locked_super(s);
		return ERR_PTR(error);
	}
	s->s_flags |= MS_ACTIVE;
	return dget(s->s_root);
}

ramfs_fill_super()-->d_make_root()-->__d_alloc(struct super_block *sb, const struct qstr *name)的参数name为空时，将以"/"作为目录名，这就是根目录"/"的由来。

int ramfs_fill_super(struct super_block *sb, void *data, int silent)
{
	struct ramfs_fs_info *fsi;
	struct inode *inode;
	int err;
    .........................................................
	sb->s_maxbytes		= MAX_LFS_FILESIZE;
	sb->s_blocksize		= PAGE_SIZE;
	sb->s_blocksize_bits	= PAGE_SHIFT;
	sb->s_magic		= RAMFS_MAGIC;
	sb->s_op		= &ramfs_ops;
	sb->s_time_gran		= 1;

	inode = ramfs_get_inode(sb, NULL, S_IFDIR | fsi->mount_opts.mode, 0);
	sb->s_root = d_make_root(inode);
	if (!sb->s_root)
		return -ENOMEM;

	return 0;
}
struct dentry *d_make_root(struct inode *root_inode)
{
	struct dentry *res = NULL;

	if (root_inode) {
		res = __d_alloc(root_inode->i_sb, NULL);
                 
        ......................................................
	}
	return res;
}
struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
{
	struct dentry *dentry;
	char *dname;
	int err;

	dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
	if (!dentry)
		return NULL;
    ...........................................................
	dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
	if (unlikely(!name)) {
		static const struct qstr anon = QSTR_INIT("/", 1);
		name = &anon;
		dname = dentry->d_iname;
	} else if (name->len > DNAME_INLINE_LEN-1) {
        .......................................................
	} else  {
		dname = dentry->d_iname;
	}	
    ...........................................................
	return dentry;
}

根目录的结构体关系可以简化如下，init_task的成员fs指向结构体fs_struct，fs_struct成员保存了根目录路径struct path，struct path的成员dentry指向rootfs文件系统根目录，根目录的名称为"/"，成员mnt指向vfsmount结构体，vfsmount的成员mnt_root指向根目录，vfsmount包含于结构体mount中。

三、子目录的创建

Android中目录初始化是在init进程中完成的。一部分是在init进程first_stage阶段创建目录并挂载文件系统，另一部分是解析fstab文件，根据文件配置完成分区挂载。

mkdir("/dev", 0755);  
mkdir("/proc", 0755);  
mkdir("/sys", 0755);  
  
mount("tmpfs", "/dev", "tmpfs", MS_NOSUID, "mode=0755");  
mkdir("/dev/pts", 0755);  
mkdir("/dev/socket", 0755);  
mount("devpts", "/dev/pts", "devpts", 0, NULL);  
mount("proc", "/proc", "proc", 0, NULL);  
mount("sysfs", "/sys", "sysfs", 0, NULL); 

on fs  
    write /proc/bootprof "INIT:Mount_START"  
    mount_all /fstab.mt6580

这里先介绍子目录的创建，下一节介绍文件系统挂载。创建目录的系统调用是mkdir()，mkdir()调用到sys_mkdirat()。sys_mkdirat()首先调用 user_path_create()，该函数执行完毕后，父目录保存在参数struct path *path中，返回一个dentry指针，dentry->d_name保存了目录名以及对应的哈希值。

SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)  
{  
    return sys_mkdirat(AT_FDCWD, pathname, mode);  
}  

SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)  
{  
    struct dentry *dentry;  
    struct path path;  
    int error;  
    unsigned int lookup_flags = LOOKUP_DIRECTORY;  
  
retry:  
    dentry = user_path_create(dfd, pathname, &path, lookup_flags);  
    if (IS_ERR(dentry))  
        return PTR_ERR(dentry);  
  
    if (!IS_POSIXACL(path.dentry->d_inode))  
        mode &= ~current_umask();  
    error = security_path_mkdir(&path, dentry, mode);  
    if (!error)  
        error = vfs_mkdir2(path.mnt, path.dentry->d_inode, dentry, mode);  
    done_path_create(&path, dentry);  
    if (retry_estale(error, lookup_flags)) {  
        lookup_flags |= LOOKUP_REVAL;  
        goto retry;  
    }  
    return error;  
}

user_path_create()-->filename_create()-->__lookup_hash()调用lookup_dcache()在hash表dentry_hashtable中查找，如果找到就返回。如果没有找到，就调用d_alloc()分配一个dentry，然后调用lookup_real()-->dir->i_op->lookup()在父目录数据块中查找是否有对应名字的目录，如果有会初始化dentry的d_inode成员。

static struct dentry *__lookup_hash(const struct qstr *name,
		struct dentry *base, unsigned int flags)
{
	struct dentry *dentry = lookup_dcache(name, base, flags);

	if (dentry)
		return dentry;

	dentry = d_alloc(base, name);
	if (unlikely(!dentry))
		return ERR_PTR(-ENOMEM);

	return lookup_real(base->d_inode, dentry, flags);
}
static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
				  unsigned int flags)
{
	struct dentry *old;

	/* Don't create child dentry for a dead directory. */
	if (unlikely(IS_DEADDIR(dir))) {
		dput(dentry);
		return ERR_PTR(-ENOENT);
	}

	old = dir->i_op->lookup(dir, dentry, flags);
	if (unlikely(old)) {
		dput(dentry);
		dentry = old;
	}
	return dentry;
}

sys_mkdirat()再调用vfs_mkdir2()，该函数中先确认目录是否已经创建(d_inode是否为空)，如果已经创建则返回。如果没有创建，则通过dir->i_op->mkdir(dir, dentry, mode)创建目录。

int vfs_mkdir2(struct vfsmount *mnt, struct inode *dir, struct dentry *dentry, umode_t mode)
{
	int error = may_create(mnt, dir, dentry);
	unsigned max_links = dir->i_sb->s_max_links;
	if (error)
		return error;
	if (!dir->i_op->mkdir)
		return -EPERM;
	mode &= (S_IRWXUGO|S_ISVTX);
	error = security_inode_mkdir(dir, dentry, mode);
	if (error)
		return error;
	if (max_links && dir->i_nlink >= max_links)
		return -EMLINK;
	error = dir->i_op->mkdir(dir, dentry, mode);
	if (!error)
		fsnotify_mkdir(dir, dentry);
	return error;
}

结构体关系可以简化如下，子目录的dentry都通过d_child链入到父目录dentry的d_subdirs中。已经打开目录的dentry，会通过d_hash成员链入到hash表dentry_hashtable中，hash值由父目录指针和文件/目录名称构造而成。

四、挂载设备

挂载文件系统的调用是mount，int mount(const char *source, const char *target,const char *filesystemtype, unsigned long mountflags, const void *data)。参数source：将要挂载的文件系统，通常是一个设备名，或者文件名；target：文件系统要挂载的目标目录；filesystemtype：文件系统的类型，例如“ext2”、”ext4”、”proc”等；mountflags指定文件系统的读写访问标志，例如MS_RDONLY、MS_REMOUNT等；data：某些文件系统特有的参数。mount成功执行时，返回0，失败返回 -1。

内核代码从SYSCALL_DEFINE5(mount)-->do_mount()-->do_new_mount()开始跟踪，vfs_kern_mount()上面已经分析过，是用于创建文件系统的super_block和根目录。

static int do_new_mount(struct path *path, const char *fstype, int flags,
			int mnt_flags, const char *name, void *data)
{
	struct file_system_type *type;
	struct vfsmount *mnt;
	int err;

	if (!fstype)
		return -EINVAL;

	type = get_fs_type(fstype);
	if (!type)
		return -ENODEV;

	mnt = vfs_kern_mount(type, flags, name, data);
	if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) &&
	    !mnt->mnt_sb->s_subtype)
		mnt = fs_set_subtype(mnt, fstype);
    ..............................................................
	err = do_add_mount(real_mount(mnt), path, mnt_flags);
	if (err)
		mntput(mnt);
	return err;
}

do_add_mount()-->lock_mount()搜索挂载目标路径的mountpoint，如果目标目录没有被挂载过，直接用该目录创建mountpoint；如果目标目录被挂载过，甚至重复挂载，要一直查到最后一个被挂载的文件系统根目录，获得目录后再创建mountpoint。hash表mountpoint_hashtable，以dentry为键值存放dentry对应的mountpoint，创建mountpoint时先在该hash表中查找，如果没找到就创建一个。

static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags)
{
	struct mountpoint *mp;
	struct mount *parent;
	int err;

	mnt_flags &= ~MNT_INTERNAL_FLAGS;

	mp = lock_mount(path);
	if (IS_ERR(mp))
		return PTR_ERR(mp);

	parent = real_mount(path->mnt);
	err = -EINVAL;
    ..........................................................................
	newmnt->mnt.mnt_flags = mnt_flags;
	err = graft_tree(newmnt, parent, mp);

unlock:
	unlock_mount(mp);
	return err;
}
static struct mountpoint *lock_mount(struct path *path)
{
	struct vfsmount *mnt;
	struct dentry *dentry = path->dentry;
retry:
	inode_lock(dentry->d_inode);
	if (unlikely(cant_mount(dentry))) {
		inode_unlock(dentry->d_inode);
		return ERR_PTR(-ENOENT);
	}
	namespace_lock();
	mnt = lookup_mnt(path);
	if (likely(!mnt)) {
		struct mountpoint *mp = get_mountpoint(dentry);
	         
        ...............................................................................
		return mp;
	}
	namespace_unlock();
	inode_unlock(path->dentry->d_inode);
	path_put(path);
	path->mnt = mnt;
	dentry = path->dentry = dget(mnt->mnt_root);
	goto retry;
}

do_add_mount()-->graft_tree()-->attach_recursive_mnt()先调用mnt_set_mountpoint()建立起子mount与父mount之间的关系，再调用commit_tree()-->__attach_mnt()将子mount加入到hash表mount_hashtable中，该哈希表的键值由父mount和挂载目标目录组成。在路径搜索的过程中，follow_managed()会在mount_hashtable中查找当前目录是否有对应的子mount，如果有进入到子mount的根目录，从而实现了路径的跳转。

static int attach_recursive_mnt(struct mount *source_mnt,
			struct mount *dest_mnt,
			struct mountpoint *dest_mp,
			struct path *parent_path)
{
	HLIST_HEAD(tree_list);
	struct mnt_namespace *ns = dest_mnt->mnt_ns;
	struct mountpoint *smp;
	struct mount *child, *p;
	struct hlist_node *n;
	int err;
                 
    .............................................................................
	smp = get_mountpoint(source_mnt->mnt.mnt_root);
	if (IS_ERR(smp))
		return PTR_ERR(smp);
                 
    .............................................................................
	if (parent_path) {
		detach_mnt(source_mnt, parent_path);
		attach_mnt(source_mnt, dest_mnt, dest_mp);
		touch_mnt_namespace(source_mnt->mnt_ns);
	} else {
		mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
		commit_tree(source_mnt);
	}
                 
    ..............................................................................
	return 0;
}

static int follow_managed(struct path *path, struct nameidata *nd)
{
	struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */
	unsigned managed;
	bool need_mntput = false;
	int ret = 0;
    ..............................................................
	while (managed = ACCESS_ONCE(path->dentry->d_flags),
	       managed &= DCACHE_MANAGED_DENTRY,
	       unlikely(managed != 0)) {
         
                                  
        ..........................................................
		if (managed & DCACHE_MOUNTED) {
			struct vfsmount *mounted = lookup_mnt(path);
			if (mounted) {
				dput(path->dentry);
				if (need_mntput)
					mntput(path->mnt);
				path->mnt = mounted;
				path->dentry = dget(mounted->mnt_root);
				need_mntput = true;
				continue;
			}
                                                     
            .....................................................
		}
                                   
        ........................................................
		break;
	}
    ........................................................
	return ret;
}

以proc为例，mount后的结构体关系简化如下。子mount的mnt_mountpoint成员指向挂载目录，mnt_parent指向挂载目录所在mount。子mount以挂载目录指针为键值存放在hash表mount_hashtable中。

本文内容由网友自发贡献，版权归原作者所有，本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容，请联系:hwhale#tublm.com(使用前将#替换为@)

Android