学习vfs,可以先从挂载点入手,了解mount相关数据结构。
我们查看mount通常是cat /proc/self/mounts。所以先从这块相关的代码开始。内核版本是6.12.1
======================================
fs/proc/base.c
有一个条目:
REG("mounts", S_IRUGO, proc_mounts_operations),
注册了mounts。
======================================
proc_mounts_operations定义在:fs/proc_namespace.c
const struct file_operations proc_mounts_operations = {
.open = mounts_open,
.read_iter = seq_read_iter,
.splice_read = copy_splice_read,
.llseek = seq_lseek,
.release = mounts_release,
.poll = mounts_poll,
};
static int mounts_open(struct inode *inode, struct file *file)
{
return mounts_open_common(inode, file, show_vfsmnt);
}
static int mounts_open_common(struct inode *inode, struct file *file,
int (*show)(struct seq_file *, struct vfsmount *))
{
// 得到proc条目对应的进程
struct task_struct *task = get_proc_task(inode);
// 得到对应的mount namespace
struct mnt_namespace *ns = task->nsp->mnt_ns;
// 得到进程的根目录
struct path root;
get_fs_root(task->fs, &root);
// 创建seq
seq_open_private(file, &mounts_op, sizeof(struct proc_mounts));
// 保存私有数据
struct seq_file *m = file->priate_data
struct proc_mounts *p = m->private;
p->ns = ns;
p->root = root;
p->show = show;
}
======================================
mounts_op定义在:fs/namespace.c
const struct seq_operations mounts_op = {
.start = m_start,
.next = m_next,
.stop = m_stop,
.show = m_show,
};
start/next/show,就是遍历ns->mounts.rb_node,这棵红黑树,然后调用show函数。
struct rb_node *node = ns->mounts.rb_node;
struct mount *m = node_to_mount(node);
static void *m_start(struct seq_file *m, loff_t *pos)
{
struct proc_mounts *p = m->private;
down_read(&namespace_sem);
return mnt_find_id_at(p->ns, *pos);
}
static int m_show(struct seq_file *m, void *v)
{
struct proc_mounts *p = m->private;
struct mount *r = v;
return p->show(m, &r->mnt);
}
======================================
show_vfsmnt定义在:fs/proc_namespace.c
static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
它有一个重要的特性,是过滤掉root目录之外的挂载点。
/* mountpoints outside of chroot jail will give SEQ_SKIP on this */
err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\");
if (err)
goto out;
有几个重要的数据结构
struct mnt_namespace,定义在fs/mount.h。注意这个是fs下面的头文件,意味着只有内核能使用,external module无法直接使用。
这个结构体定义了一个mount namespace。
struct mnt_namespace {
struct ns_common ns;
struct mount * root;
struct rb_root mounts; /* Protected by namespace_sem */
struct user_namespace *user_ns;
struct ucounts *ucounts;
u64 seq; /* Sequence number to prevent loops */
wait_queue_head_t poll;
u64 event;
unsigned int nr_mounts; /* # of mounts in the namespace */
unsigned int pending_mounts;
struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
refcount_t passive; /* number references not pinning @mounts */
} __randomize_layout;
核心的成员:
root,这个命名空间的根挂载。
mounts,所有挂载点的红黑树。
mnt_ns_tree_node,所有的命名空间存在一个红黑树里。这个是本命名空间的节点。
所有命名空间红黑树的根定义在fs/namespace.c
static struct rb_root mnt_ns_tree = RB_ROOT;
表示一个挂载点,也定义在fs/mount.h头文件中。和mnt_namespace一样,external module不可见。
struct mount {
struct hlist_node mnt_hash;
struct mount *mnt_parent;
struct dentry *mnt_mountpoint;
struct vfsmount mnt;
struct mount *mnt_master; /* slave is on master->mnt_slave_list */
struct mnt_namespace *mnt_ns; /* containing namespace */
struct mountpoint *mnt_mp; /* where is it mounted */
int mnt_id; /* mount identifier, reused */
u64 mnt_id_unique; /* mount ID unique until reboot */
int mnt_group_id; /* peer group identifier */
int mnt_expiry_mark; /* true if marked for expiry */
struct hlist_head mnt_pins;
struct hlist_head mnt_stuck_children;
} __randomize_layout;
部分成员如上。mnt成员是vfsmoun类型,内嵌在mount结构体中。
mnt_parent,是这个mount的父挂载点。mnt_mountpoint是父挂载点里面的路径。mount挂载到mnt_mountpoint上面。
比如
mount -t tmpfs none /tmp
web是rootfs的一个目录。那么mnt_parent就是rootfs。mnt_mountpoint就是rootfs里面的/tmp
这个结构体,定义在include/linux/mount.h中,导出给用户可见。
struct vfsmount {
struct dentry *mnt_root; /* root of the mounted tree */
struct super_block *mnt_sb; /* pointer to superblock */
int mnt_flags;
struct mnt_idmap *mnt_idmap;
} __randomize_layout;
mnt_root,是挂载的源目录。通常是文件系统的根目录。对于bind mount,比如
mount --bind /tmp/dir0 /web
那么mnt_root就是/tmp这个文件系统的/dir0目录。
mnt_idmap用来在多个user namespace做转换。
这个结构体定义在fs/mount.h
struct mountpoint {
struct hlist_node m_hash;
struct dentry *m_dentry;
struct hlist_head m_list;
int m_count;
};
这个结构体不知道干嘛用的,mount有一个mnt_mp成员,指向这个结构体。
主要是mount内部缓存挂载点,通常m_dentry等于mnt_mountpoint。
这个结构体定义在include/linux/path.h,
struct path {
struct vfsmount *mnt;
struct dentry *dentry;
} __randomize_layout;
看字面意思是一个路径。一个路径由挂载点,和挂载点内部的目录树组成。
fs/namei.c提供了一个接口,可以把路径字符串,转换成path结构体:
int kern_path(const char *name, unsigned int flags, struct path *path)
{
struct filename *filename = getname_kernel(name);
int ret = filename_lookup(AT_FDCWD, filename, flags, path, NULL);
putname(filename);
return ret;
}
EXPORT_SYMBOL(kern_path);
fs/d_path.c提供了接口将path转换成字符串路径:
char *d_path(const struct path *path, char *buf, int buflen)
之前的文章已经出现过这个结构体了,定义在include/linux/dcache.h中。
struct dentry {
/* RCU lookup touched fields */
unsigned int d_flags; /* protected by d_lock */
seqcount_spinlock_t d_seq; /* per dentry seqlock */
struct hlist_bl_node d_hash; /* lookup hash list */
struct dentry *d_parent; /* parent directory */
struct qstr d_name;
struct inode *d_inode; /* Where the name belongs to - NULL is
* negative */
unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
/* --- cacheline 1 boundary (64 bytes) was 32 bytes ago --- */
/* Ref lookup also touches following */
const struct dentry_operations *d_op;
struct super_block *d_sb; /* The root of the dentry tree */
unsigned long d_time; /* used by d_revalidate */
void *d_fsdata; /* fs-specific data */
/* --- cacheline 2 boundary (128 bytes) --- */
struct lockref d_lockref; /* per-dentry lock and refcount
* keep separate from RCU lookup area if
* possible!
*/
union {
struct list_head d_lru; /* LRU list */
wait_queue_head_t *d_wait; /* in-lookup ones only */
};
struct hlist_node d_sib; /* child of parent list */
struct hlist_head d_children; /* our children */
/*
* d_alias and d_rcu can share memory
*/
union {
struct hlist_node d_alias; /* inode alias list */
struct hlist_bl_node d_in_lookup_hash; /* only for in-lookup ones */
struct rcu_head d_rcu;
} d_u;
};
char *dentry_path_raw(const struct dentry *dentry, char *buf, int buflen)
接口,可以计算一个dentry的路径字符串。
写一个内核模块,遍历当前命名空间的所有mount
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | #include <linux/init.h> #include <linux/module.h> #include <linux/mnt_namespace.h> #include <linux/nsproxy.h> #include <linux/fs_struct.h> #include <linux/dcache.h> #include "../fs/mount.h" static void dump_mount( struct mount *mount) { struct vfsmount *mnt = &mount->mnt; struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; struct super_block *sb = mnt_path.dentry->d_sb; char buf[1024]; printk( "===\n" ); printk( "%s %s %s\n" , mount->mnt_devname ? : "none" , d_path(&mnt_path, buf, sizeof (buf)), sb->s_type->name ); printk( "mount id %d, sb %px, this %px parent %px\n" , mount->mnt_id, sb, mount, mount->mnt_parent); printk( "mnt_mountpoint: %px %s, sb %px\n" , mount->mnt_mountpoint, dentry_path_raw(mount->mnt_mountpoint, buf, sizeof (buf)), mount->mnt_mountpoint->d_sb); printk( "mnt_root: %px %s, sb %px\n" , mnt->mnt_root, dentry_path_raw(mnt->mnt_root, buf, sizeof (buf)), mnt->mnt_root->d_sb); if (mount->mnt_mp) printk( "mountpoint: %px %s, sb %px\n" , mount->mnt_mp->m_dentry, dentry_path_raw(mount->mnt_mp->m_dentry, buf, sizeof (buf)), mount->mnt_mp->m_dentry->d_sb); } static void dump_mnt_namespace( struct mnt_namespace *ns) { struct rb_node *node = rb_first(&ns->mounts); while (node) { struct mount *mnt = rb_entry(node, struct mount, mnt_node); dump_mount(mnt); node = rb_next(node); } } static int __init test_init( void ) { struct task_struct *task = current; struct nsproxy *nsp; struct mnt_namespace *ns = NULL; struct path root = { .mnt = NULL, .dentry = NULL }; task_lock(task); nsp = task->nsproxy; if (nsp && nsp->mnt_ns) { ns = nsp->mnt_ns; // put_mnt_ns is not exported, so use ns without hold a reference. // get_mnt_ns(ns); } if (task->fs) get_fs_root(task->fs, &root); task_unlock(task); if (ns) { printk( "=== start dump\n" ); dump_mnt_namespace(ns); } if (root.mnt) path_put(&root); return -EAGAIN; } static void __exit test_exit( void ) { } module_init(test_init); module_exit(test_exit); MODULE_LICENSE( "GPL" ); MODULE_AUTHOR( "Jianpeng Yuan" ); MODULE_DESCRIPTION( "test module" ); |
编译后运行:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | # cd /tmp # mkdir /dir0/dir0_0 -p # mount --bind dir0 /web # insmod /lib/modules/6.12.1+/updates/test.ko [ 23.247589] test: loading out-of-tree module taints kernel. [ 23.252633] === start dump [ 23.252813] === [ 23.252956] rootfs / rootfs [ 23.253043] mount id 1, sb ffff000000864800, this ffff0000008a0180 parent ffff0000008a0180 [ 23.253364] mnt_mountpoint: ffff000000820240 /, sb ffff000000864800 [ 23.253523] mnt_root: ffff000000820240 /, sb ffff000000864800 [ 23.253680] === [ 23.253739] proc /proc proc [ 23.253812] mount id 11, sb ffff000000989000, this ffff0000008a1080 parent ffff0000008a0180 [ 23.254005] mnt_mountpoint: ffff000000d2bf00 /proc, sb ffff000000864800 [ 23.254157] mnt_root: ffff0000015523c0 /, sb ffff000000989000 [ 23.254312] mountpoint: ffff000000d2bf00 /proc, sb ffff000000864800 [ 23.254470] === [ 23.254531] sysfs /sys sysfs [ 23.254608] mount id 12, sb ffff000000989800, this ffff0000008a1200 parent ffff0000008a0180 [ 23.254798] mnt_mountpoint: ffff00000267f780 /sys, sb ffff000000864800 [ 23.254948] mnt_root: ffff000000b900c0 /, sb ffff000000989800 [ 23.255077] mountpoint: ffff00000267f780 /sys, sb ffff000000864800 [ 23.255208] === [ 23.255258] devtmpfs /dev devtmpfs [ 23.255337] mount id 13, sb ffff0000008a5800, this ffff0000008a1380 parent ffff0000008a0180 [ 23.255510] mnt_mountpoint: ffff000000862f00 /dev, sb ffff000000864800 [ 23.255651] mnt_root: ffff00000082b9c0 /, sb ffff0000008a5800 [ 23.255767] mountpoint: ffff000000862f00 /dev, sb ffff000000864800 [ 23.256110] === [ 23.256178] tmpfs /ram tmpfs [ 23.256247] mount id 14, sb ffff000002f42000, this ffff0000008a1500 parent ffff0000008a0180 [ 23.256417] mnt_mountpoint: ffff000000d2bc00 /ram, sb ffff000000864800 [ 23.256549] mnt_root: ffff0000025219c0 /, sb ffff000002f42000 [ 23.256675] mountpoint: ffff000000d2bc00 /ram, sb ffff000000864800 [ 23.256813] === [ 23.256863] tmpfs /tmp tmpfs [ 23.256937] mount id 15, sb ffff000002f42800, this ffff0000008a1680 parent ffff0000008a0180 [ 23.257117] mnt_mountpoint: ffff00000267f900 /tmp, sb ffff000000864800 [ 23.257263] mnt_root: ffff000002a10540 /, sb ffff000002f42800 [ 23.257397] mountpoint: ffff00000267f900 /tmp, sb ffff000000864800 [ 23.257538] === [ 23.257591] tmpfs /run tmpfs [ 23.257660] mount id 16, sb ffff000002f43000, this ffff0000008a1800 parent ffff0000008a0180 [ 23.257831] mnt_mountpoint: ffff000001310600 /run, sb ffff000000864800 [ 23.257970] mnt_root: ffff000002a10240 /, sb ffff000002f43000 [ 23.258097] mountpoint: ffff000001310600 /run, sb ffff000000864800 [ 23.258233] === [ 23.258309] tmpfs /web tmpfs [ 23.258391] mount id 17, sb ffff000002f42800, this ffff0000008a1980 parent ffff0000008a0180 [ 23.258570] mnt_mountpoint: ffff000002b25900 /web, sb ffff000000864800 [ 23.258720] mnt_root: ffff000002a613c0 /dir0, sb ffff000002f42800 [ 23.258864] mountpoint: ffff000002b25900 /web, sb ffff000000864800 insmod: can't insert '/lib/modules/6.12.1+/updates/test.ko': Resource temporarily unavailable |