概述
Linux文件系统由VFS(Virtual File System)抽象层统一管理,屏蔽不同文件系统的实现差异。本专题深入剖析VFS架构、核心数据结构、ext4日志机制、以及如何实现自定义文件系统。
一、VFS架构总览
1.1 层次结构
用户空间:open/read/write/close syscalls
↓
VFS层: super_block / inode / dentry / file
↓
文件系统: ext4 / btrfs / xfs / tmpfs / procfs ...
↓
页缓存: page cache(address_space)
↓
块设备层: bio / blk-mq
↓
驱动层: SCSI / NVMe / virtio-blk
1.2 核心数据结构关系
super_block ←→ inode ←→ dentry ←→ file
(文件系统) (文件元数据) (目录项/名字) (打开的文件)
一个super_block对应一个挂载的文件系统
一个inode对应一个文件(可被多个dentry引用,即硬链接)
一个dentry对应一个路径分量(目录项缓存)
一个file对应一个打开的文件描述符(包含位置信息)
# 查看VFS信息
cat /proc/mounts # 已挂载的文件系统
cat /proc/filesystems # 注册的文件系统类型
cat /proc/sys/fs/inode-nr # inode使用统计
cat /proc/sys/fs/file-nr # 文件句柄统计
cat /proc/sys/fs/dentry-state # dentry缓存统计
# 查看文件系统挂载统计
findmnt --tree
二、核心数据结构深度分析
2.1 super_block
// superblock_demo.c - 遍历已挂载的文件系统
#include<linux/module.h>
#include<linux/fs.h>
#include<linux/mount.h>
#include<linux/nsproxy.h>
#include<linux/mnt_namespace.h>
MODULE_LICENSE("GPL");
staticvoiddump_super_block(struct super_block *sb)
{
pr_info("=== Superblock: %s ===\n", sb->s_type->name);
pr_info(" Device: %pg\n", sb->s_bdev ? sb->s_bdev : NULL);
pr_info(" Block size: %lu bytes\n", sb->s_blocksize);
pr_info(" Max filesize: %lld bytes\n", sb->s_maxbytes);
pr_info(" Inode count: (see df -i)\n");
pr_info(" Magic: 0x%lx\n", sb->s_magic);
pr_info(" Flags: 0x%lx\n", sb->s_flags);
pr_info(" Readonly: %d\n", sb_rdonly(sb));
/* 文件系统操作表 */
if (sb->s_op) {
pr_info(" Ops: statfs=%p alloc_inode=%p\n",
sb->s_op->statfs,
sb->s_op->alloc_inode);
}
}
staticint __init sb_demo_init(void)
{
structsuper_block *sb;
/* 遍历所有挂载的超级块 */
spin_lock(&sb_lock);
list_for_each_entry(sb, &super_blocks, s_list) {
if (atomic_read(&sb->s_active) > 0)
dump_super_block(sb);
}
spin_unlock(&sb_lock);
return0;
}
staticvoid __exit sb_demo_exit(void) {}
module_init(sb_demo_init);
module_exit(sb_demo_exit);
2.2 inode深度分析
// inode_demo.c - inode结构分析
#include<linux/module.h>
#include<linux/fs.h>
#include<linux/namei.h>
#include<linux/stat.h>
MODULE_LICENSE("GPL");
staticvoiddump_inode(struct inode *inode, constchar *path)
{
pr_info("=== Inode: %s ===\n", path);
pr_info(" ino: %lu\n", inode->i_ino);
pr_info(" mode: %o\n", inode->i_mode);
pr_info(" uid: %u\n", i_uid_read(inode));
pr_info(" gid: %u\n", i_gid_read(inode));
pr_info(" size: %lld bytes\n", i_size_read(inode));
pr_info(" blocks: %llu\n", (u64)inode->i_blocks);
pr_info(" nlink: %u\n", inode->i_nlink);
pr_info(" blksize: %u\n", inode->i_blkbits);
/* 时间戳 */
pr_info(" atime: %lld\n", inode->i_atime.tv_sec);
pr_info(" mtime: %lld\n", inode->i_mtime.tv_sec);
pr_info(" ctime: %lld\n", inode->i_ctime.tv_sec);
/* 引用计数 */
pr_info(" i_count: %u\n", atomic_read(&inode->i_count));
/* 类型判断 */
pr_info(" type: %s\n",
S_ISREG(inode->i_mode) ? "regular file" :
S_ISDIR(inode->i_mode) ? "directory" :
S_ISLNK(inode->i_mode) ? "symlink" :
S_ISBLK(inode->i_mode) ? "block device" :
S_ISCHR(inode->i_mode) ? "char device" :
S_ISFIFO(inode->i_mode) ? "fifo" :
S_ISSOCK(inode->i_mode) ? "socket" : "unknown");
/* 操作表 */
if (inode->i_op)
pr_info(" i_op->lookup: %p\n", inode->i_op->lookup);
if (inode->i_fop)
pr_info(" i_fop->read: %p\n", inode->i_fop->read);
}
staticint __init inode_demo_init(void)
{
structpathpath;
int ret;
/* 查找/etc/passwd的inode */
ret = kern_path("/etc/passwd", LOOKUP_FOLLOW, &path);
if (ret == 0) {
dump_inode(d_inode(path.dentry), "/etc/passwd");
path_put(&path);
}
/* 查找/proc的inode */
ret = kern_path("/proc", LOOKUP_FOLLOW, &path);
if (ret == 0) {
dump_inode(d_inode(path.dentry), "/proc");
path_put(&path);
}
return0;
}
staticvoid __exit inode_demo_exit(void) {}
module_init(inode_demo_init);
module_exit(inode_demo_exit);
2.3 dentry缓存
// dentry_demo.c - dentry缓存与路径查找
#include<linux/module.h>
#include<linux/fs.h>
#include<linux/dcache.h>
#include<linux/namei.h>
#include<linux/path.h>
MODULE_LICENSE("GPL");
staticvoiddump_dentry(struct dentry *dentry, int depth)
{
char indent[64];
int i;
if (depth > 5) return; /* 避免递归太深 */
memset(indent, ' ', depth * 2);
indent[depth * 2] = '\0';
pr_info("%s%s (ino=%lu, flags=0x%x, count=%d)\n",
indent,
dentry->d_name.name,
dentry->d_inode ? dentry->d_inode->i_ino : 0,
dentry->d_flags,
(int)d_count(dentry));
}
/* 通过路径查找dentry */
staticvoidpath_lookup_demo(constchar *pathname)
{
structpathpath;
char buf[256];
char *full_path;
if (kern_path(pathname, LOOKUP_FOLLOW, &path) != 0) {
pr_err("Path not found: %s\n", pathname);
return;
}
/* 获取完整路径 */
full_path = d_path(&path, buf, sizeof(buf));
if (!IS_ERR(full_path)) {
pr_info("Resolved path: %s -> %s\n", pathname, full_path);
}
dump_dentry(path.dentry, 0);
/* 查看父目录dentry */
if (path.dentry->d_parent != path.dentry) {
pr_info("Parent: %s\n",
path.dentry->d_parent->d_name.name);
}
path_put(&path);
}
staticint __init dentry_demo_init(void)
{
pr_info("=== Dentry Demo ===\n");
path_lookup_demo("/etc/passwd");
path_lookup_demo("/proc/self");
path_lookup_demo("/tmp");
return0;
}
staticvoid __exit dentry_demo_exit(void) {}
module_init(dentry_demo_init);
module_exit(dentry_demo_exit);
三、文件操作流程追踪
3.1 open()系统调用链
/*
* open("/etc/passwd", O_RDONLY) 内核调用链:
*
* sys_openat()
* → do_sys_openat2()
* → do_filp_open()
* → path_openat()
* → link_path_walk() ← 路径解析(逐分量查找dentry)
* → walk_component()
* → lookup_fast() ← dentry缓存查找
* → lookup_slow() ← 调用inode->i_op->lookup()
* → do_open()
* → vfs_open()
* → do_dentry_open()
* → inode->i_fop->open() ← 调用具体文件系统的open
*/
# 用strace观察open的完整过程
strace -e trace=openat,read,close cat /etc/passwd 2>&1 | head -20
# 用ftrace追踪VFS调用
cd /sys/kernel/debug/tracing
echo 0 > tracing_on
echofunction > current_tracer
echo'vfs_*' > set_ftrace_filter
echo 1 > tracing_on
cat /etc/passwd > /dev/null
echo 0 > tracing_on
cat trace | head -30
echo nop > current_tracer
echo > set_ftrace_filter
3.2 read()调用链与页缓存
/*
* read() 调用链(文件数据可能在页缓存中):
*
* sys_read()
* → vfs_read()
* → new_sync_read()
* → file->f_op->read_iter() ← 具体文件系统实现
* → generic_file_read_iter() ← 通用实现(走页缓存)
* → filemap_read()
* → find_get_pages_contig() ← 在页缓存中查找
* → page_cache_sync_readahead() ← 缓存未命中,触发读取
* → ext4_readpages() ← 从磁盘读取
*/
# 查看页缓存统计
cat /proc/meminfo | grep -E "Cached|Buffers|Dirty|Writeback"
# 查看特定文件的页缓存占用
# 需要pcstat工具:go install github.com/tobert/pcstat@latest
# pcstat /path/to/large/file
# 清空页缓存(测试用)
echo 1 | sudo tee /proc/sys/vm/drop_caches
# 使用vmtouch查看/控制页缓存
# sudo apt install vmtouch
vmtouch /etc/passwd # 查看缓存状态
vmtouch -t /etc/passwd # 预热到缓存
vmtouch -e /etc/passwd # 从缓存驱逐
四、ext4文件系统深度解析
4.1 ext4磁盘布局
Block Group 0:
+-----------+--------+-----------+-------+--------+-------+
| Superblock| Group | Block | Inode | Inode | Data |
| |Descr. | Bitmap | Bitmap| Table | Blocks|
+-----------+--------+-----------+-------+--------+-------+
关键参数(典型4KB块大小):
- Block size: 4096 bytes
- Inodes per group: 8192
- Blocks per group: 32768(128MB per group)
- max file size: ~16TB
- max fs size: 1EB
# 查看ext4文件系统详细信息
sudo tune2fs -l /dev/sda1
# 查看块组信息
sudo dumpe2fs /dev/sda1 | head -80
# 查看inode详情
sudo debugfs /dev/sda1
(debugfs) stat /etc/passwd # 查看inode
(debugfs) blocks /etc/passwd # 查看数据块
(debugfs) dump /etc/passwd /tmp/passwd_copy # 导出文件
(debugfs) quit
# 查看extent树(ext4使用extent代替间接块)
sudo debugfs -R "extents /etc/passwd" /dev/sda1
4.2 ext4 Extent树
/*
* ext4 Extent(区段)结构:
* 将连续的逻辑块映射到连续的物理块
* 比传统间接块(indirect block)更高效
*
* struct ext4_extent {
* __le32 ee_block; // 起始逻辑块号
* __le16 ee_len; // 长度(块数)
* __le16 ee_start_hi; // 起始物理块(高16位)
* __le32 ee_start_lo; // 起始物理块(低32位)
* };
*/
# 查看文件的extent分配情况
sudo filefrag -v /path/to/file
# 对碎片化文件进行碎片整理
sudo e4defrag /path/to/file
# 查看文件系统碎片程度
sudo e2freefrag /dev/sda1
# 预分配磁盘空间(减少碎片)
fallocate -l 1G /tmp/bigfile
# 查看文件预分配情况
stat /tmp/bigfile
4.3 日志(Journal)机制
# 查看日志模式
sudo tune2fs -l /dev/sda1 | grep "Journal features"
# ext4日志模式:
# journal: 数据和元数据都写日志(最安全,最慢)
# ordered: 只写元数据日志,数据先写磁盘(默认,平衡)
# writeback: 只写元数据日志,数据可能乱序(最快,最不安全)
# 修改日志模式(重新挂载)
sudo mount -o remount,data=writeback /dev/sda1 /mount/point
# 查看日志大小
sudo tune2fs -l /dev/sda1 | grep "Journal size"
# 挂载时指定日志模式(/etc/fstab)
# /dev/sda1 / ext4 defaults,data=ordered 0 1
五、实现自定义文件系统
5.1 最小内存文件系统(完整版)
// myfs.c - 完整的最小内存文件系统实现
#include<linux/module.h>
#include<linux/init.h>
#include<linux/fs.h>
#include<linux/pagemap.h>
#include<linux/slab.h>
#include<linux/stat.h>
#include<linux/string.h>
#include<linux/time.h>
#include<linux/uio.h>
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Minimal memory filesystem");
#define MYFS_MAGIC 0x4D594653 /* "MYFS" */
#define MYFS_DEFAULT_MODE 0755
/* ===== 文件操作 ===== */
staticintmyfs_file_open(struct inode *inode, struct file *file)
{
pr_info("myfs: open %s\n", file->f_path.dentry->d_name.name);
return generic_file_open(inode, file);
}
/* 使用页缓存的通用读写操作 */
staticconststructfile_operationsmyfs_file_fops = {
.open = myfs_file_open,
.read_iter = generic_file_read_iter,
.write_iter = generic_file_write_iter,
.mmap = generic_file_mmap,
.fsync = noop_fsync,
.llseek = generic_file_llseek,
};
/* ===== 地址空间操作(页缓存) ===== */
staticconststructaddress_space_operationsmyfs_aops = {
.read_folio = simple_read_folio,
.write_begin = simple_write_begin,
.write_end = simple_write_end,
.dirty_folio = noop_dirty_folio,
};
/* ===== inode操作 ===== */
/* 创建文件 */
staticintmyfs_create(struct mnt_idmap *idmap,
struct inode *dir,
struct dentry *dentry,
umode_t mode, bool excl)
{
structinode *inode;
inode = new_inode(dir->i_sb);
if (!inode)
return -ENOMEM;
inode->i_ino = get_next_ino();
inode->i_mode = mode | S_IFREG;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
inode->i_blocks = 0;
inode->i_atime =
inode->i_mtime =
inode->i_ctime = current_time(inode);
inode->i_op = &simple_symlink_inode_operations;
inode->i_fop = &myfs_file_fops;
inode->i_mapping->a_ops = &myfs_aops;
d_instantiate(dentry, inode);
dget(dentry);
pr_info("myfs: created file %s\n", dentry->d_name.name);
return0;
}
/* 创建目录 */
staticintmyfs_mkdir(struct mnt_idmap *idmap,
struct inode *dir,
struct dentry *dentry,
umode_t mode)
{
structinode *inode;
inode = new_inode(dir->i_sb);
if (!inode)
return -ENOMEM;
inode->i_ino = get_next_ino();
inode->i_mode = mode | S_IFDIR;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
inode->i_atime =
inode->i_mtime =
inode->i_ctime = current_time(inode);
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
set_nlink(inode, 2);
inc_nlink(dir);
d_instantiate(dentry, inode);
dget(dentry);
pr_info("myfs: created dir %s\n", dentry->d_name.name);
return0;
}
staticconststructinode_operationsmyfs_dir_inode_ops = {
.create = myfs_create,
.lookup = simple_lookup,
.link = simple_link,
.unlink = simple_unlink,
.mkdir = myfs_mkdir,
.rmdir = simple_rmdir,
.rename = simple_rename,
};
/* ===== 超级块操作 ===== */
staticconststructsuper_operationsmyfs_super_ops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
};
/* 填充超级块 */
staticintmyfs_fill_super(struct super_block *sb, void *data, int silent)
{
structinode *root_inode;
structdentry *root_dentry;
/* 设置超级块参数 */
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_blocksize = PAGE_SIZE;
sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = MYFS_MAGIC;
sb->s_op = &myfs_super_ops;
sb->s_time_gran = 1;
/* 创建根inode */
root_inode = new_inode(sb);
if (!root_inode)
return -ENOMEM;
root_inode->i_ino = 1;
root_inode->i_mode = S_IFDIR | MYFS_DEFAULT_MODE;
root_inode->i_uid = GLOBAL_ROOT_UID;
root_inode->i_gid = GLOBAL_ROOT_GID;
root_inode->i_atime =
root_inode->i_mtime =
root_inode->i_ctime = current_time(root_inode);
root_inode->i_op = &myfs_dir_inode_ops;
root_inode->i_fop = &simple_dir_operations;
set_nlink(root_inode, 2);
/* 创建根dentry */
root_dentry = d_make_root(root_inode);
if (!root_dentry)
return -ENOMEM;
sb->s_root = root_dentry;
pr_info("myfs: filesystem mounted\n");
return0;
}
/* ===== 文件系统类型 ===== */
staticstruct dentry *myfs_mount(struct file_system_type *fs_type,
int flags,
constchar *dev_name,
void *data)
{
return mount_nodev(fs_type, flags, data, myfs_fill_super);
}
staticvoidmyfs_kill_sb(struct super_block *sb)
{
kill_litter_super(sb);
pr_info("myfs: filesystem unmounted\n");
}
staticstructfile_system_typemyfs_type = {
.owner = THIS_MODULE,
.name = "myfs",
.mount = myfs_mount,
.kill_sb = myfs_kill_sb,
};
staticint __init myfs_init(void)
{
int ret = register_filesystem(&myfs_type);
if (ret == 0)
pr_info("myfs: registered. Usage: mount -t myfs none /mnt/myfs\n");
return ret;
}
staticvoid __exit myfs_exit(void)
{
unregister_filesystem(&myfs_type);
pr_info("myfs: unregistered\n");
}
module_init(myfs_init);
module_exit(myfs_exit);
# 编译并使用自定义文件系统
make -C /lib/modules/$(uname -r)/build M=$PWD modules
sudo insmod myfs.ko
# 挂载
sudo mkdir /mnt/myfs
sudo mount -t myfs none /mnt/myfs
# 测试
echo"Hello myfs" | sudo tee /mnt/myfs/test.txt
cat /mnt/myfs/test.txt
ls -la /mnt/myfs/
sudo mkdir /mnt/myfs/subdir
df -h /mnt/myfs
# 卸载
sudo umount /mnt/myfs
sudo rmmod myfs
六、proc文件系统实现
6.1 使用seq_file创建proc接口
// proc_demo.c - proc文件系统完整示例
#include<linux/module.h>
#include<linux/proc_fs.h>
#include<linux/seq_file.h>
#include<linux/slab.h>
#include<linux/uaccess.h>
MODULE_LICENSE("GPL");
/* ===== 1. 简单只读proc文件 ===== */
staticintsimple_show(struct seq_file *m, void *v)
{
seq_printf(m, "Hello from proc!\n");
seq_printf(m, "jiffies: %lu\n", jiffies);
seq_printf(m, "HZ: %d\n", HZ);
return0;
}
staticintsimple_open(struct inode *inode, struct file *file)
{
return single_open(file, simple_show, NULL);
}
staticconststructproc_opssimple_fops = {
.proc_open = simple_open,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
.proc_release = single_release,
};
/* ===== 2. 可读写proc文件 ===== */
staticchar rw_buffer[256] = "default value\n";
staticsize_t rw_len = 15;
staticintrw_show(struct seq_file *m, void *v)
{
seq_write(m, rw_buffer, rw_len);
return0;
}
staticintrw_open(struct inode *inode, struct file *file)
{
return single_open(file, rw_show, NULL);
}
staticssize_trw_write(struct file *file,
constchar __user *ubuf,
size_t count, loff_t *ppos)
{
if (count >= sizeof(rw_buffer))
count = sizeof(rw_buffer) - 1;
if (copy_from_user(rw_buffer, ubuf, count))
return -EFAULT;
rw_buffer[count] = '\0';
rw_len = count;
pr_info("proc rw: wrote %zu bytes: %s", count, rw_buffer);
return count;
}
staticconststructproc_opsrw_fops = {
.proc_open = rw_open,
.proc_read = seq_read,
.proc_write = rw_write,
.proc_lseek = seq_lseek,
.proc_release = single_release,
};
/* ===== 3. 迭代器模式(适合多行输出)===== */
structmy_item {
int id;
char name[32];
structlist_headlist;
};
staticLIST_HEAD(my_list);
staticvoid *my_seq_start(struct seq_file *m, loff_t *pos)
{
return seq_list_start(&my_list, *pos);
}
staticvoid *my_seq_next(struct seq_file *m, void *v, loff_t *pos)
{
return seq_list_next(v, &my_list, pos);
}
staticvoidmy_seq_stop(struct seq_file *m, void *v) {}
staticintmy_seq_show(struct seq_file *m, void *v)
{
structmy_item *item = list_entry(v, struct my_item, list);
seq_printf(m, "%3d: %s\n", item->id, item->name);
return0;
}
staticconststructseq_operationsmy_seq_ops = {
.start = my_seq_start,
.next = my_seq_next,
.stop = my_seq_stop,
.show = my_seq_show,
};
staticintlist_open(struct inode *inode, struct file *file)
{
return seq_open(file, &my_seq_ops);
}
staticconststructproc_opslist_fops = {
.proc_open = list_open,
.proc_read = seq_read,
.proc_lseek = seq_lseek,
.proc_release = seq_release,
};
/* ===== 初始化 ===== */
staticstructproc_dir_entry *proc_dir;
staticstructproc_dir_entry *proc_simple, *proc_rw, *proc_list;
staticint __init proc_demo_init(void)
{
int i;
structmy_item *item;
/* 创建proc目录 */
proc_dir = proc_mkdir("demo", NULL);
if (!proc_dir)
return -ENOMEM;
proc_simple = proc_create("info", 0444, proc_dir, &simple_fops);
proc_rw = proc_create("value", 0644, proc_dir, &rw_fops);
proc_list = proc_create("list", 0444, proc_dir, &list_fops);
/* 填充链表 */
for (i = 0; i < 5; i++) {
item = kmalloc(sizeof(*item), GFP_KERNEL);
if (!item) break;
item->id = i;
snprintf(item->name, sizeof(item->name), "item_%d", i);
list_add_tail(&item->list, &my_list);
}
pr_info("proc_demo: created /proc/demo/{info,value,list}\n");
pr_info(" cat /proc/demo/info\n");
pr_info(" echo 'hello' > /proc/demo/value\n");
pr_info(" cat /proc/demo/list\n");
return0;
}
staticvoid __exit proc_demo_exit(void)
{
structmy_item *item, *tmp;
proc_remove(proc_dir);
list_for_each_entry_safe(item, tmp, &my_list, list) {
list_del(&item->list);
kfree(item);
}
}
module_init(proc_demo_init);
module_exit(proc_demo_exit);
七、FUSE(用户空间文件系统)
7.1 FUSE架构
用户程序 read("/mnt/fuse/file")
↓
VFS → FUSE内核模块(/dev/fuse)
↓ (通过/dev/fuse传递请求)
用户态FUSE守护进程(libfuse)
↓ (实现文件操作逻辑)
返回结果给内核
// hello_fuse.c - 最小FUSE文件系统(使用libfuse3)
// 编译:gcc hello_fuse.c -o hello_fuse $(pkg-config fuse3 --cflags --libs)
// 运行:./hello_fuse /mnt/fuse -f
// 测试:cat /mnt/fuse/hello
// 卸载:fusermount3 -u /mnt/fuse
#define FUSE_USE_VERSION 31
#include<fuse3/fuse.h>
#include<stdio.h>
#include<string.h>
#include<errno.h>
#include<fcntl.h>
#include<stddef.h>
#include<assert.h>
staticconstchar *hello_path = "/hello";
staticconstchar *hello_content = "Hello from FUSE filesystem!\n";
staticinthello_getattr(constchar *path, struct stat *stbuf,
struct fuse_file_info *fi)
{
memset(stbuf, 0, sizeof(*stbuf));
if (strcmp(path, "/") == 0) {
stbuf->st_mode = S_IFDIR | 0755;
stbuf->st_nlink = 2;
return0;
}
if (strcmp(path, hello_path) == 0) {
stbuf->st_mode = S_IFREG | 0444;
stbuf->st_nlink = 1;
stbuf->st_size = strlen(hello_content);
return0;
}
return -ENOENT;
}
staticinthello_readdir(constchar *path, void *buf,
fuse_fill_dir_t filler,
off_t offset,
struct fuse_file_info *fi,
enum fuse_readdir_flags flags)
{
if (strcmp(path, "/") != 0)
return -ENOENT;
filler(buf, ".", NULL, 0, 0);
filler(buf, "..", NULL, 0, 0);
filler(buf, "hello", NULL, 0, 0);
return0;
}
staticinthello_open(constchar *path, struct fuse_file_info *fi)
{
if (strcmp(path, hello_path) != 0)
return -ENOENT;
if ((fi->flags & O_ACCMODE) != O_RDONLY)
return -EACCES;
return0;
}
staticinthello_read(constchar *path, char *buf, size_t size,
off_t offset, struct fuse_file_info *fi)
{
size_t len = strlen(hello_content);
if (strcmp(path, hello_path) != 0)
return -ENOENT;
if (offset >= (off_t)len)
return0;
if (offset + size > len)
size = len - offset;
memcpy(buf, hello_content + offset, size);
return size;
}
staticconststructfuse_operationshello_oper = {
.getattr = hello_getattr,
.readdir = hello_readdir,
.open = hello_open,
.read = hello_read,
};
intmain(int argc, char *argv[])
{
return fuse_main(argc, argv, &hello_oper, NULL);
}
八、文件系统调试与分析
8.1 常用调试命令
# 查看挂载点的文件系统统计
df -h
df -i # inode使用情况
# 查看文件的物理块分配
sudo filefrag -v /path/to/file
# 查看目录树磁盘使用
du -sh /var/log/*
du --max-depth=1 /var
# 检查文件系统错误(需先卸载)
sudo fsck.ext4 -n /dev/sda1 # -n: 只检查不修复
# 查看磁盘I/O统计
iostat -x 1 5
# 监控文件系统事件(需要inotify-tools)
sudo apt install inotify-tools
inotifywait -m -r /tmp # 监控/tmp下所有事件
8.2 inotify内核模块
// inotify_demo.c - 使用inotify监控文件事件
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<sys/inotify.h>
#include<unistd.h>
#include<fcntl.h>
#include<errno.h>
#define EVENT_SIZE (sizeof(struct inotify_event))
#define BUF_LEN (1024 * (EVENT_SIZE + 16))
intmain(int argc, char *argv[])
{
int ifd, wd;
char buf[BUF_LEN];
constchar *watchdir = argc > 1 ? argv[1] : "/tmp";
ifd = inotify_init1(IN_NONBLOCK);
if (ifd < 0) {
perror("inotify_init1");
return1;
}
/* 监控指定目录 */
wd = inotify_add_watch(ifd, watchdir,
IN_CREATE | IN_DELETE |
IN_MODIFY | IN_MOVED_FROM |
IN_MOVED_TO | IN_CLOSE_WRITE);
if (wd < 0) {
perror("inotify_add_watch");
return1;
}
printf("Watching: %s (press Ctrl+C to stop)\n", watchdir);
while (1) {
ssize_t len = read(ifd, buf, BUF_LEN);
if (len < 0) {
if (errno == EAGAIN) {
usleep(100000); /* 100ms */
continue;
}
perror("read");
break;
}
char *ptr = buf;
while (ptr < buf + len) {
structinotify_event *event = (struct inotify_event *)ptr;
constchar *event_name = "";
if (event->mask & IN_CREATE) event_name = "CREATE";
if (event->mask & IN_DELETE) event_name = "DELETE";
if (event->mask & IN_MODIFY) event_name = "MODIFY";
if (event->mask & IN_MOVED_FROM) event_name = "MOVED_FROM";
if (event->mask & IN_MOVED_TO) event_name = "MOVED_TO";
if (event->mask & IN_CLOSE_WRITE) event_name = "CLOSE_WRITE";
if (event->mask & IN_ISDIR) printf("[DIR] ");
printf("%-12s %s/%s\n",
event_name,
watchdir,
event->len ? event->name : "");
ptr += EVENT_SIZE + event->len;
}
}
inotify_rm_watch(ifd, wd);
close(ifd);
return0;
}
实践检查清单
VFS基础
- [ ] 能描述
super_block、inode、dentry、file四者的关系 - [ ] 理解
open()系统调用中路径解析的完整流程 - [ ] 理解 dentry 缓存的作用和负向缓存(negative dentry)
- [ ] 能用
kern_path()在内核中查找文件路径
ext4
- [ ] 理解 ext4 的 Block Group 结构
- [ ] 理解 Extent 树相比传统间接块的优势
- [ ] 理解 journal 三种模式(journal/ordered/writeback)的区别
- [ ] 能用
debugfs 检查 ext4 文件系统的内部结构
自定义文件系统
- [ ] 理解
mount_nodev、mount_bdev 的区别 - [ ] 能用
seq_file + proc_fs 创建可读写的/proc接口 - [ ] 能用 libfuse 实现用户空间文件系统
调试工具
- [ ] 用
ftrace 追踪 VFS 函数调用链 - [ ] 用
strace分析文件操作的系统调用序列