我正在尝试实现一个程序,它可以使用以下命令打印常规稀疏文件中的所有孔和数据段lseek(2)
及其论点SEEK_DATA
and SEEK_HOLE
,类似于:
$ ./list_hold_and_data_segs sparse_file
This file has 100 bytes
[0, 10]: hole
[11, 99]: data(end)
执行
/*
* list_hole_and_data_segs.c
*/
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
enum Type {
HOLE,
DATA,
};
void find_all_holes(int fd);
int main(int ac, char *av[])
{
int fd = open(av[1], O_RDONLY);
if (fd == -1) {
perror("open");
exit(EXIT_FAILURE);
}
find_all_holes(fd);
return 0;
}
void find_all_holes(int fd)
{
off_t cur_offset = 0; // current offset
enum Type cur_type; // current byte type
off_t file_size = lseek(fd, 0, SEEK_END);
off_t index_of_last_byte = file_size - 1;
printf("This file has %ld bytes\n", file_size);
// check the type of byte 0
off_t res = lseek(fd, 0, SEEK_HOLE);
if (res == 0) {
cur_type = HOLE;
} else if (res == file_size) {
printf("[0, %ld]: data(then exit)\n", index_of_last_byte);
exit(0);
} else {
cur_type = DATA;
cur_offset = res;
}
while (cur_offset <= index_of_last_byte) {
off_t new_offset =lseek(fd, cur_offset,
((cur_type == DATA) ? SEEK_HOLE : SEEK_DATA));
if ((cur_type == HOLE && new_offset == -1 && errno == ENXIO) ||
(cur_type == DATA && new_offset == file_size)) {
// from current position to the end of this file: `cur_type`
printf("[%ld, %ld]: %s(end)\n", cur_offset,
index_of_last_byte,
((cur_type == DATA) ? "data" : "hole"));
break; // exit of while loop
} else {
// from current offset to the new offset: `cur_type`
printf("[%ld, %ld]: %s\n", cur_offset, new_offset - 1,
((cur_type == DATA) ? "data" : "hole"));
cur_offset = new_offset;
cur_type = (cur_type == DATA) ? HOLE : DATA;
}
}
}
测试我的实现
我使用以下代码片段创建稀疏文件,为了简单起见,省略了错误处理:
/*
* create_sparse_file.c
*/
#include <fcntl.h>
#include <unistd.h>
int main(void)
{
int fd = open("sparse_file", O_CREAT | O_WRONLY | O_TRUNC, 0666);
lseek(fd, 10000, SEEK_CUR);
write(fd, "HELLO", 5);
close(fd);
return 0;
}
$ gcc create_sparse_file.c -o create_sparse_file && ./create_sparse_file
$ stat sparse_file
File: sparse_file
Size: 10005 Blocks: 8 IO Block: 4096 regular file
Device: 803h/2051d Inode: 3556105 Links: 1
# create a normal file as a comparision
$ cp sparse_file not_sparse_file --sparse=never
$ stat not_sparse_file
File: not_sparse_file
Size: 10005 Blocks: 24 IO Block: 4096 regular file
Device: 803h/2051d Inode: 3557867 Links: 1
$ gcc list_hole_and_data_segs.c -o list_hole_and_data_segs
$ ./list_hole_and_data_segs sparse_file
This file has 10005 bytes
[0, 8191]: hole
[8192, 10004]: data(end)
Question
正如你所看到的,输出./list_hole_and_data_seg sparse_file
is:
[0, 8191]: hole
[8192, 10004]: data(end)
而真实的案例是:
[0, 9999]: hole
[10000, 10004]: data(end)
是什么使得行为list_hole_and_data_seg
与实际情况不符,如何修正?
环境
$ uname -a
Linux pop-os 5.17.15-76051715-generic #202206141358~1655919116~22.04~1db9e34 SMP PREEMPT Wed Jun 22 19 x86_64 x86_64 x86_64 GNU/Linux
$ df -hT .
Filesystem Type Size Used Avail Use% Mounted on
/dev/sda3 ext4 103G 54G 44G 56% /
$ stat -f .
File: "."
ID: 4885eb446c106708 Namelen: 255 Type: ext2/ext3
Block size: 4096 Fundamental block size: 4096
Blocks: Total: 26819732 Free: 12805152 Available: 11431226
Inodes: Total: 6856704 Free: 6062138
$ gcc --version
gcc (Ubuntu 11.2.0-19ubuntu1) 11.2.0
$ ldd --version
ldd (Ubuntu GLIBC 2.35-0ubuntu3) 2.35