Unix 中缓冲区大小对文件 I/O 的影响

2024-6-6 • tag-icon

我试图了解Unix基于操作系统的内部工作原理。我正在阅读缓冲 I/O，以及缓冲区大小如何影响系统调用的数量，进而影响复制程序所花费的总时间。首先，这是我的程序：

#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <time.h>

long currentTimeMillis();

int main(int argc, char *argv[]) {
    int bufsize = atoi(argv[3]);
    printf("copying with buffer size %d\n", bufsize);
    char buf[bufsize];
    //open the file
    int fd_from = open(argv[1], O_RDWR);
    if(-1 == fd_from) {
        printf("Error opening source file\n");
        return -1;
    }
    //file to be copied to
    int fd_to = open(argv[2], O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR);
    if(-1 == fd_to) {
        printf("Error opening destination file\n");
        return -1;
    }
    //copy
    long startTime = currentTimeMillis();
    int bytes_read = 0;

    long totalTimeForRead = 0;
    long totalTimeForWrite = 0;

    while(1) {

        long readStartTime = currentTimeMillis();
        int bytes_read = read(fd_from,buf,bufsize);
        long readEndTime = currentTimeMillis();
        if(0 == bytes_read) {
            break;
        }
        if(-1 == bytes_read) {
            printf("Error occurred while reading source file\n");
            return -1;
        }

        totalTimeForRead += readEndTime - readStartTime;

        long writeStartTime = currentTimeMillis();
        int bytes_written = write(fd_to,buf,bufsize);
        long writeEndTime = currentTimeMillis();
        totalTimeForWrite += (writeEndTime - writeStartTime);

        if(-1 == bytes_written) {
            printf("Some error occurred while writing file\n");
            return -1;
        }
    }
    long endTime = currentTimeMillis();
    printf("Total time to copy%ld\n", endTime - startTime);
    printf("Total time to write%ld\n", totalTimeForWrite);
    printf("Total time to read%ld\n", totalTimeForRead);

}

long currentTimeMillis() {
  struct timeval time;
  gettimeofday(&time, NULL);
  return time.tv_sec * 1000 + time.tv_usec / 1000;
}

我使用的是配备 2.9GHz Intel i7 的 16G MacBook Pro（如果此信息有用的话）。源文件大小为2.8G。我有点惊讶地发现总时间比read()少得多write()。这是我在缓冲区大小为 16K 时的发现：

./a.out largefile dest 16382
copying with buffer size 16382
Total time to copy5987
Total time to write5330
Total time to read638

根据我的阅读，write()将数据从用户缓冲区传输到内核缓冲区后立即返回。所以它所花费的时间就是这个时间+系统调用发起所花费的时间。read()也从内核缓冲区读取到用户缓冲区，因此所花费的总时间应该相同（在这两种情况下，没有磁盘 I/O）。

那么为什么结果会有这么大的差异呢？我在 SO 上问过这个问题，但我想这是错误的地方。

相关内容