如何判断管道缓冲区是否已满？

Question 1

我会使用系统调用跟踪工具来跟踪您的 Perl 脚本：strace(Linux)、dtruss(OS X)、ktrace(FreeBSD)、truss(Solaris) 等。目标是查看您的 Perl 脚本花费多少时间等待从其读取stdin 以及其他程序等待写入其 stdout 所花费的时间。

在这里，我以 writer 作为瓶颈来测试这一点：

terminal 1$ gzip -c < /dev/urandom | cat > /dev/null

terminal 2$ ps auxw | egrep 'gzip|cat'
slamb    25311 96.0  0.0  2852  516 pts/0    R+   23:35   3:40 gzip -c
slamb    25312  0.8  0.0  2624  336 pts/0    S+   23:35   0:01 cat

terminal 2$ strace -p 25312 -s 0 -rT -e trace=read
Process 25312 attached - interrupt to quit
     0.000000 read(0, ""..., 4096) = 4096 <0.005207>
     0.005531 read(0, ""..., 4096) = 4096 <0.000051>

这里的第一个数字是自上一个系统调用开始以来的时间，最后一个数字是系统调用所花费的时间。所以我们可以用 Perl 进行一些后处理来聚合它...... [*]

terminal 2$ strace -p 25312 -s 0 -rT -e trace=read 2>&1 | perl -nle 'm{^\s*([\d.]+) read\(0, .*<([\d.]+)>} or next; $total_work += $1 - $last_wait; $total_wait += $2; $last_wait = $2; print "working for $total_work sec, waiting for $total_wait sec"; $last_wait = $2;'
working for 0 sec, waiting for 0.005592 sec
...
working for 0.305356 sec, waiting for 2.28624900000002 sec
...

terminal 2$ strace -p 25311 -s 0 -rT -e trace=write 2>&1 | perl -nle 'm{^\s*([\d.]+) write\(1, .*<([\d.]+)>} or next; $total_work += $1 - $last_wait; $total_wait += $2; $last_wait = $2; print "working for $total_work sec, waiting for $total_wait sec"; $last_wait = $2;'
...
working for 5.15862000000001 sec, waiting for 0.0555740000000007 sec
...

您可以更花哨地制作一个 SystemTap 或 DTrace 脚本，它可以同时跟踪双方，仅跟踪正确的文件描述符，并每秒左右打印一个很好的状态更新，其中双方等待对方的时间百分比。

[*] - 警告：如果在其他文件描述符上调用读/写，我的粗略聚合不太正确；在这种情况下，它会低估工作时间。

dtrace 版本实际上非常简洁。

terminal 1$ gzip -c < /dev/urandom | cat > /dev/null

terminal 2$ ps aux | egrep 'gzip| cat'
slamb    54189  95.8  0.0   591796    584 s006  R+   12:49AM  22:49.55 gzip -c
slamb    54190   0.4  0.0   599828    392 s006  S+   12:49AM   0:06.08 cat

terminal 2$ cat > pipe.d <<'EOF'
#!/usr/sbin/dtrace -qs

BEGIN
{
  start = timestamp;
  writer_blocked = 0;
  reader_blocked = 0;
}

tick-1s, END
{
  this->elapsed = timestamp - start;
  printf("since startup, writer blocked %3d%% of time, reader %3d%% of time\n",
         100 * writer_blocked / this->elapsed,
         100 * reader_blocked / this->elapsed);
}

syscall::write:entry
/pid == $1 && arg0 == 1/
{
  self->entry = timestamp;
}

syscall::write:return
/pid == $1 && self->entry != 0/
{
  writer_blocked += timestamp - self->entry;
  self->entry = 0;
}

syscall::read:entry
/pid == $2 && arg0 == 0/
{
  self->entry = timestamp;
}

syscall::read:return
/pid == $2 && self->entry != 0/
{
  reader_blocked += timestamp - self->entry;
  self->entry = 0;
}
EOF

terminal 2$ chmod u+x pipe.d
terminal 2$ sudo ./pipe.d 54189 54190
since startup, writer blocked   0% of time, reader  98% of time
since startup, writer blocked   0% of time, reader  99% of time
since startup, writer blocked   0% of time, reader  99% of time
since startup, writer blocked   0% of time, reader  99% of time
since startup, writer blocked   0% of time, reader  99% of time
^C
since startup, writer blocked   0% of time, reader  99% of time

SystemTap 版本：

terminal 1$ gzip -c /dev/urandom | cat > /dev/null

terminal 2$ ps auxw | egrep 'gzip| cat'
slamb     3405  109  0.0   4356   584 pts/1    R+   02:57   0:04 gzip -c /dev/urandom
slamb     3406  0.2  0.0  10848   588 pts/1    S+   02:57   0:00 cat

terminal 2$ cat > probes.stp <<'EOF'
#!/usr/bin/env stap

global start
global writer_pid
global writes
global reader_pid
global reads

probe begin {
  start = gettimeofday_us()
  writer_pid = strtol(argv[1], 10)
  reader_pid = strtol(argv[2], 10)
}

probe timer.s(1), end {
  elapsed = gettimeofday_us() - start
  printf("since startup, writer blocked %3d%% of time, reader %3d%% of time\n",
         100 * @sum(writes) / elapsed,
         100 * @sum(reads) / elapsed)
}

probe syscall.write.return {
  if (pid() == writer_pid && $fd == 1)
    writes <<< gettimeofday_us() - @entry(gettimeofday_us())
}

probe syscall.read.return {
  if (pid() == reader_pid && $fd == 0)
    reads <<< gettimeofday_us() - @entry(gettimeofday_us())
}
EOF

terminal 2$ chmod a+x probes.stp
terminal 2$ sudo ./pipe.stp 3405 3406
since startup, writer blocked   0% of time, reader  99% of time
...

Answer

我会使用系统调用跟踪工具来跟踪您的 Perl 脚本：strace(Linux)、dtruss(OS X)、ktrace(FreeBSD)、truss(Solaris) 等。目标是查看您的 Perl 脚本花费多少时间等待从其读取stdin 以及其他程序等待写入其 stdout 所花费的时间。

在这里，我以 writer 作为瓶颈来测试这一点：

terminal 1$ gzip -c < /dev/urandom | cat > /dev/null

terminal 2$ ps auxw | egrep 'gzip|cat'
slamb    25311 96.0  0.0  2852  516 pts/0    R+   23:35   3:40 gzip -c
slamb    25312  0.8  0.0  2624  336 pts/0    S+   23:35   0:01 cat

terminal 2$ strace -p 25312 -s 0 -rT -e trace=read
Process 25312 attached - interrupt to quit
     0.000000 read(0, ""..., 4096) = 4096 <0.005207>
     0.005531 read(0, ""..., 4096) = 4096 <0.000051>

这里的第一个数字是自上一个系统调用开始以来的时间，最后一个数字是系统调用所花费的时间。所以我们可以用 Perl 进行一些后处理来聚合它...... [*]

terminal 2$ strace -p 25312 -s 0 -rT -e trace=read 2>&1 | perl -nle 'm{^\s*([\d.]+) read\(0, .*<([\d.]+)>} or next; $total_work += $1 - $last_wait; $total_wait += $2; $last_wait = $2; print "working for $total_work sec, waiting for $total_wait sec"; $last_wait = $2;'
working for 0 sec, waiting for 0.005592 sec
...
working for 0.305356 sec, waiting for 2.28624900000002 sec
...

terminal 2$ strace -p 25311 -s 0 -rT -e trace=write 2>&1 | perl -nle 'm{^\s*([\d.]+) write\(1, .*<([\d.]+)>} or next; $total_work += $1 - $last_wait; $total_wait += $2; $last_wait = $2; print "working for $total_work sec, waiting for $total_wait sec"; $last_wait = $2;'
...
working for 5.15862000000001 sec, waiting for 0.0555740000000007 sec
...

您可以更花哨地制作一个 SystemTap 或 DTrace 脚本，它可以同时跟踪双方，仅跟踪正确的文件描述符，并每秒左右打印一个很好的状态更新，其中双方等待对方的时间百分比。

[*] - 警告：如果在其他文件描述符上调用读/写，我的粗略聚合不太正确；在这种情况下，它会低估工作时间。

dtrace 版本实际上非常简洁。

terminal 1$ gzip -c < /dev/urandom | cat > /dev/null

terminal 2$ ps aux | egrep 'gzip| cat'
slamb    54189  95.8  0.0   591796    584 s006  R+   12:49AM  22:49.55 gzip -c
slamb    54190   0.4  0.0   599828    392 s006  S+   12:49AM   0:06.08 cat

terminal 2$ cat > pipe.d <<'EOF'
#!/usr/sbin/dtrace -qs

BEGIN
{
  start = timestamp;
  writer_blocked = 0;
  reader_blocked = 0;
}

tick-1s, END
{
  this->elapsed = timestamp - start;
  printf("since startup, writer blocked %3d%% of time, reader %3d%% of time\n",
         100 * writer_blocked / this->elapsed,
         100 * reader_blocked / this->elapsed);
}

syscall::write:entry
/pid == $1 && arg0 == 1/
{
  self->entry = timestamp;
}

syscall::write:return
/pid == $1 && self->entry != 0/
{
  writer_blocked += timestamp - self->entry;
  self->entry = 0;
}

syscall::read:entry
/pid == $2 && arg0 == 0/
{
  self->entry = timestamp;
}

syscall::read:return
/pid == $2 && self->entry != 0/
{
  reader_blocked += timestamp - self->entry;
  self->entry = 0;
}
EOF

terminal 2$ chmod u+x pipe.d
terminal 2$ sudo ./pipe.d 54189 54190
since startup, writer blocked   0% of time, reader  98% of time
since startup, writer blocked   0% of time, reader  99% of time
since startup, writer blocked   0% of time, reader  99% of time
since startup, writer blocked   0% of time, reader  99% of time
since startup, writer blocked   0% of time, reader  99% of time
^C
since startup, writer blocked   0% of time, reader  99% of time

SystemTap 版本：

terminal 1$ gzip -c /dev/urandom | cat > /dev/null

terminal 2$ ps auxw | egrep 'gzip| cat'
slamb     3405  109  0.0   4356   584 pts/1    R+   02:57   0:04 gzip -c /dev/urandom
slamb     3406  0.2  0.0  10848   588 pts/1    S+   02:57   0:00 cat

terminal 2$ cat > probes.stp <<'EOF'
#!/usr/bin/env stap

global start
global writer_pid
global writes
global reader_pid
global reads

probe begin {
  start = gettimeofday_us()
  writer_pid = strtol(argv[1], 10)
  reader_pid = strtol(argv[2], 10)
}

probe timer.s(1), end {
  elapsed = gettimeofday_us() - start
  printf("since startup, writer blocked %3d%% of time, reader %3d%% of time\n",
         100 * @sum(writes) / elapsed,
         100 * @sum(reads) / elapsed)
}

probe syscall.write.return {
  if (pid() == writer_pid && $fd == 1)
    writes <<< gettimeofday_us() - @entry(gettimeofday_us())
}

probe syscall.read.return {
  if (pid() == reader_pid && $fd == 0)
    reads <<< gettimeofday_us() - @entry(gettimeofday_us())
}
EOF

terminal 2$ chmod a+x probes.stp
terminal 2$ sudo ./pipe.stp 3405 3406
since startup, writer blocked   0% of time, reader  99% of time
...

Question 2

您可以pv -TC在管道中插入命令：

cmd1 | pv -TC | cmd2

pv使用自己的缓冲区并-T使其报告 1 秒周期内的平均填充情况（默认情况下）。

如果它始终是 100%，则意味着生产产出的速度比消耗产出的cmd1速度更快。cmd2如果不是，那就反过来了。请注意，管道本身可以容纳 64kB。

另请参阅-B指定pv的缓冲区大小。您可以使用多个pvs，如下所示：

$ cmd1 | pv -cCTrbN 'cmd1 -> cmd2' | cmd2 | pv -cCTrbN 'cmd2 -> cmd3' | cmd3
cmd1 -> cmd2: 1.92GiB { 53%} [ 387MiB/s]
cmd2 -> cmd3: 1.92GiB {  0%} [ 387MiB/s]

Answer

您可以pv -TC在管道中插入命令：

cmd1 | pv -TC | cmd2

pv使用自己的缓冲区并-T使其报告 1 秒周期内的平均填充情况（默认情况下）。

如果它始终是 100%，则意味着生产产出的速度比消耗产出的cmd1速度更快。cmd2如果不是，那就反过来了。请注意，管道本身可以容纳 64kB。

另请参阅-B指定pv的缓冲区大小。您可以使用多个pvs，如下所示：

$ cmd1 | pv -cCTrbN 'cmd1 -> cmd2' | cmd2 | pv -cCTrbN 'cmd2 -> cmd3' | cmd3
cmd1 -> cmd2: 1.92GiB { 53%} [ 387MiB/s]
cmd2 -> cmd3: 1.92GiB {  0%} [ 387MiB/s]

如何判断管道缓冲区是否已满？

答案1

答案2

相关内容