是否有用于缓存进程输出的 shell 命令或实用程序?

是否有用于缓存进程输出的 shell 命令或实用程序?

希望这是正确的提问地点。

有没有一个与此类似的 UNIX 工具?

# invokes echo, saves to cache (using command with arguments as key), returns "hi"
cache 10 echo hi 

# wait 2 seconds
sleep 2

# doesn't invoke echo, from cache returns "hi"
cache 10 echo hi 

# wait 10 seconds
sleep 10

# with cache expired, invokes echo, returns "hi"
cache 10 echo hi 

显然 echo 不是真正的用例。

基本上缓存给定命令+参数的 STDOUT、STDERR 和状态,因此下次调用同一进程时不必重新运行它。

我可以编写一个脚本来执行此操作,但我想知道 unix 工具集中是否有一个我不知道的脚本。

答案1

我刚刚为此写了一个相当完整的脚本;最新版本位于https://gist.github.com/akorn/51ee2fe7d36fa139723c851d87e56096

#!/bin/zsh
#
# Purpose: run speficied command with specified arguments and cache result. If cache is fresh enough, don't run command again but return cached output.
# Also cache exit status and stderr.
# License: GPLv3

# Use silly long variable names to avoid clashing with whatever the invoked program might use
RUNCACHED_MAX_AGE=${RUNCACHED_MAX_AGE:-300}
RUNCACHED_IGNORE_ENV=${RUNCACHED_IGNORE_ENV:-0}
RUNCACHED_IGNORE_PWD=${RUNCACHED_IGNORE_PWD:-0}
[[ -n "$HOME" ]] && RUNCACHED_CACHE_DIR=${RUNCACHED_CACHE_DIR:-$HOME/.runcached}
RUNCACHED_CACHE_DIR=${RUNCACHED_CACHE_DIR:-/var/cache/runcached}

function usage() {
    echo "Usage: runcached [--ttl <max cache age>] [--cache-dir <cache directory>]"
    echo "       [--ignore-env] [--ignore-pwd] [--help] [--prune-cache]"
    echo "       [--] command [arg1 [arg2 ...]]"
    echo
    echo "Run 'command' with the specified args and cache stdout, stderr and exit"
    echo "status. If you run the same command again and the cache is fresh, cached"
    echo "data is returned and the command is not actually run."
    echo
    echo "Normally, all exported environment variables as well as the current working"
    echo "directory are included in the cache key. The --ignore options disable this."
    echo "The OLDPWD variable is always ignored."
    echo
    echo "--prune-cache deletes all cache entries older than the maximum age. There is"
    echo "no other mechanism to prevent the cache growing without bounds."
    echo
    echo "The default cache directory is ${RUNCACHED_CACHE_DIR}."
    echo "Maximum cache age defaults to ${RUNCACHED_MAX_AGE}."
    echo
    echo "CAVEATS:"
    echo
    echo "Side effects of 'command' are obviously not cached."
    echo
    echo "There is no cache invalidation logic except cache age (specified in seconds)."
    echo
    echo "If the cache can't be created, the command is run uncached."
    echo
    echo "This script is always silent; any output comes from the invoked command. You"
    echo "may thus not notice errors creating the cache and such."
    echo
    echo "stdout and stderr streams are saved separately. When both are written to a"
    echo "terminal from cache, they will almost certainly be interleaved differently"
    echo "than originally. Ordering of messages within the two streams is preserved."
    exit 0
}

while [[ -n "$1" ]]; do
    case "$1" in
        --ttl)      RUNCACHED_MAX_AGE="$2"; shift 2;;
        --cache-dir)    RUNCACHED_CACHE_DIR="$2"; shift 2;;
        --ignore-env)   RUNCACHED_IGNORE_ENV=1; shift;;
        --ignore-pwd)   RUNCACHED_IGNORE_PWD=1; shift;;
        --prune-cache)  RUNCACHED_PRUNE=1; shift;;
        --help)     usage;;
        --)     shift; break;;
        *)      break;;
    esac
done

zmodload zsh/datetime
zmodload zsh/stat

# This is racy, but the race is harmless; at worst, the program is run uncached 
# because the cache is unusable. Testing for directory existence saves an
# mkdir(1) execution in the common case, improving performance infinitesimally;
# it could matter if runcached is run from inside a tight loop.
# Hide errors so that runcached itself is transparent (doesn't mix new messages 
# into whatever the called program outputs).
[[ -d "$RUNCACHED_CACHE_DIR/." ]] || mkdir -p "$RUNCACHED_CACHE_DIR" >/dev/null 2>/dev/null

((RUNCACHED_PRUNE)) && find "$RUNCACHED_CACHE_DIR/." -maxdepth 1 -type f \! -newermt @$[EPOCHSECONDS-RUNCACHED_MAX_AGE] -delete 2>/dev/null

[[ -n "$@" ]] || exit 0 # if no command specified, exit silently

(
    # Almost(?) nothing uses OLDPWD, but taking it into account potentially reduces cache efficency.
    # Thus, we ignore it for the purpose of coming up with a cache key.
    unset OLDPWD
    ((RUNCACHED_IGNORE_PWD)) && unset PWD
    ((RUNCACHED_IGNORE_ENV)) || env
    echo -E "$@"
) | md5sum | read RUNCACHED_CACHE_KEY RUNCACHED__crap__

# Unfortunately, I couldn't find a less convoluted way of getting rid of an error message when trying to open a logfile in a nonexistent cache directory.
exec {RUNCACHED_temp_stderr} >&2
exec 2>/dev/null
exec {RUNCACHED_LOCK_FD}>>$RUNCACHED_CACHE_DIR/$RUNCACHED_CACHE_KEY.lock
exec 2>&$RUNCACHED_temp_stderr
exec {RUNCACHED_temp_stderr}>&-

# If we can't obtain a lock, we want to run uncached; otherwise
# 'runcached' wouldn't be transparent because it would prevent
# parallel execution of several instances of the same command.
# Locking is necessary to avoid races between the mv(1) command
# below replacing stderr with a newer version and another instance
# of runcached using a newer stdout with the older stderr.
if flock -n $RUNCACHED_LOCK_FD 2>/dev/null; then
    if [[ -f $RUNCACHED_CACHE_DIR/$RUNCACHED_CACHE_KEY.stdout ]]; then
        if [[ $[EPOCHSECONDS-$(zstat +mtime $RUNCACHED_CACHE_DIR/$RUNCACHED_CACHE_KEY.stdout)] -le $RUNCACHED_MAX_AGE ]]; then
            cat $RUNCACHED_CACHE_DIR/$RUNCACHED_CACHE_KEY.stdout &
            cat $RUNCACHED_CACHE_DIR/$RUNCACHED_CACHE_KEY.stderr >&2 &
            wait
            exit $(<$RUNCACHED_CACHE_DIR/$RUNCACHED_CACHE_KEY.exitstatus)
        else
            rm -f $RUNCACHED_CACHE_DIR/$RUNCACHED_CACHE_KEY.{stdout,stderr,exitstatus} 2>/dev/null
        fi
    fi

    # only reached if cache didn't exist or was too old
    if [[ -d $RUNCACHED_CACHE_DIR/. ]]; then
        RUNCACHED_tempdir=$(mktemp -d 2>/dev/null)
        if [[ -d $RUNCACHED_tempdir/. ]]; then
            $@ >&1 >$RUNCACHED_tempdir/$RUNCACHED_CACHE_KEY.stdout 2>&2 2>$RUNCACHED_tempdir/$RUNCACHED_CACHE_KEY.stderr
            RUNCACHED_ret=$?
            echo $RUNCACHED_ret >$RUNCACHED_tempdir/$RUNCACHED_CACHE_KEY.exitstatus 2>/dev/null
            mv $RUNCACHED_tempdir/$RUNCACHED_CACHE_KEY.{stdout,stderr,exitstatus} $RUNCACHED_CACHE_DIR/ 2>/dev/null
            rmdir $RUNCACHED_tempdir 2>/dev/null
            exit $RUNCACHED_ret
        fi
    fi
fi

# only reached if cache not created successfully or lock couldn't be obtained
exec $@

答案2

编辑:另请参阅我的其他答案关于bkt,一个独立的子进程缓存实用程序

我创建bash缓存,Bash 的记忆库,它的工作原理与您所描述的完全一样。它是专门为缓存 Bash 函数而设计的,但显然您可以将对其他命令的调用包装在函数中。

它处理许多更简单的缓存机制所忽略的边缘情况行为。它报告原始调用的退出代码,单独保留 stdout 和 stderr,并保留输出中的任何尾随空格($()命令替换将截断尾随空格)。

演示:

# Define function normally, then decorate it with bc::cache
$ maybe_sleep() {
  sleep "$@"
  echo "Did I sleep?"
} && bc::cache maybe_sleep

# Initial call invokes the function
$ time maybe_sleep 1
Did I sleep?

real    0m1.047s
user    0m0.000s
sys     0m0.020s

# Subsequent call uses the cache
$ time maybe_sleep 1
Did I sleep?

real    0m0.044s
user    0m0.000s
sys     0m0.010s

# Invocations with different arguments are cached separately
$ time maybe_sleep 2
Did I sleep?

real    0m2.049s
user    0m0.000s
sys     0m0.020s

还有一个基准函数可以显示缓存的开销:

$ bc::benchmark maybe_sleep 1
Original:       1.007
Cold Cache:     1.052
Warm Cache:     0.044

所以你可以看到读/写开销(在我的机器上,它使用临时文件系统) 大约为 1/20 秒。该基准实用程序可以帮助您决定是否值得缓存特定调用。

答案3

您可以将结果放入文件中,然后从该文件中读回......

tmpDir=/tmp/$$
rm -rf "$tmpDir"
mkdir "$tmpDir"

echo cmd1 > "$tmpDir"/cmd1_stdout 2> "$tmpDir"/cmd1_stderr
echo $? > "$tmpDir"/cmd1_exitcode

# Retrieving output of cmd1:
( cat "$tmpDir"/cmd1_stdout ; cat "$tmpDir"/cmd1_stderr 1>&2; exit $(cat "$tmpDir"/cmd1_exitcode) )

由此我们可以定义一个“缓存”函数。这个版本需要一个我们永远不会用作参数的字符。例如逗号“,”。您可以在“IFS=,”行更改它

tmpDir=/tmp/$$
rm -rf "$tmpDir"
mkdir "$tmpDir"

cache() {

 IFS=, cmd="$*"
 if [ -f "$tmpDir/$cmd"_exitcode ]; then 
   cat "$tmpDir/$cmd"_stdout
   cat "$tmpDir/$cmd"_stderr 1>&2
   return $(cat "$tmpDir"/cmd1_exitcode)
 fi

   # This line is bash-only:
 "$@" 2> >(tee "$tmpDir/$cmd"_stderr 1>&2) > >(tee "$tmpDir/$cmd"_stdout)
 local e=$?
 echo $e > "$tmpDir/$cmd"_exitcode

 return $e
}

超时可以通过“date +%s”和“stat -c %Y”来实现:

tmpDir=/tmp/$$
rm -rf "$tmpDir"
mkdir "$tmpDir"

cache() {

 local timeout=$1
 shift

 IFS=, cmd="$*"
 if [ -f "$tmpDir/$cmd"_exitcode ]; then 

   local now=$(date +%s)
   local fdate=$(stat -c %Y "$tmpDir/$cmd"_exitcode)

   if [ $((now-fdate)) -le $timeout ]; then 
     cat "$tmpDir/$cmd"_stdout
     cat "$tmpDir/$cmd"_stderr 1>&2
     return $(cat "$tmpDir/$cmd"_exitcode)
   fi

 fi

   # This line is bash-only:
 "$@" 2> >(tee "$tmpDir/$cmd"_stderr 1>&2) > >(tee "$tmpDir/$cmd"_stdout)
 local e=$?
 echo $e > "$tmpDir/$cmd"_exitcode

 return $e
}

“仅 bash”行可以替换为:

  "$@" 2> "$tmpDir/$cmd"_stderr > "$tmpDir/$cmd"_stdout
  local e=$?
  cat "$tmpDir/$cmd"_stdout
  cat "$tmpDir/$cmd"_stderr 1>&2

答案4

在 shell 脚本中缓存命令输出的最常见机制是将其分配给变量。这可以通过子 shell 轻松完成。它不会像传统缓存那样过期,但那些编写 shell 脚本的人通常会发现它是可以接受的。这是使用子 shell 和变量的上述脚本

HI=$(echo hi)
echo $HI
sleep 2
echo $HI
sleep 10
echo $HI

另一种选择是在 shell 脚本中创建缓存函数。就像是 ...

cache() {
expiry=$1
cmd=$2-
cache=/tmp/{$2-}_cache

if test "`find -not -newermt '-30 seconds' -delete ${cache}`"; then
$cmd |tee "$cache"
else
cat "$cache"
fi
}

相关内容