如何使用多种模式查找文件

Question 1

如果您想避免为每个模式一次又一次地解压缩文件，您可以这样做：

PATTERNS='foo
bar
baz' find . -mtime -"$a" -type f ! -name "*.bak*" -exec awk -v q=\' '
  function shquote(s) {
    gsub(q, q "\\" q q, s)
    return q s q
  }
  BEGIN {
    n = split(ENVIRON["PATTERNS"], pats, "\n")
    for (arg = 1; arg < ARGC; arg++) {
      file = ARGV[arg]
      cmd = "gzip -dcf < " shquote(file)
      for (i = 1; i <= n; i++) notfound[pats[i]]
      left = n
      while (left && (cmd | getline line) > 0) {
        for (pat in notfound) {
          if (line ~ pat) {
            if (!--left) {
              print file
              break
            }
            delete notfound[pat]
          }
        }
      }
      close(cmd)
    }
    exit
  }' {} +

请注意，模式被视为awk模式，类似于grep -E/支持的扩展正则表达式egrep。对于不区分大小写的匹配，您可以-v IGNORECASE=1使用 GNU 添加 if awk，或者可移植地更改为：

PATTERNS='foo
bar
baz' find . -mtime -"$a" -type f ! -name "*.bak*" -exec awk -v q=\' '
  function shquote(s) {
    gsub(q, q "\\" q q, s)
    return q s q
  }
  BEGIN {
    n = split(tolower(ENVIRON["PATTERNS"]), pats, "\n")
    for (arg = 1; arg < ARGC; arg++) {
      file = ARGV[arg]
      cmd = "gzip -dcf < " shquote(file)
      for (i = 1; i <= n; i++) notfound[pats[i]]
      left = n
      while (left && (cmd | getline line) > 0) {
        line = tolower(line)
        for (pat in notfound) {
          if (line ~ pat) {
            if (!--left) {
              print file
              break
            }
            delete notfound[pat]
          }
        }
      }
      close(cmd)
    }
    exit
  }' {} +

（假设模式没有非标准 ERE 扩展，例如\S，它将转换为\s）。

您可以将该awk命令放入zgrep-many脚本中以使其更易于使用。就像是：

#! /bin/sh -

usage() {
  cat >&2 << EOF
Usage: $0 [-e <pattern>] [-f <file] [-i] [pattern] files

List the files for which all the given patterns are matched.
EOF
  exit 1
}

ignorecase= 
PATTERNS=
export PATTERNS
NL='
'
sep=

while getopts e:f:i opt; do
  case $opt in
    (e) PATTERNS=$PATTERNS$sep$OPTARG; sep=$NL;;
    (f) PATTERNS=$PATTERNS$sep$(cat < "$OPTARG") || exit; sep=$NL;;
    (i) ignorecase='tolower(';;
    (*) usage;;
  esac
done
shift "$((OPTIND - 1))"
if [ -z "$PATTERNS" ]; then
  [ "$#" -gt 0 ] || usage
  PATTERN=$1; shift
fi

[ "$#" -eq 0 ] && exit

exec awk -v q=\' '
  function shquote(s) {
    gsub(q, q "\\" q q, s)
    return q s q
  }
  BEGIN {
    n = split('"$ignorecase"'ENVIRON["PATTERNS"]'"${ignorecase:+)}"', pats, "\n")
    for (arg = 1; arg < ARGC; arg++) {
      file = ARGV[arg]
      cmd = "gzip -dcf < " shquote(file)
      for (i = 1; i <= n; i++) notfound[pats[i]]
      left = n
      while (left && (cmd | getline line) > 0) {
        '"${ignorecase:+line = tolower(line)}"'
        for (pat in notfound) {
          if (line ~ pat) {
            if (!--left) {
              print file
              break
            }
            delete notfound[pat]
          }
        }
      }
      close(cmd)
    }
    exit
  }' "$@"

用作：

find ... -exec zgrep-many -ie foo -e bar -e baz {} +

例如。

Answer

如果您想避免为每个模式一次又一次地解压缩文件，您可以这样做：

PATTERNS='foo
bar
baz' find . -mtime -"$a" -type f ! -name "*.bak*" -exec awk -v q=\' '
  function shquote(s) {
    gsub(q, q "\\" q q, s)
    return q s q
  }
  BEGIN {
    n = split(ENVIRON["PATTERNS"], pats, "\n")
    for (arg = 1; arg < ARGC; arg++) {
      file = ARGV[arg]
      cmd = "gzip -dcf < " shquote(file)
      for (i = 1; i <= n; i++) notfound[pats[i]]
      left = n
      while (left && (cmd | getline line) > 0) {
        for (pat in notfound) {
          if (line ~ pat) {
            if (!--left) {
              print file
              break
            }
            delete notfound[pat]
          }
        }
      }
      close(cmd)
    }
    exit
  }' {} +

请注意，模式被视为awk模式，类似于grep -E/支持的扩展正则表达式egrep。对于不区分大小写的匹配，您可以-v IGNORECASE=1使用 GNU 添加 if awk，或者可移植地更改为：

PATTERNS='foo
bar
baz' find . -mtime -"$a" -type f ! -name "*.bak*" -exec awk -v q=\' '
  function shquote(s) {
    gsub(q, q "\\" q q, s)
    return q s q
  }
  BEGIN {
    n = split(tolower(ENVIRON["PATTERNS"]), pats, "\n")
    for (arg = 1; arg < ARGC; arg++) {
      file = ARGV[arg]
      cmd = "gzip -dcf < " shquote(file)
      for (i = 1; i <= n; i++) notfound[pats[i]]
      left = n
      while (left && (cmd | getline line) > 0) {
        line = tolower(line)
        for (pat in notfound) {
          if (line ~ pat) {
            if (!--left) {
              print file
              break
            }
            delete notfound[pat]
          }
        }
      }
      close(cmd)
    }
    exit
  }' {} +

（假设模式没有非标准 ERE 扩展，例如\S，它将转换为\s）。

您可以将该awk命令放入zgrep-many脚本中以使其更易于使用。就像是：

#! /bin/sh -

usage() {
  cat >&2 << EOF
Usage: $0 [-e <pattern>] [-f <file] [-i] [pattern] files

List the files for which all the given patterns are matched.
EOF
  exit 1
}

ignorecase= 
PATTERNS=
export PATTERNS
NL='
'
sep=

while getopts e:f:i opt; do
  case $opt in
    (e) PATTERNS=$PATTERNS$sep$OPTARG; sep=$NL;;
    (f) PATTERNS=$PATTERNS$sep$(cat < "$OPTARG") || exit; sep=$NL;;
    (i) ignorecase='tolower(';;
    (*) usage;;
  esac
done
shift "$((OPTIND - 1))"
if [ -z "$PATTERNS" ]; then
  [ "$#" -gt 0 ] || usage
  PATTERN=$1; shift
fi

[ "$#" -eq 0 ] && exit

exec awk -v q=\' '
  function shquote(s) {
    gsub(q, q "\\" q q, s)
    return q s q
  }
  BEGIN {
    n = split('"$ignorecase"'ENVIRON["PATTERNS"]'"${ignorecase:+)}"', pats, "\n")
    for (arg = 1; arg < ARGC; arg++) {
      file = ARGV[arg]
      cmd = "gzip -dcf < " shquote(file)
      for (i = 1; i <= n; i++) notfound[pats[i]]
      left = n
      while (left && (cmd | getline line) > 0) {
        '"${ignorecase:+line = tolower(line)}"'
        for (pat in notfound) {
          if (line ~ pat) {
            if (!--left) {
              print file
              break
            }
            delete notfound[pat]
          }
        }
      }
      close(cmd)
    }
    exit
  }' "$@"

用作：

find ... -exec zgrep-many -ie foo -e bar -e baz {} +

例如。

Question 2

grep没有用于匹配多个模式的 AND 选项，但您基本上可以使用 OR 匹配模式|。如果您使用扩展语法，则可以将多个模式及其所有组合组合起来：

a.*b.*c|a.*c.*b|b.*a.*c|b.*c.*a|c.*a.*b|c.*b.*a

但如果你有两个以上的模式，这可能不是一个好主意，因为组合的数量会迅速增加。

您可以zgrep使用组合您的命令-exec。除最后一个之外的-q每个选项都使用安静选项zgrep（如果它和所有先前的 grep 找到匹配，则打印文件名）。

find -mtime -$a -type f ! -name "*.bak*"      \
        -exec zgrep -iq "$b" {} \;            \
        -exec zgrep -iq "$c" {} \;            \
        -exec zgrep -il "$d" {} \; | sort

Answer

grep没有用于匹配多个模式的 AND 选项，但您基本上可以使用 OR 匹配模式|。如果您使用扩展语法，则可以将多个模式及其所有组合组合起来：

a.*b.*c|a.*c.*b|b.*a.*c|b.*c.*a|c.*a.*b|c.*b.*a

但如果你有两个以上的模式，这可能不是一个好主意，因为组合的数量会迅速增加。

您可以zgrep使用组合您的命令-exec。除最后一个之外的-q每个选项都使用安静选项zgrep（如果它和所有先前的 grep 找到匹配，则打印文件名）。

find -mtime -$a -type f ! -name "*.bak*"      \
        -exec zgrep -iq "$b" {} \;            \
        -exec zgrep -iq "$c" {} \;            \
        -exec zgrep -il "$d" {} \; | sort

Question 3

您可以使用find运行三个 -s 的命令zgrep，例如

  find -mtime -$a -type f ! -name "*.bak*"      \
       -exec zgrep -q {} "$b" \; \
       -a   -exec zgrep -q {} "$c" \; \
       -a   -exec zgrep -q {} "$d" \; \
    | sort

您还可以首先收集文件名，grep例如

 find -mtime -$a -type f ! -name "*.bak*" > /tmp/file-list

（假设你的文件名很好，没有空格）

然后循环每一行/tmp/file-list

最后，您可以用另一种语言编写脚本（awkPython，...）

为了避免输入，您可以定义一个 shell 函数。

Answer