如何使用 awk 重新创建 grep -f

Question 1

如果你自己的：

awk 'BEGIN {FS = "\n"; RS = ""; OFS = "\n" ;} {if (/intf/ && /addr/ &&(/1.1.1.1/||/3.3.3.3/)) { print $0"\n"}}' > outputfile

这确实是您需要做的，您可以将每行包含一些 ip 地址的“match.list”文件中的内容与：

gawk '
  ( NR==FNR ) { # NR==FNR only when parsing the first file...
      ipreg=$0; # get one ip from the first file
      gsub(".", "\.", ipreg); #ensure each "." becomes "\." for the regex
      ipreg= "\<" ipreg "\>" # add beginning of word / end of word delimiters
      # that way 1.2.3.4 will NOT match: 11.2.3.42
      ipsreg=ipsreg sep ipreg; sep="|" # add it to the list of ipsreg
      # and sep only added before the 2+ elements as it is an empty string for the 1st
      next # skip everything else, we are parsing the first file...
    }

  
    ( /intf/ && /addr/ && ( $0 ~ ipsreg ) ) # default action will be print $0 if it matches...
    # and as ORS at that point will have been set to "\n\n",
    # it will print the record + an empty line after it
 ' match.list FS="\n" RS="" OFS="\n" ORS="\n\n" - > outputfile
   # the things between match.list and - will be seen as definitions to be done at that time,
   # as they contain a "=", and not be interpreted as filenames
   #  - : is STDIN, and will be the 2nd "file" parsed, where NR>FNR (FNR=for the current file, NR=from the beginning)

Answer

如果你自己的：

awk 'BEGIN {FS = "\n"; RS = ""; OFS = "\n" ;} {if (/intf/ && /addr/ &&(/1.1.1.1/||/3.3.3.3/)) { print $0"\n"}}' > outputfile

这确实是您需要做的，您可以将每行包含一些 ip 地址的“match.list”文件中的内容与：

gawk '
  ( NR==FNR ) { # NR==FNR only when parsing the first file...
      ipreg=$0; # get one ip from the first file
      gsub(".", "\.", ipreg); #ensure each "." becomes "\." for the regex
      ipreg= "\<" ipreg "\>" # add beginning of word / end of word delimiters
      # that way 1.2.3.4 will NOT match: 11.2.3.42
      ipsreg=ipsreg sep ipreg; sep="|" # add it to the list of ipsreg
      # and sep only added before the 2+ elements as it is an empty string for the 1st
      next # skip everything else, we are parsing the first file...
    }

  
    ( /intf/ && /addr/ && ( $0 ~ ipsreg ) ) # default action will be print $0 if it matches...
    # and as ORS at that point will have been set to "\n\n",
    # it will print the record + an empty line after it
 ' match.list FS="\n" RS="" OFS="\n" ORS="\n\n" - > outputfile
   # the things between match.list and - will be seen as definitions to be done at that time,
   # as they contain a "=", and not be interpreted as filenames
   #  - : is STDIN, and will be the 2nd "file" parsed, where NR>FNR (FNR=for the current file, NR=from the beginning)

Question 2

FWIW我不会为此使用正则表达式，我会v[]在下面创建一个数组（），将标签（例如）映射srcaddr到它们的值（例如"1.1.1.1"或）"all"，然后您可以对数组索引进行哈希查找以查找哪些标签存在于当前块以及您感兴趣的任何标签的值。例如，使用任何 POSIX awk：

$ cat tst.awk
NR==FNR {
    ips["\"" $0 "\""]
    next
}
$1 == "edit" {
    lineNr = 1
}
lineNr {
    tagFld = (NF > 2 ? 2 : 1)
    tag = $tagFld
    match($0,"^([[:space:]]*[^[:space:]]+){" tagFld "}[[:space:]]*")
    heads[tag] = substr($0,1,RLENGTH)
    v[tag] = substr($0,RLENGTH+1)
    tags[lineNr++] = tag

    if ( $1 == "next" ) {
        if (    (("srcintf" in v) && (v["srcaddr"] in ips)) \
             || (("dstintf" in v) && (v["dstaddr"] in ips)) \
           ) {
            for ( i=1; i<lineNr; i++ ) {
                tag = tags[i]
                print heads[tag] v[tag]
            }
            print ""
        }
        delete v
        lineNr = 0
    }
}

$ awk -f tst.awk filterfile textfile
edit 114
set uuid 6cb43
set action accept
set srcintf "Port-ch40.1657"
set dstintf "any"
set srcaddr "1.1.1.1"
set dstaddr "all"
set schedule "always"
set service "ALL_ICMP" "icmp-echo-reply" "icmp-source-quench" "icmp-time-exceeded" "icmp-unreachable"
set logtraffic all
next

使用该结构，您可以轻松测试或改变您喜欢的任何标签的任何值，并为每个块中每个标签的内容编写更精确的测试，而不是仅仅在整个块上进行正则表达式比较。例如，如果您想查找/输出 is 、 is 和 include 的块，uuid您6cb43可以schedule更改always此service设置"icmp-time-exceeded"：

        if (    (("srcintf" in v) && (v["srcaddr"] in ips)) \
             || (("dstintf" in v) && (v["dstaddr"] in ips)) \

对此：

        if (    (v["uuid"] == "6cb43") \
             && (v["schedule"] == "always") \
             && (v["service"] ~ /"icmp-time-exceeded"/) \

如果您想在打印之前将任何标签设置为其他值，您可以在v[]打印循环之前将其填充：

$ cat tst.awk
NR==FNR {
    ips["\"" $0 "\""]
    next
}
$1 == "edit" {
    lineNr = 1
}
lineNr {
    tagFld = (NF > 2 ? 2 : 1)
    tag = $tagFld
    match($0,"^([[:space:]]*[^[:space:]]+){" tagFld "}[[:space:]]*")
    heads[tag] = substr($0,1,RLENGTH)
    v[tag] = substr($0,RLENGTH+1)
    tags[lineNr++] = tag

    if ( $1 == "next" ) {
        if (    (("srcintf" in v) && (v["srcaddr"] in ips)) \
             || (("dstintf" in v) && (v["dstaddr"] in ips)) \
           ) {
            v["action"] = "reject"
            v["dstaddr"] = "\"127.0.0.1\""
            for ( i=1; i<lineNr; i++ ) {
                tag = tags[i]
                print heads[tag] v[tag]
            }
            print ""
        }
        delete v
        lineNr = 0
    }
}

$ awk -f tst.awk filterfile textfile
edit 114
set uuid 6cb43
set action reject
set srcintf "Port-ch40.1657"
set dstintf "any"
set srcaddr "1.1.1.1"
set dstaddr "127.0.0.1"
set schedule "always"
set service "ALL_ICMP" "icmp-echo-reply" "icmp-source-quench" "icmp-time-exceeded" "icmp-unreachable"
set logtraffic all
next

Answer

FWIW我不会为此使用正则表达式，我会v[]在下面创建一个数组（），将标签（例如）映射srcaddr到它们的值（例如"1.1.1.1"或）"all"，然后您可以对数组索引进行哈希查找以查找哪些标签存在于当前块以及您感兴趣的任何标签的值。例如，使用任何 POSIX awk：

$ cat tst.awk
NR==FNR {
    ips["\"" $0 "\""]
    next
}
$1 == "edit" {
    lineNr = 1
}
lineNr {
    tagFld = (NF > 2 ? 2 : 1)
    tag = $tagFld
    match($0,"^([[:space:]]*[^[:space:]]+){" tagFld "}[[:space:]]*")
    heads[tag] = substr($0,1,RLENGTH)
    v[tag] = substr($0,RLENGTH+1)
    tags[lineNr++] = tag

    if ( $1 == "next" ) {
        if (    (("srcintf" in v) && (v["srcaddr"] in ips)) \
             || (("dstintf" in v) && (v["dstaddr"] in ips)) \
           ) {
            for ( i=1; i<lineNr; i++ ) {
                tag = tags[i]
                print heads[tag] v[tag]
            }
            print ""
        }
        delete v
        lineNr = 0
    }
}

$ awk -f tst.awk filterfile textfile
edit 114
set uuid 6cb43
set action accept
set srcintf "Port-ch40.1657"
set dstintf "any"
set srcaddr "1.1.1.1"
set dstaddr "all"
set schedule "always"
set service "ALL_ICMP" "icmp-echo-reply" "icmp-source-quench" "icmp-time-exceeded" "icmp-unreachable"
set logtraffic all
next

使用该结构，您可以轻松测试或改变您喜欢的任何标签的任何值，并为每个块中每个标签的内容编写更精确的测试，而不是仅仅在整个块上进行正则表达式比较。例如，如果您想查找/输出 is 、 is 和 include 的块，uuid您6cb43可以schedule更改always此service设置"icmp-time-exceeded"：

        if (    (("srcintf" in v) && (v["srcaddr"] in ips)) \
             || (("dstintf" in v) && (v["dstaddr"] in ips)) \

对此：

        if (    (v["uuid"] == "6cb43") \
             && (v["schedule"] == "always") \
             && (v["service"] ~ /"icmp-time-exceeded"/) \

如果您想在打印之前将任何标签设置为其他值，您可以在v[]打印循环之前将其填充：

$ cat tst.awk
NR==FNR {
    ips["\"" $0 "\""]
    next
}
$1 == "edit" {
    lineNr = 1
}
lineNr {
    tagFld = (NF > 2 ? 2 : 1)
    tag = $tagFld
    match($0,"^([[:space:]]*[^[:space:]]+){" tagFld "}[[:space:]]*")
    heads[tag] = substr($0,1,RLENGTH)
    v[tag] = substr($0,RLENGTH+1)
    tags[lineNr++] = tag

    if ( $1 == "next" ) {
        if (    (("srcintf" in v) && (v["srcaddr"] in ips)) \
             || (("dstintf" in v) && (v["dstaddr"] in ips)) \
           ) {
            v["action"] = "reject"
            v["dstaddr"] = "\"127.0.0.1\""
            for ( i=1; i<lineNr; i++ ) {
                tag = tags[i]
                print heads[tag] v[tag]
            }
            print ""
        }
        delete v
        lineNr = 0
    }
}

$ awk -f tst.awk filterfile textfile
edit 114
set uuid 6cb43
set action reject
set srcintf "Port-ch40.1657"
set dstintf "any"
set srcaddr "1.1.1.1"
set dstaddr "127.0.0.1"
set schedule "always"
set service "ALL_ICMP" "icmp-echo-reply" "icmp-source-quench" "icmp-time-exceeded" "icmp-unreachable"
set logtraffic all
next

Question 3

仅回答具体问题awk 可以处理来自文件的值，是的，您可以getline使用<文件名重定向命令的输入。添加到 BEGIN 块的末尾：

getline <"filterfile";
fromfilter = $0;
gsub("\n","|",fromfilter);

由于您已经设置了 FS 和 RS，getline会将整个文件读入$0，因此您只需用正则表达式运算符替换行分隔符即可 |。使用结果变量match：

if (/intf/ && /addr/ && match($0,fromfilter)) { print $0"\n"}

Answer

仅回答具体问题awk 可以处理来自文件的值，是的，您可以getline使用<文件名重定向命令的输入。添加到 BEGIN 块的末尾：

getline <"filterfile";
fromfilter = $0;
gsub("\n","|",fromfilter);

由于您已经设置了 FS 和 RS，getline会将整个文件读入$0，因此您只需用正则表达式运算符替换行分隔符即可 |。使用结果变量match：

if (/intf/ && /addr/ && match($0,fromfilter)) { print $0"\n"}

如何使用 awk 重新创建 grep -f

答案1

答案2

答案3

相关内容