需要格式化文本文件并根据键值格式进行更新

Question 1

一个awk执行此操作的程序，假设这些LIB_TRGT_行总是出现后这些引用的其他行：

BEGIN {
        FS = ":[[:blank:]]*"
        OFS = ": "
}

{ print }

$1 !~ /^LIB_TRGT_/ {
        # "list" is an associative array that uses the keys "calv",
        # "anot", and "secd", and for each of these stores the relevant
        # servers as a SUBSEP-delimited string.

        list[$2] = (list[$2] == "" ? $1 : list[$2] SUBSEP $1)
        next
}

{
        path = $2

        # Pick out the comma-delimited list of keys after "TRGT_", and
        # split these up into elements of the array "k".

        sub("^.*TRGT_", "", $1)
        nk = split($1, k, ",")

        $0 = ""
        $2 = path

        # For each key, get the corresponding list from "list" and
        # output the path for each of its values.

        for (i = 1; i <= nk; ++i) {
                nv = split(list[k[i]], v, SUBSEP)
                for (j = 1; j <= nv; ++j) {
                        $1 = "LIB_" v[j]
                        print
                }
        }
}

使用文件中的上述脚本script以及您的数据进行测试Unix 文本文件称为file：

$ awk -f script file
server1: calv
server2: anot
log: /u/log/1
server3: calv
server4: anot
server5: secd
server6: calv
LIB_TRGT_calv,anot: /tmp/hello.txt
LIB_server1: /tmp/hello.txt
LIB_server3: /tmp/hello.txt
LIB_server6: /tmp/hello.txt
LIB_server2: /tmp/hello.txt
LIB_server4: /tmp/hello.txt
LIB_TRGT_secd: /var/del.tmp
LIB_server5: /var/del.tmp

顺序与您在问题中显示的内容略有不同，因为LIB_TRGT_它们一出现就对其做出反应，而不是保存它们并在最后处理它们。

Answer

一个awk执行此操作的程序，假设这些LIB_TRGT_行总是出现后这些引用的其他行：

BEGIN {
        FS = ":[[:blank:]]*"
        OFS = ": "
}

{ print }

$1 !~ /^LIB_TRGT_/ {
        # "list" is an associative array that uses the keys "calv",
        # "anot", and "secd", and for each of these stores the relevant
        # servers as a SUBSEP-delimited string.

        list[$2] = (list[$2] == "" ? $1 : list[$2] SUBSEP $1)
        next
}

{
        path = $2

        # Pick out the comma-delimited list of keys after "TRGT_", and
        # split these up into elements of the array "k".

        sub("^.*TRGT_", "", $1)
        nk = split($1, k, ",")

        $0 = ""
        $2 = path

        # For each key, get the corresponding list from "list" and
        # output the path for each of its values.

        for (i = 1; i <= nk; ++i) {
                nv = split(list[k[i]], v, SUBSEP)
                for (j = 1; j <= nv; ++j) {
                        $1 = "LIB_" v[j]
                        print
                }
        }
}

使用文件中的上述脚本script以及您的数据进行测试Unix 文本文件称为file：

$ awk -f script file
server1: calv
server2: anot
log: /u/log/1
server3: calv
server4: anot
server5: secd
server6: calv
LIB_TRGT_calv,anot: /tmp/hello.txt
LIB_server1: /tmp/hello.txt
LIB_server3: /tmp/hello.txt
LIB_server6: /tmp/hello.txt
LIB_server2: /tmp/hello.txt
LIB_server4: /tmp/hello.txt
LIB_TRGT_secd: /var/del.tmp
LIB_server5: /var/del.tmp

顺序与您在问题中显示的内容略有不同，因为LIB_TRGT_它们一出现就对其做出反应，而不是保存它们并在最后处理它们。

Question 2

我们可以通过从数据生成代码然后将生成的代码应用到数据本身来解决这个问题。

使用扩展正则表达式模式的 GNU sed。

inputf='./data.txt'

< "$inputf" \
sed -E '
  s/^\s+|\s+$//g
  s/\s+/\t/g
' |
sed -E '
  /_TRGT_[^:\t]/{
    s/^(([^_]+_)+TRGT_)([^,:]+)[,:](.*(\t\S+))$/\3\5\n\1\4/
    h;s/\n.*//;s/.*\t//
    s:[\/&]:\\&:g;G
    s/(.*)\n(.*\t).*(\n.*)/\2\1\3/
    P
  }
  D
' |
sed -En '
  1i\
p

  s#(.*)\t(.*)#/:\\t\1$/{s/^/LIB_/;s/:.*/:\\t\2/;H;ba;}#p

  $a\
:a\
$!d;g;s/.//
' |
sed -Ef - "$inputf"

server1:    calv
server2:    anot
log:    /u/log/1
server3:    calv
server4:    anot
server5:    secd
server6:    calv
LIB_TRGT_calv,anot: /tmp/hello.txt
LIB_TRGT_secd:  /var/del.tmp
LIB_server1:    /tmp/hello.txt
LIB_server2:    /tmp/hello.txt
LIB_server3:    /tmp/hello.txt
LIB_server4:    /tmp/hello.txt
LIB_server5:    /var/del.tmp
LIB_server6:    /tmp/hello.txt

步骤0，通过删除前导尾随空格来清理数据。等宽字体到 TAB。
第一步，我们隔离键值对，将它们放在每行一个中。
Step2 有了这些 kv 元组，我们生成 sed 代码。
Step3，我们将代码应用到数据上。

Answer

我们可以通过从数据生成代码然后将生成的代码应用到数据本身来解决这个问题。

使用扩展正则表达式模式的 GNU sed。

inputf='./data.txt'

< "$inputf" \
sed -E '
  s/^\s+|\s+$//g
  s/\s+/\t/g
' |
sed -E '
  /_TRGT_[^:\t]/{
    s/^(([^_]+_)+TRGT_)([^,:]+)[,:](.*(\t\S+))$/\3\5\n\1\4/
    h;s/\n.*//;s/.*\t//
    s:[\/&]:\\&:g;G
    s/(.*)\n(.*\t).*(\n.*)/\2\1\3/
    P
  }
  D
' |
sed -En '
  1i\
p

  s#(.*)\t(.*)#/:\\t\1$/{s/^/LIB_/;s/:.*/:\\t\2/;H;ba;}#p

  $a\
:a\
$!d;g;s/.//
' |
sed -Ef - "$inputf"

server1:    calv
server2:    anot
log:    /u/log/1
server3:    calv
server4:    anot
server5:    secd
server6:    calv
LIB_TRGT_calv,anot: /tmp/hello.txt
LIB_TRGT_secd:  /var/del.tmp
LIB_server1:    /tmp/hello.txt
LIB_server2:    /tmp/hello.txt
LIB_server3:    /tmp/hello.txt
LIB_server4:    /tmp/hello.txt
LIB_server5:    /var/del.tmp
LIB_server6:    /tmp/hello.txt

步骤0，通过删除前导尾随空格来清理数据。等宽字体到 TAB。
第一步，我们隔离键值对，将它们放在每行一个中。
Step2 有了这些 kv 元组，我们生成 sed 代码。
Step3，我们将代码应用到数据上。

Question 3

下面是针对这两种情况的统一解决方案，即第二个字段是否被引用时。

awk -v q=\' '1
NF<2{next}
!/^([^_]+_)+TRGT_/ {
  a[FNR] = $2
  b[FNR,$2] = $1
  if (index($2,q) == 1) quoted++
  next
}
{
  n = split($1, temp, /[_,:]/)
  s=0
  for (i=1; i<n; i++) {
    t = temp[i]
    s += length(t)+1
    ch = substr($1, s-1, 1)
    if (ch == "_") continue
    key = (!quoted) ? t : q t q
    c[key] = $2
  }
  split("", temp, ":")
}
BEGIN { OFS="\t" }
END {
  for (i=1; i in a; i++)
    if ( (a[i] in c) && ((i SUBSEP a[i]) in b))
      print "LIB_"b[i,a[i]], c[a[i]]
}' data.txt

server1:    'calv'
server2:    'anot'
log:    '/u/log/1'
server3:    'calv'
server4:    'anot'
server5:    'secd'
server6:    'calv'
LIB_TRGT_calv,anot: '/tmp/hello.txt'
LIB_TRGT_secd:  '/var/del.tmp'
LIB_server1:    '/tmp/hello.txt'
LIB_server2:    '/tmp/hello.txt'
LIB_server3:    '/tmp/hello.txt'
LIB_server4:    '/tmp/hello.txt'
LIB_server5:    '/var/del.tmp'
LIB_server6:    '/tmp/hello.txt'

Answer

下面是针对这两种情况的统一解决方案，即第二个字段是否被引用时。

awk -v q=\' '1
NF<2{next}
!/^([^_]+_)+TRGT_/ {
  a[FNR] = $2
  b[FNR,$2] = $1
  if (index($2,q) == 1) quoted++
  next
}
{
  n = split($1, temp, /[_,:]/)
  s=0
  for (i=1; i<n; i++) {
    t = temp[i]
    s += length(t)+1
    ch = substr($1, s-1, 1)
    if (ch == "_") continue
    key = (!quoted) ? t : q t q
    c[key] = $2
  }
  split("", temp, ":")
}
BEGIN { OFS="\t" }
END {
  for (i=1; i in a; i++)
    if ( (a[i] in c) && ((i SUBSEP a[i]) in b))
      print "LIB_"b[i,a[i]], c[a[i]]
}' data.txt

server1:    'calv'
server2:    'anot'
log:    '/u/log/1'
server3:    'calv'
server4:    'anot'
server5:    'secd'
server6:    'calv'
LIB_TRGT_calv,anot: '/tmp/hello.txt'
LIB_TRGT_secd:  '/var/del.tmp'
LIB_server1:    '/tmp/hello.txt'
LIB_server2:    '/tmp/hello.txt'
LIB_server3:    '/tmp/hello.txt'
LIB_server4:    '/tmp/hello.txt'
LIB_server5:    '/var/del.tmp'
LIB_server6:    '/tmp/hello.txt'

需要格式化文本文件并根据键值格式进行更新

答案1

答案2

答案3

相关内容