输入文件:is2.txt
10.39.5.41,A1,B1
10.39.5.41,A2,B2
10.39.5.41,A3,B3
10.39.5.41,A4,B4
10.39.5.41,A5,B5
10.39.5.41,A6,B6
脚本 :
#!/bin/bash
second_column="OOOOOOO" # OOOOOOO will be added to every second column
third_column="XXXXXXXX" # XXXXXXXX will be added to every third column
awk -v second="$second_column" -v third="$third_column" 'BEGIN { FS="," }
{
if(a[$1])
{a[$1]=a[$1]";second"$2";third"$3}
else
{a[$1]=a[$1]second$2";"third$3}}
END{for (i in a)print i";"a[i];}' < is2.txt
output:
[root@testgfs2 test]# ./testawk.awk
10.39.5.41;OOOOOOOA1;XXXXXXXXB1;secondA2;thirdB2;secondA3;thirdB3;secondA4;thirdB4;secondA5;thirdB5;secondA6;thirdB6
为什么 shell 变量(second_column、third_column)没有反映在完整的输出中而仅反映在第一行中。怎么了?
预期输出:
10.39.5.41;OOOOOOOA1;XXXXXXXXB1;OOOOOOOA2;XXXXXXXXB2;OOOOOOOA3;XXXXXXXXB3;OOOOOOOA4;XXXXXXXXB4;OOOOOOOA5;XXXXXXXXB5;OOOOOOOA6;XXXXXXXXB6
此外,有什么方法可以将其推广到 n 列(例如 n = 100)?
答案1
这是具有建议的间距和缩进的 awk 代码,您能看到问题吗?
BEGIN { FS = "," }
{
if(a[$1])
a[$1] = a[$1] ";second" $2 ";third" $3
else
a[$1] = a[$1]second $2 ";" third$3
}
END {
for (i in a)
print i ";" a[i]
}
您在子句中引用了second
and 。third
if
根据您的预期输出猜测,您可以这样做:
awk -v c2='OOOOOOO' -v c3='XXXXXXXX' -v FS=, -v OFS=';' -v ORS=';' '
!f {
printf "%s", $1
f=1
}
{
$1 = ""
$2 = c2 $2
$3 = c3 $3
}
1
' | sed 's/;;/;/g; s/;$//'
输出:
10.39.5.41;OOOOOOOA1;XXXXXXXXB1;OOOOOOOA2;XXXXXXXXB2;OOOOOOOA3;XXXXXXXXB3;OOOOOOOA4;XXXXXXXXB4;OOOOOOOA5;XXXXXXXXB5;OOOOOOOA6;XXXXXXXXB6
为了推广这种方法,您可以传入要在字符串前面添加的位,并将其拆分为 awk 数组。然后使用 for 循环而不是显式列变量:
awk -v prepends='OOOOOOO XXXXXXXX' -v FS=, -v OFS=';' -v ORS=';' '
BEGIN { split(prepends, cn, / +/) }
!f {
printf "%s", $1
f=1
}
{
$1 = ""
for(i=1; i<=NF; i++)
$i = cn[i-1] $i
}
1' | sed 's/;;/;/g; s/;$//'