我需要读取一个文件,找到每一行中的最大值,然后打印该行的 ID、最大值来自的列 ( sno#
) 以及关联列中的值lc#
。如果最大值出现多次,我需要包括所有匹配项。例如:
input.txt(制表符分隔)
Id sno1 lc1 sno2 lc2 sno3 lc3 sno4 lc4
RM1 98 ss1 88 ms1 78 gs1 45 rs1
RM2 23 ss2 44 ms2 98 gs2 15 rs2
RM3 45 ss3 100 ms3 33 gs3 10 rs3
RM4 45 ss4 45 ms4 12 gs4 11 rs4
输出.txt
RM1 ss1 sno1
RM2 gs2 sno3
RM3 ms3 sno2
RM4 ss4,ms4 sno1,sno2
答案1
输入
$ cat input.txt
Id sno1 lc1 sno2 lc2 sno3 lc3 sno4 lc4
RM1 98 ss1 88 ms1 78 gs1 45 rs1
RM2 23 ss2 44 ms2 98 gs2 15 rs2
RM3 45 ss3 100 ms3 33 gs3 10 rs3
RM4 45 ss4 45 ms4 12 gs4 11 rs4
awk 脚本
$ cat row_max.awk
NR == 1 {
for (i = 1; i <= NF; i++) headers[i] = $i;
next
}
{
# find maximum value
max = $2
for (i = 4; i <= NF; i += 2) if ($i > max) max = $i;
# print row id
printf "%s", $1
# print all lc# column values (assuming the column
# after the max value sno# column)
sep = OFS
for (i = 2; i <= NF; i += 2) {
if ($i == max) {
printf "%s%s", sep, $(i + 1);
sep = ","
}
}
# print all column headers of the max value columns
sep = OFS
for (i = 2; i <= NF; i += 2) {
if ($i == max) {
printf "%s%s", sep, headers[i];
sep = ","
}
}
printf "\n"
}
输出
$ awk -f row_max.awk input.txt
RM1 ss1 sno1
RM2 gs2 sno3
RM3 ms3 sno2
RM4 ss4,ms4 sno1,sno2
答案2
awk 'BEGIN{ getline; for(i=1;i<=NF;i++) hdr[i]=$i; max=-1 }
{ for(i=2; i<=NF; i+=2)
if($i==max) { lc=lc","$(i+1); sn=sn","hdr[i] }
else if($i>max) { max=$i; lc=$(i+1); sn=hdr[i] }
print $1" "lc" "sn; max=-1 }'
这是有评论的
awk 'BEGIN{
# read line 1 column-headers to an array
getline; for(i=1;i<=NF;i++) hdr[i]=$i
# set max to less than zero
max=-1
}
{ # processes lines 2 -> last
# check each "sn.." field
for(i=2; i<=NF; i+=2)
if($i==max) { # this cannot occurr for 1st itteration per line
# append "lc" field value and "sn" header
# to previous "lc" and "sn", using ","
lc=lc","$(i+1); sn=sn","hdr[i]
}
else if($i>max) { # this must occurr on 1st itteration per line
# save "lc" field value and "sn" header
max=$i; lc=$(i+1); sn=hdr[i]
}
# print output for the current line
print $1" "lc" "sn;
# set max to less than zero; ready for next line
# max=-1
}
输出
RM1 ss1 sno1
RM2 gs2 sno3
RM3 ms3 sno2
RM4 ss4,ms4 sno1,sno2