AWK-查找一行中的最大值并带标题打印

AWK-查找一行中的最大值并带标题打印

我需要读取一个文件,找到每一行中的最大值,然后打印该行的 ID、最大值来自的列 ( sno#) 以及关联列中的值lc#。如果最大值出现多次,我需要包括所有匹配项。例如:

input.txt(制表符分隔)

Id  sno1  lc1  sno2  lc2    sno3  lc3  sno4 lc4
RM1  98   ss1   88   ms1    78    gs1   45  rs1
RM2  23   ss2   44   ms2    98    gs2   15  rs2
RM3  45   ss3   100  ms3    33    gs3   10  rs3
RM4  45   ss4   45   ms4    12    gs4   11  rs4

输出.txt

RM1 ss1 sno1
RM2 gs2 sno3
RM3 ms3 sno2
RM4 ss4,ms4 sno1,sno2   

答案1

输入

$ cat input.txt
Id  sno1  lc1  sno2  lc2    sno3  lc3  sno4 lc4
RM1  98   ss1   88   ms1    78    gs1   45  rs1
RM2  23   ss2   44   ms2    98    gs2   15  rs2
RM3  45   ss3   100  ms3    33    gs3   10  rs3
RM4  45   ss4   45   ms4    12    gs4   11  rs4

awk 脚本

$ cat row_max.awk
NR == 1 {
        for (i = 1; i <= NF; i++) headers[i] = $i;
        next
}

{
        # find maximum value
        max = $2
        for (i = 4; i <= NF; i += 2) if ($i > max) max = $i;
        # print row id
        printf "%s", $1
        # print all lc# column values (assuming the column 
        # after the max value sno# column)
        sep = OFS
        for (i = 2; i <= NF; i += 2) {
                if ($i == max) {
                        printf "%s%s", sep, $(i + 1);
                        sep = ","
                }
        }
        # print all column headers of the max value columns
        sep = OFS
        for (i = 2; i <= NF; i += 2) {
                if ($i == max) {
                        printf "%s%s", sep, headers[i];
                        sep = ","
                }
        }
        printf "\n"
}

输出

$ awk -f row_max.awk input.txt
RM1 ss1 sno1
RM2 gs2 sno3
RM3 ms3 sno2
RM4 ss4,ms4 sno1,sno2

答案2

awk 'BEGIN{ getline; for(i=1;i<=NF;i++) hdr[i]=$i; max=-1 }
   { for(i=2; i<=NF; i+=2) 
        if($i==max) { lc=lc","$(i+1); sn=sn","hdr[i] }
        else if($i>max) { max=$i; lc=$(i+1); sn=hdr[i] }
     print $1" "lc" "sn; max=-1 }'  

这是有评论的

awk 'BEGIN{ 
       # read line 1 column-headers to an array 
       getline; for(i=1;i<=NF;i++) hdr[i]=$i
       # set max to less than zero 
       max=-1
     }
     { # processes lines 2 -> last
       # check each "sn.." field 
       for(i=2; i<=NF; i+=2) 
           if($i==max) { # this cannot occurr for 1st itteration per line  
                # append "lc" field value and "sn" header 
                #  to previous "lc" and "sn", using ","   
                lc=lc","$(i+1); sn=sn","hdr[i] 
            }
            else if($i>max) { # this must occurr on 1st itteration per line 
                # save "lc" field value and "sn" header  
                max=$i; lc=$(i+1); sn=hdr[i]  
            }
       # print output for the current line 
       print $1" "lc" "sn; 
       # set max to less than zero; ready for next line
       # max=-1 
     }

输出

RM1 ss1 sno1
RM2 gs2 sno3
RM3 ms3 sno2
RM4 ss4,ms4 sno1,sno2

相关内容