添加从文件中查找的成对值

Question

awk '# Process "0pairs_file". Build a "pair" array (keys only). 
     # Include leading an trailing space to unambiguously search of each field value via index().
     NR==FNR{ pair[NR]=" "$1" "$2" "; next }

     # Determine number of records in "pairs_file"0.           
     FNR==1{ pz=NR-1 } # "pz" size of "pair" array

     # Process "main_file"
     #   For each record in "main_file", 
     #   check if "$1"  is found in any of the "pair" elements  
     { for( pi=1; pi<=pz; pi++ ){
           p = pair[pi]                  # Note that "p" has a leading and triling space
           pix = index( p, " "$1" " )    # Get char indes (1-based position) of " "$1" " in "pair" element 

           # When $1 is found, pix > 0
           if( pix--){                   # "pix--" decrements pix after the test  
               pl=p$2" "; plevel[pl]
               # Build a "data" array for 
               #   presence of 1st and 2nd fruit, and for 
               #   the input fields $3 and $4 and $5 
               # Use "pl" plus a corresponding numeric suffixes for "data" keys 
               if( pix ){ data[pl 2]=1 } #  pix != 0 so it must be the 2nd fruit of the pair
               else     { data[pl 1]=1 } #  pix == 0 so it must be the 1st fruit of the pair 
               for( v=3; v<=5; v++ ) data[pl v]+=$v
           } 
       }
     }
     END{# process by pair
         for(pi=1; pi<=pz; pi++){
             p=pair[pi]
             na=1  # flag for when pair is N/A

             # Process by level
             for( plk in plevel ){
                 if( index( plk, p ) == 1 ){
                   na=0
                   printf "%s", substr(plk,2)
                   for( v=3; v<=5; v++ ) printf "%s ", data[plk v]
                   if( data[plk 1] != data[plk 2] ){
                     split(p,u)
                     if ( data[plk 1] ) printf "%s", u[2] 
                     else               printf "%s", u[1] 
                   } 
                   else { printf "None" }  
                   print "" 
                 }
             } 
             if( na ) print substr(p,2) "Na Na Na Na both"
         } 
     }' pairs_file main_file | column -t -s' '

如果您想按“级别”对“对”组进行排序，请替换以下 2 行

             # Process by level
             for( plk in plevel ){

用这些行代替：

             # Sort by level                
             plk=""; for( pl in plevel ) 
                 if( index( pl, p ) == 1 ) plk = plk sprintf( "%s\n", pl )
             cmd = "printf \"%s\" \""plk"\" | sort -V"
             ppli=0; while( ( cmd | getline pplk) > 0 )
                 pplkeys[++ppli] = pplk
             close(cmd)
             pplz=ppli  # "pplz" size of "pplkeys" array

             # Process by level
             for(ppli=1; ppli<=pplz; ppli++){
                 plk = pplkeys[ppli]

如果您想要表格输出，请按如下方式进行管道传输：

 awk '...' pairs_file main_file | column -t -s' '

输出：按级别（对组）排序，并按制表column

mango123   squash378  L1   8   10  12  None
mango123   squash378  L2   4   5   6   squash378
mango123   squash378  L5   10  12  13  mango123
mango123   squash378  L11  11  12  23  squash378
mango123   squash378  L15  0   1   3   mango123
squash378  orange765  L1   18  9   12  None
squash378  orange765  L5   10  12  13  orange765
squash378  orange765  L15  0   1   3   orange765
pear546    lime436    Na   Na  Na  Na  both

Answer 1

awk '# Process "0pairs_file". Build a "pair" array (keys only). 
     # Include leading an trailing space to unambiguously search of each field value via index().
     NR==FNR{ pair[NR]=" "$1" "$2" "; next }

     # Determine number of records in "pairs_file"0.           
     FNR==1{ pz=NR-1 } # "pz" size of "pair" array

     # Process "main_file"
     #   For each record in "main_file", 
     #   check if "$1"  is found in any of the "pair" elements  
     { for( pi=1; pi<=pz; pi++ ){
           p = pair[pi]                  # Note that "p" has a leading and triling space
           pix = index( p, " "$1" " )    # Get char indes (1-based position) of " "$1" " in "pair" element 

           # When $1 is found, pix > 0
           if( pix--){                   # "pix--" decrements pix after the test  
               pl=p$2" "; plevel[pl]
               # Build a "data" array for 
               #   presence of 1st and 2nd fruit, and for 
               #   the input fields $3 and $4 and $5 
               # Use "pl" plus a corresponding numeric suffixes for "data" keys 
               if( pix ){ data[pl 2]=1 } #  pix != 0 so it must be the 2nd fruit of the pair
               else     { data[pl 1]=1 } #  pix == 0 so it must be the 1st fruit of the pair 
               for( v=3; v<=5; v++ ) data[pl v]+=$v
           } 
       }
     }
     END{# process by pair
         for(pi=1; pi<=pz; pi++){
             p=pair[pi]
             na=1  # flag for when pair is N/A

             # Process by level
             for( plk in plevel ){
                 if( index( plk, p ) == 1 ){
                   na=0
                   printf "%s", substr(plk,2)
                   for( v=3; v<=5; v++ ) printf "%s ", data[plk v]
                   if( data[plk 1] != data[plk 2] ){
                     split(p,u)
                     if ( data[plk 1] ) printf "%s", u[2] 
                     else               printf "%s", u[1] 
                   } 
                   else { printf "None" }  
                   print "" 
                 }
             } 
             if( na ) print substr(p,2) "Na Na Na Na both"
         } 
     }' pairs_file main_file | column -t -s' '

如果您想按“级别”对“对”组进行排序，请替换以下 2 行

             # Process by level
             for( plk in plevel ){

用这些行代替：

             # Sort by level                
             plk=""; for( pl in plevel ) 
                 if( index( pl, p ) == 1 ) plk = plk sprintf( "%s\n", pl )
             cmd = "printf \"%s\" \""plk"\" | sort -V"
             ppli=0; while( ( cmd | getline pplk) > 0 )
                 pplkeys[++ppli] = pplk
             close(cmd)
             pplz=ppli  # "pplz" size of "pplkeys" array

             # Process by level
             for(ppli=1; ppli<=pplz; ppli++){
                 plk = pplkeys[ppli]

如果您想要表格输出，请按如下方式进行管道传输：

 awk '...' pairs_file main_file | column -t -s' '

输出：按级别（对组）排序，并按制表column

mango123   squash378  L1   8   10  12  None
mango123   squash378  L2   4   5   6   squash378
mango123   squash378  L5   10  12  13  mango123
mango123   squash378  L11  11  12  23  squash378
mango123   squash378  L15  0   1   3   mango123
squash378  orange765  L1   18  9   12  None
squash378  orange765  L5   10  12  13  orange765
squash378  orange765  L15  0   1   3   orange765
pear546    lime436    Na   Na  Na  Na  both

添加从文件中查找的成对值

答案1

相关内容