在多个文件中搜索第 3 列匹配行中数字较大的行

Question

优化awk解决方案是关于27快几倍：

awk -F':' 'FILENAME != "main"{ 
               if (!($1 in a) || $3 > a[$1]) { a[$1] = $3; b[$1] = $0 } next; 
           }
           { 
               if (($1 in a) && (a[$1] > $3)){ print b[$1]; delete b[$1] } 
               else print; 
           }' file* main

输出：

test01:10957:8172:2472
test02:1401:6160:5894
test03:7245:8934:5725
test04:3737:10175:5219
test05:10769:10381:1102
test06:3605:3713:7695
test07:1445:2850:2755
test08:4707:9047:10578
test09:2913:5628:1305

执行时间比较：

$ time(awk -F: '{print $1,$3}' main |while read a b; do grep ^${a}: main file* | sort -t":" -rnk4 | awk -F':' -vb=$b '{if($4>b){print $0;next} else {print ($1=="main")? $0 : NULL}}' | head -1; done > /dev/null)

real    0m0.111s
user    0m0.004s
sys 0m0.012s

$ time(awk -F':' 'FILENAME != "main"{ if (!($1 in a) || $3 > a[$1]) { a[$1]=$3; b[$1]=$0 } next }{ if (($1 in a) && (a[$1] > $3)){ print b[$1]; delete b[$1] } else print  }' file* main > /dev/null)

real    0m0.004s
user    0m0.000s
sys 0m0.000s

Answer 1

优化awk解决方案是关于27快几倍：

awk -F':' 'FILENAME != "main"{ 
               if (!($1 in a) || $3 > a[$1]) { a[$1] = $3; b[$1] = $0 } next; 
           }
           { 
               if (($1 in a) && (a[$1] > $3)){ print b[$1]; delete b[$1] } 
               else print; 
           }' file* main

输出：

test01:10957:8172:2472
test02:1401:6160:5894
test03:7245:8934:5725
test04:3737:10175:5219
test05:10769:10381:1102
test06:3605:3713:7695
test07:1445:2850:2755
test08:4707:9047:10578
test09:2913:5628:1305

执行时间比较：

$ time(awk -F: '{print $1,$3}' main |while read a b; do grep ^${a}: main file* | sort -t":" -rnk4 | awk -F':' -vb=$b '{if($4>b){print $0;next} else {print ($1=="main")? $0 : NULL}}' | head -1; done > /dev/null)

real    0m0.111s
user    0m0.004s
sys 0m0.012s

$ time(awk -F':' 'FILENAME != "main"{ if (!($1 in a) || $3 > a[$1]) { a[$1]=$3; b[$1]=$0 } next }{ if (($1 in a) && (a[$1] > $3)){ print b[$1]; delete b[$1] } else print  }' file* main > /dev/null)

real    0m0.004s
user    0m0.000s
sys 0m0.000s

在多个文件中搜索第 3 列匹配行中数字较大的行

答案1

相关内容