awk 根据单元格值合并行

awk 根据单元格值合并行

我想使用 awk 合并第 1,2,3,4,5,6,7 列匹配的行,并合并其中第 8 列中的唯一条目。

输入是

AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0014L5
AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0015L5;0017L5
AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0017L5;0019L5
AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0019L5
AB-DB1,No_IP_Found,11/01/2015,INFINITY,Adhoc-ab-db1-Sunset,DB_Backup/Archived/Offline,infinity ,No_Media_Used/Disk
AB-DB1,No_IP_Found,11/01/2015,INFINITY,Adhoc-ab-db1-Sunset,DB_Backup/Archived/Offline,infinity ,No_Media_Used/Disk
AB-DB1,No_IP_Found,11/01/2015,INFINITY,Adhoc-ab-db1-Sunset,DB_Backup/Archived/Offline,infinity ,No_Media_Used/Disk
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/c-3885516020-20151008-00,infinity ,0007L5
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/cf_ABDB_02qj5qta_1_1_892529578,infinity ,0023L5
APPSRV,172.25.128.100,09/11/2017,12/13/2017,MG16-Genral-OS-LINUX-MONTHLY,/;/Monitoring/;/Monitoring_old/;/NetBackup_7.5_CLIENTS.tar;/bin/;/boot/;/data1/;/data2/;/dev/;/etc/;/home/;/lib/;/lib64/;/lost+found/;/media/;/misc/;/mnt/;/net/;/opt/;/proc/;/root/;/sbin/;/selinux/;/srv/;/sys/;/tftpboot/;/tmp/;/usr/;/var/,3 months ,0089L5
APPSRV,172.25.128.100,09/11/2017,12/13/2017,MG16-Genral-OS-LINUX-MONTHLY,/;/Monitoring/;/Monitoring_old/;/NetBackup_7.5_CLIENTS.tar;/bin/;/boot/;/data1/;/data2/;/dev/;/etc/;/home/;/lib/;/lib64/;/lost+found/;/media/;/misc/;/mnt/;/net/;/opt/;/proc/;/root/;/sbin/;/selinux/;/srv/;/sys/;/tftpboot/;/tmp/;/usr/;/var/,3 months ,0090L5;0094L5;0089L5;0091L5;
APPSRV,172.25.128.100,09/11/2017,12/13/2017,MG16-Genral-OS-LINUX-MONTHLY,/;/Monitoring/;/Monitoring_old/;/NetBackup_7.5_CLIENTS.tar;/bin/;/boot/;/data1/;/data2/;/dev/;/etc/;/home/;/lib/;/lib64/;/lost+found/;/media/;/misc/;/mnt/;/net/;/opt/;/proc/;/root/;/sbin/;/selinux/;/srv/;/sys/;/tftpboot/;/tmp/;/usr/;/var/,3 months ,0091L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/etc/,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,DB_Backup/Archived/Offline,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/home/bblite/,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/home/bblite/,1 month ,0060L5

所需输出

AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0014L5;0015L5;0017L5;0019L5
AB-DB1,No_IP_Found,11/01/2015,INFINITY,Adhoc-ab-db1-Sunset,DB_Backup/Archived/Offline,infinity ,No_Media_Used/Disk
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/c-3885516020-20151008-00,infinity ,0007L5
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/cf_ABDB_02qj5qta_1_1_892529578,infinity ,0023L5
APPSRV,172.25.128.100,09/11/2017,12/13/2017,MG16-Genral-OS-LINUX-MONTHLY,/;/Monitoring/;/Monitoring_old/;/NetBackup_7.5_CLIENTS.tar;/bin/;/boot/;/data1/;/data2/;/dev/;/etc/;/home/;/lib/;/lib64/;/lost+found/;/media/;/misc/;/mnt/;/net/;/opt/;/proc/;/root/;/sbin/;/selinux/;/srv/;/sys/;/tftpboot/;/tmp/;/usr/;/var/,3 months ,0089L5;0090L5;0094L5;0091L5;
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/etc/,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,DB_Backup/Archived/Offline,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/home/bblite/,1 month ,0078L5;0060L5

目前我使用下面的代码。

awk -F' *, *' '{a[$1","$2","$3","$4","$5","$6","$7]=a[$1","$2","$3","$4","$5","$6","$7] "," $8} END {for (k in a) {print k a[k] | "sort -u"}}'

但它似乎不能正常工作。任何人都可以提出任何建议吗?

答案1

与单呆呆地过程:

awk -F, 'BEGIN{ PROCINFO["sorted_in"]="@ind_str_asc" }
        { k=$1 FS $2 FS $3 FS $4 FS $5 FS $6 FS $7 }
        { a[k]=a[k]? a[k]";"$8 : $8 }
        END{ 
            for(i in a) { 
                split(a[i],b,";"); r=""; 
                for (j in b) { if(!c[b[j]]++) r=(r=="")? b[j]: r";"b[j] }
                delete c; print i,r 
            } 
        }' OFS=',' file
  • PROCINFO["sorted_in"]="@ind_str_asc"- 按数组键/索引进行比较/排序。我们可以将预定义数组设置PROCINFO为一组预定义值中的一个。这里描述的那些特殊值(文档):https://www.gnu.org/software/gawk/manual/gawk.html#Controlling-Scanning

  • k=$1 FS $2 FS $3 FS $4 FS $5 FS $6 FS $7- 构建唯一的密钥

  • a[k]=a[k]? a[k]";"$8 : $8- 累积每个字段的第 8 个字段值团体


输出:

AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/c-3885516020-20151008-00,infinity ,0007L5
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/cf_ABDB_02qj5qta_1_1_892529578,infinity ,0023L5
AB-DB1,No_IP_Found,11/01/2015,INFINITY,Adhoc-ab-db1-Sunset,DB_Backup/Archived/Offline,infinity ,No_Media_Used/Disk
AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0014L5;0015L5;0017L5;0019L5
APPSRV,172.25.128.100,09/11/2017,12/13/2017,MG16-Genral-OS-LINUX-MONTHLY,/;/Monitoring/;/Monitoring_old/;/NetBackup_7.5_CLIENTS.tar;/bin/;/boot/;/data1/;/data2/;/dev/;/etc/;/home/;/lib/;/lib64/;/lost+found/;/media/;/misc/;/mnt/;/net/;/opt/;/proc/;/root/;/sbin/;/selinux/;/srv/;/sys/;/tftpboot/;/tmp/;/usr/;/var/,3 months ,0089L5;0090L5;0094L5;0091L5;
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/etc/,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/home/bblite/,1 month ,0078L5;0060L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,DB_Backup/Archived/Offline,1 month ,0078L5

答案2

让我们首先对其进行唯一sort -u处理,然后通过awk.

awk -F',' -v OFS=, '{cpyNF=$NF;NF--;a[$0]=a[$0]","cpyNF} 
    END{for (i in a) print i""a[i]}' <(sort -u infile)

答案3

第一个变体

sedawk并被datamash使用。如果您感兴趣并且代码可以满足您的要求,我可以添加解释。行不应包含:(冒号),因为我选择它作为 的分隔符datamash。当然可以改成其他的。

sed 's/,/:/7' input.txt |
datamash -t: -g 1 unique 2 | 
awk -F: '{
    printf "%s,", $1;

    size = split($2, arr, "[,;]");

    for(i = 1; i <= size; i++) {
        if(!unique_arr[arr[i]]) {
            printf "%s; ", arr[i];
            unique_arr[arr[i]] = 1;
        }
    }
    print "";
    delete unique_arr;
}' | sed 's/[; ]*$//g'

输出

AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0014L5; 0015L5; 0017L5; 0019L5
AB-DB1,No_IP_Found,11/01/2015,INFINITY,Adhoc-ab-db1-Sunset,DB_Backup/Archived/Offline,infinity ,No_Media_Used/Disk
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/c-3885516020-20151008-00,infinity ,0007L5
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/cf_ABDB_02qj5qta_1_1_892529578,infinity ,0023L5
APPSRV,172.25.128.100,09/11/2017,12/13/2017,MG16-Genral-OS-LINUX-MONTHLY,/;/Monitoring/;/Monitoring_old/;/NetBackup_7.5_CLIENTS.tar;/bin/;/boot/;/data1/;/data2/;/dev/;/etc/;/home/;/lib/;/lib64/;/lost+found/;/media/;/misc/;/mnt/;/net/;/opt/;/proc/;/root/;/sbin/;/selinux/;/srv/;/sys/;/tftpboot/;/tmp/;/usr/;/var/,3 months ,0089L5; 0090L5; 0094L5; 0091L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/etc/,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,DB_Backup/Archived/Offline,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/home/bblite/,1 month ,0060L5; 0078L5

第二种变体 - 没有 datamash

sed 's/,/:/7; s/\s*;$//' input.txt |
awk -F: '
{
    size = split($2, arr_eight_field, ";"); 
    for(i = 1; i <= size; i++) {
        main_arr[$1][arr_eight_field[i]] = 1;
    }
}
END {
    for(seven_fields in main_arr) {
        eight = "";
        for(i in main_arr[seven_fields]) {
            eight = (eight) ? eight "; " i : i;
        }
        print seven_fields "," eight;
    }
}' | sort

输出

AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/c-3885516020-20151008-00,infinity ,0007L5
AB-DB1,No_IP_Found,10/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/cf_ABDB_02qj5qta_1_1_892529578,infinity ,0023L5
AB-DB1,No_IP_Found,11/01/2015,INFINITY,Adhoc-ab-db1-Sunset,DB_Backup/Archived/Offline,infinity ,No_Media_Used/Disk
AB-DB1,No_IP_Found,11/08/2015,INFINITY,Adhoc-ab-db1-Sunset,/avqln1ic_1_1,infinity ,0017L5; 0015L5; 0019L5; 0014L5
APPSRV,172.25.128.100,09/11/2017,12/13/2017,MG16-Genral-OS-LINUX-MONTHLY,/;/Monitoring/;/Monitoring_old/;/NetBackup_7.5_CLIENTS.tar;/bin/;/boot/;/data1/;/data2/;/dev/;/etc/;/home/;/lib/;/lib64/;/lost+found/;/media/;/misc/;/mnt/;/net/;/opt/;/proc/;/root/;/sbin/;/selinux/;/srv/;/sys/;/tftpboot/;/tmp/;/usr/;/var/,3 months ,0090L5; 0089L5; 0091L5; 0094L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,DB_Backup/Archived/Offline,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/etc/,1 month ,0078L5
BB-LITE,172.25.129.32,09/25/2017,10/26/2017,MG12-BB-LITE-AP-BROKER-DAILY,/home/bblite/,1 month ,0060L5; 0078L5

相关内容