我有一个文件,其中的条目key: value
格式如下:
猫数据.txt
name: 'tom'
tom_age: '31'
status_tom_mar: 'yes'
school: 'anne'
fd_year_anne: '1987'
name: 'hmz'
hmz_age: '21'
status_hmz_mar: 'no'
school: 'svp'
fd_year_svp: '1982'
name: 'toli'
toli_age: '41'
同样...
我只需要查找并打印那些key: value
具有重复键作为单个条目的键。
下面的代码让我得到重复的键
cat data.txt | awk '{ print $1 }' | sort | uniq -d
name:
school:
但是,我想要输出将重复键的值连接到一行中的位置。
预期输出:
name: ['tom', 'hmz', 'toli']
school: ['anne', 'svp']
tom_age: '31'
status_tom_mar: 'yes'
fd_year_anne: '1987'
hmz_age: '21'
status_hmz_mar: 'no'
fd_year_svp: '1982'
toli_age: '41'
你能建议一下吗?
答案1
在awk
:
$ awk -F': ' '
{
count[$1]++;
data[$1] = $1 in data ? data[$1]", "$2 : $2
}
END {
for (id in count) {
printf "%s: ",id;
print (count[id]>1 ? "[ "data[id]" ]" : data[id])
}
}' data.txt
hmz_age: '21'
tom_age: '31'
fd_year_anne: '1987'
school: [ 'anne', 'svp' ]
name: [ 'tom', 'hmz', 'toli' ]
toli_age: '41'
fd_year_svp: '1982'
status_hmz_mar: 'no'
status_tom_mar: 'yes'
Perl 方法:
$ perl -F: -lane 'push @{$k{$F[0]}},$F[1];
END{
for $key (keys(%k)){
$data="";
if(scalar(@{$k{$key}})>1){
$data="[" . join(",",@{$k{$key}}) . "]";
}
else{
$data=${$k{$key}}[0];
}
print "$key: $data"
}
}' data.txt
status_tom_mar: 'yes'
fd_year_anne: '1987'
tom_age: '31'
toli_age: '41'
fd_year_svp: '1982'
hmz_age: '21'
school: [ 'anne', 'svp']
name: [ 'tom', 'hmz', 'toli']
status_hmz_mar: 'no'
或者,也许更容易理解一点:
perl -F: -lane '@fields=@F;
push @{$key_hash{$fields[0]}},$fields[1];
END{
for $key (keys(%key_hash)){
$data="";
@key_data=@{$key_hash{$key}};
if(scalar(@key_data)>1){
$data="[" . join(",", @key_data) . "]";
}
else{
$data=$key_data[0]
}
print "$key: $data"
}
}' data.txt
答案2
一个简短的awk
程序将为您实现这一目标
awk -F': ' '
# Every line of input; fields split at colon+space
{
# Append a comma if we have previous items
if (h[$1] > "") { h[$1] = h[$1] ", " };
# Append the item and increment the count
h[$1] = h[$1] $2;
i[$1]++
}
# Finally
END {
# Iterate across all the keys we have found
for (k in h) {
if (i[k] > 1) { p = "[%s]" } else { p = "%s" };
printf "%s: " p "\n", k, h[k]
}
}
' data.txt
输出
hmz_age: ['21', '41']
tom_age: '31'
fd_year_anne: ['1987', '1982']
school: ['anne', 'svp']
name: ['tom', 'hmz', 'toli']
status_hmz_mar: 'no'
status_tom_mar: 'yes'
答案3
在 awk 中:
awk '{arr[$1][length(arr[$1])+1]=$2}; END {for (i in arr) {printf i;if (length(arr[i])>1) {xc=" [";for (rr in arr[i]) {printf xc;printf arr[i][rr];xc=","} print "]"} else print arr[i][length(arr[i])]} }' data.txt
输出:
hmz_age:'21'
fd_year_svp:'1982'
fd_year_anne:'1987'
name: ['tom','hmz','toli']
school: ['anne','svp']
status_tom_mar:'yes'
tom_age:'31'
toli_age:'41'
status_hmz_mar:'no'
答案4
步骤1
for i in $(awk -F ":" '{a[$1]++}END{for(x in a){print x,a[x]}}' file.txt | awk '$NF>1{print $1}'|tac); do grep "^$i" file.txt >/dev/null; if [[ $? == 0 ]]; then awk -v i="$i" -F ":" '$1 == i{print $2}' file.txt|awk 'END{print "\n"}ORS=","'|sed "s/^,//g"|sed "s/,$//g"|awk -v i="$i" '{print i":["$0"]"}';else grep -v "^$i" file.txt;fi; done >output.txt
第2步
for i in $(awk -F ":" '{a[$1]++}END{for(x in a){print x,a[x]}}' file.txt| awk '$NF==1'); do awk -v i="$i" -F ":" '$1 ~ i' file.txt; done >>output.txt
输出
name: ['tom', 'hmz', 'toli']
school: ['anne', 'svp']
tom_age: '31'
status_tom_mar: 'yes'
fd_year_anne: '1987'
hmz_age: '21'
status_hmz_mar: 'no'
fd_year_svp: '1982'
toli_age: '41'