两个文件的并集并且在新文件中是唯一的

Question 1

awk '
    NR==FNR {
        a[$1,$2] = $3
        val[$1,$2] = $4
        next
    } 
    ($1,$2) in a {
        for (n=$3-2; n<=$3+2; n++)
            if (a[$1,$2] == n) {
                $4+=val[$1,$2]
                break
            }
    } 
    {print}
' file2 file1

AT1 AT22 24 2 ATAGATA ATTATAT
AT2  AT24  22  0 ATAGATA ATTATAT
AT3 AT23 263 4 ATAGATA ATTATAT
AT4 AT28 252 7 ATAGATA ATTATAT

Answer

awk '
    NR==FNR {
        a[$1,$2] = $3
        val[$1,$2] = $4
        next
    } 
    ($1,$2) in a {
        for (n=$3-2; n<=$3+2; n++)
            if (a[$1,$2] == n) {
                $4+=val[$1,$2]
                break
            }
    } 
    {print}
' file2 file1

AT1 AT22 24 2 ATAGATA ATTATAT
AT2  AT24  22  0 ATAGATA ATTATAT
AT3 AT23 263 4 ATAGATA ATTATAT
AT4 AT28 252 7 ATAGATA ATTATAT

Question 2

这应该满足您的要求（就我的测试用例而言）

#!/bin/bash

# Concatenating the 2 files and sorting entries

SRC=`cat file1 file2 | sort `
FLAG="OFF"
i=1

while read a b c d e

do
 # Need an initial array to start comparing
 if [ "$i" -eq "1"  ];then
  init_vals=( "$a" "$b" "$c" "$d" "$e" )
  FLAG="ON"
 else
  # Start comparing
  vals=( "$a" "$b" "$c" "$d" "$e" )
  if [[ ${vals[0]} ==  "${init_vals[0]}" && ${vals[1]} == "${init_vals[1]}" ]]
   then
    # First and second column are identical, checking in delta on third meet the requirements
    delta=`expr ${vals[2]} - ${init_vals[2]}`
    if [ "$delta" -ge "-2" -a "$delta" -le "2" ]
    then
      # It does, adding values on column 4 and keeping other columns
      sum_col4=`expr ${vals[3]} + ${init_vals[3]}`
      vals=( "${init_vals[0]}" "${init_vals[1]}" "${init_vals[2]}" "$sum_col4" "${init_vals[4]}" )
      #This is a tracker, we don't print result until conditions are not met
      FLAG="ON"
    else
      #Delta on column 3 is not met
      FLAG="OFF"
    fi
  else
    #Column 1 and 2 are different
    FLAG="OFF"
  fi
#    echo "$FLAG"

   if [[ $FLAG != "ON" ]];then
     echo ${init_vals[@]}
     FLAG="ON"
   fi
   init_vals=( "${vals[@]}" )

 fi
 (( i++ ))
done <<< "$SRC"
#Printing last lastline
echo ${init_vals[@]}

Answer

这应该满足您的要求（就我的测试用例而言）

#!/bin/bash

# Concatenating the 2 files and sorting entries

SRC=`cat file1 file2 | sort `
FLAG="OFF"
i=1

while read a b c d e

do
 # Need an initial array to start comparing
 if [ "$i" -eq "1"  ];then
  init_vals=( "$a" "$b" "$c" "$d" "$e" )
  FLAG="ON"
 else
  # Start comparing
  vals=( "$a" "$b" "$c" "$d" "$e" )
  if [[ ${vals[0]} ==  "${init_vals[0]}" && ${vals[1]} == "${init_vals[1]}" ]]
   then
    # First and second column are identical, checking in delta on third meet the requirements
    delta=`expr ${vals[2]} - ${init_vals[2]}`
    if [ "$delta" -ge "-2" -a "$delta" -le "2" ]
    then
      # It does, adding values on column 4 and keeping other columns
      sum_col4=`expr ${vals[3]} + ${init_vals[3]}`
      vals=( "${init_vals[0]}" "${init_vals[1]}" "${init_vals[2]}" "$sum_col4" "${init_vals[4]}" )
      #This is a tracker, we don't print result until conditions are not met
      FLAG="ON"
    else
      #Delta on column 3 is not met
      FLAG="OFF"
    fi
  else
    #Column 1 and 2 are different
    FLAG="OFF"
  fi
#    echo "$FLAG"

   if [[ $FLAG != "ON" ]];then
     echo ${init_vals[@]}
     FLAG="ON"
   fi
   init_vals=( "${vals[@]}" )

 fi
 (( i++ ))
done <<< "$SRC"
#Printing last lastline
echo ${init_vals[@]}

两个文件的并集并且在新文件中是唯一的

答案1

答案2

相关内容