使用 awk 生成一系列新文件的 2 个文件之间的算术

Question 1

TL;博士：awk为您的示例硬编码的紧凑脚本

NR != FNR {
    out = "out" FNR ".txt"
    printf "" > out
    for (l=m=1; l <= nl; l++)
        printf tmpl[l] ORS, l in vals ? $(m++)*vals[l] : 0 >> out
    close(out)
    next
}

{
    gsub(/%/, "%%")
# here is the regex that selects the fields by their name
    if ($3 ~ /^(vox_la_max|knockout_max|shed_prob)[^[:alnum:]_]*$/) {
        vals[NR] = $1
        sub(/^[0-9]+(\.[0-9]+)?/, OFMT)
    }
    tmpl[NR] = $0; nl++
}

将其用作：

LC_NUMERIC=C awk -f script input.txt multipliers.txt

它生成名为的输出文件outX.txt。

LC_NUMERIC=C如果您的区域设置使用逗号而不是点作为浮点值的小数点分隔符，则需要该位。

为了简单起见，我做了一些看起来合理的假设：

所需的输入字段始终是单独的值，并带有相邻注释，指示字段名称为一个单词，必须用空格（至少一个空格）将其与/*
没有同名的字段
浮点值仅用数字和（可能）一个点表示，即没有指数或其他科学表示

与上面相同的脚本，但详细、描述和扩展为允许：

按行号任意指定所需字段
通过属于每个字段的输入行上的注释引用的名称任意指定所需字段
输出文件自动以输入文件名命名，输入文件名可能有一个扩展名（例如.txt），并且其指示的路径（如果有）必须不是有点；换句话说，最好从包含输入文件的目录运行脚本

# some preparations
BEGIN {
    # output files named as the input file name
    split(ARGV[1], f, ".")
    outpfx = f[1]
    # remember wanted fields specified on command line as comma-separated line numbers
    if (nums) {
        # split variable "nums" on comma into helper array "r"
        n = split(nums, r, ",")
        # loop over helper array to build final array, thus indexed by wanted line numbers
        while (n) rows[r[n--]]
    }
}

# here we operate on multipliers file
NR != FNR {
    # output file name for this set of multipliers
    out = outpfx FNR ".txt"
    # create/overwrite this output file
    printf "" > out
    # loop over template lines scanned from input file
    for (linenum = multnum = 1; linenum <= numlines; linenum++)
        # use the template line as printf format string to consume values to be multiplied (if any)
        printf tmpl[linenum] ORS, linenum in wanted_values ? $(multnum++)*wanted_values[linenum] : 0 >> out
    close(out)
    next
}

# here we scan the input file to build a template for printf
{
    # escape existing % chars as we are going to leverage printfs own format string which is %-based
    gsub(/%/, "%%")
    # on specified line numbers or named fields:
    if (NR in rows || names && match($3, "^("names")[^[:alnum:]_]*$")) {
        # remember this value
        wanted_values[NR] = $1
        # replace the original value with the printfs conversion specification for floating-point values
        # it will be used by printf later on while processing the multipliers file
        sub(/^[0-9]+(\.[0-9]+)?/, OFMT)
    }
    # remember this whole line as a template
    tmpl[NR] = $0; numlines++
}

像这样使用它：

# specify fields by their line numbers, each separated by a comma
LC_NUMERIC=C awk -f script -v nums=36,38,39 input.txt multipliers.txt
# or specify fields by their names, each separated by the | character (NOTE it's a regexp)
LC_NUMERIC=C awk -f script -v names='vox_la_max|knockout_max|shed_prob' input.txt multipliers.txt
# or also use both ways of specifying fields
LC_NUMERIC=C awk -f script -v nums=15,112,234,71,5 -v names='vox_la_max|numesp' input.txt multipliers.txt

如果指定的字段多于乘数，则超出的字段将变为0（乘以 0）。

如果您指定的字段少于乘数，则多余的乘数将被忽略。

在任何情况下，字段始终按照它们出现的行号的顺序消耗乘数，即输入文件中遇到的第一个字段消耗第一个乘数，无论您如何指定该字段。

Answer