awk 中的快速散列?

awk 中的快速散列?

我觉得有人用 AWK 编写 MD5 很酷:https://kaworu.ch/blog/2017/05/23/implementing-md5-in-AWK/

不幸的是,这个速度相当慢。我正在寻找一种适用于 AWK 的快速哈希工具,可以计算单个字段的 16 位或 32 位哈希。

它不必是加密安全的。

这样的功能存在吗?

我的替代方案是使用 Perl:

perl -F, -ane 'use B; print B::hash($F[0])'

所以该解决方案应该比 Perl 解决方案更快。

答案1

如果您需要 16 位哈希,则为crc16.下面是我找到的代码这里对于 CRC16:

BEGIN {
    cmpl = 0xffff;
    initial = 0x0000;

    TABLE[  0] = 0x0000;
    TABLE[  1] = 0x1021;
    TABLE[  2] = 0x2042;
    TABLE[  3] = 0x3063;
    TABLE[  4] = 0x4084;
    TABLE[  5] = 0x50a5;
    TABLE[  6] = 0x60c6;
    TABLE[  7] = 0x70e7;
    TABLE[  8] = 0x8108;
    TABLE[  9] = 0x9129;
    TABLE[ 10] = 0xa14a;
    TABLE[ 11] = 0xb16b;
    TABLE[ 12] = 0xc18c;
    TABLE[ 13] = 0xd1ad;
    TABLE[ 14] = 0xe1ce;
    TABLE[ 15] = 0xf1ef;
    TABLE[ 16] = 0x1231;
    TABLE[ 17] = 0x0210;
    TABLE[ 18] = 0x3273;
    TABLE[ 19] = 0x2252;
    TABLE[ 20] = 0x52b5;
    TABLE[ 21] = 0x4294;
    TABLE[ 22] = 0x72f7;
    TABLE[ 23] = 0x62d6;
    TABLE[ 24] = 0x9339;
    TABLE[ 25] = 0x8318;
    TABLE[ 26] = 0xb37b;
    TABLE[ 27] = 0xa35a;
    TABLE[ 28] = 0xd3bd;
    TABLE[ 29] = 0xc39c;
    TABLE[ 30] = 0xf3ff;
    TABLE[ 31] = 0xe3de;
    TABLE[ 32] = 0x2462;
    TABLE[ 33] = 0x3443;
    TABLE[ 34] = 0x0420;
    TABLE[ 35] = 0x1401;
    TABLE[ 36] = 0x64e6;
    TABLE[ 37] = 0x74c7;
    TABLE[ 38] = 0x44a4;
    TABLE[ 39] = 0x5485;
    TABLE[ 40] = 0xa56a;
    TABLE[ 41] = 0xb54b;
    TABLE[ 42] = 0x8528;
    TABLE[ 43] = 0x9509;
    TABLE[ 44] = 0xe5ee;
    TABLE[ 45] = 0xf5cf;
    TABLE[ 46] = 0xc5ac;
    TABLE[ 47] = 0xd58d;
    TABLE[ 48] = 0x3653;
    TABLE[ 49] = 0x2672;
    TABLE[ 50] = 0x1611;
    TABLE[ 51] = 0x0630;
    TABLE[ 52] = 0x76d7;
    TABLE[ 53] = 0x66f6;
    TABLE[ 54] = 0x5695;
    TABLE[ 55] = 0x46b4;
    TABLE[ 56] = 0xb75b;
    TABLE[ 57] = 0xa77a;
    TABLE[ 58] = 0x9719;
    TABLE[ 59] = 0x8738;
    TABLE[ 60] = 0xf7df;
    TABLE[ 61] = 0xe7fe;
    TABLE[ 62] = 0xd79d;
    TABLE[ 63] = 0xc7bc;
    TABLE[ 64] = 0x48c4;
    TABLE[ 65] = 0x58e5;
    TABLE[ 66] = 0x6886;
    TABLE[ 67] = 0x78a7;
    TABLE[ 68] = 0x0840;
    TABLE[ 69] = 0x1861;
    TABLE[ 70] = 0x2802;
    TABLE[ 71] = 0x3823;
    TABLE[ 72] = 0xc9cc;
    TABLE[ 73] = 0xd9ed;
    TABLE[ 74] = 0xe98e;
    TABLE[ 75] = 0xf9af;
    TABLE[ 76] = 0x8948;
    TABLE[ 77] = 0x9969;
    TABLE[ 78] = 0xa90a;
    TABLE[ 79] = 0xb92b;
    TABLE[ 80] = 0x5af5;
    TABLE[ 81] = 0x4ad4;
    TABLE[ 82] = 0x7ab7;
    TABLE[ 83] = 0x6a96;
    TABLE[ 84] = 0x1a71;
    TABLE[ 85] = 0x0a50;
    TABLE[ 86] = 0x3a33;
    TABLE[ 87] = 0x2a12;
    TABLE[ 88] = 0xdbfd;
    TABLE[ 89] = 0xcbdc;
    TABLE[ 90] = 0xfbbf;
    TABLE[ 91] = 0xeb9e;
    TABLE[ 92] = 0x9b79;
    TABLE[ 93] = 0x8b58;
    TABLE[ 94] = 0xbb3b;
    TABLE[ 95] = 0xab1a;
    TABLE[ 96] = 0x6ca6;
    TABLE[ 97] = 0x7c87;
    TABLE[ 98] = 0x4ce4;
    TABLE[ 99] = 0x5cc5;
    TABLE[100] = 0x2c22;
    TABLE[101] = 0x3c03;
    TABLE[102] = 0x0c60;
    TABLE[103] = 0x1c41;
    TABLE[104] = 0xedae;
    TABLE[105] = 0xfd8f;
    TABLE[106] = 0xcdec;
    TABLE[107] = 0xddcd;
    TABLE[108] = 0xad2a;
    TABLE[109] = 0xbd0b;
    TABLE[110] = 0x8d68;
    TABLE[111] = 0x9d49;
    TABLE[112] = 0x7e97;
    TABLE[113] = 0x6eb6;
    TABLE[114] = 0x5ed5;
    TABLE[115] = 0x4ef4;
    TABLE[116] = 0x3e13;
    TABLE[117] = 0x2e32;
    TABLE[118] = 0x1e51;
    TABLE[119] = 0x0e70;
    TABLE[120] = 0xff9f;
    TABLE[121] = 0xefbe;
    TABLE[122] = 0xdfdd;
    TABLE[123] = 0xcffc;
    TABLE[124] = 0xbf1b;
    TABLE[125] = 0xaf3a;
    TABLE[126] = 0x9f59;
    TABLE[127] = 0x8f78;
    TABLE[128] = 0x9188;
    TABLE[129] = 0x81a9;
    TABLE[130] = 0xb1ca;
    TABLE[131] = 0xa1eb;
    TABLE[132] = 0xd10c;
    TABLE[133] = 0xc12d;
    TABLE[134] = 0xf14e;
    TABLE[135] = 0xe16f;
    TABLE[136] = 0x1080;
    TABLE[137] = 0x00a1;
    TABLE[138] = 0x30c2;
    TABLE[139] = 0x20e3;
    TABLE[140] = 0x5004;
    TABLE[141] = 0x4025;
    TABLE[142] = 0x7046;
    TABLE[143] = 0x6067;
    TABLE[144] = 0x83b9;
    TABLE[145] = 0x9398;
    TABLE[146] = 0xa3fb;
    TABLE[147] = 0xb3da;
    TABLE[148] = 0xc33d;
    TABLE[149] = 0xd31c;
    TABLE[150] = 0xe37f;
    TABLE[151] = 0xf35e;
    TABLE[152] = 0x02b1;
    TABLE[153] = 0x1290;
    TABLE[154] = 0x22f3;
    TABLE[155] = 0x32d2;
    TABLE[156] = 0x4235;
    TABLE[157] = 0x5214;
    TABLE[158] = 0x6277;
    TABLE[159] = 0x7256;
    TABLE[160] = 0xb5ea;
    TABLE[161] = 0xa5cb;
    TABLE[162] = 0x95a8;
    TABLE[163] = 0x8589;
    TABLE[164] = 0xf56e;
    TABLE[165] = 0xe54f;
    TABLE[166] = 0xd52c;
    TABLE[167] = 0xc50d;
    TABLE[168] = 0x34e2;
    TABLE[169] = 0x24c3;
    TABLE[170] = 0x14a0;
    TABLE[171] = 0x0481;
    TABLE[172] = 0x7466;
    TABLE[173] = 0x6447;
    TABLE[174] = 0x5424;
    TABLE[175] = 0x4405;
    TABLE[176] = 0xa7db;
    TABLE[177] = 0xb7fa;
    TABLE[178] = 0x8799;
    TABLE[179] = 0x97b8;
    TABLE[180] = 0xe75f;
    TABLE[181] = 0xf77e;
    TABLE[182] = 0xc71d;
    TABLE[183] = 0xd73c;
    TABLE[184] = 0x26d3;
    TABLE[185] = 0x36f2;
    TABLE[186] = 0x0691;
    TABLE[187] = 0x16b0;
    TABLE[188] = 0x6657;
    TABLE[189] = 0x7676;
    TABLE[190] = 0x4615;
    TABLE[191] = 0x5634;
    TABLE[192] = 0xd94c;
    TABLE[193] = 0xc96d;
    TABLE[194] = 0xf90e;
    TABLE[195] = 0xe92f;
    TABLE[196] = 0x99c8;
    TABLE[197] = 0x89e9;
    TABLE[198] = 0xb98a;
    TABLE[199] = 0xa9ab;
    TABLE[200] = 0x5844;
    TABLE[201] = 0x4865;
    TABLE[202] = 0x7806;
    TABLE[203] = 0x6827;
    TABLE[204] = 0x18c0;
    TABLE[205] = 0x08e1;
    TABLE[206] = 0x3882;
    TABLE[207] = 0x28a3;
    TABLE[208] = 0xcb7d;
    TABLE[209] = 0xdb5c;
    TABLE[210] = 0xeb3f;
    TABLE[211] = 0xfb1e;
    TABLE[212] = 0x8bf9;
    TABLE[213] = 0x9bd8;
    TABLE[214] = 0xabbb;
    TABLE[215] = 0xbb9a;
    TABLE[216] = 0x4a75;
    TABLE[217] = 0x5a54;
    TABLE[218] = 0x6a37;
    TABLE[219] = 0x7a16;
    TABLE[220] = 0x0af1;
    TABLE[221] = 0x1ad0;
    TABLE[222] = 0x2ab3;
    TABLE[223] = 0x3a92;
    TABLE[224] = 0xfd2e;
    TABLE[225] = 0xed0f;
    TABLE[226] = 0xdd6c;
    TABLE[227] = 0xcd4d;
    TABLE[228] = 0xbdaa;
    TABLE[229] = 0xad8b;
    TABLE[230] = 0x9de8;
    TABLE[231] = 0x8dc9;
    TABLE[232] = 0x7c26;
    TABLE[233] = 0x6c07;
    TABLE[234] = 0x5c64;
    TABLE[235] = 0x4c45;
    TABLE[236] = 0x3ca2;
    TABLE[237] = 0x2c83;
    TABLE[238] = 0x1ce0;
    TABLE[239] = 0x0cc1;
    TABLE[240] = 0xef1f;
    TABLE[241] = 0xff3e;
    TABLE[242] = 0xcf5d;
    TABLE[243] = 0xdf7c;
    TABLE[244] = 0xaf9b;
    TABLE[245] = 0xbfba;
    TABLE[246] = 0x8fd9;
    TABLE[247] = 0x9ff8;
    TABLE[248] = 0x6e17;
    TABLE[249] = 0x7e36;
    TABLE[250] = 0x4e55;
    TABLE[251] = 0x5e74;
    TABLE[252] = 0x2e93;
    TABLE[253] = 0x3eb2;
    TABLE[254] = 0x0ed1;
    TABLE[255] = 0x1ef0;

    for(i = 0; i <= 255; i++)
        BYTE[sprintf("%c", i)] = i;
}

function crc16Notable(input) {
    crc = 0x0;
    len = split(input, buf, "");
    for(i = 1; i <= len; i++) {
        byte = BYTE[buf[i]];
        code = and(rshift(crc, 8), 0xFF);
        code = xor(code, and(byte, 0xFF));
        code = xor(code, rshift(code, 4));
        crc = and(lshift(crc, 8), 0xFFFF);
        crc = xor(crc, code);
        code = and(lshift(code, 5), 0xFFFF);
        crc = xor(crc, code);
        code = and(lshift(code, 7), 0xFFFF);
        crc = xor(crc, code);
    }
    return crc;
}

function crc16(input) {
    crc = initial;
    len = split(input, buf, "");
    for(i = 1; i <= len; i++) {
        tbl_idx = and(xor(rshift(crc, 8), BYTE[buf[i]]), 0xff);
        crc = and(xor(TABLE[tbl_idx], lshift(crc, 8)), cmpl);
    }
    return crc;
}

只需调用函数crc16("input string")即可得到结果

答案2

只需使用更快的系统工具:

$ awk '{ print system( "echo \""$2"\"| md5sum" ) }' input

与往常一样,请小心system()并确保您的输入经过清理。

相关内容