Skip to content

Commit 15d0a18

Browse files
committed
Elminate BCE in mask algorithm
Thanks again to @renthraysk This provides another significant speedup. benchmark old MB/s new MB/s speedup Benchmark_mask/2/fast-8 405.48 513.25 1.27x Benchmark_mask/3/fast-8 518.93 661.92 1.28x Benchmark_mask/4/fast-8 1207.10 1252.39 1.04x Benchmark_mask/8/fast-8 1708.82 1655.63 0.97x Benchmark_mask/16/fast-8 3418.58 3051.25 0.89x Benchmark_mask/32/fast-8 5789.43 5813.31 1.00x Benchmark_mask/128/fast-8 12819.53 14804.50 1.15x Benchmark_mask/512/fast-8 18247.06 21659.50 1.19x Benchmark_mask/4096/fast-8 19802.31 23885.68 1.21x Benchmark_mask/16384/fast-8 20896.97 25081.11 1.20x
1 parent 3b6e614 commit 15d0a18

File tree

1 file changed

+52
-52
lines changed

1 file changed

+52
-52
lines changed

frame.go

+52-52
Original file line numberDiff line numberDiff line change
@@ -344,79 +344,79 @@ func mask(key uint32, b []byte) uint32 {
344344
for len(b) >= 128 {
345345
v := binary.LittleEndian.Uint64(b)
346346
binary.LittleEndian.PutUint64(b, v^key64)
347-
v = binary.LittleEndian.Uint64(b[8:])
348-
binary.LittleEndian.PutUint64(b[8:], v^key64)
349-
v = binary.LittleEndian.Uint64(b[16:])
350-
binary.LittleEndian.PutUint64(b[16:], v^key64)
351-
v = binary.LittleEndian.Uint64(b[24:])
352-
binary.LittleEndian.PutUint64(b[24:], v^key64)
353-
v = binary.LittleEndian.Uint64(b[32:])
354-
binary.LittleEndian.PutUint64(b[32:], v^key64)
355-
v = binary.LittleEndian.Uint64(b[40:])
356-
binary.LittleEndian.PutUint64(b[40:], v^key64)
357-
v = binary.LittleEndian.Uint64(b[48:])
358-
binary.LittleEndian.PutUint64(b[48:], v^key64)
359-
v = binary.LittleEndian.Uint64(b[56:])
360-
binary.LittleEndian.PutUint64(b[56:], v^key64)
361-
v = binary.LittleEndian.Uint64(b[64:])
362-
binary.LittleEndian.PutUint64(b[64:], v^key64)
363-
v = binary.LittleEndian.Uint64(b[72:])
364-
binary.LittleEndian.PutUint64(b[72:], v^key64)
365-
v = binary.LittleEndian.Uint64(b[80:])
366-
binary.LittleEndian.PutUint64(b[80:], v^key64)
367-
v = binary.LittleEndian.Uint64(b[88:])
368-
binary.LittleEndian.PutUint64(b[88:], v^key64)
369-
v = binary.LittleEndian.Uint64(b[96:])
370-
binary.LittleEndian.PutUint64(b[96:], v^key64)
371-
v = binary.LittleEndian.Uint64(b[104:])
372-
binary.LittleEndian.PutUint64(b[104:], v^key64)
373-
v = binary.LittleEndian.Uint64(b[112:])
374-
binary.LittleEndian.PutUint64(b[112:], v^key64)
375-
v = binary.LittleEndian.Uint64(b[120:])
376-
binary.LittleEndian.PutUint64(b[120:], v^key64)
347+
v = binary.LittleEndian.Uint64(b[8:16])
348+
binary.LittleEndian.PutUint64(b[8:16], v^key64)
349+
v = binary.LittleEndian.Uint64(b[16:24])
350+
binary.LittleEndian.PutUint64(b[16:24], v^key64)
351+
v = binary.LittleEndian.Uint64(b[24:32])
352+
binary.LittleEndian.PutUint64(b[24:32], v^key64)
353+
v = binary.LittleEndian.Uint64(b[32:40])
354+
binary.LittleEndian.PutUint64(b[32:40], v^key64)
355+
v = binary.LittleEndian.Uint64(b[40:48])
356+
binary.LittleEndian.PutUint64(b[40:48], v^key64)
357+
v = binary.LittleEndian.Uint64(b[48:56])
358+
binary.LittleEndian.PutUint64(b[48:56], v^key64)
359+
v = binary.LittleEndian.Uint64(b[56:64])
360+
binary.LittleEndian.PutUint64(b[56:64], v^key64)
361+
v = binary.LittleEndian.Uint64(b[64:72])
362+
binary.LittleEndian.PutUint64(b[64:72], v^key64)
363+
v = binary.LittleEndian.Uint64(b[72:80])
364+
binary.LittleEndian.PutUint64(b[72:80], v^key64)
365+
v = binary.LittleEndian.Uint64(b[80:88])
366+
binary.LittleEndian.PutUint64(b[80:88], v^key64)
367+
v = binary.LittleEndian.Uint64(b[88:96])
368+
binary.LittleEndian.PutUint64(b[88:96], v^key64)
369+
v = binary.LittleEndian.Uint64(b[96:104])
370+
binary.LittleEndian.PutUint64(b[96:104], v^key64)
371+
v = binary.LittleEndian.Uint64(b[104:112])
372+
binary.LittleEndian.PutUint64(b[104:112], v^key64)
373+
v = binary.LittleEndian.Uint64(b[112:120])
374+
binary.LittleEndian.PutUint64(b[112:120], v^key64)
375+
v = binary.LittleEndian.Uint64(b[120:128])
376+
binary.LittleEndian.PutUint64(b[120:128], v^key64)
377377
b = b[128:]
378378
}
379379

380380
// Then we xor until b is less than 64 bytes.
381381
for len(b) >= 64 {
382382
v := binary.LittleEndian.Uint64(b)
383383
binary.LittleEndian.PutUint64(b, v^key64)
384-
v = binary.LittleEndian.Uint64(b[8:])
385-
binary.LittleEndian.PutUint64(b[8:], v^key64)
386-
v = binary.LittleEndian.Uint64(b[16:])
387-
binary.LittleEndian.PutUint64(b[16:], v^key64)
388-
v = binary.LittleEndian.Uint64(b[24:])
389-
binary.LittleEndian.PutUint64(b[24:], v^key64)
390-
v = binary.LittleEndian.Uint64(b[32:])
391-
binary.LittleEndian.PutUint64(b[32:], v^key64)
392-
v = binary.LittleEndian.Uint64(b[40:])
393-
binary.LittleEndian.PutUint64(b[40:], v^key64)
394-
v = binary.LittleEndian.Uint64(b[48:])
395-
binary.LittleEndian.PutUint64(b[48:], v^key64)
396-
v = binary.LittleEndian.Uint64(b[56:])
397-
binary.LittleEndian.PutUint64(b[56:], v^key64)
384+
v = binary.LittleEndian.Uint64(b[8:16])
385+
binary.LittleEndian.PutUint64(b[8:16], v^key64)
386+
v = binary.LittleEndian.Uint64(b[16:24])
387+
binary.LittleEndian.PutUint64(b[16:24], v^key64)
388+
v = binary.LittleEndian.Uint64(b[24:32])
389+
binary.LittleEndian.PutUint64(b[24:32], v^key64)
390+
v = binary.LittleEndian.Uint64(b[32:40])
391+
binary.LittleEndian.PutUint64(b[32:40], v^key64)
392+
v = binary.LittleEndian.Uint64(b[40:48])
393+
binary.LittleEndian.PutUint64(b[40:48], v^key64)
394+
v = binary.LittleEndian.Uint64(b[48:56])
395+
binary.LittleEndian.PutUint64(b[48:56], v^key64)
396+
v = binary.LittleEndian.Uint64(b[56:64])
397+
binary.LittleEndian.PutUint64(b[56:64], v^key64)
398398
b = b[64:]
399399
}
400400

401401
// Then we xor until b is less than 32 bytes.
402402
for len(b) >= 32 {
403403
v := binary.LittleEndian.Uint64(b)
404404
binary.LittleEndian.PutUint64(b, v^key64)
405-
v = binary.LittleEndian.Uint64(b[8:])
406-
binary.LittleEndian.PutUint64(b[8:], v^key64)
407-
v = binary.LittleEndian.Uint64(b[16:])
408-
binary.LittleEndian.PutUint64(b[16:], v^key64)
409-
v = binary.LittleEndian.Uint64(b[24:])
410-
binary.LittleEndian.PutUint64(b[24:], v^key64)
405+
v = binary.LittleEndian.Uint64(b[8:16])
406+
binary.LittleEndian.PutUint64(b[8:16], v^key64)
407+
v = binary.LittleEndian.Uint64(b[16:24])
408+
binary.LittleEndian.PutUint64(b[16:24], v^key64)
409+
v = binary.LittleEndian.Uint64(b[24:32])
410+
binary.LittleEndian.PutUint64(b[24:32], v^key64)
411411
b = b[32:]
412412
}
413413

414414
// Then we xor until b is less than 16 bytes.
415415
for len(b) >= 16 {
416416
v := binary.LittleEndian.Uint64(b)
417417
binary.LittleEndian.PutUint64(b, v^key64)
418-
v = binary.LittleEndian.Uint64(b[8:])
419-
binary.LittleEndian.PutUint64(b[8:], v^key64)
418+
v = binary.LittleEndian.Uint64(b[8:16])
419+
binary.LittleEndian.PutUint64(b[8:16], v^key64)
420420
b = b[16:]
421421
}
422422

0 commit comments

Comments
 (0)