# 如何獲得stripped模式的校驗和

I have a 64 bit number (but only the 42 low order bits are used) and need to computer the sum of the 4 bits at n, n+m, n+m*2 and n+m*3 (note: anything that can produce a sum >4 is invalid) for some fixed m and every value of n that places all the bits in the number

0010 1011 0110 0001


2, 3, 1, 2, 3, 0, 3


v1 = In;
v2 = In<<3;
v3 = In<<6;
v4 = In<<9;

a1 = v1 ^ v2;
a2 = v1 & v2;
b1 = v3 ^ v4;
b2 = v3 & v4;
c2 = a1 & b1;
d2 = a2 ^ b2;

o1 = a1 ^ b1;
o2 = c2 ^ d2;
o4 = a2 & b2;


edit: as it happens I need the histogram of the sums so doing a bit-count of o4, o2&o1, o2 and o1 gives me what I want.

arr = [0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4];

for(int i = 0; i < N; i++)
{
out[i] = arr[(In & 0b1001001001) % 30];
In >>= 1;
}


## 最佳答案

unsigned short out;
const unsigned long long mask      = 0x0249024902490249ul;
const unsigned long long shiftmask = 0x0001000100010001ul;

unsigned long long t = (unsigned short)(in >> 38) | (unsigned long long)(unsigned short)(in >> 39) > 40) > 41) << 48;
*((unsigned long long*)(out + 38)) = (t & shiftmask) + (t >> 3 & shiftmask) + (t >> 6 & shiftmask) + (t >> 9 & shiftmask);

[... snipsnap ...]

t = (unsigned short)(in >> 2) | (unsigned long long)(unsigned short)(in >> 3) > 4) > 5) << 48;
*((unsigned long long*)(out + 2)) = (t & shiftmask) + (t >> 3 & shiftmask) + (t >> 6 & shiftmask) + (t >> 9 & shiftmask);

t = (unsigned short)in | (unsigned long long)(unsigned short)(in >> 1) << 16;
*((unsigned int*)out) = (unsigned int)((t & shiftmask) + (t >> 3 & shiftmask) + (t >> 6 & shiftmask) + (t >> 9 & shiftmask));


By reordering the computations, we can further reduce the execution time significantly, since it drastically reduces the amount of times that something is loaded into the QWORD. A few other optimizations are quite obvious and rather minor, but sum up to another interesting speedup.
unsigned short out;
const unsigned long long Xmask = 0x249024902490249ull;
const unsigned long long Ymask = 0x7000700070007u;

unsigned long long x = (in >> 14 & 0xFFFFu) | (in >> 20 & 0xFFFFu) > 26 & 0xFFFFu) > 32) << 48;
unsigned long long y;
y += y >> 6;
y += y >> 3;
out = (unsigned short)(y >> 48);
out = (unsigned short)(y >> 32);
out = (unsigned short)(y >> 16);
out = (unsigned short)(y      );

x >>= 1;
y += y >> 6;
y += y >> 3;
out = (unsigned short)(y >> 48);
out = (unsigned short)(y >> 32);
out = (unsigned short)(y >> 16);
out = (unsigned short)(y      );

[snisnap]

x >>= 1;
y += y >> 6;
y += y >> 3;
out = (unsigned short)(y >> 48);
out = (unsigned short)(y >> 32);
out = (unsigned short)(y >> 16);
out = (unsigned short)(y      );

x >>= 1;
x &= 0xFFFF000000000000ul;
x |= (in & 0xFFFFu) | (in >> 5 & 0xFFFFu) > 10 & 0xFFFFu) << 32;
y += y >> 6;
y += y >> 3;
out = (unsigned short)(y >> 48);
out = (unsigned short)(y >> 32);
out[ 5] = (unsigned short)(y >> 16);
out[ 0] = (unsigned short)(y      );

[snipsnap]

x >>= 1;
y += y >> 6;
y += y >> 3;