#include #include #define rdtscl(low) \ __asm__ __volatile__ ("rdtsc" : "=a" (low) : : "edx") int main() { unsigned int x[100]; unsigned int y[100]; unsigned int z[100]; long a,b,c; size_t i; unsigned long long l; for (i=0; i<100; ++i) { x[i]=0; y[i]=-1; z[i]=0; } x[0]=1; rdtscl(a); l=0; for (i=0; i<100; ++i) { l += (unsigned long long)x[i] + y[i]; z[i]=l; l>>=32; } rdtscl(b); printf("C: %ld cycles\n",b-a); for (i=0; i<100; ++i) assert(z[i]==0); assert(l==1); rdtscl(a); asm volatile( "mov (%%esi,%%ecx),%%eax\n" "add (%%edi,%%ecx),%%eax\n" "mov %%eax,(%%ebx,%%ecx)\n" "1:\n" "lea 4(%%ecx),%%ecx\n" "jecxz 1f\n" "mov (%%esi,%%ecx),%%eax\n" "adc (%%edi,%%ecx),%%eax\n" "mov %%eax,(%%ebx,%%ecx)\n" "jmp 1b\n" "1:\n" : : "S" (x+100), "D" (y+100), "b" (z+100), "c" (-400) : "eax" ); rdtscl(b); printf("asm: %ld cycles\n",b-a); for (i=0; i<100; ++i) assert(z[i]==0); assert(l==1); }