Comment #0 by bearophile_hugs — 2011-02-17T16:15:32Z
While solving some Project Euler problems with D and C, I have found about five fold performance difference. I have reduced the code to a small benchmark (that shows a smaller difference).
Probably the problem comes from %10 and /10 done on uint. DMD uses div, while GCC uses a known trick for small integer division/modulus. On the web there are pages that explain how to implement this small integer division optimization (example: http://libdivide.com/ ).
---------------------------------
Timings, n = 100_000_000:
GCC: 3.13 s
DMD: 11.69 s
dmd -O -release -inline testd.d
gcc -O3 -s testc.c -o testc.exe
32 bit
GCC 4.5.2
DMD 2.052beta
---------------------------------
// D2 code
import std.c.stdio: printf;
int main() {
uint total = 0;
uint i;
for (i = 0; i < 100000000; i++) {
uint n = i;
uint res = n % 10;
n /= 10;
while (n != 0) {
res = (n % 10) + (10 * res);
n /= 10;
}
total += res;
}
printf("%u\n", total); // 461784529
return 0;
}
---------------------------------
// C code
#include "stdio.h"
typedef unsigned long uint;
int main() {
uint total = 0;
uint i;
for (i = 0; i < 100000000; i++) {
uint n = i;
uint res = n % 10;
n /= 10;
while (n != 0) {
res = (n % 10) + (10 * res);
n /= 10;
}
total += res;
}
printf("%u\n", total); // 461784529
return 0;
}
---------------------------------
DMD inner loop:
L1C: lea EBX,[EBX*4][EBX]
mov EAX,ESI
mov ECX,0Ah
xor EDX,EDX
add EBX,EBX
div ECX
add EBX,EDX
test EAX,EAX
mov ESI,EAX
jne L1C
---------------------------------
GCC inner loop:
L7:
movl %ecx, %eax
mull %esi
leal (%ebx,%ebx,4), %ebx
shrl $3, %edx
leal (%edx,%edx,4), %eax
addl %eax, %eax
subl %eax, %ecx
testl %edx, %edx
leal (%ecx,%ebx,2), %ebx
movl %edx, %ecx
jne L7
Comment #1 by bearophile_hugs — 2011-06-24T18:24:48Z