Can't static array elements where the index is known at compile time be treated just like regular stack variables? GDC seems to treat them as such.
D Source code below. I'd expect these two implementations to compile to the exact same ASM. In GDC, the main loops do, though the pre-loop setup stuff does compile differently for reasons I don't understand.
int sum1(const int[] arr) {
auto end = arr.ptr + arr.length - 3;
int[4] sums;
for(auto cur = arr.ptr; cur < end; cur += 4) {
sums[0] += cur[0];
sums[1] += cur[1];
sums[2] += cur[2];
sums[3] += cur[3];
}
return sums[0] + sums[1] + sums[2] + sums[3];
}
int sum2(const int[] arr) {
auto end = arr.ptr + arr.length - 3;
int sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
for(auto cur = arr.ptr; cur < end; cur += 4) {
sum0 += cur[0];
sum1 += cur[1];
sum2 += cur[2];
sum3 += cur[3];
}
return sum0 + sum1 + sum2 + sum3;
}
ASM:
_D5test34sum1FxAiZi:
push RBP
mov RBP,RSP
sub RSP,020h
push RBX
lea RAX,-020h[RBP]
xor RCX,RCX
mov [RAX],RCX
mov 8[RAX],RCX
mov R8,RSI
mov RDX,RSI
mov R9,RDI
lea R9,0FFFFFFF4h[R9*4][RDX]
cmp RDX,R9
jae L51
L2D: mov EBX,[R8]
add -020h[RBP],EBX
mov ESI,4[R8]
add -01Ch[RBP],ESI
mov EAX,8[R8]
add -018h[RBP],EAX
mov ECX,0Ch[R8]
add -014h[RBP],ECX
add R8,010h
cmp R8,R9
jb L2D
L51: mov EAX,-020h[RBP]
add EAX,-01Ch[RBP]
add EAX,-018h[RBP]
add EAX,-014h[RBP]
pop RBX
mov RSP,RBP
pop RBP
ret
nop
_D5test34sum2FxAiZi:
push RBP
mov RBP,RSP
sub RSP,010h
push RBX
push R12
push R13
xor R9D,R9D
xor R11D,R11D
xor EBX,EBX
xor R12D,R12D
mov R8,RSI
mov RAX,RSI
mov R13,RDI
lea R13,0FFFFFFF4h[R13*4][RAX]
cmp RAX,R13
jae L46
L2E: add R9D,[R8]
add R11D,4[R8]
add EBX,8[R8]
add R12D,0Ch[R8]
add R8,010h
cmp R8,R13
jb L2E
L46: lea EAX,[R11][R9]
add EAX,EBX
add EAX,R12D
pop R13
pop R12
pop RBX
mov RSP,RBP
pop RBP
ret
Comment #1 by bugzilla — 2013-04-25T12:41:56Z
The optimizer currently does not enregister variables that don't fit into registers. It could be enhanced to do it.
Comment #2 by timon.gehr — 2013-04-25T12:48:21Z
(In reply to comment #1)
> The optimizer currently does not enregister variables that don't fit into
> registers. It could be enhanced to do it.
ints definitely fit into registers.
Comment #3 by bugzilla — 2013-04-25T13:44:14Z
(In reply to comment #2)
> ints definitely fit into registers.
sums is one variable, not 4.
Comment #4 by bearophile_hugs — 2013-06-14T03:16:06Z
Comment #5 by dlang-bugzilla — 2017-07-18T13:21:55Z
FWIW, after https://github.com/dlang/dmd/pull/6176 (2.073.0) the generated assembly code changed, however a simple benchmark shows that performance did not noticeably change.
Comment #6 by robert.schadek — 2024-12-13T18:06:23Z