Bug 2750 – Optimize slice copy with size known at compile time
Status
RESOLVED
Resolution
FIXED
Severity
normal
Priority
P2
Component
dmd
Product
D
Version
D1 (retired)
Platform
x86
OS
Windows
Creation time
2009-03-19T14:57:00Z
Last change time
2014-04-18T09:12:06Z
Keywords
patch, wrong-code
Assigned to
bugzilla
Creator
snake.scaly
Comments
Comment #0 by snake.scaly — 2009-03-19T14:57:45Z
It was discussed recently that the compiler intrinsic for slice copying was slower than CRT memcpy():
http://tinyurl.com/cfxmva
In that particular case it was generating rep movsb despite the fact that the slice size was known at compile time.
I'm proposing a patch which fixes this problem. Here is an example. This code:
void main() {
auto h = "hello\n";
char buf[16];
buf[0 .. h.length] = h;
}
compiled with -O -release -inline by the current 1.041:
__Dmain comdat
assume CS:__Dmain
sub ESP,020h
mov EDX,FLAT:_DATA[0Ch]
mov EAX,FLAT:_DATA[08h]
push EBX
push ESI
mov ESI,EDX
push EDI
lea EDI,0Ch[ESP]
movsd
movsb
movsb
lea ECX,01Ch[ESP]
mov EBX,0FFFFFFFFh
mov [ECX],EBX
mov EAX,6
lea ESI,0Ch[ESP]
mov 4[ECX],EBX
lea EDI,01Ch[ESP]
mov 8[ECX],EBX
mov 0Ch[ECX],EBX
mov ECX,EAX
rep
movsb
xor EAX,EAX
pop EDI
pop ESI
pop EBX
add ESP,020h
ret
__Dmain ends
and by a patched compiler:
__Dmain comdat
assume CS:__Dmain
sub ESP,020h
mov EDX,FLAT:_DATA[0Ch]
mov EAX,FLAT:_DATA[08h]
push EBX
push ESI
mov ESI,EDX
push EDI
lea EDI,0Ch[ESP]
movsd
movsb
movsb
lea ECX,01Ch[ESP]
mov EBX,0FFFFFFFFh
mov [ECX],EBX
xor EAX,EAX
mov 4[ECX],EBX
mov 8[ECX],EBX
mov 0Ch[ECX],EBX
pop EDI
pop ESI
pop EBX
add ESP,020h
ret
__Dmain ends
Here is the patch:
-------8<------------------------------
diff --git a/dmd/backend/cgelem.c b/dmd/backend/cgelem.c
index a2a4a1f..a80eefb 100644
--- a/dmd/backend/cgelem.c
+++ b/dmd/backend/cgelem.c
@@ -3773,6 +3773,16 @@ STATIC elem * el64_32(elem *e)
e->E1 = el_selecte1(e->E1);
}
break;
+
+ case OPpair:
+ e = el_selecte1(el_selecte1(e));
+ goto L1;
+ case OPrpair:
+ e = el_selecte2(el_selecte1(e));
+ goto L1;
+ L1:
+ e->Ety = ty;
+ break;
}
return e;
}
-------8<------------------------------
Comment #1 by snake.scaly — 2009-03-19T15:40:01Z
Sorry, bad example. The patched compiler simply optimized the copy away, which wasn't bad in itself, but neither was what I wanted to demonstrate. Here's a better example:
void main() {
auto h = "hello\n";
auto buf = new char[16];
buf[0 .. h.length] = h;
}
Original 1.041:
__Dmain comdat
assume CS:__Dmain
L0: sub ESP,018h
mov EDX,FLAT:_DATA[0Ch]
mov EAX,FLAT:_DATA[08h]
push EBX
push ESI
mov ESI,EDX
push EDI
lea EDI,0Ch[ESP]
movsd
movsb
movsb
lea ESI,0Ch[ESP]
mov ECX,offset FLAT:_D11TypeInfo_Aa6__initZ
push 010h
push ECX
call near ptr __d_newarrayiT
mov EBX,6
mov ECX,EBX
mov EDI,EDX
rep
movsb
add ESP,8
xor EAX,EAX
pop EDI
pop ESI
pop EBX
add ESP,018h
ret
__Dmain ends
Assembly produced by a patched compiler:
__Dmain comdat
assume CS:__Dmain
L0: sub ESP,01Ch
mov EDX,FLAT:_DATA[0Ch]
mov EAX,FLAT:_DATA[08h]
push ESI
mov ESI,EDX
push EDI
lea EDI,0Ch[ESP]
movsd
movsb
movsb
mov ECX,offset FLAT:_D11TypeInfo_Aa6__initZ
push 010h
push ECX
call near ptr __d_newarrayiT
lea ESI,014h[ESP]
mov EDI,EDX
movsd
movsb
movsb
add ESP,8
xor EAX,EAX
pop EDI
pop ESI
add ESP,01Ch
ret
__Dmain ends
Comment #2 by bugzilla — 2009-03-20T00:12:06Z
Interesting, since I had already introduced almost the identical patch 3 days ago! Anyhow, this will obviously go out in the next update.