Comment #1 by thomas.bockman — 2016-01-17T00:52:48Z
Here's the complete output of a failed run on my system:
Performing "release" build using dmd for x86_64.
bug ~master: target for configuration "application" is up to date.
To force a rebuild of up-to-date targets, run again with --force.
Running ./bug
byte n = -128
byte m = -2
real theory = +64
int practice1 = +0
int practice2 = +64
intFlag = {}
FAILS: consistent
byte n = -127
byte m = -2
real theory = +63
int practice1 = +0
int practice2 = +63
intFlag = {}
FAILS: consistent
byte n = -65
byte m = -2
real theory = +32
int practice1 = +0
int practice2 = +32
intFlag = {}
FAILS: consistent
byte n = -64
byte m = -2
real theory = +32
int practice1 = +0
int practice2 = +32
intFlag = {}
FAILS: consistent
byte n = -63
byte m = -2
real theory = +31
int practice1 = +0
int practice2 = +31
intFlag = {}
FAILS: consistent
byte n = -33
byte m = -2
real theory = +16
int practice1 = +0
int practice2 = +16
intFlag = {}
FAILS: consistent
byte n = -32
byte m = -2
real theory = +16
int practice1 = +0
int practice2 = +16
intFlag = {}
FAILS: consistent
byte n = -31
byte m = -2
real theory = +15
int practice1 = +0
int practice2 = +15
intFlag = {}
FAILS: consistent
byte n = -30
byte m = -2
real theory = +15
int practice1 = +0
int practice2 = +15
intFlag = {}
FAILS: consistent
byte n = -29
byte m = -2
real theory = +14
int practice1 = +0
int practice2 = +14
intFlag = {}
FAILS: consistent
byte n = -28
byte m = -2
real theory = +14
int practice1 = +0
int practice2 = +14
intFlag = {}
FAILS: consistent
byte n = -27
byte m = -2
real theory = +13
int practice1 = +0
int practice2 = +13
intFlag = {}
FAILS: consistent
byte n = -26
byte m = -2
real theory = +13
int practice1 = +0
int practice2 = +13
intFlag = {}
FAILS: consistent
byte n = -25
byte m = -2
real theory = +12
int practice1 = +0
int practice2 = +12
intFlag = {}
FAILS: consistent
byte n = -24
byte m = -2
real theory = +12
int practice1 = +0
int practice2 = +12
intFlag = {}
FAILS: consistent
byte n = -23
byte m = -2
real theory = +11
int practice1 = +0
int practice2 = +11
intFlag = {}
FAILS: consistent
byte n = -22
byte m = -2
real theory = +11
int practice1 = +0
int practice2 = +11
intFlag = {}
FAILS: consistent
byte n = -21
byte m = -2
real theory = +10
int practice1 = +0
int practice2 = +10
intFlag = {}
FAILS: consistent
byte n = -20
byte m = -2
real theory = +10
int practice1 = +0
int practice2 = +10
intFlag = {}
FAILS: consistent
byte n = -19
byte m = -2
real theory = +9
int practice1 = +0
int practice2 = +9
intFlag = {}
FAILS: consistent
byte n = -18
byte m = -2
real theory = +9
int practice1 = +0
int practice2 = +9
intFlag = {}
FAILS: consistent
byte n = -17
byte m = -2
real theory = +8
int practice1 = +0
int practice2 = +8
intFlag = {}
FAILS: consistent
byte n = -16
byte m = -2
real theory = +8
int practice1 = +0
int practice2 = +8
intFlag = {}
FAILS: consistent
byte n = -15
byte m = -2
real theory = +7
int practice1 = +0
int practice2 = +7
intFlag = {}
FAILS: consistent
byte n = -14
byte m = -2
real theory = +7
int practice1 = +0
int practice2 = +7
intFlag = {}
FAILS: consistent
byte n = -13
byte m = -2
real theory = +6
int practice1 = +0
int practice2 = +6
intFlag = {}
FAILS: consistent
byte n = -12
byte m = -2
real theory = +6
int practice1 = +0
int practice2 = +6
intFlag = {}
FAILS: consistent
byte n = -11
byte m = -2
real theory = +5
int practice1 = +0
int practice2 = +5
intFlag = {}
FAILS: consistent
byte n = -10
byte m = -2
real theory = +5
int practice1 = +0
int practice2 = +5
intFlag = {}
FAILS: consistent
byte n = -9
byte m = -2
real theory = +4
int practice1 = +0
int practice2 = +4
intFlag = {}
FAILS: consistent
byte n = -8
byte m = -2
real theory = +4
int practice1 = +0
int practice2 = +4
intFlag = {}
FAILS: consistent
byte n = -7
byte m = -2
real theory = +3
int practice1 = +0
int practice2 = +3
intFlag = {}
FAILS: consistent
byte n = -6
byte m = -2
real theory = +3
int practice1 = +0
int practice2 = +3
intFlag = {}
FAILS: consistent
byte n = -5
byte m = -2
real theory = +2
int practice1 = +0
int practice2 = +2
intFlag = {}
FAILS: consistent
byte n = -4
byte m = -2
real theory = +2
int practice1 = +0
int practice2 = +2
intFlag = {}
FAILS: consistent
byte n = -3
byte m = -2
real theory = +1
int practice1 = +0
int practice2 = +1
intFlag = {}
FAILS: consistent
byte n = -2
byte m = -2
real theory = +1
int practice1 = +0
int practice2 = +1
intFlag = {}
FAILS: consistent
byte n = -1
byte m = -2
real theory = +0
int practice = +0
intFlag = {divide by zero}
FAILS: correct
byte n = +0
byte m = -2
real theory = -0
int practice = +0
intFlag = {divide by zero}
FAILS: correct
byte n = +1
byte m = -2
real theory = -0
int practice = +0
intFlag = {divide by zero}
FAILS: correct
byte n = +2
byte m = -2
real theory = -1
int practice1 = +0
int practice2 = -1
intFlag = {}
FAILS: consistent
byte n = +3
byte m = -2
real theory = -1
int practice1 = +0
int practice2 = -1
intFlag = {}
FAILS: consistent
byte n = +4
byte m = -2
real theory = -2
int practice1 = +0
int practice2 = -2
intFlag = {}
FAILS: consistent
byte n = +5
byte m = -2
real theory = -2
int practice1 = +0
int practice2 = -2
intFlag = {}
FAILS: consistent
byte n = +6
byte m = -2
real theory = -3
int practice1 = +0
int practice2 = -3
intFlag = {}
FAILS: consistent
byte n = +7
byte m = -2
real theory = -3
int practice1 = +0
int practice2 = -3
intFlag = {}
FAILS: consistent
byte n = +8
byte m = -2
real theory = -4
int practice1 = +0
int practice2 = -4
intFlag = {}
FAILS: consistent
byte n = +9
byte m = -2
real theory = -4
int practice1 = +0
int practice2 = -4
intFlag = {}
FAILS: consistent
byte n = +10
byte m = -2
real theory = -5
int practice1 = +0
int practice2 = -5
intFlag = {}
FAILS: consistent
byte n = +11
byte m = -2
real theory = -5
int practice1 = +0
int practice2 = -5
intFlag = {}
FAILS: consistent
byte n = +12
byte m = -2
real theory = -6
int practice1 = +0
int practice2 = -6
intFlag = {}
FAILS: consistent
byte n = +13
byte m = -2
real theory = -6
int practice1 = +0
int practice2 = -6
intFlag = {}
FAILS: consistent
byte n = +14
byte m = -2
real theory = -7
int practice1 = +0
int practice2 = -7
intFlag = {}
FAILS: consistent
byte n = +15
byte m = -2
real theory = -7
int practice1 = +0
int practice2 = -7
intFlag = {}
FAILS: consistent
byte n = +16
byte m = -2
real theory = -8
int practice1 = +0
int practice2 = -8
intFlag = {}
FAILS: consistent
byte n = +17
byte m = -2
real theory = -8
int practice1 = +0
int practice2 = -8
intFlag = {}
FAILS: consistent
byte n = +18
byte m = -2
real theory = -9
int practice1 = +0
int practice2 = -9
intFlag = {}
FAILS: consistent
byte n = +19
byte m = -2
real theory = -9
int practice1 = +0
int practice2 = -9
intFlag = {}
FAILS: consistent
byte n = +20
byte m = -2
real theory = -10
int practice1 = +0
int practice2 = -10
intFlag = {}
FAILS: consistent
byte n = +21
byte m = -2
real theory = -10
int practice1 = +0
int practice2 = -10
intFlag = {}
FAILS: consistent
byte n = +22
byte m = -2
real theory = -11
int practice1 = +0
int practice2 = -11
intFlag = {}
FAILS: consistent
byte n = +23
byte m = -2
real theory = -11
int practice1 = +0
int practice2 = -11
intFlag = {}
FAILS: consistent
byte n = +24
byte m = -2
real theory = -12
int practice1 = +0
int practice2 = -12
intFlag = {}
FAILS: consistent
byte n = +25
byte m = -2
real theory = -12
int practice1 = +0
int practice2 = -12
intFlag = {}
FAILS: consistent
byte n = +26
byte m = -2
real theory = -13
int practice1 = +0
int practice2 = -13
intFlag = {}
FAILS: consistent
byte n = +27
byte m = -2
real theory = -13
int practice1 = +0
int practice2 = -13
intFlag = {}
FAILS: consistent
byte n = +28
byte m = -2
real theory = -14
int practice1 = +0
int practice2 = -14
intFlag = {}
FAILS: consistent
byte n = +29
byte m = -2
real theory = -14
int practice1 = +0
int practice2 = -14
intFlag = {}
FAILS: consistent
byte n = +30
byte m = -2
real theory = -15
int practice1 = +0
int practice2 = -15
intFlag = {}
FAILS: consistent
byte n = +31
byte m = -2
real theory = -15
int practice1 = +0
int practice2 = -15
intFlag = {}
FAILS: consistent
byte n = +32
byte m = -2
real theory = -16
int practice1 = +0
int practice2 = -16
intFlag = {}
FAILS: consistent
byte n = +33
byte m = -2
real theory = -16
int practice1 = +0
int practice2 = -16
intFlag = {}
FAILS: consistent
byte n = +63
byte m = -2
real theory = -31
int practice1 = +0
int practice2 = -31
intFlag = {}
FAILS: consistent
byte n = +64
byte m = -2
real theory = -32
int practice1 = +0
int practice2 = -32
intFlag = {}
FAILS: consistent
byte n = +65
byte m = -2
real theory = -32
int practice1 = +0
int practice2 = -32
intFlag = {}
FAILS: consistent
byte n = +127
byte m = -2
real theory = -63
int practice1 = +0
int practice2 = -63
intFlag = {}
FAILS: consistent
Program exited with code -8
Comment #2 by thomas.bockman — 2016-01-19T22:11:38Z
I'm trying to keep working on the original program, but I'm now starting to see the same kind of problems in debug mode and on LDC, as well. Other error messages I've gotten recently:
Program exited with code -11
Program exited with code -8
Program exited with code -4
Illegal Instruction
Comment #3 by thomas.bockman — 2016-01-19T22:23:28Z
I have now reproduced this problem without using TLS. I have no idea what is going on at this point.
Comment #4 by hsteoh — 2016-01-19T23:43:40Z
Exit code -8 seems to be SIGFPE (floating point exception). Exit code -11 is your usual segmentation fault, usually the result of dereferencing an illegal pointer (usually null, but can be other things like random int values accidentally overwriting a pointer). Exit code -4 is, as it says, illegal instruction, usually the result of a corrupted function pointer. You can see the full list of signal numbers by running `kill -l` in a terminal.
Anyway, I've been trying to compile your code with various flags, but I could not reproduce any of the errors you're seeing. Do you know exactly what's the command line dub uses to invoke dmd (I don't have dub installed)? I tried various combinations like:
dmd -unittest test.d
dmd -release test.d
dmd -debug test.d
dmd -release -noboundscheck test.d
...
etc., and running the resulting executable afterwards, but none of them give any of the consistency errors or crashes that you're seeing. I'm also running on Linux/x86_64.
Based on the nature of your crashes, I'm suspecting that it has something to do with linker problems... perhaps dub is somehow picking up an incorrect version of a shared library (probably druntime or phobos), so the runtime functions used by the program aren't doing what they're expected to do, causing various memory corruption issues that lead to consistency errors and crashes. You might want to try a fresh installation of the dmd toolchain in a sandbox (in a VM or a chroot, perhaps) and see if that makes a difference.
Failing that, try looking at the output of 'dmd -v test.d' and carefully check the output to see if it's pulling in imports and shared libraries that are in the expected places.
Comment #5 by hsteoh — 2016-01-19T23:47:44Z
P.S. An example of an "unexpected place" would be dmd reading import files from /usr/src/d/phobos when dmd itself is installed in /usr/share/local/bin/ (i.e., it seems likely that the version of phobos it's picking up may not be the same version as the one dmd was shipped with). Another thing to watch out for is when it invokes cc or gcc (usually as a last step) with -L paths that point to somewhere that seems inconsistent with where dmd is installed.
Comment #6 by hsteoh — 2016-01-19T23:49:52Z
P.S.S. Oh, and another thought occurred to me: maybe try flushing dub's cache of previously-built targets? E.g., if you copy your source files to new, empty directory and build everything from scratch. Sometimes stale object files left in build caches can get wrongly linked to the newest version of the program, causing heisenbugs that vanish when you rebuild the code from fresh.
Comment #7 by thomas.bockman — 2016-01-19T23:59:54Z
Created attachment 1576
compiler output
This is the complete output from a run of DUB and DMD.
Comment #8 by thomas.bockman — 2016-01-20T00:04:14Z
Thanks for explaining the error codes. Here's what the DMD command looks like:
dmd -c -of.dub/build/application-release-linux.posix-x86_64-dmd_2069-CB9CA9A6812DFD37D33F5377E56B63D5/bug.o -release -inline -O -v -w -version=Have_bug -Isource/ source/app.d -vcolumns
I checked the import paths, and they looked right to me. You can check the attachment in my previous message if you want, though.
Note that I'm not currently using a standard DMD install; I'm basically running DMD HEAD out of /opt. However, I have reproduced this issue using the .deb package for DMD 2.069.2 and 2.068.2 as well.
LDC does not have a problem with the exact source code included above, but I've hit what seems to be the same issue in the latest version of the original pre-reduction code for my checkedint project.
Comment #9 by thomas.bockman — 2016-01-20T00:06:53Z
As far as DUB caching stuff goes - it's true I've had some issues with that, but I don't think that's the fundamental cause here. This problem persists even if I just move the source to a fresh project directory, or open up the hidden ".dub" folder in the project and delete the cache folders.
Comment #10 by hsteoh — 2016-01-20T00:11:57Z
Thanks for posting the dub output, I've managed to reproduce the problem now. I'll do some investigation to see what's going on here...
Comment #11 by thomas.bockman — 2016-01-20T00:13:25Z
> I've managed to reproduce the problem now.
Excellent. So I'm not crazy :-D
> I'll do some investigation to see what's going on here...
Many thanks. This has been driving me nuts.
Comment #12 by hsteoh — 2016-01-20T00:15:58Z
It seems to be triggered by the -inline -O flags to dmd. I managed to reduce the compiler call to the following:
dmd -inline -O source/app.d -ofapp && ./app
Removing either -inline or -O makes the problem go away. Going to dig deeper now...
Comment #13 by thomas.bockman — 2016-01-20T00:19:13Z
> Removing either -inline or -O makes the problem go away. Going to dig deeper now...
Just keep in mind that I have a newer version of the non-reduced code that seems to show the same problem across a much wider variety of permutations of compilers and options.
If you can't figure out what's going on from this one, let me know and I'll try to reduce the newer code. (But I'd rather not unless I have to, because this project takes a while to reduce.)
Comment #14 by hsteoh — 2016-01-20T00:21:00Z
Have you looked into DustMite? It can reduce your code while you work on something else.
Comment #16 by thomas.bockman — 2016-01-20T00:24:58Z
> Have you looked into DustMite? It can reduce your code while you work on something else.
I've heard of DustMite, and it sounds very cool.
I doubt it could effectively reduce my *original* code though, because it makes such heavy use of meta-programming that it would probably need to be able to solve the halting problem to do a good job.
Comment #17 by hsteoh — 2016-01-20T00:30:42Z
The way dustmite works is not by some clever solution to the halting problem (haha), it's simply by trial-and-error deleting parts of the syntax tree of the program until the original error no longer occurs. It recursively refines its reduction, so that by the time it's done, what's left is usually pretty close to minimal code.
Sometimes, it does get stuck, but a little human editing of the intermediate reduction and running dustmite again on the result often gets even closer to minimal. It beats hand-reducing the code through the entire process, if nothing else.
Comment #18 by thomas.bockman — 2016-01-20T00:38:24Z
I should definitely give it a try some time.
I guess what I'm saying, though, is that I'm pretty sure it would need a lot of intervention by me (at least at the beginning) in order to accomplish much with this particular project.
The reduction I submitted here is in no way simply a trimming of the syntax tree; I had to write shims, instantiate various templates, and mixin strings by hand in order to get the ball rolling.
The original code also compiles very slowly, as well, which would make a blind guess-and-check process rather painful until it got far enough along to eliminate the part that causes the slow compilation.
Comment #20 by thomas.bockman — 2016-01-20T00:45:52Z
Will DustMite automatically import dependencies like isNaN(), or did you do that?
Comment #21 by ag0aep6g — 2016-01-20T00:50:05Z
(In reply to thomas.bockman from comment #20)
> Will DustMite automatically import dependencies like isNaN(), or did you do
> that?
I haven't used Dustmite here at all. I don't know what it does with library code.
Comment #22 by hsteoh — 2016-01-20T01:00:59Z
Basically, dustmite does textual simplification, which often, though not always, corresponds with semantic simplification. For example, it does replace selective imports with non-selective imports -- it's a textual simplification but not necessarily a semantic one. It doesn't actually look into the imported code unless the imported module is also part of the source tree being reduced.
Anyway, dustmite didn't get me very far in reducing the code any further; here's how far it got:
------
import std.stdio;
enum IntFlag {NULL, error}
IntFlag intFlag;
int dummyVar;
int safeDiv(byte left, byte right)
{
const div0 = right == 0;
const posOver = left == int.min&& right ;
if(div0)
{
intFlag = posOver ? IntFlag.error : IntFlag.error;
return 0;
}
return left / right;
}
void main()
{
byte[] testValues = [-2];
foreach(m; testValues)
foreach(n; testValues)
{
const theory = cast(real)n / m;
const thrInval = theory.isNaN;
intFlag = IntFlag.NULL;
const practice1 = safeDiv(n, m);
writeln(practice1 == theory); /* false with -inline -O */
void require(bool success)
{
if(success) dummyVar = m;
}
require(thrInval);
}
}
bool isNaN(real x) {
enum EXPMASK = 0x7FFF;
enum EXPPOS_SHORT = 4;
const e = EXPMASK & (cast(ushort *)&x)[EXPPOS_SHORT];
const ps = *cast(ulong *)&x;
return e == EXPMASK &&
ps & 0x7FFF_FFFF_FFFF_FFFF;
}
------
(I left std.stdio in there and replaced the assert with a writeln, because dustmite needed a script that can tell the difference between a correct reduction and an incorrect reduction -- I have to compile it with and without the offending flags and grep for "false" and "true", respectively, so that dustmite doesn't barge ahead and just replace the assert with something that vacuously fails. :-P)
Looks like I will have to look into the generated asm next...
Comment #23 by hsteoh — 2016-01-20T02:42:03Z
There appears to be a codegen bug in dmd. Prior to the idiv instruction (integer division, at n/m), it needs to load EDX:EAX with the (extended) value of n. EAX is loaded correctly but EDX is not correctly zeroed (since we are dividing byte values, EDX should be zeroed). As a result, the idiv instruction will produce a garbage value.
In the code compiled without -O -inline, there is an "xor %edx,%edx" instruction prior to the idiv. Apparently, -O and -inline interacts together badly and this instruction somehow gets elided.
Here's the disassembly of main(), compiled with dmd -O -inline (with some embedded comments mapping sections to various source code snippets):
------
0000000000438bc8 <_Dmain>:
438bc8: 55 push %rbp
438bc9: 48 8b ec mov %rsp,%rbp
438bcc: 48 81 ec 80 00 00 00 sub $0x80,%rsp
438bd3: 48 89 5d 88 mov %rbx,-0x78(%rbp)
438bd7: 4c 89 65 90 mov %r12,-0x70(%rbp)
438bdb: 4c 89 6d 98 mov %r13,-0x68(%rbp)
438bdf: 4c 89 75 a0 mov %r14,-0x60(%rbp)
438be3: 4c 89 7d a8 mov %r15,-0x58(%rbp)
438be7: be 01 00 00 00 mov $0x1,%esi
438bec: bf 30 27 68 00 mov $0x682730,%edi
// testValues = [-2]
438bf1: e8 b2 21 00 00 callq 43ada8 <_d_arrayliteralTX>
438bf6: 49 89 c4 mov %rax,%r12
438bf9: 41 c6 04 24 fe movb $0xfe,(%r12)
438bfe: 4c 89 65 c8 mov %r12,-0x38(%rbp)
438c02: 48 c7 45 c0 01 00 00 movq $0x1,-0x40(%rbp)
438c09: 00
438c0a: 31 db xor %ebx,%ebx
// foreach(m; testValues)
438c0c: 48 83 fb 01 cmp $0x1,%rbx
438c10: 72 0a jb 438c1c <_Dmain+0x54>
438c12: bf 16 00 00 00 mov $0x16,%edi
// array bounds check
438c17: e8 94 07 00 00 callq 4393b0 <_D4test7__arrayZ>
438c1c: 48 8b 45 c8 mov -0x38(%rbp),%rax
// ecx := testValues[i]
438c20: 0f be 0c 03 movsbl (%rbx,%rax,1),%ecx
// -0x8(%rbp) := (byte) m
438c24: 88 4d f8 mov %cl,-0x8(%rbp)
438c27: 45 31 e4 xor %r12d,%r12d
438c2a: 48 89 5d f0 mov %rbx,-0x10(%rbp)
// foreach (n; testValues)
438c2e: 49 83 fc 01 cmp $0x1,%r12
438c32: 72 0a jb 438c3e <_Dmain+0x76>
438c34: bf 17 00 00 00 mov $0x17,%edi
// array bounds check
438c39: e8 72 07 00 00 callq 4393b0 <_D4test7__arrayZ>
438c3e: 48 8b 55 c8 mov -0x38(%rbp),%rdx
// ebx := testValues[i]
438c42: 41 0f be 1c 14 movsbl (%r12,%rdx,1),%ebx
// -0x7(%rbp) := (byte) n
438c47: 88 5d f9 mov %bl,-0x7(%rbp)
// extend to 32 bits
438c4a: 0f be db movsbl %bl,%ebx
438c4d: 89 5d b0 mov %ebx,-0x50(%rbp)
438c50: db 45 b0 fildl -0x50(%rbp)
438c53: 0f be 45 f8 movsbl -0x8(%rbp),%eax
438c57: 89 45 b0 mov %eax,-0x50(%rbp)
438c5a: db 45 b0 fildl -0x50(%rbp)
// theory = cast(real)n / m;
438c5d: de f9 fdivrp %st,%st(1)
438c5f: db 7d d0 fstpt -0x30(%rbp)
438c62: 66 c7 45 da 00 00 movw $0x0,-0x26(%rbp)
438c68: c7 45 dc 00 00 00 00 movl $0x0,-0x24(%rbp)
438c6f: f2 48 0f 10 45 d0 rex.W movsd -0x30(%rbp),%xmm0
438c75: f2 48 0f 11 45 e0 rex.W movsd %xmm0,-0x20(%rbp)
438c7b: f2 48 0f 10 4d d8 rex.W movsd -0x28(%rbp),%xmm1
438c81: f2 48 0f 11 4d e8 rex.W movsd %xmm1,-0x18(%rbp)
438c87: 0f b7 4d d8 movzwl -0x28(%rbp),%ecx
// (inlined isNan): EXPMASK & (cast(ushort*)&x)[EXPPOS_SHORT]
438c8b: 66 81 e1 ff 7f and $0x7fff,%cx
438c90: ba ff 7f 00 00 mov $0x7fff,%edx
// e == EXPMASK
438c95: 66 3b ca cmp %dx,%cx
438c98: 75 13 jne 438cad <_Dmain+0xe5>
// rax := -0x20(%rbp) == ps
438c9a: 48 8b 45 e0 mov -0x20(%rbp),%rax
// isNaN: ps & 0x7fff_ffff_ffff_ffff
438c9e: 48 bb ff ff ff ff ff movabs $0x7fffffffffffffff,%rbx
438ca5: ff ff 7f
438ca8: 48 85 c3 test %rax,%rbx
438cab: 75 05 jne 438cb2 <_Dmain+0xea>
// isNaN == false
438cad: 45 31 ed xor %r13d,%r13d
// r13d := return value of isNaN
438cb0: eb 06 jmp 438cb8 <_Dmain+0xf0>
// isNaN == true
438cb2: 41 bd 01 00 00 00 mov $0x1,%r13d
// r13d := return value of isNaN
// NOTE: r13d == thrInval (enregistered)
// esi := IntFlag.NULL
438cb8: 31 f6 xor %esi,%esi
438cba: 64 48 8b 0c 25 00 00 mov %fs:0x0,%rcx
438cc1: 00 00
438cc3: 48 8b 15 66 91 24 00 mov 0x249166(%rip),%rdx # 681e30 <_DYNAMIC+0x258>
438cca: 89 34 11 mov %esi,(%rcx,%rdx,1)
// (inlined safeDiv): right == 0
438ccd: 38 75 f8 cmp %dh,-0x8(%rbp)
// r14b := div0 = (right == 0)
438cd0: 41 0f 94 c6 sete %r14b
// eax := left == n
438cd4: 0f be 45 f9 movsbl -0x7(%rbp),%eax
// left == int.min
438cd8: 3d 00 00 00 80 cmp $0x80000000,%eax
438cdd: 75 06 jne 438ce5 <_Dmain+0x11d>
// cast(bool)right
438cdf: 40 38 75 f8 cmp %sil,-0x8(%rbp)
438ce3: 75 05 jne 438cea <_Dmain+0x122>
// r15d := posOver := false
438ce5: 45 31 ff xor %r15d,%r15d
438ce8: eb 06 jmp 438cf0 <_Dmain+0x128>
// r15d := posOver := true
438cea: 41 bf 01 00 00 00 mov $0x1,%r15d
// if (div0)
438cf0: 45 84 f6 test %r14b,%r14b
************* PROBLEM: EDX not zeroed here *********************
438cf3: 74 1e je 438d13 <_Dmain+0x14b>
438cf5: 41 80 ff 01 cmp $0x1,%r15b
438cf9: bb 01 00 00 00 mov $0x1,%ebx
438cfe: 64 48 8b 0c 25 00 00 mov %fs:0x0,%rcx
438d05: 00 00
438d07: 48 8b 15 22 91 24 00 mov 0x249122(%rip),%rdx # 681e30 <_DYNAMIC+0x258>
438d0e: 89 1c 11 mov %ebx,(%rcx,%rdx,1)
// return 0;
438d11: eb 0b jmp 438d1e <_Dmain+0x156>
// >>from: 438cf3 (if (!div0))
// return left / right
// edi := m
438d13: 40 0f be 7d f8 rex movsbl -0x8(%rbp),%edi
438d18: 99 cltd
// left / right
// (edi == right == m)
// idiv: EDX:EAX / edi <--- *** PROBLEM: EDX not loaded with correct value
// eax := quotient
// edx := remainder
438d19: f7 ff idiv %edi
438d1b: 48 89 c6 mov %rax,%rsi
// >>from: safeDiv: if (div0)
438d1e: 89 75 b0 mov %esi,-0x50(%rbp)
438d21: db 45 b0 fildl -0x50(%rbp)
438d24: db 6d d0 fldt -0x30(%rbp)
438d27: df e9 fucomip %st(1),%st
438d29: dd d8 fstp %st(0)
438d2b: bf 01 00 00 00 mov $0x1,%edi
438d30: 7a 02 jp 438d34 <_Dmain+0x16c>
438d32: 74 02 je 438d36 <_Dmain+0x16e>
438d34: 31 ff xor %edi,%edi
438d36: e8 b5 00 00 00 callq 438df0 <_D3std5stdio14__T7writelnTbZ7writelnFNfbZv>
438d3b: 45 84 ed test %r13b,%r13b
438d3e: 74 17 je 438d57 <_Dmain+0x18f>
438d40: 0f be 45 f8 movsbl -0x8(%rbp),%eax
438d44: 64 48 8b 0c 25 00 00 mov %fs:0x0,%rcx
438d4b: 00 00
438d4d: 48 8b 15 ec 90 24 00 mov 0x2490ec(%rip),%rdx # 681e40 <_DYNAMIC+0x268>
438d54: 89 04 11 mov %eax,(%rcx,%rdx,1)
438d57: 49 ff c4 inc %r12
438d5a: 49 83 fc 01 cmp $0x1,%r12
// end inner loop
438d5e: 0f 82 ca fe ff ff jb 438c2e <_Dmain+0x66>
438d64: 48 8b 5d f0 mov -0x10(%rbp),%rbx
438d68: 48 ff c3 inc %rbx
438d6b: 48 83 fb 01 cmp $0x1,%rbx
// end outer loop
438d6f: 0f 82 97 fe ff ff jb 438c0c <_Dmain+0x44>
438d75: 31 c0 xor %eax,%eax
438d77: 48 8b 5d 88 mov -0x78(%rbp),%rbx
438d7b: 4c 8b 65 90 mov -0x70(%rbp),%r12
438d7f: 4c 8b 6d 98 mov -0x68(%rbp),%r13
438d83: 4c 8b 75 a0 mov -0x60(%rbp),%r14
438d87: 4c 8b 7d a8 mov -0x58(%rbp),%r15
438d8b: 48 8b e5 mov %rbp,%rsp
438d8e: 5d pop %rbp
438d8f: c3 retq
------
Comment #25 by thomas.bockman — 2016-01-20T19:48:38Z
The similar crashes I was getting with LDC turn out to be unrelated to this issue, despite the similar symptoms:
https://issues.dlang.org/show_bug.cgi?id=15586
(It's a bug in Phobos, which I have already submitted a PR for.)
Comment #26 by yebblies — 2016-01-21T10:26:26Z
Are you sure that's where it's going wrong?
This line
438d18: 99 cltd
Is sign extending AX -> DX:AX which appears to be correct, no?
Comment #27 by hsteoh — 2016-01-21T22:58:26Z
Argh you're right, I missed that... that'll teach me to analyze assembly code without stepping through it in a debugger...
Comment #28 by yebblies — 2016-01-21T23:25:18Z
(In reply to hsteoh from comment #27)
> Argh you're right, I missed that... that'll teach me to analyze assembly
> code without stepping through it in a debugger...
Honestly I didn't know that instruction existed until I looked at this code. If you can find the actual error let me know.
Comment #29 by hsteoh — 2016-01-21T23:53:58Z
Alright, I think I found the real culprit now.
Still referencing the asm dump I posted, the problematic spot is here:
...
438cb8: 31 f6 xor %esi,%esi
438cba: 64 48 8b 0c 25 00 00 mov %fs:0x0,%rcx
438cc1: 00 00
438cc3: 48 8b 15 66 91 24 00 mov 0x249166(%rip),%rdx # 681e30 <_DYNAMIC+0x258>
438cca: 89 34 11 mov %esi,(%rcx,%rdx,1)
***** PROBLEM: rdx at this point contains -288 (0xfffffee0)
438ccd: 38 75 f8 cmp %dh,-0x8(%rbp)
438cd0: 41 0f 94 c6 sete %r14b
...
I'm not exactly sure what exactly is the load from 0x249166(%rip) for, or why the compiler would want to emit such a thing here, because this part of the code is computing the value of "(right == 0)" (from the top of the safeDiv function, here inlined). The corresponding part of the good (unoptimized) version of the code reads:
... (NOTE: from a different disassembly)
438bb1: 89 7d f0 mov %edi,-0x10(%rbp)
438bb4: 89 75 f8 mov %esi,-0x8(%rbp)
// right == 0
438bb7: 80 7d f0 00 cmpb $0x0,-0x10(%rbp)
438bbb: 0f 94 c0 sete %al
...
Here, %edi contains the parameter 'right', which is stored in the local stack variable -0x10(%rbp) and then compared against 0.
In the bad (optimized) version at the top, however, it's comparing the local variable `m` (which, from an earlier part of the dump, can be seen to be stored in -0x8(%rbp)) against %dh, which doesn't seem to make sense. Why does the compiler emit a comparison agains %dh instead of 0x0? Is it assuming that %dh is zero? Why would it make such as assumption?
After this point, the value of div0 is wrong, and the code proceeds down a different path than it should.
Comment #30 by yebblies — 2016-03-20T06:47:25Z
(In reply to hsteoh from comment #29)
>
> In the bad (optimized) version at the top, however, it's comparing the local
> variable `m` (which, from an earlier part of the dump, can be seen to be
> stored in -0x8(%rbp)) against %dh, which doesn't seem to make sense. Why
> does the compiler emit a comparison agains %dh instead of 0x0? Is it
> assuming that %dh is zero? Why would it make such as assumption?
>
> After this point, the value of div0 is wrong, and the code proceeds down a
> different path than it should.
Yeah, the compiler is incorrectly assuming that dh contains zero. In cdnot in cod2.c, there is the check
if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,0,®))
which uses the cmp mem, reg form if it finds a register that contains zero. I'm not sure why it returns true and sets reg to 6 though.