Comment #0 by bearophile_hugs — 2013-04-10T19:38:48Z
I suggest to inline with normal code the vector ops among arrays that are statically known to be quite short.
The following two benchmarks compute the number of Pythagorean triples and show the performance difference between using a sum among ulong[2] and doing the same with a struct that defined opOpAssign("+").
Runtime first version about 5.8 seconds, runtime second version about 0.7 seconds.
// First version - - - - - - - - - -
import std.stdio;
ulong[2] tri(in ulong lim, in ulong a=3, in ulong b=4, in ulong c=5) pure {
immutable l = a + b + c;
if (l > lim)
return [0, 0];
ulong[2] r = [1, lim / l];
r[] += tri(lim, a - 2*b + 2*c, 2*a - b + 2*c, 2*a - 2*b + 3*c)[];
r[] += tri(lim, a + 2*b + 2*c, 2*a + b + 2*c, 2*a + 2*b + 3*c)[];
r[] += tri(lim, -a + 2*b + 2*c, -2*a + b + 2*c, -2*a + 2*b + 3*c)[];
return r;
}
void main() {
tri(10 ^^ 8).writeln;
}
//- - - - - - - - - - - - - - -
// Second version - - - - - - - - - -
import std.stdio;
struct Pair {
ulong x, y;
void opOpAssign(string op="+")(in Pair p2) pure nothrow {
x += p2.x;
y += p2.y;
}
}
Pair tri(in ulong lim, in ulong a=3, in ulong b=4, in ulong c=5) pure {
immutable l = a + b + c;
if (l > lim)
return Pair();
auto r = Pair(1, lim / l);
r += tri(lim, a - 2*b + 2*c, 2*a - b + 2*c, 2*a - 2*b + 3*c);
r += tri(lim, a + 2*b + 2*c, 2*a + b + 2*c, 2*a + 2*b + 3*c);
r += tri(lim, -a + 2*b + 2*c, -2*a + b + 2*c, -2*a + 2*b + 3*c);
return r;
}
void main() {
tri(10 ^^ 8).writeln;
}
//- - - - - - - - - - - - - - -
Comment #1 by robert.schadek — 2024-12-13T18:05:59Z