| leonardo ( @ 2009-02-09 14:01:00 |
| Entry tags: | benchmark, c++, d language, programming |
Struct/class benchmark with D and C++
I think the (close) future of D compilers is in LDC. I can accept the DMD backend to be not much efficient, but seeing how C programs compiled with LLVM-GCC are about as efficient as the ones compiled with GCC, I want D to run fast when compiled with LDC. So I've run similar (but not equal) benchmarks with LDC (D1 compiler, 32 bit, adapted to Tango, that has a GC partially different from the Phobos one, so memory allocation timings are different and usually better).
As usual benchmarks are very tricky things, so if you see things to fix, please tell me. I may redo the benchmarks.
---------------------
TIMINGS, N = 28, best of 3: fibs_c++: 0.27 s fibs_d: 0.38 s fib_c++: 3.06 s fib2_d: 8.50 s fib_d: 1.07 s (with scope) fib2_c++: 3.17 s (virtual method) With llvm-g++: fibs_c++: 0.31 s fib_c++: 2.73 s fib2_c++: 2.83 s (virtual method)
The results don't look much good for D.
At the bottom of this post you can see part of the ASM produced by LDC and GCC.
The slowness in the struct-based case of the struct may come from the fact that GCC seems to have unrolled lot of recursive calls, while LDC has not done it. Maybe (probably) there are ways to do the same with LLVM (and LDC) too.
The slowness of the class-based code may partially come from the virtual nature of the calls (absent in C++) and from time spent by the GC. I have tried to add the "final" keyword everywhere (in the D class code) with no difference in running time.
----------------------------------------
UPDATE Feb 11 2009:
I timed two more versions:
An alternative version of fibs_d, with "scope" where each struct is defined, like:
scope auto f1 = Fib(_value - 1);
But of course the timing is the same, it's useless, because scope acts where there's a memory allocation.
I have tried a C++ version that is more similar to the D code, using a virtual function:
virtual int value() {
The results (fi2_c++, code not shown) are a bit worse: 3.17 s instead of 3.06 s.
I have tried to make the method in fib_d final, so it's not virtual (as in C++):
final int value() {
but the running time is unchanged or a bit worse.
Then to show a more apple-to-apple comparison I have timed the C++ code using llvm-g++:
fibs_c++: 0.31 s
fib_c++: 2.73 s
fib2_c++: 2.83 s
The fibs timing is intermediate, while fib_c/fib2_c is now faster. It's a lot a matter of how the recursivity is unrolled, I think.
Benchmark details: CPU: Pentium3 500 Mhz, 256 MB RAM OS: Ubuntu 8.10 Timings done with the 'time' command, 'real' timings, best of 3 runs. Code compiled with: g++ -O3 -s fibs_cpp.cpp -o fibs_cpp ldc -O3 -release -inline fibs_d.d Compilers used: LLVM D Compiler rev.939, based on DMD v1.039 and llvm 2.4 (Wed Feb 4 23:09:12 2009) gcc version 4.3.2 (Ubuntu 4.3.2-1ubuntu12)
----------------------------------------
UPDATE Jul 3 2009:
I have done tests again on Pubuntu, with GCC 4.2.4 and LDC based on DMD v1.045 and llvm 2.6svn (Thu Jul 2 23:07:48 2009):
TIMINGS, N = 28, best of 3: fibs_c++: 0.02 fibs_d: 0.04 fib_c++: 0.54 fib2_d: 0.62 fib_d: 0.18 (with scope) fib2_c++: 0.56 (virtual method)Now timings are similar to the C++ code. LDC isn't able still to de-virtualize the value() method.
(A problem of LDC is that it's not a compiler, but a front-end for a compiler developed by other people, so when LDC doesn't perform certain optimizations it's sometimes not easy to understand where the problem is.)
----------------------------------------
// fibs_cpp.cpp
#include "stdio.h"
#define N 28
class Fib {
private:
int _value;
public:
Fib(int n) { _value = n; }
int value() {
if (_value <= 2)
return 1;
Fib f1 = Fib(_value - 1);
Fib f2 = Fib(_value - 2);
return f1.value() + f2.value();
}
};
int main() {
int tot = 0;
for (int i = 0; i < 10; i++) {
Fib x = Fib(N);
tot += x.value();
}
printf("tot: %d\n", tot);
return 0;
}
---------------------
// fibs_d.d
import tango.io.Stdout: Stdout;
const int N = 28;
struct Fib {
private int _value;
int value() {
if (_value <= 2)
return 1;
auto f1 = Fib(_value - 1);
auto f2 = Fib(_value - 2);
return f1.value + f2.value;
}
}
void main() {
int tot;
for (int i; i < 10; i++) {
auto f = Fib(N);
tot += f.value;
}
Stdout.formatln("tot: {}", tot);
}
---------------------
// fib_cpp.cpp
#include "stdio.h"
#define N 28
class Fib {
private:
int _value;
public:
Fib(int n) { _value = n; }
int value() {
if (_value <= 2)
return 1;
Fib *f1 = new Fib(_value - 1);
Fib *f2 = new Fib(_value - 2);
int n1 = f1->value();
int n2 = f2->value();
delete(f1);
delete(f2);
return n1 + n2;
}
};
int main() {
int tot = 0;
for (int i = 0; i < 10; i++) {
Fib *x = new Fib(N);
tot += x->value();
delete(x);
}
printf("tot: %d\n", tot);
return 0;
}
---------------------
// fib_d.d
import tango.io.Stdout: Stdout;
const int N = 28;
class Fib {
private int _value;
this(int n) { _value = n; }
int value() {
if (_value <= 2)
return 1;
scope f1 = new Fib(_value - 1);
scope f2 = new Fib(_value - 2);
return f1.value + f2.value;
}
}
void main() {
int tot;
for (int i; i < 10; i++) {
scope x = new Fib(N);
tot += x.value;
}
Stdout.formatln("tot: {}", tot);
}
---------------------
// fib2_d.d
import tango.io.Stdout: Stdout;
const int N = 28;
class Fib {
private int _value;
this(int n) { _value = n; }
int value() {
if (_value <= 2)
return 1;
auto f1 = new Fib(_value - 1);
auto f2 = new Fib(_value - 2);
return f1.value + f2.value;
}
}
void main() {
int tot;
for (int i; i < 10; i++) {
auto x = new Fib(N);
tot += x.value;
}
Stdout.formatln("tot: {}", tot);
}
---------------------
ASM of fib_d.d relative to the main and struct method:
.text
.align 16
.globl _Dmain
.type _Dmain,@function
_Dmain:
.Leh_func_begin1:
.Llabel1:
pushl %ebx
pushl %edi
pushl %esi
subl $48, %esp
xorl %esi, %esi
movl %esi, %edi
.align 16
.LBB1_1: # forbody
movl $27, 40(%esp)
movl $26, 32(%esp)
leal 40(%esp), %eax
call _D4fibs3Fib5valueMFZi
movl %eax, %ebx
leal 32(%esp), %eax
call _D4fibs3Fib5valueMFZi
addl %edi, %ebx
addl %eax, %ebx
incl %esi
cmpl $10, %esi
movl %ebx, %edi
jne .LBB1_1 # forbody
.LBB1_2: # endfor
movl _D5tango2io6Stdout6StdoutC5tango2io6stream6Format20__T12FormatOutputTaZ12FormatOutput, %eax
movl %ebx, 24(%esp)
leal 24(%esp), %edx
movl %edx, 8(%esp)
movl $.str1, 16(%esp)
movl $7, 12(%esp)
movl $._arguments.storage, 4(%esp)
movl $1, (%esp)
call _D5tango2io6stream6Format20__T12FormatOutputTaZ12FormatOutput8formatlnMFAaYC5tango2io6stream6Format20__T12FormatOutputTaZ12FormatOutput
subl $20, %esp
xorl %eax, %eax
addl $48, %esp
popl %esi
popl %edi
popl %ebx
ret $8
.size _Dmain, .-_Dmain
.Leh_func_end1:
.align 16
.globl _D4fibs3Fib5valueMFZi
.type _D4fibs3Fib5valueMFZi,@function
_D4fibs3Fib5valueMFZi:
pushl %esi
subl $16, %esp
movl (%eax), %eax
cmpl $2, %eax
jg .LBB2_3 # endif
.LBB2_1: # if
movl $1, %eax
.LBB2_2: # if
addl $16, %esp
popl %esi
ret
.LBB2_3: # endif
leal -1(%eax), %ecx
movl %ecx, 8(%esp)
addl $4294967294, %eax
movl %eax, (%esp)
leal 8(%esp), %eax
call _D4fibs3Fib5valueMFZi
movl %eax, %esi
leal (%esp), %eax
call _D4fibs3Fib5valueMFZi
addl %esi, %eax
jmp .LBB2_2 # if
.size _D4fibs3Fib5valueMFZi, .-_D4fibs3Fib5valueMFZi
---------------------
ASM of fibs_cpp.cpp relative to the main and struct method:
.section .text._ZN3Fib5valueEv,"axG",@progbits,_ZN3Fib5valueEv,comdat
.align 2
.p2align 4,,15
.weak _ZN3Fib5valueEv
.type _ZN3Fib5valueEv, @function
_ZN3Fib5valueEv:
.LFB36:
pushl %ebp
.LCFI0:
movl %esp, %ebp
.LCFI1:
subl $88, %esp
.LCFI2:
movl 8(%ebp), %eax
movl %ebx, -12(%ebp)
.LCFI3:
movl %esi, -8(%ebp)
.LCFI4:
movl %edi, -4(%ebp)
.LCFI5:
movl (%eax), %edx
movl $1, %eax
cmpl $2, %edx
jg .L46
.L3:
movl -12(%ebp), %ebx
movl -8(%ebp), %esi
movl -4(%ebp), %edi
movl %ebp, %esp
popl %ebp
ret
.p2align 4,,7
.p2align 3
.L46:
cmpl $3, %edx
leal -2(%edx), %esi
movl $1, -84(%ebp)
jne .L47
cmpl $2, %esi
movl $1, %eax
jg .L48
.L19:
addl -84(%ebp), %eax
jmp .L3
.p2align 4,,7
.p2align 3
.L47:
cmpl $2, %esi
leal -3(%edx), %edi
movl $1, -80(%ebp)
jg .L49
.L7:
cmpl $2, %edi
movl $1, %eax
jg .L50
.L13:
addl -80(%ebp), %eax
cmpl $2, %esi
movl %eax, -84(%ebp)
movl $1, %eax
jle .L19
.L48:
cmpl $3, %esi
leal -2(%esi), %edi
movl $1, -60(%ebp)
jne .L51
.L21:
cmpl $2, %edi
movl $1, %eax
jg .L52
.L31:
addl -60(%ebp), %eax
jmp .L19
.p2align 4,,7
.p2align 3
.L49:
leal -4(%edx), %eax
cmpl $2, %edi
movl %eax, -76(%ebp)
movl $1, -72(%ebp)
jle .L9
movl %eax, -16(%ebp)
leal -5(%edx), %eax
movl %eax, -20(%ebp)
leal -16(%ebp), %eax
movl %eax, (%esp)
call _ZN3Fib5valueEv
movl %eax, %ebx
leal -20(%ebp), %eax
movl %eax, (%esp)
call _ZN3Fib5valueEv
leal (%eax,%ebx), %ebx
movl %ebx, -72(%ebp)
.L9:
cmpl $2, -76(%ebp)
movl $1, %eax
jle .L11
movl -76(%ebp), %eax
subl $1, %eax
movl %eax, -20(%ebp)
movl -76(%ebp), %eax
subl $2, %eax
movl %eax, -16(%ebp)
leal -20(%ebp), %eax
movl %eax, (%esp)
call _ZN3Fib5valueEv
movl %eax, %ebx
leal -16(%ebp), %eax
movl %eax, (%esp)
call _ZN3Fib5valueEv
addl %ebx, %eax
.L11:
addl -72(%ebp), %eax
movl %eax, -80(%ebp)
jmp .L7
.p2align 4,,7
.p2align 3
.L52:
cmpl $3, %edi
leal -2(%edi), %esi
movl $1, -44(%ebp)
jne .L53
.L33:
cmpl $2, %esi
movl $1, %eax
jg .L54
.L39:
addl -44(%ebp), %eax
jmp .L31
.p2align 4,,7
.p2align 3
.L51:
leal -3(%esi), %eax
cmpl $2, %edi
movl %eax, -56(%ebp)
movl $1, -52(%ebp)
jle .L23
movl %eax, -16(%ebp)
leal -4(%esi), %eax
movl %eax, -20(%ebp)
leal -16(%ebp), %eax
movl %eax, (%esp)
call _ZN3Fib5valueEv
movl %eax, %ebx
leal -20(%ebp), %eax
movl %eax, (%esp)
call _ZN3Fib5valueEv
leal (%eax,%ebx), %ebx
movl %ebx, -52(%ebp)
.L23:
cmpl $2, -56(%ebp)
movl $1, %eax
jg .L55
.L25:
addl -52(%ebp), %eax
movl %eax, -60(%ebp)
jmp .L21
.p2align 4,,7
.p2align 3
.L50:
leal -2(%edi), %eax
cmpl $3, %edi
movl %eax, -68(%ebp)
movl $1, -64(%ebp)
je .L15
movl %eax, -16(%ebp)
leal -3(%edi), %eax
movl %eax, -20(%ebp)
leal -16(%ebp), %eax
movl %eax, (%esp)
call _ZN3Fib5valueEv
movl %eax, %ebx
leal -20(%ebp), %eax
movl %eax, (%esp)
call _ZN3Fib5valueEv
leal (%eax,%ebx), %ebx
movl %ebx, -64(%ebp)
.L15:
cmpl $2, -68(%ebp)
movl $1, %eax
jle .L17
movl -68(%ebp), %eax
subl $1, %eax
movl %eax, -20(%ebp)
movl -68(%ebp), %eax
subl $2, %eax
movl %eax, -16(%ebp)
leal -20(%ebp), %eax
movl %eax, (%esp)
call _ZN3Fib5valueEv
movl %eax, %ebx
leal -16(%ebp), %eax
movl %eax, (%esp)
call _ZN3Fib5valueEv
addl %ebx, %eax
.L17:
addl -64(%ebp), %eax
jmp .L13
.p2align 4,,7
.p2align 3
.L55:
movl -56(%ebp), %esi
movl $1, -48(%ebp)
subl $2, %esi
cmpl $3, -56(%ebp)
je .L27
movl -56(%ebp), %eax
movl %esi, -20(%ebp)
subl $3, %eax
movl %eax, -16(%ebp)
leal -20(%ebp), %eax
movl %eax, (%esp)
call _ZN3Fib5valueEv
movl %eax, %ebx
leal -16(%ebp), %eax
movl %eax, (%esp)
call _ZN3Fib5valueEv
leal (%eax,%ebx), %ebx
movl %ebx, -48(%ebp)
.L27:
cmpl $2, %esi
movl $1, %eax
jle .L29
leal -1(%esi), %eax
movl %eax, -16(%ebp)
leal -2(%esi), %eax
movl %eax, -20(%ebp)
leal -16(%ebp), %eax
movl %eax, (%esp)
call _ZN3Fib5valueEv
movl %eax, %ebx
leal -20(%ebp), %eax
movl %eax, (%esp)
call _ZN3Fib5valueEv
addl %ebx, %eax
.L29:
addl -48(%ebp), %eax
jmp .L25
.p2align 4,,7
.p2align 3
.L54:
cmpl $3, %esi
leal -2(%esi), %edi
movl $1, -32(%ebp)
je .L41
leal -3(%esi), %eax
movl %eax, -16(%ebp)
leal -20(%ebp), %eax
movl %edi, -20(%ebp)
movl %eax, (%esp)
call _ZN3Fib5valueEv
movl %eax, %ebx
leal -16(%ebp), %eax
movl %eax, (%esp)
call _ZN3Fib5valueEv
leal (%eax,%ebx), %ebx
movl %ebx, -32(%ebp)
.L41:
cmpl $2, %edi
movl $1, %eax
jle .L43
leal -1(%edi), %eax
movl %eax, -16(%ebp)
leal -2(%edi), %eax
movl %eax, -20(%ebp)
leal -16(%ebp), %eax
movl %eax, (%esp)
call _ZN3Fib5valueEv
movl %eax, %ebx
leal -20(%ebp), %eax
movl %eax, (%esp)
call _ZN3Fib5valueEv
addl %ebx, %eax
.L43:
addl -32(%ebp), %eax
jmp .L39
.p2align 4,,7
.p2align 3
.L53:
leal -3(%edi), %eax
cmpl $2, %esi
movl %eax, -40(%ebp)
movl $1, -36(%ebp)
jle .L35
movl %eax, -20(%ebp)
leal -4(%edi), %eax
movl %eax, -16(%ebp)
leal -20(%ebp), %eax
movl %eax, (%esp)
call _ZN3Fib5valueEv
movl %eax, %ebx
leal -16(%ebp), %eax
movl %eax, (%esp)
call _ZN3Fib5valueEv
leal (%eax,%ebx), %ebx
movl %ebx, -36(%ebp)
.L35:
cmpl $2, -40(%ebp)
movl $1, %eax
jle .L37
movl -40(%ebp), %eax
subl $1, %eax
movl %eax, -16(%ebp)
movl -40(%ebp), %eax
subl $2, %eax
movl %eax, -20(%ebp)
leal -16(%ebp), %eax
movl %eax, (%esp)
call _ZN3Fib5valueEv
movl %eax, %ebx
leal -20(%ebp), %eax
movl %eax, (%esp)
call _ZN3Fib5valueEv
addl %ebx, %eax
.L37:
addl -36(%ebp), %eax
movl %eax, -44(%ebp)
jmp .L33
.LFE36:
.size _ZN3Fib5valueEv, .-_ZN3Fib5valueEv
.section .rodata.str1.1,"aMS",@progbits,1
.LC0:
.string "tot: %d\n"
.text
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB37:
leal 4(%esp), %ecx
.LCFI6:
andl $-16, %esp
pushl -4(%ecx)
.LCFI7:
pushl %ebp
.LCFI8:
movl %esp, %ebp
.LCFI9:
pushl %edi
.LCFI10:
pushl %esi
.LCFI11:
pushl %ebx
.LCFI12:
pushl %ecx
.LCFI13:
subl $104, %esp
.LCFI14:
leal -20(%ebp), %ebx
movl $27, -20(%ebp)
leal -24(%ebp), %esi
movl $26, -24(%ebp)
movl %ebx, (%esp)
call _ZN3Fib5valueEv
movl %eax, -96(%ebp)
movl %esi, (%esp)
call _ZN3Fib5valueEv
movl $27, -20(%ebp)
movl $26, -24(%ebp)
movl %eax, -92(%ebp)
movl %ebx, (%esp)
call _ZN3Fib5valueEv
movl %eax, -88(%ebp)
movl %esi, (%esp)
call _ZN3Fib5valueEv
movl $27, -20(%ebp)
movl $26, -24(%ebp)
movl %eax, -84(%ebp)
movl %ebx, (%esp)
call _ZN3Fib5valueEv
movl %eax, -80(%ebp)
movl %esi, (%esp)
call _ZN3Fib5valueEv
movl $27, -20(%ebp)
movl $26, -24(%ebp)
movl %eax, -76(%ebp)
movl %ebx, (%esp)
call _ZN3Fib5valueEv
movl %eax, -72(%ebp)
movl %esi, (%esp)
call _ZN3Fib5valueEv
movl $27, -20(%ebp)
movl $26, -24(%ebp)
movl %eax, -68(%ebp)
movl %ebx, (%esp)
call _ZN3Fib5valueEv
movl %eax, -64(%ebp)
movl %esi, (%esp)
call _ZN3Fib5valueEv
movl $27, -20(%ebp)
movl $26, -24(%ebp)
movl %eax, -60(%ebp)
movl %ebx, (%esp)
call _ZN3Fib5valueEv
movl %eax, -56(%ebp)
movl %esi, (%esp)
call _ZN3Fib5valueEv
movl $27, -20(%ebp)
movl $26, -24(%ebp)
movl %eax, -52(%ebp)
movl %ebx, (%esp)
call _ZN3Fib5valueEv
movl %eax, -48(%ebp)
movl %esi, (%esp)
call _ZN3Fib5valueEv
movl $27, -20(%ebp)
movl $26, -24(%ebp)
movl %eax, -44(%ebp)
movl %ebx, (%esp)
call _ZN3Fib5valueEv
movl %eax, -40(%ebp)
movl %esi, (%esp)
call _ZN3Fib5valueEv
movl $27, -20(%ebp)
movl $26, -24(%ebp)
movl %eax, -36(%ebp)
movl %ebx, (%esp)
call _ZN3Fib5valueEv
movl %esi, (%esp)
movl %eax, %edi
call _ZN3Fib5valueEv
movl -84(%ebp), %edx
addl -88(%ebp), %edx
addl -80(%ebp), %edx
addl -76(%ebp), %edx
addl -72(%ebp), %edx
addl -68(%ebp), %edx
addl -64(%ebp), %edx
addl -60(%ebp), %edx
addl -56(%ebp), %edx
addl -52(%ebp), %edx
addl -48(%ebp), %edx
addl -44(%ebp), %edx
addl -40(%ebp), %edx
addl -36(%ebp), %edx
movl $27, -20(%ebp)
movl $26, -24(%ebp)
addl %edi, %edx
addl %eax, %edx
movl -92(%ebp), %eax
addl -96(%ebp), %eax
movl %ebx, (%esp)
leal (%eax,%edx), %edi
call _ZN3Fib5valueEv
movl %esi, (%esp)
movl %eax, %ebx
call _ZN3Fib5valueEv
movl $.LC0, 4(%esp)
movl $1, (%esp)
addl %ebx, %eax
addl %edi, %eax
movl %eax, 8(%esp)
call __printf_chk
addl $104, %esp
xorl %eax, %eax
popl %ecx
popl %ebx
popl %esi
popl %edi
popl %ebp
leal -4(%ecx), %esp
ret
.LFE37:
.size main, .-main
.section .eh_frame,"a",@progbits
.Lframe1:
.long .LECIE1-.LSCIE1
.LSCIE1:
.long 0x0
.byte 0x1
.globl __gxx_personality_v0
.string "zP"
.uleb128 0x1
.sleb128 -4
.byte 0x8
.uleb128 0x5
.byte 0x0
.long __gxx_personality_v0
.byte 0xc
.uleb128 0x4
.uleb128 0x4
.byte 0x88
.uleb128 0x1
.align 4
.LECIE1:
.LSFDE1:
.long .LEFDE1-.LASFDE1
.LASFDE1:
.long .LASFDE1-.Lframe1
.long .LFB36
.long .LFE36-.LFB36
.uleb128 0x0
.byte 0x4
.long .LCFI0-.LFB36
.byte 0xe
.uleb128 0x8
.byte 0x85
.uleb128 0x2
.byte 0x4
.long .LCFI1-.LCFI0
.byte 0xd
.uleb128 0x5
.byte 0x4
.long .LCFI5-.LCFI1
.byte 0x87
.uleb128 0x3
.byte 0x86
.uleb128 0x4
.byte 0x83
.uleb128 0x5
.align 4
.LEFDE1:
.LSFDE3:
.long .LEFDE3-.LASFDE3
.LASFDE3:
.long .LASFDE3-.Lframe1
.long .LFB37
.long .LFE37-.LFB37
.uleb128 0x0
.byte 0x4
.long .LCFI6-.LFB37
.byte 0xc
.uleb128 0x1
.uleb128 0x0
.byte 0x9
.uleb128 0x4
.uleb128 0x1
.byte 0x4
.long .LCFI7-.LCFI6
.byte 0xc
.uleb128 0x4
.uleb128 0x4
.byte 0x4
.long .LCFI8-.LCFI7
.byte 0xe
.uleb128 0x8
.byte 0x85
.uleb128 0x2
.byte 0x4
.long .LCFI9-.LCFI8
.byte 0xd
.uleb128 0x5
.byte 0x4
.long .LCFI13-.LCFI9
.byte 0x84
.uleb128 0x6
.byte 0x83
.uleb128 0x5
.byte 0x86
.uleb128 0x4
.byte 0x87
.uleb128 0x3
.align 4
.LEFDE3:
.ident "GCC: (Ubuntu 4.3.2-1ubuntu12) 4.3.2"
.section .note.GNU-stack,"",@progbits