Fibonacci
Purpose
This is a small benchmark that is more of a demonstration of the advantage of compilers over interpreters. This is not the most efficient implementation of Fibonacci but compiled languages are capable of running the code through multiple optimization passes to finally generate more efficient code.
Code
fun fib(n) {
if n < 2 then return n end
fib(n - 1) + fib(n - 2)
}
start = Time.checkpoint()
result = fib(40)
elapsed = Time.checkpoint() - start
print(result)
print(elapsed)
#Gambol.function.nounwind
#Gambol.function.callingconv.fast
fun fib(n UInt32) -> UInt32 {
if n < 2 then return n end
fib(n - 1) + fib(n - 2)
}
start = Time.checkpoint()
result = fib(40)
elapsed = Time.checkpoint() - start
print(result)
print(elapsed)
from time import time
def fib(n):
if n<2: return n
return fib(n-1)+fib(n-2)
start = time()
result = fib(40)
elapsed = time() - start
print(result)
print(elapsed)
def fib(n)
return n if n < 2
fib(n-1) + fib(n-2)
end
start = Time.now()
result = fib(40)
elapsed = Time.now() - start
puts result
puts elapsed
using InteractiveUtils
function fib(n)
if n < 2
return n
end
return fib(n-1) + fib(n-2)
end
elapsed = @elapsed begin
result = fib(40)
end
println(result)
println("elapsed ", elapsed)
#include <stdio.h>
#include <time.h>
unsigned int fib(unsigned int n) {
if (n < 2) return n;
return fib(n-1) + fib(n-2);
}
int main() {
clock_t start = clock();
unsigned int result = fib(40);
clock_t elapsed = clock();
double elapsed_ms = ((double)(elapsed - start)) / CLOCKS_PER_SEC * 1000;
printf("%llu\n", result);
printf("elapsed: %.2fms\n", elapsed_ms);
return 0;
}
public class Fib {
public static int fib(int n) {
if (n < 2) return n;
return fib(n-1) + fib(n-2);
}
public static void main(String[] args) {
long start = System.currentTimeMillis();
long result = fib(40);
long elapsed = System.currentTimeMillis() - start;
System.out.println(result);
System.out.println("elapsed: " + elapsed);
}
}
function fib(n) {
if (n < 2) return n;
return fib(n-1) + fib(n-2);
}
const start = performance.now();
const result = fib(40);
const elapsed = performance.now() - start;
console.log(`${result}`);
console.log(`elapsed: ${elapsed.toFixed(2)} ms`);
The Generated IR
Some of these languages including Gambol use LLVM as their intermediate representation. We can therefore take a sneak peek into the inner workings of the compiler and see what the above algorithm is finally reduced to.
; Function Attrs: nofree nosync nounwind memory(none)
define fastcc i64 @"global.<fibo>.fib"(i64 noundef %0) #3 personality ptr @__gambol_exception_personality {
%2 = icmp slt i64 %0, 2
br i1 %2, label %common.ret, label %tailrecurse, !prof !3
common.ret: ; preds = %tailrecurse, %1
%accumulator.tr.lcssa = phi i64 [ 0, %1 ], [ %6, %tailrecurse ]
%.tr.lcssa = phi i64 [ %0, %1 ], [ %5, %tailrecurse ]
%accumulator.ret.tr = add i64 %.tr.lcssa, %accumulator.tr.lcssa
ret i64 %accumulator.ret.tr
tailrecurse: ; preds = %1, %tailrecurse
%.tr52 = phi i64 [ %5, %tailrecurse ], [ %0, %1 ]
%accumulator.tr51 = phi i64 [ %6, %tailrecurse ], [ 0, %1 ]
%3 = add nsw i64 %.tr52, -1
%4 = tail call fastcc i64 @"global.<fibo>.fib"(i64 %3)
%5 = add nsw i64 %.tr52, -2
%6 = add i64 %4, %accumulator.tr51
%7 = icmp ult i64 %.tr52, 4
br i1 %7, label %common.ret, label %tailrecurse, !prof !4
}
; Function Attrs: nofree nosync nounwind memory(none) uwtable
define dso_local i32 @fib(i32 noundef %0) local_unnamed_addr #0 {
%2 = icmp ult i32 %0, 2
br i1 %2, label %11, label %3
3: ; preds = %1, %3
%4 = phi i32 [ %8, %3 ], [ %0, %1 ]
%5 = phi i32 [ %9, %3 ], [ 0, %1 ]
%6 = add i32 %4, -1
%7 = tail call i32 @fib(i32 noundef %6)
%8 = add i32 %4, -2
%9 = add i32 %7, %5
%10 = icmp ult i32 %8, 2
br i1 %10, label %11, label %3
11: ; preds = %3, %1
%12 = phi i32 [ 0, %1 ], [ %9, %3 ]
%13 = phi i32 [ %0, %1 ], [ %8, %3 ]
%14 = add i32 %13, %12
ret i32 %14
}
; Function Signature: fib(Int64)
define i64 @julia_fib_957(i64 signext %"n::Int64") #0 {
top:
; ┌ @ int.jl:83 within `<`
%0 = icmp sgt i64 %"n::Int64", 1
; └
br i1 %0, label %L4, label %common.ret
common.ret: ; preds = %L4, %top
%common.ret.op = phi i64 [ %5, %L4 ], [ %"n::Int64", %top ]
ret i64 %common.ret.op
L4: ; preds = %top
; ┌ @ int.jl:86 within `-`
%1 = add nsw i64 %"n::Int64", -1
; └
%2 = call i64 @julia_fib_957(i64 signext %1)
; ┌ @ int.jl:86 within `-`
%3 = add nsw i64 %"n::Int64", -2
; └
%4 = call i64 @julia_fib_957(i64 signext %3)
; ┌ @ int.jl:87 within `+`
%5 = add i64 %4, %2
br label %common.ret
; └
}
Not exactly the most efficient implementation but there is only one recursive
call to fib
in both optimized Gambol and C generated executables. Both Gambol
optimized and C appear to be generating the same code while the JIT compiler of
Julia is not showing the the same level of optimization which is also reflected
in the results below.
Results
Gambol | Gambol Optimized | Python (3.12) | Ruby (3.3) | Julia (1.11) | C (clang 18) | C (gcc 14) | Java (OpenJDK 23) | Javascript (node 20) | |
---|---|---|---|---|---|---|---|---|---|
Time | 1.810s | 467ms | 20.661s | 13.431s | 891ms | 373ms | 240ms | 597ms | 1.892s |