Fibonacci

Purpose

This is a small benchmark that is more of a demonstration of the advantage of compilers over interpreters. This is not the most efficient implementation of Fibonacci but compiled languages are capable of running the code through multiple optimization passes to finally generate more efficient code.

Code

Gambol No TypeGambol OptimizedPythonRubyJuliaCJavaJavascript

fun fib(n) {

  if n < 2 then return n end

  fib(n - 1) + fib(n - 2)

}

start = Time.checkpoint()
result = fib(40)
elapsed = Time.checkpoint() - start

print(result)
print(elapsed)

#Gambol.function.nounwind
#Gambol.function.callingconv.fast
fun fib(n UInt32) -> UInt32 {

  if n < 2 then return n end

  fib(n - 1) + fib(n - 2)

}

start = Time.checkpoint()
result = fib(40)
elapsed = Time.checkpoint() - start

print(result)
print(elapsed)

from time import time

def fib(n):

    if n<2: return n

    return fib(n-1)+fib(n-2)


start = time()
result = fib(40)
elapsed = time() - start

print(result)
print(elapsed)

def fib(n)

  return n if n < 2

  fib(n-1) + fib(n-2)

end

start = Time.now()
result = fib(40)
elapsed = Time.now() - start

puts result
puts elapsed

using InteractiveUtils

function fib(n)

  if n < 2 

    return n

  end

  return fib(n-1) + fib(n-2)

end

elapsed = @elapsed begin

  result = fib(40)

end

println(result)
println("elapsed ", elapsed)

#include <stdio.h>
#include <time.h>

unsigned int fib(unsigned int n) {

    if (n < 2) return n;

    return fib(n-1) + fib(n-2);
}

int main() {

    clock_t start = clock();

    unsigned int result = fib(40);

    clock_t elapsed = clock();

    double elapsed_ms = ((double)(elapsed - start)) / CLOCKS_PER_SEC * 1000;

    printf("%llu\n", result);
    printf("elapsed: %.2fms\n", elapsed_ms);

    return 0;
}

public class Fib {

    public static int fib(int n) {

        if (n < 2) return n;

        return fib(n-1) + fib(n-2);
    }

    public static void main(String[] args) {

        long start = System.currentTimeMillis();

        long result = fib(40);

        long elapsed = System.currentTimeMillis() - start;

        System.out.println(result);
        System.out.println("elapsed: " + elapsed);
    }
}

function fib(n) {

    if (n < 2) return n;

    return fib(n-1) + fib(n-2);
}

const start = performance.now();

const result = fib(40);

const elapsed = performance.now() - start;

console.log(`${result}`);
console.log(`elapsed: ${elapsed.toFixed(2)} ms`);

The Generated IR

Some of these languages including Gambol use LLVM as their intermediate representation. We can therefore take a sneak peek into the inner workings of the compiler and see what the above algorithm is finally reduced to.

Gambol OptimizedC (Clang -O3 -march=native)Julia

; Function Attrs: nofree nosync nounwind memory(none)
define fastcc i64 @"global.<fibo>.fib"(i64 noundef %0) #3 personality ptr @__gambol_exception_personality {
  %2 = icmp slt i64 %0, 2
  br i1 %2, label %common.ret, label %tailrecurse, !prof !3

common.ret:                                       ; preds = %tailrecurse, %1
  %accumulator.tr.lcssa = phi i64 [ 0, %1 ], [ %6, %tailrecurse ]
  %.tr.lcssa = phi i64 [ %0, %1 ], [ %5, %tailrecurse ]
  %accumulator.ret.tr = add i64 %.tr.lcssa, %accumulator.tr.lcssa
  ret i64 %accumulator.ret.tr

tailrecurse:                                      ; preds = %1, %tailrecurse
  %.tr52 = phi i64 [ %5, %tailrecurse ], [ %0, %1 ]
  %accumulator.tr51 = phi i64 [ %6, %tailrecurse ], [ 0, %1 ]
  %3 = add nsw i64 %.tr52, -1
  %4 = tail call fastcc i64 @"global.<fibo>.fib"(i64 %3)
  %5 = add nsw i64 %.tr52, -2
  %6 = add i64 %4, %accumulator.tr51
  %7 = icmp ult i64 %.tr52, 4
  br i1 %7, label %common.ret, label %tailrecurse, !prof !4
}

; Function Attrs: nofree nosync nounwind memory(none) uwtable
define dso_local i32 @fib(i32 noundef %0) local_unnamed_addr #0 {
  %2 = icmp ult i32 %0, 2
  br i1 %2, label %11, label %3

3:                                                ; preds = %1, %3
  %4 = phi i32 [ %8, %3 ], [ %0, %1 ]
  %5 = phi i32 [ %9, %3 ], [ 0, %1 ]
  %6 = add i32 %4, -1
  %7 = tail call i32 @fib(i32 noundef %6)
  %8 = add i32 %4, -2
  %9 = add i32 %7, %5
  %10 = icmp ult i32 %8, 2
  br i1 %10, label %11, label %3

11:                                               ; preds = %3, %1
  %12 = phi i32 [ 0, %1 ], [ %9, %3 ]
  %13 = phi i32 [ %0, %1 ], [ %8, %3 ]
  %14 = add i32 %13, %12
  ret i32 %14
}

; Function Signature: fib(Int64)
define i64 @julia_fib_957(i64 signext %"n::Int64") #0 {
top:
; ┌ @ int.jl:83 within `<`
   %0 = icmp sgt i64 %"n::Int64", 1
; └
  br i1 %0, label %L4, label %common.ret

common.ret:                                       ; preds = %L4, %top
  %common.ret.op = phi i64 [ %5, %L4 ], [ %"n::Int64", %top ]
  ret i64 %common.ret.op

L4:                                               ; preds = %top
; ┌ @ int.jl:86 within `-`
   %1 = add nsw i64 %"n::Int64", -1
; └
  %2 = call i64 @julia_fib_957(i64 signext %1)
; ┌ @ int.jl:86 within `-`
   %3 = add nsw i64 %"n::Int64", -2
; └
  %4 = call i64 @julia_fib_957(i64 signext %3)
; ┌ @ int.jl:87 within `+`
   %5 = add i64 %4, %2
   br label %common.ret
; └
}

Not exactly the most efficient implementation but there is only one recursive call to fib in both optimized Gambol and C generated executables. Both Gambol optimized and C appear to be generating the same code while the JIT compiler of Julia is not showing the the same level of optimization which is also reflected in the results below.

Results

	Gambol	Gambol Optimized	Python (3.12)	Ruby (3.3)	Julia (1.11)	C (clang 18)	C (gcc 14)	Java (OpenJDK 23)	Javascript (node 20)
Time	1.810s	467ms	20.661s	13.431s	891ms	373ms	240ms	597ms	1.892s