Benchmarks for 10 by 10 Matrices

using Exercise1
using BenchmarkTools
using LinearAlgebra: mul!
n = 10
S = 100
A = randn(n,n)
B = randn(n,n)
C = similar(B)

10×10 Matrix{Float64}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0

rowbench = @benchmark rowmatmul!($C,$A,$B) samples=S

BenchmarkTools.Trial: 100 samples with 10 evaluations.
 Range (min … max):  1.320 μs …  1.680 μs  ┊ GC (min … max): 0.00% … 0.00%
 Time  (median):     1.320 μs              ┊ GC (median):    0.00%
 Time  (mean ± σ):   1.329 μs ± 37.065 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%

  █    ▇                                                      
  █▁▁▁▁█▁▁▁▁▁▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▄ ▄
  1.32 μs      Histogram: log(frequency) by time     1.42 μs <

 Memory estimate: 0 bytes, allocs estimate: 0.

colbench = @benchmark colmatmul!($C,$A,$B) samples=S

BenchmarkTools.Trial: 100 samples with 10 evaluations.
 Range (min … max):  1.390 μs …  1.490 μs  ┊ GC (min … max): 0.00% … 0.00%
 Time  (median):     1.390 μs              ┊ GC (median):    0.00%
 Time  (mean ± σ):   1.393 μs ± 10.623 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%

  █                                                           
  █▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▆ ▂
  1.39 μs        Histogram: frequency by time         1.4 μs <

 Memory estimate: 0 bytes, allocs estimate: 0.

println("colmatmul! is $(minimum(rowbench.times) / minimum(colbench.times)) times faster than rowmatmul!")

colmatmul! is 0.9496439105100353 times faster than rowmatmul!

linbench = @benchmark mul!($C,$A,$B) samples=S

BenchmarkTools.Trial: 100 samples with 200 evaluations.
 Range (min … max):  410.535 ns … 630.550 ns  ┊ GC (min … max): 0.00% … 0.00%
 Time  (median):     418.035 ns               ┊ GC (median):    0.00%
 Time  (mean ± σ):   423.775 ns ±  26.537 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%

  █▁  ▅█▇                                                        
  ██▇████▁▁▁▁▄▁▃▃▄▃▄▁▁▃▁▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▁▁▁▁▁▁▃▁▁▁▁▃ ▃
  411 ns           Histogram: frequency by time          514 ns <

 Memory estimate: 0 bytes, allocs estimate: 0.

println("mul! is $(minimum(colbench.times) / minimum(linbench.times)) times faster than colmatmul!")

mul! is 3.3860693972499294 times faster than colmatmul!

turbench = @benchmark turbomul!($C,$A,$B) samples=S

BenchmarkTools.Trial: 100 samples with 958 evaluations.
 Range (min … max):  91.761 ns … 135.189 ns  ┊ GC (min … max): 0.00% … 0.00%
 Time  (median):     92.596 ns               ┊ GC (median):    0.00%
 Time  (mean ± σ):   96.726 ns ±   8.099 ns  ┊ GC (mean ± σ):  0.00% ± 0.00%

  ▇█    ▂▁            ▄                                         
  ██▁▁▅▁██▁▁▁▁▁▁▁▁▁▁▁▅█▅▅▁▁▁▁▁▁▁▁▅▅▁▁▁▅▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅ ▅
  91.8 ns       Histogram: log(frequency) by time       132 ns <

 Memory estimate: 0 bytes, allocs estimate: 0.

println("turbomul! is $(minimum(linbench.times) / minimum(turbench.times)) times faster than mul!")

turbomul! is 4.473961459269455 times faster than mul!

Benchmarks for various sizes

using Exercise1
using BenchmarkTools
using LinearAlgebra: mul!
using Plots
ENV["GKSwstype"] = "100"

function benchmarks(n, S=100)
  funcs = [colmatmul!, mul!, turbomul!]
  out = Vector{BenchmarkTools.Trial}(undef, 0)
  A = randn(n,n)
  B = randn(n,n)
  C = A*B
  for f in funcs
    o = @benchmark $f($C,$A,$B) samples=S
    push!(out,o)
  end
  return([median(o.times) for o in out])
end

n = 2 .^(1:9)
times=benchmarks.(n)
plot(n, hcat(times...)', labels=["colmatmul!" "mul!" "turbomul!"], xaxis=:log10, yaxis=:log10)