Benchmarks

Cumulative Sum

using BenchmarkTools, CpuId, SIMDscan
N = 10_000
x = rand(N);
nothing
julia> cpuinfo()  Cpu Property       Value
  –––––––––––––––––– –––––––––––––––––––––––––––––––––––––––––––––––––––––––
  Brand              Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
  Vendor             :Intel
  Architecture       :Broadwell
  Model              Family: 0x06, Model: 0x4f, Stepping: 0x01, Type: 0x00
  Cores              2 physical cores, 2 logical cores (on executing CPU)
                     No Hyperthreading hardware capability detected
  Clock Frequencies  Not supported by CPU
  Data Cache         Level 1:3 : (32, 256, 51200) kbytes
                     64 byte cache line size
  Address Size       48 bits virtual, 46 bits physical
  SIMD               256 bit = 32 byte max. SIMD vector size
  Time Stamp Counter TSC is accessible via `rdtsc`
                     TSC increased at every clock cycle (non-invariant TSC)
  Perf. Monitoring   Performance Monitoring Counters (PMC) are not supported
  Hypervisor         Yes, Microsoft
julia> @benchmark cumsum!($(copy(x)),$x)BenchmarkTools.Trial: 10000 samples with 1 evaluation. Range (minmax): 12.000 μs 2.062 ms GC (min … max): 0.00% … 0.00% Time (median): 14.200 μs GC (median): 0.00% Time (mean ± σ): 15.746 μs ± 26.762 μs GC (mean ± σ): 0.00% ± 0.00% ▂▁▆▅▄ ▄▇█████▃▂▂▂▂▂▂▂▁▁▂▂▁▁▂▁▂▂▂▂▂▁▂▂▁▂▂▂▂▂▂▂▂▂▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂ ▃ 12 μs Histogram: frequency by time 36.5 μs < Memory estimate: 0 bytes, allocs estimate: 0.
julia> @benchmark scan_serial!(+,$(copy(x)))BenchmarkTools.Trial: 10000 samples with 1 evaluation. Range (minmax): 10.000 μs 5.972 ms GC (min … max): 0.00% … 0.00% Time (median): 10.801 μs GC (median): 0.00% Time (mean ± σ): 12.506 μs ± 59.856 μs GC (mean ± σ): 0.00% ± 0.00% ▂▇▇▇ ▁ ▂ ████▁▁▃▃▄▃▄▄▄▁▃▁▁▃▃▄▃▄▃▁▁▁▁▁▁▁▁▁▁▃▄▃▃▁▄▁▅▆▇██████▇▆▆▄▅▃▄▄ █ 10 μs Histogram: log(frequency) by time 31.6 μs < Memory estimate: 0 bytes, allocs estimate: 0.
julia> @benchmark scan_simd!(+,$(copy(x)), Val(16))BenchmarkTools.Trial: 10000 samples with 3 evaluations. Range (minmax): 8.100 μs702.345 μs GC (min … max): 0.00% … 0.00% Time (median): 8.434 μs GC (median): 0.00% Time (mean ± σ): 8.785 μs ± 7.367 μs GC (mean ± σ): 0.00% ± 0.00% ▁ ▅▁▁█▇▆▄█▇▄▅▆▆▆▆▆▆▅▆▅▅▆▅▄▅▅▆▆▆▆▆▅▆▆▆▅▆▅▅▅▅▅▅▄▄▃▄▄▄▄▄▃▄▄▄▃▄ █ 8.1 μs Histogram: log(frequency) by time 13.8 μs < Memory estimate: 0 bytes, allocs estimate: 0.

AR(1)

T = 2500
ϵ = randn(T)
y = similar(ϵ)
α = 0.9
function ar1recursize!(y, ϵ, α)
    y[1] = ϵ[1]
    for t = 2:T
        y[t] = α*y[t-1] + ϵ[t]
    end
    y
end
ar(y,e) = (e[1] + α*y[1]*e[2], α*y[2]*e[2]);
nothing
julia> @benchmark ar1recursize!($y,$ϵ,$α)BenchmarkTools.Trial: 8337 samples with 1 evaluation.
 Range (minmax):  425.302 μs  7.620 ms   GC (min … max): 0.00% … 0.00%
 Time  (median):     546.703 μs                GC (median):    0.00%
 Time  (mean ± σ):   595.199 μs ± 341.624 μs   GC (mean ± σ):  4.73% ± 7.63%

      ▅█                                                     
  ▂▃▄█████▆▅▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▁▁▂▂▁▂▁▁▂▂▂▂▂▂▁▂▁▂▁▁▁▂ ▃
  425 μs           Histogram: frequency by time         1.57 ms <

 Memory estimate: 366.58 KiB, allocs estimate: 20961.
julia> @benchmark scan_serial!($ar, $((copy(ϵ), ones(T))))BenchmarkTools.Trial: 3150 samples with 1 evaluation. Range (minmax): 1.120 ms 7.960 ms GC (min … max): 0.00% … 58.39% Time (median): 1.461 ms GC (median): 0.00% Time (mean ± σ): 1.582 ms ± 689.286 μs GC (mean ± σ): 5.79% ± 10.37% ▃▇█▄▂ ▁ ▁ ▇████████▇▆▆▆▄▄▄▁▃▃▁▃▃▁▁▃▁▁▃▁▁▃▃▁▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▁▃▃▅▃▆▆▇ █ 1.12 ms Histogram: log(frequency) by time 6.17 ms < Memory estimate: 890.64 KiB, allocs estimate: 39480.
julia> @benchmark scan_simd!($ar, $((copy(ϵ), ones(T))), identity=$((0.0,1.0/α)))BenchmarkTools.Trial: 10000 samples with 1 evaluation. Range (minmax): 134.102 μs 6.237 ms GC (min … max): 0.00% … 95.21% Time (median): 163.403 μs GC (median): 0.00% Time (mean ± σ): 214.813 μs ± 395.449 μs GC (mean ± σ): 18.26% ± 9.55% █▂ ▁ ██▇▆▇█▅▄▃▁▁▁▃▄▃▄▁▁▁▁▁▁▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▁▁▁▁▁▁▁▁▁▁▁▃▃▅ █ 134 μs Histogram: log(frequency) by time 3.66 ms < Memory estimate: 488.94 KiB, allocs estimate: 6304.