Julia at Full Tilt: Profiling and Optimizations

Paul Schrimpf

2026-03-22

Introduction

About Me

Useful Resources

Overview

yes

no

Targeted Optimizations

allocations

floating point operations

something else

do not see any

Parallelization

Multi-thread

GPU

Distributed

Profile

What are the bottlenecks?

Reduce allocations

Optimize for CPU efficiency

???

Basics

Use functions

Type stability

Good algorithm

Code is too slow

Fast enough?

Hooray!

Basics

yes

no

Targeted Optimizations

allocations

floating point operations

something else

do not see any

Parallelization

Multi-thread

GPU

Distributed

Profile

What are the bottlenecks?

Reduce allocations

Optimize for CPU efficiency

???

Basics

Use functions

Type stability

Good algorithm

Code is too slow

Fast enough?

Hooray!

Avoid Premature Optimization

  • Complete, correct \(>>\) fast, incorrect, unfinished
  • Clear, maintainable \(>\) fast, incomprensible (almost always)
  • But some practices can both make code faster and clearer

Functions

  • Julia functions are JIT compiled, global scripts are not
  • Code needs to be in a function for full performance
  • Organizing code into functions is also better for readability and maintenance

Type Stability

  • To generate efficient code, the Julia compiler needs to know the types of all variables
  • Given the types of the inputs of a function, the types of its intermediate variables and output should be deterministic

Type Stable: Example

using BenchmarkTools
function unstabletrick(x, t)
  sum(xi < t ? xi : t for xi in x)
end

t = 0.5
n = 10000
x = (rand(n).-0.5)*10

Type Stable: Example

Stable: Float to Float

@benchmark unstabletrick(x, 0.)
BenchmarkTools.Trial: 10000 samples with 8 evaluations per sample.
 Range (minmax):  3.667 μs 15.777 μs   GC (min … max): 0.00% … 0.00%
 Time  (median):     3.771 μs                GC (median):    0.00%
 Time  (mean ± σ):   3.788 μs ± 169.911 ns   GC (mean ± σ):  0.00% ± 0.00%

              ▂ ▁█▅▄▁▂                                        
  ▁▁▁▁▁▁▁▁▂▃▄▅████████▅▆▆▄▄▃▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁ ▂
  3.67 μs         Histogram: frequency by time        4.01 μs <

 Memory estimate: 16 bytes, allocs estimate: 1.

Stable: Int to Int

xint = round.(Int, x)
@benchmark unstabletrick(xint, 0)
BenchmarkTools.Trial: 10000 samples with 225 evaluations per sample.
 Range (minmax):  317.756 ns839.631 ns   GC (min … max): 0.00% … 0.00%
 Time  (median):     339.440 ns                GC (median):    0.00%
 Time  (mean ± σ):   340.754 ns ±  18.097 ns   GC (mean ± σ):  0.00% ± 0.00%

                       ▄█                                      
  ▂▁▁▂▁▁▂▂▂▁▂▂▂▃▂▃▄▆▆▆████▆▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▂▁▂▁▂▂▂ ▃
  318 ns           Histogram: frequency by time          375 ns <

 Memory estimate: 16 bytes, allocs estimate: 1.

Unstable: (Int & Float) to (Int | Float)

@benchmark unstabletrick(xint, 0.1)
BenchmarkTools.Trial: 10000 samples with 1 evaluation per sample.
 Range (minmax):  17.623 μs 22.192 μs   GC (min … max): 0.00% … 0.00%
 Time  (median):     18.314 μs                GC (median):    0.00%
 Time  (mean ± σ):   18.346 μs ± 226.740 ns   GC (mean ± σ):  0.00% ± 0.00%

                   ▆▆█▂▂▂▂                                   ▂
  ▅▃▁▁▁▃▁▃▄▁▁▁▃▄▄▆████████▆▆▆▇█▇▅▄▅▅▄▃▄▃▁▁▄▁▁▃▄▃▄▆▇▅▆▄▅▅▆▅▆▇ █
  17.6 μs       Histogram: log(frequency) by time      19.7 μs <

 Memory estimate: 16 bytes, allocs estimate: 1.
  • 6-32x slowdown!

Detecting Type Instability

@code_warntype unstabletrick(xint,0)
MethodInstance for Main.Notebook.unstabletrick(::Vector{Int64}, ::Int64)
  from unstabletrick(x, t) @ Main.Notebook ~/ARGridBootstrap/docs/build/westdri_talk.qmd:153
Arguments
  #self#::Core.Const(Main.Notebook.unstabletrick)
  x::Vector{Int64}
  t::Int64
Locals
  #1::Main.Notebook.var"#unstabletrick##0#unstabletrick##1"{Int64}
Body::Int64
1 ─ %1 = Main.Notebook.sum::Core.Const(sum)
 %2 = Main.Notebook.:(var"#unstabletrick##0#unstabletrick##1")::Core.Const(Main.Notebook.var"#unstabletrick##0#unstabletrick##1")
 %3 = Core._typeof_captured_variable(t)::Core.Const(Int64)
 %4 = Core.apply_type(%2, %3)::Core.Const(Main.Notebook.var"#unstabletrick##0#unstabletrick##1"{Int64})
      (#1 = %new(%4, t))
 %6 = #1::Main.Notebook.var"#unstabletrick##0#unstabletrick##1"{Int64}
 %7 = Base.Generator(%6, x)::Base.Generator{Vector{Int64}, Main.Notebook.var"#unstabletrick##0#unstabletrick##1"{Int64}}
 %8 = (%1)(%7)::Int64
└──      return %8

@code_warntype unstabletrick(xint,0.)
MethodInstance for Main.Notebook.unstabletrick(::Vector{Int64}, ::Float64)
  from unstabletrick(x, t) @ Main.Notebook ~/ARGridBootstrap/docs/build/westdri_talk.qmd:153
Arguments
  #self#::Core.Const(Main.Notebook.unstabletrick)
  x::Vector{Int64}
  t::Float64
Locals
  #1::Main.Notebook.var"#unstabletrick##0#unstabletrick##1"{Float64}
Body::Union{Float64, Int64}
1 ─ %1 = Main.Notebook.sum::Core.Const(sum)
 %2 = Main.Notebook.:(var"#unstabletrick##0#unstabletrick##1")::Core.Const(Main.Notebook.var"#unstabletrick##0#unstabletrick##1")
 %3 = Core._typeof_captured_variable(t)::Core.Const(Float64)
 %4 = Core.apply_type(%2, %3)::Core.Const(Main.Notebook.var"#unstabletrick##0#unstabletrick##1"{Float64})
      (#1 = %new(%4, t))
 %6 = #1::Main.Notebook.var"#unstabletrick##0#unstabletrick##1"{Float64}
 %7 = Base.Generator(%6, x)::Base.Generator{Vector{Int64}, Main.Notebook.var"#unstabletrick##0#unstabletrick##1"{Float64}}
 %8 = (%1)(%7)::Union{Float64, Int64}
└──      return %8

Extended Example

Code for klm()
using ForwardDiff, LinearAlgebra, Distributions
function statparts(gi::Function)
  function P(A) # projection matrix
    A*pinv(A'*A)*A'
  end
  function(θ)
    giθ = gi(θ)
    p = length(θ)
    (n, k) = size(giθ)
    Ω = cov(giθ)
    gn=mean(gi(θ), dims=1)'
    Gi= ForwardDiff.jacobian(gi,θ)
    Gi = reshape(Gi, n , k, p)
    G = mean(Gi, dims=1)
    Γ = zeros(eltype(Gi),p,k,k)
    D = zeros(eltype(Gi),k, p)
    for j in 1:p
      for i in 1:n
        Γ[j,:,:] += (Gi[i,:,j] .- G[1,:,j]) * giθ[i,:]'
      end
      Γ[j,:,:] ./= n
      D[:,j] = G[1,:,j] - Γ[j,:,:]*inv(Ω)*gn
    end
    return(n,k,p,gn, Ω, D, P)
  end
end

function klm(gi::Function)
  SP = statparts(gi)
  function(θ)
    (n,k,p,gn, Ω, D, P) = SP(θ)
    return n*(gn'*Ω^(-1/2)*P^(-1/2)*D)*Ω^(-1/2)*gn)[1]
  end
end

import Random
function simulate_ivshare(n,β,γ,ρ)
  z = randn(n, size(γ)[1])
  endo = randn(n, length(β))
  x = z*γ .+ endo
  ξ = rand(Normal(0,sqrt((1.0-ρ^2))),n).+endo[:,1]*ρ
  y = cdf.(Logistic(), x*β .+ ξ)
  return((y=y,x=x,z=z))
end
n = 100
k = 2
iv = 3
β0 = ones(k)
π0 = vcat(5*I,ones(iv-k,k))
ρ = 0.5
Random.seed!(622)
(y,x,z) = simulate_ivshare(n,β00,ρ)

function gi_ivshare(β,y,x,z)
  ξ = quantile.(Logistic(),y) .- x*β
  ξ.*z
end

gi = let y=y, x=x, z=z
  β->gi_ivshare(β,y,x,z)
end
#5 (generic function with 1 method)
@code_warntype statparts(gi)(β0)
MethodInstance for (::var"#statparts##1#statparts##2"{var"#5#6"{Vector{Float64}, Matrix{Float64}, Matrix{Float64}}, var"#P#statparts##0"})(::Vector{Float64})
  from (::Main.Notebook.var"#statparts##1#statparts##2")(θ) @ Main.Notebook ~/ARGridBootstrap/docs/build/westdri_talk.qmd:237
Arguments
  #self#::Main.Notebook.var"#statparts##1#statparts##2"{Main.Notebook.var"#5#6"{Vector{Float64}, Matrix{Float64}, Matrix{Float64}}, Main.Notebook.var"#P#statparts##0"}
  θ::Vector{Float64}
Locals
  @_3::Union{Nothing, Tuple{Int64, Int64}}
  @_4::Int64
  D::Union{Array{Float64, 3}, Matrix}
  Γ::Union{Array{Float64, 4}, Array{T, 3} where T}
  G::Any
  Gi::Any
  gn::LinearAlgebra.Adjoint{Float64, Matrix{Float64}}
  Ω::Matrix{Float64}
  k::Int64
  n::Int64
  p::Int64
  giθ::Matrix{Float64}
  @_15::Union{Nothing, Tuple{Int64, Int64}}
  j::Int64
  i::Int64
Body::Tuple{Int64, Int64, Int64, LinearAlgebra.Adjoint{Float64, Matrix{Float64}}, Matrix{Float64}, Union{Array{Float64, 3}, Matrix}, Main.Notebook.var"#P#statparts##0"}
1 ─ %1   = Core.getfield(#self#, :gi)::Main.Notebook.var"#5#6"{Vector{Float64}, Matrix{Float64}, Matrix{Float64}}
        (giθ = (%1)(θ))
 %3   = Main.Notebook.length::Core.Const(length)
        (p = (%3)(θ))
 %5   = Main.Notebook.size::Core.Const(size)
 %6   = giθ::Matrix{Float64}
 %7   = (%5)(%6)::Tuple{Int64, Int64}
 %8   = Base.indexed_iterate(%7, 1)::Core.PartialStruct(Tuple{Int64, Int64}, Any[Int64, Core.Const(2)])
        (n = Core.getfield(%8, 1))
        (@_4 = Core.getfield(%8, 2))
 %11  = @_4::Core.Const(2)
 %12  = Base.indexed_iterate(%7, 2, %11)::Core.PartialStruct(Tuple{Int64, Int64}, Any[Int64, Core.Const(3)])
        (k = Core.getfield(%12, 1))
 %14  = Main.Notebook.cov::Core.Const(Statistics.cov)
 %15  = giθ::Matrix{Float64}
        (Ω = (%14)(%15))
 %17  = Main.Notebook.:var"'"::Core.Const(adjoint)
 %18  = Core.getfield(#self#, :gi)::Main.Notebook.var"#5#6"{Vector{Float64}, Matrix{Float64}, Matrix{Float64}}
 %19  = (%18)(θ)::Matrix{Float64}
 %20  = (:dims,)::Core.Const((:dims,))
 %21  = Core.apply_type(Core.NamedTuple, %20)::Core.Const(NamedTuple{(:dims,)})
 %22  = Core.tuple(1)::Core.Const((1,))
 %23  = (%21)(%22)::Core.Const((dims = 1,))
 %24  = Main.Notebook.mean::Core.Const(Statistics.mean)
 %25  = Core.kwcall(%23, %24, %19)::Matrix{Float64}
        (gn = (%17)(%25))
 %27  = Main.Notebook.ForwardDiff::Core.Const(ForwardDiff)
 %28  = Base.getproperty(%27, :jacobian)::Core.Const(ForwardDiff.jacobian)
 %29  = Core.getfield(#self#, :gi)::Main.Notebook.var"#5#6"{Vector{Float64}, Matrix{Float64}, Matrix{Float64}}
        (Gi = (%28)(%29, θ))
 %31  = Main.Notebook.reshape::Core.Const(reshape)
 %32  = Gi::Any
 %33  = n::Int64
 %34  = k::Int64
 %35  = p::Int64
        (Gi = (%31)(%32, %33, %34, %35))
 %37  = (:dims,)::Core.Const((:dims,))
 %38  = Core.apply_type(Core.NamedTuple, %37)::Core.Const(NamedTuple{(:dims,)})
 %39  = Core.tuple(1)::Core.Const((1,))
 %40  = (%38)(%39)::Core.Const((dims = 1,))
 %41  = Main.Notebook.mean::Core.Const(Statistics.mean)
 %42  = Gi::Any
        (G = Core.kwcall(%40, %41, %42))
 %44  = Main.Notebook.zeros::Core.Const(zeros)
 %45  = Main.Notebook.eltype::Core.Const(eltype)
 %46  = Gi::Any
 %47  = (%45)(%46)::Any
 %48  = p::Int64
 %49  = k::Int64
 %50  = k::Int64
        (Γ = (%44)(%47, %48, %49, %50))
 %52  = Main.Notebook.zeros::Core.Const(zeros)
 %53  = Main.Notebook.eltype::Core.Const(eltype)
 %54  = Gi::Any
 %55  = (%53)(%54)::Any
 %56  = k::Int64
 %57  = p::Int64
        (D = (%52)(%55, %56, %57))
 %59  = Main.Notebook.:(:)::Core.Const(Colon())
 %60  = p::Int64
 %61  = (%59)(1, %60)::Core.PartialStruct(UnitRange{Int64}, Any[Core.Const(1), Int64])
        (@_3 = Base.iterate(%61))
 %63  = @_3::Union{Nothing, Tuple{Int64, Int64}}
 %64  = (%63 === nothing)::Bool
 %65  = Base.not_int(%64)::Bool
└──        goto #7 if not %65
2 ┄ %67  = @_3::Tuple{Int64, Int64}
        (j = Core.getfield(%67, 1))
 %69  = Core.getfield(%67, 2)::Int64
 %70  = Main.Notebook.:(:)::Core.Const(Colon())
 %71  = n::Int64
 %72  = (%70)(1, %71)::Core.PartialStruct(UnitRange{Int64}, Any[Core.Const(1), Int64])
        (@_15 = Base.iterate(%72))
 %74  = @_15::Union{Nothing, Tuple{Int64, Int64}}
 %75  = (%74 === nothing)::Bool
 %76  = Base.not_int(%75)::Bool
└──        goto #5 if not %76
3 ┄ %78  = @_15::Tuple{Int64, Int64}
        (i = Core.getfield(%78, 1))
 %80  = Core.getfield(%78, 2)::Int64
 %81  = Main.Notebook.:+::Core.Const(+)
 %82  = Γ::Union{Array{Float64, 4}, Array{T, 3} where T}
 %83  = j::Int64
 %84  = Main.Notebook.:(:)::Core.Const(Colon())
 %85  = Main.Notebook.:(:)::Core.Const(Colon())
 %86  = Base.getindex(%82, %83, %84, %85)::Matrix
 %87  = Main.Notebook.:*::Core.Const(*)
 %88  = Main.Notebook.:-::Core.Const(-)
 %89  = Gi::Any
 %90  = i::Int64
 %91  = Main.Notebook.:(:)::Core.Const(Colon())
 %92  = j::Int64
 %93  = Base.getindex(%89, %90, %91, %92)::Any
 %94  = G::Any
 %95  = Main.Notebook.:(:)::Core.Const(Colon())
 %96  = j::Int64
 %97  = Base.getindex(%94, 1, %95, %96)::Any
 %98  = Base.broadcasted(%88, %93, %97)::Any
 %99  = Base.materialize(%98)::Any
 %100 = Main.Notebook.:var"'"::Core.Const(adjoint)
 %101 = giθ::Matrix{Float64}
 %102 = i::Int64
 %103 = Main.Notebook.:(:)::Core.Const(Colon())
 %104 = Base.getindex(%101, %102, %103)::Vector{Float64}
 %105 = (%100)(%104)::LinearAlgebra.Adjoint{Float64, Vector{Float64}}
 %106 = (%87)(%99, %105)::Any
 %107 = (%81)(%86, %106)::Any
 %108 = Γ::Union{Array{Float64, 4}, Array{T, 3} where T}
 %109 = j::Int64
 %110 = Main.Notebook.:(:)::Core.Const(Colon())
 %111 = Main.Notebook.:(:)::Core.Const(Colon())
        Base.setindex!(%108, %107, %109, %110, %111)
        (@_15 = Base.iterate(%72, %80))
 %114 = @_15::Union{Nothing, Tuple{Int64, Int64}}
 %115 = (%114 === nothing)::Bool
 %116 = Base.not_int(%115)::Bool
└──        goto #5 if not %116
4 ─        goto #3
5 ┄ %119 = Γ::Union{Array{Float64, 4}, Array{T, 3} where T}
 %120 = j::Int64
 %121 = Main.Notebook.:(:)::Core.Const(Colon())
 %122 = Main.Notebook.:(:)::Core.Const(Colon())
 %123 = Base.dotview(%119, %120, %121, %122)::SubArray{_A, 2, P, Tuple{Int64, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}}, true} where {_A, P<:(Array{T, 3} where T)}
 %124 = Main.Notebook.:/::Core.Const(/)
 %125 = Γ::Union{Array{Float64, 4}, Array{T, 3} where T}
 %126 = j::Int64
 %127 = Main.Notebook.:(:)::Core.Const(Colon())
 %128 = Main.Notebook.:(:)::Core.Const(Colon())
 %129 = Base.getindex(%125, %126, %127, %128)::Matrix
 %130 = n::Int64
 %131 = Base.broadcasted(%124, %129, %130)::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{2}, Nothing, typeof(/), <:Tuple{Matrix, Int64}}
        Base.materialize!(%123, %131)
 %133 = Main.Notebook.:-::Core.Const(-)
 %134 = G::Any
 %135 = Main.Notebook.:(:)::Core.Const(Colon())
 %136 = j::Int64
 %137 = Base.getindex(%134, 1, %135, %136)::Any
 %138 = Main.Notebook.:*::Core.Const(*)
 %139 = Γ::Union{Array{Float64, 4}, Array{T, 3} where T}
 %140 = j::Int64
 %141 = Main.Notebook.:(:)::Core.Const(Colon())
 %142 = Main.Notebook.:(:)::Core.Const(Colon())
 %143 = Base.getindex(%139, %140, %141, %142)::Matrix
 %144 = Main.Notebook.inv::Core.Const(inv)
 %145 = Ω::Matrix{Float64}
 %146 = (%144)(%145)::Matrix{Float64}
 %147 = gn::LinearAlgebra.Adjoint{Float64, Matrix{Float64}}
 %148 = (%138)(%143, %146, %147)::Any
 %149 = (%133)(%137, %148)::Any
 %150 = D::Union{Array{Float64, 3}, Matrix}
 %151 = Main.Notebook.:(:)::Core.Const(Colon())
 %152 = j::Int64
        Base.setindex!(%150, %149, %151, %152)
        (@_3 = Base.iterate(%61, %69))
 %155 = @_3::Union{Nothing, Tuple{Int64, Int64}}
 %156 = (%155 === nothing)::Bool
 %157 = Base.not_int(%156)::Bool
└──        goto #7 if not %157
6 ─        goto #2
7 ┄ %160 = n::Int64
 %161 = k::Int64
 %162 = p::Int64
 %163 = gn::LinearAlgebra.Adjoint{Float64, Matrix{Float64}}
 %164 = Ω::Matrix{Float64}
 %165 = D::Union{Array{Float64, 3}, Matrix}
 %166 = Core.getfield(#self#, :P)::Core.Const(Main.Notebook.var"#P#statparts##0"())
 %167 = Core.tuple(%160, %161, %162, %163, %164, %165, %166)::Tuple{Int64, Int64, Int64, LinearAlgebra.Adjoint{Float64, Matrix{Float64}}, Matrix{Float64}, Union{Array{Float64, 3}, Matrix}, Main.Notebook.var"#P#statparts##0"}
└──        return %167

@benchmark statparts(gi)(β0)
BenchmarkTools.Trial: 10000 samples with 1 evaluation per sample.
 Range (minmax):   97.665 μs 12.247 ms   GC (min … max):  0.00% … 98.42%
 Time  (median):     121.510 μs                GC (median):     0.00%
 Time  (mean ± σ):   136.316 μs ± 314.825 μs   GC (mean ± σ):  10.18% ±  4.46%

    ▆█▃▆▅▄▁▁▁▁    ▆▅▃▂▂▁▁                                       
  ▆███████████████████████▇▇▆▆▅▅▅▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▄
  97.7 μs          Histogram: frequency by time          188 μs <

 Memory estimate: 223.73 KiB, allocs estimate: 4941.
klm_stable()
function statparts_stable(gi::F) where {F <: Function}
  function P(A) # projection matrix
    A*pinv(A'*A)*A'
  end
  function(θ)
    giθ = gi(θ)
    p = length(θ)
    (n, k) = size(giθ)
    Ω = cov(giθ)
    gn=mean(gi(θ), dims=1)'
    Gi = similar(gn,n,k,p)
    Gi= ForwardDiff.jacobian!(Gi,gi,θ)
    G = mean(Gi, dims=1)
    Γ = zeros(eltype(Gi),p,k,k)
    D = zeros(eltype(Gi),k, p)
    for j in 1:p
      for i in 1:n
        Γ[j,:,:] += (Gi[i,:,j] .- G[1,:,j]) * giθ[i,:]'
      end
      Γ[j,:,:] ./= n
      D[:,j] = G[1,:,j] - Γ[j,:,:]*inv(Ω)*gn
    end
    return(n,k,p,gn, Ω, D, P)
  end
end

function klm_stable(gi::F) where {F <: Function}
  SP = statparts_stable(gi)
  function(θ)
    (n,k,p,gn, Ω, D, P) = SP(θ)
    λ, v = eigen(Ω)
    irΩ = v*diagm.^(-1/2))*v'
    return n*(gn'*irΩ*P(irΩ*D)*irΩ*gn)[1]
  end
end
klm_stable (generic function with 1 method)
@code_warntype statparts_stable(gi)(β0)
MethodInstance for (::var"#statparts_stable##1#statparts_stable##2"{var"#5#6"{Vector{Float64}, Matrix{Float64}, Matrix{Float64}}, var"#P#statparts_stable##0"})(::Vector{Float64})
  from (::Main.Notebook.var"#statparts_stable##1#statparts_stable##2")(θ) @ Main.Notebook ~/ARGridBootstrap/docs/build/westdri_talk.qmd:314
Arguments
  #self#::Main.Notebook.var"#statparts_stable##1#statparts_stable##2"{Main.Notebook.var"#5#6"{Vector{Float64}, Matrix{Float64}, Matrix{Float64}}, Main.Notebook.var"#P#statparts_stable##0"}
  θ::Vector{Float64}
Locals
  @_3::Union{Nothing, Tuple{Int64, Int64}}
  @_4::Int64
  D::Matrix{Float64}
  Γ::Array{Float64, 3}
  G::Array{Float64, 3}
  Gi::Array{Float64, 3}
  gn::LinearAlgebra.Adjoint{Float64, Matrix{Float64}}
  Ω::Matrix{Float64}
  k::Int64
  n::Int64
  p::Int64
  giθ::Matrix{Float64}
  @_15::Union{Nothing, Tuple{Int64, Int64}}
  j::Int64
  i::Int64
Body::Tuple{Int64, Int64, Int64, LinearAlgebra.Adjoint{Float64, Matrix{Float64}}, Matrix{Float64}, Matrix{Float64}, Main.Notebook.var"#P#statparts_stable##0"}
1 ─ %1   = Core.getfield(#self#, :gi)::Main.Notebook.var"#5#6"{Vector{Float64}, Matrix{Float64}, Matrix{Float64}}
        (giθ = (%1)(θ))
 %3   = Main.Notebook.length::Core.Const(length)
        (p = (%3)(θ))
 %5   = Main.Notebook.size::Core.Const(size)
 %6   = giθ::Matrix{Float64}
 %7   = (%5)(%6)::Tuple{Int64, Int64}
 %8   = Base.indexed_iterate(%7, 1)::Core.PartialStruct(Tuple{Int64, Int64}, Any[Int64, Core.Const(2)])
        (n = Core.getfield(%8, 1))
        (@_4 = Core.getfield(%8, 2))
 %11  = @_4::Core.Const(2)
 %12  = Base.indexed_iterate(%7, 2, %11)::Core.PartialStruct(Tuple{Int64, Int64}, Any[Int64, Core.Const(3)])
        (k = Core.getfield(%12, 1))
 %14  = Main.Notebook.cov::Core.Const(Statistics.cov)
 %15  = giθ::Matrix{Float64}
        (Ω = (%14)(%15))
 %17  = Main.Notebook.:var"'"::Core.Const(adjoint)
 %18  = Core.getfield(#self#, :gi)::Main.Notebook.var"#5#6"{Vector{Float64}, Matrix{Float64}, Matrix{Float64}}
 %19  = (%18)(θ)::Matrix{Float64}
 %20  = (:dims,)::Core.Const((:dims,))
 %21  = Core.apply_type(Core.NamedTuple, %20)::Core.Const(NamedTuple{(:dims,)})
 %22  = Core.tuple(1)::Core.Const((1,))
 %23  = (%21)(%22)::Core.Const((dims = 1,))
 %24  = Main.Notebook.mean::Core.Const(Statistics.mean)
 %25  = Core.kwcall(%23, %24, %19)::Matrix{Float64}
        (gn = (%17)(%25))
 %27  = Main.Notebook.similar::Core.Const(similar)
 %28  = gn::LinearAlgebra.Adjoint{Float64, Matrix{Float64}}
 %29  = n::Int64
 %30  = k::Int64
 %31  = p::Int64
        (Gi = (%27)(%28, %29, %30, %31))
 %33  = Main.Notebook.ForwardDiff::Core.Const(ForwardDiff)
 %34  = Base.getproperty(%33, :jacobian!)::Core.Const(ForwardDiff.jacobian!)
 %35  = Gi::Array{Float64, 3}
 %36  = Core.getfield(#self#, :gi)::Main.Notebook.var"#5#6"{Vector{Float64}, Matrix{Float64}, Matrix{Float64}}
        (Gi = (%34)(%35, %36, θ))
 %38  = (:dims,)::Core.Const((:dims,))
 %39  = Core.apply_type(Core.NamedTuple, %38)::Core.Const(NamedTuple{(:dims,)})
 %40  = Core.tuple(1)::Core.Const((1,))
 %41  = (%39)(%40)::Core.Const((dims = 1,))
 %42  = Main.Notebook.mean::Core.Const(Statistics.mean)
 %43  = Gi::Array{Float64, 3}
        (G = Core.kwcall(%41, %42, %43))
 %45  = Main.Notebook.zeros::Core.Const(zeros)
 %46  = Main.Notebook.eltype::Core.Const(eltype)
 %47  = Gi::Array{Float64, 3}
 %48  = (%46)(%47)::Core.Const(Float64)
 %49  = p::Int64
 %50  = k::Int64
 %51  = k::Int64
        (Γ = (%45)(%48, %49, %50, %51))
 %53  = Main.Notebook.zeros::Core.Const(zeros)
 %54  = Main.Notebook.eltype::Core.Const(eltype)
 %55  = Gi::Array{Float64, 3}
 %56  = (%54)(%55)::Core.Const(Float64)
 %57  = k::Int64
 %58  = p::Int64
        (D = (%53)(%56, %57, %58))
 %60  = Main.Notebook.:(:)::Core.Const(Colon())
 %61  = p::Int64
 %62  = (%60)(1, %61)::Core.PartialStruct(UnitRange{Int64}, Any[Core.Const(1), Int64])
        (@_3 = Base.iterate(%62))
 %64  = @_3::Union{Nothing, Tuple{Int64, Int64}}
 %65  = (%64 === nothing)::Bool
 %66  = Base.not_int(%65)::Bool
└──        goto #7 if not %66
2 ┄ %68  = @_3::Tuple{Int64, Int64}
        (j = Core.getfield(%68, 1))
 %70  = Core.getfield(%68, 2)::Int64
 %71  = Main.Notebook.:(:)::Core.Const(Colon())
 %72  = n::Int64
 %73  = (%71)(1, %72)::Core.PartialStruct(UnitRange{Int64}, Any[Core.Const(1), Int64])
        (@_15 = Base.iterate(%73))
 %75  = @_15::Union{Nothing, Tuple{Int64, Int64}}
 %76  = (%75 === nothing)::Bool
 %77  = Base.not_int(%76)::Bool
└──        goto #5 if not %77
3 ┄ %79  = @_15::Tuple{Int64, Int64}
        (i = Core.getfield(%79, 1))
 %81  = Core.getfield(%79, 2)::Int64
 %82  = Main.Notebook.:+::Core.Const(+)
 %83  = Γ::Array{Float64, 3}
 %84  = j::Int64
 %85  = Main.Notebook.:(:)::Core.Const(Colon())
 %86  = Main.Notebook.:(:)::Core.Const(Colon())
 %87  = Base.getindex(%83, %84, %85, %86)::Matrix{Float64}
 %88  = Main.Notebook.:*::Core.Const(*)
 %89  = Main.Notebook.:-::Core.Const(-)
 %90  = Gi::Array{Float64, 3}
 %91  = i::Int64
 %92  = Main.Notebook.:(:)::Core.Const(Colon())
 %93  = j::Int64
 %94  = Base.getindex(%90, %91, %92, %93)::Vector{Float64}
 %95  = G::Array{Float64, 3}
 %96  = Main.Notebook.:(:)::Core.Const(Colon())
 %97  = j::Int64
 %98  = Base.getindex(%95, 1, %96, %97)::Vector{Float64}
 %99  = Base.broadcasted(%89, %94, %98)::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Vector{Float64}, Vector{Float64}}}
 %100 = Base.materialize(%99)::Vector{Float64}
 %101 = Main.Notebook.:var"'"::Core.Const(adjoint)
 %102 = giθ::Matrix{Float64}
 %103 = i::Int64
 %104 = Main.Notebook.:(:)::Core.Const(Colon())
 %105 = Base.getindex(%102, %103, %104)::Vector{Float64}
 %106 = (%101)(%105)::LinearAlgebra.Adjoint{Float64, Vector{Float64}}
 %107 = (%88)(%100, %106)::Matrix{Float64}
 %108 = (%82)(%87, %107)::Matrix{Float64}
 %109 = Γ::Array{Float64, 3}
 %110 = j::Int64
 %111 = Main.Notebook.:(:)::Core.Const(Colon())
 %112 = Main.Notebook.:(:)::Core.Const(Colon())
        Base.setindex!(%109, %108, %110, %111, %112)
        (@_15 = Base.iterate(%73, %81))
 %115 = @_15::Union{Nothing, Tuple{Int64, Int64}}
 %116 = (%115 === nothing)::Bool
 %117 = Base.not_int(%116)::Bool
└──        goto #5 if not %117
4 ─        goto #3
5 ┄ %120 = Γ::Array{Float64, 3}
 %121 = j::Int64
 %122 = Main.Notebook.:(:)::Core.Const(Colon())
 %123 = Main.Notebook.:(:)::Core.Const(Colon())
 %124 = Base.dotview(%120, %121, %122, %123)::SubArray{Float64, 2, Array{Float64, 3}, Tuple{Int64, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}}, true}
 %125 = Main.Notebook.:/::Core.Const(/)
 %126 = Γ::Array{Float64, 3}
 %127 = j::Int64
 %128 = Main.Notebook.:(:)::Core.Const(Colon())
 %129 = Main.Notebook.:(:)::Core.Const(Colon())
 %130 = Base.getindex(%126, %127, %128, %129)::Matrix{Float64}
 %131 = n::Int64
 %132 = Base.broadcasted(%125, %130, %131)::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{2}, Nothing, typeof(/), Tuple{Matrix{Float64}, Int64}}
        Base.materialize!(%124, %132)
 %134 = Main.Notebook.:-::Core.Const(-)
 %135 = G::Array{Float64, 3}
 %136 = Main.Notebook.:(:)::Core.Const(Colon())
 %137 = j::Int64
 %138 = Base.getindex(%135, 1, %136, %137)::Vector{Float64}
 %139 = Main.Notebook.:*::Core.Const(*)
 %140 = Γ::Array{Float64, 3}
 %141 = j::Int64
 %142 = Main.Notebook.:(:)::Core.Const(Colon())
 %143 = Main.Notebook.:(:)::Core.Const(Colon())
 %144 = Base.getindex(%140, %141, %142, %143)::Matrix{Float64}
 %145 = Main.Notebook.inv::Core.Const(inv)
 %146 = Ω::Matrix{Float64}
 %147 = (%145)(%146)::Matrix{Float64}
 %148 = gn::LinearAlgebra.Adjoint{Float64, Matrix{Float64}}
 %149 = (%139)(%144, %147, %148)::Matrix{Float64}
 %150 = (%134)(%138, %149)::Matrix{Float64}
 %151 = D::Matrix{Float64}
 %152 = Main.Notebook.:(:)::Core.Const(Colon())
 %153 = j::Int64
        Base.setindex!(%151, %150, %152, %153)
        (@_3 = Base.iterate(%62, %70))
 %156 = @_3::Union{Nothing, Tuple{Int64, Int64}}
 %157 = (%156 === nothing)::Bool
 %158 = Base.not_int(%157)::Bool
└──        goto #7 if not %158
6 ─        goto #2
7 ┄ %161 = n::Int64
 %162 = k::Int64
 %163 = p::Int64
 %164 = gn::LinearAlgebra.Adjoint{Float64, Matrix{Float64}}
 %165 = Ω::Matrix{Float64}
 %166 = D::Matrix{Float64}
 %167 = Core.getfield(#self#, :P)::Core.Const(Main.Notebook.var"#P#statparts_stable##0"())
 %168 = Core.tuple(%161, %162, %163, %164, %165, %166, %167)::Tuple{Int64, Int64, Int64, LinearAlgebra.Adjoint{Float64, Matrix{Float64}}, Matrix{Float64}, Matrix{Float64}, Main.Notebook.var"#P#statparts_stable##0"}
└──        return %168

@benchmark statparts_stable(gi)(β0)
BenchmarkTools.Trial: 10000 samples with 1 evaluation per sample.
 Range (minmax):  23.635 μs 15.500 ms   GC (min … max):  0.00% … 99.48%
 Time  (median):     39.274 μs                GC (median):     0.00%
 Time  (mean ± σ):   49.689 μs ± 298.283 μs   GC (mean ± σ):  19.16% ±  3.43%

    ▁▃             ▇                                
  ▁▃██▇▅▅▄▄▄▄▄▄▄▄▄████▆▅▄▄▄▄▃▃▄▃▃▃▃▃▂▂▃▂▂▂▂▂▂▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁ ▃
  23.6 μs         Histogram: frequency by time         75.2 μs <

 Memory estimate: 182.01 KiB, allocs estimate: 2904.
using Test
@test klm(gi)(β0)  klm_stable(gi)(β0)
Test Passed

Targeted Optimizations

yes

no

Targeted Optimizations

allocations

floating point operations

something else

do not see any

Parallelization

Multi-thread

GPU

Distributed

Profile

What are the bottlenecks?

Reduce allocations

Optimize for CPU efficiency

???

Basics

Use functions

Type stability

Good algorithm

Code is too slow

Fast enough?

Hooray!

Profiling

profileiframe
using Profile, ProfileCanvas
function profilehtmlstring()
  buf = IOBuffer()
  show(buf, MIME("text/html"), ProfileCanvas.view(Profile.fetch()))
  s=String(take!(buf))
  println("\n<br><br>\n"*s*"\n<br>\n")
end
function profileiframe(filename="proftmp.html")
  #buf = IOBuffer()
  #show(buf, MIME("text/html"), ProfileCanvas.view(Profile.fetch()))
  #s=String(take!(buf))
  #s=replace(s, "\"" => "&quot" )
  #  HTML("<iframe srcdata=\""*s*"\" width=\"1200\"  height=\"650\"></iframe>\n")
  #ProfileCanvas.html_file(filename)

  #HTML("<iframe src=\""*filename*"\" width=\"1200\"  height=\"650\"></iframe>\n")
  open(filename, "w") do io
    show(io, MIME("text/html"), ProfileCanvas.view(Profile.fetch()))
  end
  HTML("<iframe src=\""*filename*"\" width=\"1500\"  height=\"650\"></iframe>\n")
end
Profile.clear();
Profile.init(n=10^7,delay=0.00001);
klm_stable(gi)(β0)
foo(n) = sum(klm_stable(gi)(β0) for i in 1:n) # loop so it runs longer to get better profile
foo(1) # precompile outside of profiling
@profile foo(1_000)
profileiframe("klmprof.html") # only needed for quarto, just use `@profview foo(1_000)` elsewhere

Reducing Allocations

  • Allocating memory is slow
  • Reduce allocations by:
    • Using @views instead of slices
    • Pre-allocating and reusing arrays
    • Eliminate dynamic allocations with StaticArrays or similar compile time known size types

Reduced Allocations

klm_fast()
function statparts_fast(gi::F) where {F <: Function}
  function P(A::AbstractMatrix) # projection matrix
    A*pinv(A'*A)*A'
  end
  let gi=gi
    function(θ)
      giθ = gi(θ)
      p = length(θ)
      (n, k) = size(giθ)
      Ω = Hermitian(cov(giθ))
      gn=mean(gi(θ), dims=1)'
      iΩgn = Ω \ gn
      Gi = similar(gn,n,k,p)
      ForwardDiff.jacobian!(Gi,gi,θ)
      G = mean(Gi, dims=1)
      Γ = zeros(eltype(Gi),p,k,k)
      D = zeros(eltype(Gi),k, p)
      @inbounds for j in 1:p
        @inbounds for i in 1:n
          @views Γ[j,:,:] .+= (Gi[i,:,j] .- G[1,:,j]) * giθ[i,:]'
        end
        Γ[j,:,:] ./= n
        @views D[:,j] .= G[1,:,j] .- Γ[j,:,:]*iΩgn
      end
      return(n,k,p,gn, Ω, D, P)
    end
  end
end

function klm_fast(gi::F) where {F <: Function}
  SP = statparts_fast(gi)
  function(θ)
    (n,k,p,gn, Ω, D, P) = SP(θ)
    λ, v = eigen(Ω)
    irΩ = v*diagm.^(-1/2))*v'
    return n*(gn'*irΩ*P(irΩ*D)*irΩ*gn)[1]
  end
end
klm_fast (generic function with 1 method)
@benchmark klm_fast(gi)(β0)
BenchmarkTools.Trial: 10000 samples with 1 evaluation per sample.
 Range (minmax):  14.328 μs 15.992 ms   GC (min … max):  0.00% … 98.79%
 Time  (median):     20.559 μs                GC (median):     0.00%
 Time  (mean ± σ):   24.120 μs ± 212.203 μs   GC (mean ± σ):  12.27% ±  1.40%

      ▅▇█▆▂         ▁▄▄▂▁                                      
  ▁▂▄▇█████▇▅▄▄▃▃▄▅▇█████▇▅▅▄▃▂▃▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▄▃▃▃▂▂▂▁ ▃
  14.3 μs         Histogram: frequency by time         34.4 μs <

 Memory estimate: 80.04 KiB, allocs estimate: 945.
@test klm_fast(gi)(β0)  klm(gi)(β0)
Test Passed

StaticArrays

extra functions
struct Integrator{Tx, Tw}
    x::Tx
    w::Tw
end

function Integrator(dx::Distribution, n=100)
    x = [rand(dx) for _ in 1:n]
    w = Base.Iterators.Repeated(1/n)
    Integrator(x,w)
end

(∫::Integrator)(f) = sum((xw)->f(xw[1])*xw[2], zip(∫.x, ∫.w))
Code
function share(δ, Σ, x, ∫)
  J,K = size(x)
  (length(δ) == J) || error("length(δ)=$(length(δ)) != size(x,1)=$J")
  (K,K) == size(Σ) || error("size(x,2)=$K != size(Σ)=$(size(Σ))")
= x*Σ
  function shareν(ν)
    s = δ +*ν
    smax=max(0,maximum(s))
    s = s .- smax
    s = exp.(s)
    s *= 1/(sum(s) + exp(0-smax))
    return(s)
  end
  return((shareν))
end
share (generic function with 1 method)

Heap allocated arrays

J = 10
K = 5
δ = rand(J)
X = randn(J,K)
Σ = I + zeros(K,K)
= Integrator(MvNormal(zeros(K),I))

@benchmark share(δ,Σ,X,∫)
BenchmarkTools.Trial: 10000 samples with 1 evaluation per sample.
 Range (minmax):  11.181 μs 22.209 ms   GC (min … max):  0.00% … 99.75%
 Time  (median):     20.850 μs                GC (median):     0.00%
 Time  (mean ± σ):   25.465 μs ± 290.928 μs   GC (mean ± σ):  18.96% ±  1.73%

  ▅▅▇▇▄▃▂▂▅█▅▄▄▃▂▂▂▂▁▁▁▁ ▂▁▁                                 ▂
  ███████████████████████████▆▅▅▄▅▅▁▅▁▁▄▄▁▁▄▃▁▁▃▃▁▃▁▁▁▁▁▁▁▄▄ █
  11.2 μs       Histogram: log(frequency) by time      72.3 μs <

 Memory estimate: 98.78 KiB, allocs estimate: 1400.

Stack allocated arrays

using StaticArrays
= SVector{J}(δ)
= SMatrix{K,K}(Σ)
sX = SMatrix{J,K}(X)
nd = length(∫.x)
iw = SVector{nd}(fill(1/nd,nd))
ix = [SVector{K}(x) for x  ∫.x]
s∫ = Integrator(ix,iw)

@benchmark share(sδ,sΣ,sX,s∫)
BenchmarkTools.Trial: 10000 samples with 7 evaluations per sample.
 Range (minmax):  4.282 μs 11.525 μs   GC (min … max): 0.00% … 0.00%
 Time  (median):     4.314 μs                GC (median):    0.00%
 Time  (mean ± σ):   4.390 μs ± 447.842 ns   GC (mean ± σ):  0.00% ± 0.00%

  █▆▄   ▁▁                                                  ▂
  ████▇█████▇▇▆▆▅▅▄▄▅▃▄▅▅▅▃▅▆▅▄▃▃▁▄▄▁▄▄▃▃▄▄▄▅▄▄▅▄▄▅▅▅▄▅▆▅▅▅ █
  4.28 μs      Histogram: log(frequency) by time      6.06 μs <

 Memory estimate: 96 bytes, allocs estimate: 1.

Memory Considerations

  • Computations are faster when operating accessing contiguous chunks of memory
    • Access arrays by columns
  • Data moves RAM ⇒ Cache ⇒ CPU Registers
    • RAM ⇒ Cache much slower than Cache ⇒ CPU Registers
    • Can see big benefits from small code that fits on cache
    • CPU prefetches data from RAM ⇒ Cache, by predicting what will be needed
    • Needed data not on Cache when needed is a “cache miss”, these are costly
    • Predictable code without branches and accessing contiguous memory helps avoid caches misses

Single Instruction, Multiple Data

  • CPUs can perform the same operation on multiple numbers at the same time
    • “Vectorized instructions”
    • Current generation x86 CPUs have 512 bit registers, can operate on 8 Float64 values at once
  • Compiler tries to use vectorized instructions when possible
    • Loop of fixed length (no break or continue)
    • No branching
    • Re-ordering allowed (indicate with @simd)

SISD

function slowdot(a,b)
  out = one(promote_type(eltype(a),eltype(b)))
  for i in eachindex(a)
    out += a[i]*b[i]
  end
  out
end

n = 1_000
a = rand(n)
b = rand(n)
@code_llvm slowdot(a,b)
; Function Signature: slowdot(Array{Float64, 1}, Array{Float64, 1})
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:609 within `slowdot`
define double @julia_slowdot_27643(ptr noundef nonnull align 8 dereferenceable(24) %"a::Array", ptr noundef nonnull align 8 dereferenceable(24) %"b::Array") #0 {
top:
  %"new::Tuple" = alloca [1 x i64], align 8
  %"new::Tuple24" = alloca [1 x i64], align 8
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:611 within `slowdot`
; ┌ @ abstractarray.jl:321 within `eachindex`
; │┌ @ abstractarray.jl:137 within `axes1`
; ││┌ @ abstractarray.jl:98 within `axes`
; │││┌ @ array.jl:194 within `size`
      %"a::Array.size_ptr" = getelementptr inbounds i8, ptr %"a::Array", i64 16
      %"a::Array.size.0.copyload" = load i64, ptr %"a::Array.size_ptr", align 8
; └└└└
; ┌ @ range.jl:917 within `iterate`
; │┌ @ range.jl:688 within `isempty`
; ││┌ @ operators.jl:425 within `>`
; │││┌ @ int.jl:83 within `<`
      %0 = icmp slt i64 %"a::Array.size.0.copyload", 1
; └└└└
  br i1 %0, label %L65, label %L13.preheader

L13.preheader:                                    ; preds = %top
  %"b::Array.size_ptr" = getelementptr inbounds i8, ptr %"b::Array", i64 16
  %"b::Array.size.0.copyload" = load i64, ptr %"b::Array.size_ptr", align 8
  %memoryref_data = load ptr, ptr %"a::Array", align 8
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:612 within `slowdot`
; ┌ @ essentials.jl:919 within `getindex`
   %invariant.gep = getelementptr i8, ptr %memoryref_data, i64 -8
   %memoryref_data10 = load ptr, ptr %"b::Array", align 8
   %invariant.gep32 = getelementptr i8, ptr %memoryref_data10, i64 -8
   %smin = call i64 @llvm.smin.i64(i64 %"b::Array.size.0.copyload", i64 0)
   %1 = sub i64 %"b::Array.size.0.copyload", %smin
   %smax = call i64 @llvm.smax.i64(i64 %smin, i64 -1)
   %2 = add nsw i64 %smax, 1
   %3 = mul nuw nsw i64 %1, %2
   %.not = icmp eq i64 %3, 0
   br i1 %.not, label %main.pseudo.exit, label %L48.preheader

L48.preheader:                                    ; preds = %L13.preheader
   %umin = call i64 @llvm.umin.i64(i64 %"a::Array.size.0.copyload", i64 %3)
; └
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:613 within `slowdot`
  %umax = call i64 @llvm.umax.i64(i64 %umin, i64 1)
  %xtraiter = and i64 %umax, 7
  %4 = icmp ult i64 %umin, 8
  br i1 %4, label %main.exit.selector.unr-lcssa, label %L48.preheader.new

L48.preheader.new:                                ; preds = %L48.preheader
  %unroll_iter = and i64 %umax, 9223372036854775800
  br label %L48

L27:                                              ; preds = %L13.postloop
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:612 within `slowdot`
; ┌ @ essentials.jl:919 within `getindex`
   store i64 %value_phi3.postloop, ptr %"new::Tuple24", align 8
   call void @j_throw_boundserror_27646(ptr nonnull %"a::Array", ptr nocapture nonnull readonly %"new::Tuple24") #13
   unreachable

L45:                                              ; preds = %L30.postloop
   store i64 %value_phi3.postloop, ptr %"new::Tuple", align 8
   call void @j_throw_boundserror_27646(ptr nonnull %"b::Array", ptr nocapture nonnull readonly %"new::Tuple") #13
   unreachable

L48:                                              ; preds = %L48, %L48.preheader.new
   %value_phi3 = phi i64 [ 1, %L48.preheader.new ], [ %38, %L48 ]
   %value_phi5 = phi double [ 1.000000e+00, %L48.preheader.new ], [ %37, %L48 ]
   %niter = phi i64 [ 0, %L48.preheader.new ], [ %niter.next.7, %L48 ]
; │ @ essentials.jl:920 within `getindex`
   %memoryref_offset = shl i64 %value_phi3, 3
   %gep = getelementptr i8, ptr %invariant.gep, i64 %memoryref_offset
   %5 = load double, ptr %gep, align 8
   %gep33 = getelementptr i8, ptr %invariant.gep32, i64 %memoryref_offset
   %6 = load double, ptr %gep33, align 8
; └
; ┌ @ float.jl:497 within `*`
   %7 = fmul double %5, %6
; └
; ┌ @ float.jl:495 within `+`
   %8 = fadd double %value_phi5, %7
; └
; ┌ @ essentials.jl:920 within `getindex`
   %gep.1 = getelementptr i8, ptr %memoryref_data, i64 %memoryref_offset
   %9 = load double, ptr %gep.1, align 8
   %gep33.1 = getelementptr i8, ptr %memoryref_data10, i64 %memoryref_offset
   %10 = load double, ptr %gep33.1, align 8
; └
; ┌ @ float.jl:497 within `*`
   %11 = fmul double %9, %10
; └
; ┌ @ float.jl:495 within `+`
   %12 = fadd double %8, %11
; └
; ┌ @ essentials.jl:920 within `getindex`
   %memoryref_offset.2 = add i64 %memoryref_offset, 16
   %gep.2 = getelementptr i8, ptr %invariant.gep, i64 %memoryref_offset.2
   %13 = load double, ptr %gep.2, align 8
   %gep33.2 = getelementptr i8, ptr %invariant.gep32, i64 %memoryref_offset.2
   %14 = load double, ptr %gep33.2, align 8
; └
; ┌ @ float.jl:497 within `*`
   %15 = fmul double %13, %14
; └
; ┌ @ float.jl:495 within `+`
   %16 = fadd double %12, %15
; └
; ┌ @ essentials.jl:920 within `getindex`
   %memoryref_offset.3 = add i64 %memoryref_offset, 24
   %gep.3 = getelementptr i8, ptr %invariant.gep, i64 %memoryref_offset.3
   %17 = load double, ptr %gep.3, align 8
   %gep33.3 = getelementptr i8, ptr %invariant.gep32, i64 %memoryref_offset.3
   %18 = load double, ptr %gep33.3, align 8
; └
; ┌ @ float.jl:497 within `*`
   %19 = fmul double %17, %18
; └
; ┌ @ float.jl:495 within `+`
   %20 = fadd double %16, %19
; └
; ┌ @ essentials.jl:920 within `getindex`
   %memoryref_offset.4 = add i64 %memoryref_offset, 32
   %gep.4 = getelementptr i8, ptr %invariant.gep, i64 %memoryref_offset.4
   %21 = load double, ptr %gep.4, align 8
   %gep33.4 = getelementptr i8, ptr %invariant.gep32, i64 %memoryref_offset.4
   %22 = load double, ptr %gep33.4, align 8
; └
; ┌ @ float.jl:497 within `*`
   %23 = fmul double %21, %22
; └
; ┌ @ float.jl:495 within `+`
   %24 = fadd double %20, %23
; └
; ┌ @ essentials.jl:920 within `getindex`
   %memoryref_offset.5 = add i64 %memoryref_offset, 40
   %gep.5 = getelementptr i8, ptr %invariant.gep, i64 %memoryref_offset.5
   %25 = load double, ptr %gep.5, align 8
   %gep33.5 = getelementptr i8, ptr %invariant.gep32, i64 %memoryref_offset.5
   %26 = load double, ptr %gep33.5, align 8
; └
; ┌ @ float.jl:497 within `*`
   %27 = fmul double %25, %26
; └
; ┌ @ float.jl:495 within `+`
   %28 = fadd double %24, %27
; └
; ┌ @ essentials.jl:920 within `getindex`
   %memoryref_offset.6 = add i64 %memoryref_offset, 48
   %gep.6 = getelementptr i8, ptr %invariant.gep, i64 %memoryref_offset.6
   %29 = load double, ptr %gep.6, align 8
   %gep33.6 = getelementptr i8, ptr %invariant.gep32, i64 %memoryref_offset.6
   %30 = load double, ptr %gep33.6, align 8
; └
; ┌ @ float.jl:497 within `*`
   %31 = fmul double %29, %30
; └
; ┌ @ float.jl:495 within `+`
   %32 = fadd double %28, %31
; └
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:613 within `slowdot`
; ┌ @ range.jl:921 within `iterate`
   %33 = add nuw i64 %value_phi3, 7
; └
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:612 within `slowdot`
; ┌ @ essentials.jl:920 within `getindex`
   %memoryref_offset.7 = shl i64 %33, 3
   %gep.7 = getelementptr i8, ptr %invariant.gep, i64 %memoryref_offset.7
   %34 = load double, ptr %gep.7, align 8
   %gep33.7 = getelementptr i8, ptr %invariant.gep32, i64 %memoryref_offset.7
   %35 = load double, ptr %gep33.7, align 8
; └
; ┌ @ float.jl:497 within `*`
   %36 = fmul double %34, %35
; └
; ┌ @ float.jl:495 within `+`
   %37 = fadd double %32, %36
; └
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:613 within `slowdot`
; ┌ @ range.jl:921 within `iterate`
   %38 = add nuw i64 %value_phi3, 8
; └
  %niter.next.7 = add i64 %niter, 8
  %niter.ncmp.7.not = icmp eq i64 %niter.next.7, %unroll_iter
  br i1 %niter.ncmp.7.not, label %main.exit.selector.unr-lcssa, label %L48

main.exit.selector.unr-lcssa:                     ; preds = %L48, %L48.preheader
  %value_phi3.lcssa.ph = phi i64 [ undef, %L48.preheader ], [ %33, %L48 ]
  %.lcssa58.ph = phi double [ undef, %L48.preheader ], [ %37, %L48 ]
  %.lcssa57.ph = phi i64 [ undef, %L48.preheader ], [ %38, %L48 ]
  %value_phi3.unr = phi i64 [ 1, %L48.preheader ], [ %38, %L48 ]
  %value_phi5.unr = phi double [ 1.000000e+00, %L48.preheader ], [ %37, %L48 ]
  %lcmp.mod.not = icmp eq i64 %xtraiter, 0
  br i1 %lcmp.mod.not, label %main.exit.selector, label %L48.epil

L48.epil:                                         ; preds = %L48.epil, %main.exit.selector.unr-lcssa
  %value_phi3.epil = phi i64 [ %43, %L48.epil ], [ %value_phi3.unr, %main.exit.selector.unr-lcssa ]
  %value_phi5.epil = phi double [ %42, %L48.epil ], [ %value_phi5.unr, %main.exit.selector.unr-lcssa ]
  %epil.iter = phi i64 [ %epil.iter.next, %L48.epil ], [ 0, %main.exit.selector.unr-lcssa ]
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:612 within `slowdot`
; ┌ @ essentials.jl:920 within `getindex`
   %memoryref_offset.epil = shl i64 %value_phi3.epil, 3
   %gep.epil = getelementptr i8, ptr %invariant.gep, i64 %memoryref_offset.epil
   %39 = load double, ptr %gep.epil, align 8
   %gep33.epil = getelementptr i8, ptr %invariant.gep32, i64 %memoryref_offset.epil
   %40 = load double, ptr %gep33.epil, align 8
; └
; ┌ @ float.jl:497 within `*`
   %41 = fmul double %39, %40
; └
; ┌ @ float.jl:495 within `+`
   %42 = fadd double %value_phi5.epil, %41
; └
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:613 within `slowdot`
; ┌ @ range.jl:921 within `iterate`
   %43 = add nuw i64 %value_phi3.epil, 1
; └
  %epil.iter.next = add i64 %epil.iter, 1
  %epil.iter.cmp.not = icmp eq i64 %epil.iter.next, %xtraiter
  br i1 %epil.iter.cmp.not, label %main.exit.selector, label %L48.epil

main.exit.selector:                               ; preds = %L48.epil, %main.exit.selector.unr-lcssa
  %value_phi3.lcssa = phi i64 [ %value_phi3.lcssa.ph, %main.exit.selector.unr-lcssa ], [ %value_phi3.epil, %L48.epil ]
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:612 within `slowdot`
; ┌ @ float.jl:495 within `+`
   %.lcssa58 = phi double [ %.lcssa58.ph, %main.exit.selector.unr-lcssa ], [ %42, %L48.epil ]
; └
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:613 within `slowdot`
; ┌ @ range.jl:921 within `iterate`
   %.lcssa57 = phi i64 [ %.lcssa57.ph, %main.exit.selector.unr-lcssa ], [ %43, %L48.epil ]
; └
  %44 = icmp ult i64 %value_phi3.lcssa, %"a::Array.size.0.copyload"
  br i1 %44, label %main.pseudo.exit, label %L65

main.pseudo.exit:                                 ; preds = %main.exit.selector, %L13.preheader
  %value_phi3.copy = phi i64 [ 1, %L13.preheader ], [ %.lcssa57, %main.exit.selector ]
  %value_phi5.copy = phi double [ 1.000000e+00, %L13.preheader ], [ %.lcssa58, %main.exit.selector ]
  br label %L13.postloop

L65:                                              ; preds = %L48.postloop, %main.exit.selector, %top
  %value_phi23 = phi double [ 1.000000e+00, %top ], [ %.lcssa58, %main.exit.selector ], [ %49, %L48.postloop ]
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:614 within `slowdot`
  ret double %value_phi23

L13.postloop:                                     ; preds = %L48.postloop, %main.pseudo.exit
  %value_phi3.postloop = phi i64 [ %50, %L48.postloop ], [ %value_phi3.copy, %main.pseudo.exit ]
  %value_phi5.postloop = phi double [ %49, %L48.postloop ], [ %value_phi5.copy, %main.pseudo.exit ]
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:612 within `slowdot`
; ┌ @ essentials.jl:919 within `getindex`
   %45 = add i64 %value_phi3.postloop, -1
   %.not.postloop = icmp ult i64 %45, %"a::Array.size.0.copyload"
   br i1 %.not.postloop, label %L30.postloop, label %L27

L30.postloop:                                     ; preds = %L13.postloop
   %.not28.postloop = icmp ult i64 %45, %"b::Array.size.0.copyload"
   br i1 %.not28.postloop, label %L48.postloop, label %L45

L48.postloop:                                     ; preds = %L30.postloop
; │ @ essentials.jl:920 within `getindex`
   %memoryref_offset.postloop = shl i64 %value_phi3.postloop, 3
   %gep.postloop = getelementptr i8, ptr %invariant.gep, i64 %memoryref_offset.postloop
   %46 = load double, ptr %gep.postloop, align 8
   %gep33.postloop = getelementptr i8, ptr %invariant.gep32, i64 %memoryref_offset.postloop
   %47 = load double, ptr %gep33.postloop, align 8
; └
; ┌ @ float.jl:497 within `*`
   %48 = fmul double %46, %47
; └
; ┌ @ float.jl:495 within `+`
   %49 = fadd double %value_phi5.postloop, %48
; └
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:613 within `slowdot`
; ┌ @ range.jl:921 within `iterate`
; │┌ @ promotion.jl:637 within `==`
    %.not29.not.postloop = icmp eq i64 %value_phi3.postloop, %"a::Array.size.0.copyload"
; │└
   %50 = add i64 %value_phi3.postloop, 1
; └
  br i1 %.not29.not.postloop, label %L65, label %L13.postloop
}

SIMD

function fastdot(a,b)
  out = one(promote_type(eltype(a),eltype(b)))
  @simd for i in eachindex(a)
    out += a[i]*b[i]
  end
  out
end

@code_llvm fastdot(a,b)
; Function Signature: fastdot(Array{Float64, 1}, Array{Float64, 1})
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:626 within `fastdot`
define double @julia_fastdot_27712(ptr noundef nonnull align 8 dereferenceable(24) %"a::Array", ptr noundef nonnull align 8 dereferenceable(24) %"b::Array") #0 {
top:
  %"new::Tuple17" = alloca [1 x i64], align 8
  %"new::Tuple18" = alloca [1 x i64], align 8
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:628 within `fastdot`
; ┌ @ simdloop.jl:69 within `macro expansion`
; │┌ @ abstractarray.jl:321 within `eachindex`
; ││┌ @ abstractarray.jl:137 within `axes1`
; │││┌ @ abstractarray.jl:98 within `axes`
; ││││┌ @ array.jl:194 within `size`
       %"a::Array.size_ptr" = getelementptr inbounds i8, ptr %"a::Array", i64 16
       %"a::Array.size.0.copyload" = load i64, ptr %"a::Array.size_ptr", align 8
; │└└└└
; │ @ simdloop.jl:72 within `macro expansion`
; │┌ @ int.jl:83 within `<`
    %0 = icmp slt i64 %"a::Array.size.0.copyload", 1
; │└
   br i1 %0, label %L69, label %L8.preheader

L8.preheader:                                     ; preds = %top
   %"b::Array.size_ptr" = getelementptr inbounds i8, ptr %"b::Array", i64 16
   %"b::Array.size.0.copyload" = load i64, ptr %"b::Array.size_ptr", align 8
   %memoryref_data = load ptr, ptr %"a::Array", align 8
   %memoryref_data8 = load ptr, ptr %"b::Array", align 8
; │ @ simdloop.jl:75 within `macro expansion`
   %1 = add i64 %"b::Array.size.0.copyload", -9223372036854775807
   %smax = call i64 @llvm.smax.i64(i64 %1, i64 0)
   %2 = sub i64 %"b::Array.size.0.copyload", %smax
   %isnotneg.inv = icmp slt i64 %"b::Array.size.0.copyload", 0
   %3 = select i1 %isnotneg.inv, i64 0, i64 %2
   %smin32 = call i64 @llvm.smin.i64(i64 %"a::Array.size.0.copyload", i64 %3)
   %exit.mainloop.at = call i64 @llvm.smax.i64(i64 %smin32, i64 0)
   %.not = icmp slt i64 %3, 1
   br i1 %.not, label %main.pseudo.exit, label %L60.preheader

L60.preheader:                                    ; preds = %L8.preheader
   %min.iters.check = icmp ult i64 %smin32, 32
   br i1 %min.iters.check, label %scalar.ph, label %vector.ph

vector.ph:                                        ; preds = %L60.preheader
   %n.vec = and i64 %smin32, 9223372036854775776
   br label %vector.body

vector.body:                                      ; preds = %vector.body, %vector.ph
; │ @ simdloop.jl:76 within `macro expansion`
; │┌ @ simdloop.jl:54 within `simd_index`
; ││┌ @ int.jl:87 within `+`
     %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
     %vec.phi = phi <8 x double> [ <double 1.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %vector.ph ], [ %17, %vector.body ]
     %vec.phi55 = phi <8 x double> [ <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %vector.ph ], [ %18, %vector.body ]
     %vec.phi56 = phi <8 x double> [ <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %vector.ph ], [ %19, %vector.body ]
     %vec.phi57 = phi <8 x double> [ <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %vector.ph ], [ %20, %vector.body ]
; │└└
; │ @ simdloop.jl:77 within `macro expansion` @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:629
; │┌ @ essentials.jl:920 within `getindex`
    %4 = shl i64 %index, 3
    %5 = getelementptr inbounds i8, ptr %memoryref_data, i64 %4
    %6 = getelementptr inbounds double, ptr %5, i64 8
    %7 = getelementptr inbounds double, ptr %5, i64 16
    %8 = getelementptr inbounds double, ptr %5, i64 24
    %wide.load = load <8 x double>, ptr %5, align 8
    %wide.load58 = load <8 x double>, ptr %6, align 8
    %wide.load59 = load <8 x double>, ptr %7, align 8
    %wide.load60 = load <8 x double>, ptr %8, align 8
    %9 = getelementptr inbounds i8, ptr %memoryref_data8, i64 %4
    %10 = getelementptr inbounds double, ptr %9, i64 8
    %11 = getelementptr inbounds double, ptr %9, i64 16
    %12 = getelementptr inbounds double, ptr %9, i64 24
    %wide.load61 = load <8 x double>, ptr %9, align 8
    %wide.load62 = load <8 x double>, ptr %10, align 8
    %wide.load63 = load <8 x double>, ptr %11, align 8
    %wide.load64 = load <8 x double>, ptr %12, align 8
; │└
; │┌ @ float.jl:497 within `*`
    %13 = fmul contract <8 x double> %wide.load, %wide.load61
    %14 = fmul contract <8 x double> %wide.load58, %wide.load62
    %15 = fmul contract <8 x double> %wide.load59, %wide.load63
    %16 = fmul contract <8 x double> %wide.load60, %wide.load64
; │└
; │┌ @ float.jl:495 within `+`
    %17 = fadd reassoc contract <8 x double> %vec.phi, %13
    %18 = fadd reassoc contract <8 x double> %vec.phi55, %14
    %19 = fadd reassoc contract <8 x double> %vec.phi56, %15
    %20 = fadd reassoc contract <8 x double> %vec.phi57, %16
; │└
; │ @ simdloop.jl:76 within `macro expansion`
; │┌ @ simdloop.jl:54 within `simd_index`
; ││┌ @ int.jl:87 within `+`
     %index.next = add nuw i64 %index, 32
     %21 = icmp eq i64 %index.next, %n.vec
     br i1 %21, label %middle.block, label %vector.body

middle.block:                                     ; preds = %vector.body
; │└└
; │ @ simdloop.jl:75 within `macro expansion`
   %bin.rdx = fadd reassoc contract <8 x double> %18, %17
   %bin.rdx65 = fadd reassoc contract <8 x double> %19, %bin.rdx
   %bin.rdx66 = fadd reassoc contract <8 x double> %20, %bin.rdx65
   %22 = call reassoc contract double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> %bin.rdx66)
   %cmp.n = icmp eq i64 %smin32, %n.vec
   br i1 %cmp.n, label %main.exit.selector, label %scalar.ph

scalar.ph:                                        ; preds = %middle.block, %L60.preheader
   %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %L60.preheader ]
   %bc.merge.rdx = phi double [ %22, %middle.block ], [ 1.000000e+00, %L60.preheader ]
   br label %L60

L39:                                              ; preds = %L12.postloop
; │ @ simdloop.jl:77 within `macro expansion` @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:629
; │┌ @ essentials.jl:919 within `getindex`
    store i64 %29, ptr %"new::Tuple18", align 8
    call void @j_throw_boundserror_27715(ptr nonnull %"a::Array", ptr nocapture nonnull readonly %"new::Tuple18") #13
    unreachable

L57:                                              ; preds = %L42.postloop
    store i64 %29, ptr %"new::Tuple17", align 8
    call void @j_throw_boundserror_27715(ptr nonnull %"b::Array", ptr nocapture nonnull readonly %"new::Tuple17") #13
    unreachable

L60:                                              ; preds = %L60, %scalar.ph
    %value_phi130 = phi i64 [ %23, %L60 ], [ %bc.resume.val, %scalar.ph ]
    %value_phi29 = phi double [ %27, %L60 ], [ %bc.merge.rdx, %scalar.ph ]
; │└
; │ @ simdloop.jl:76 within `macro expansion`
; │┌ @ simdloop.jl:54 within `simd_index`
; ││┌ @ int.jl:87 within `+`
     %23 = add nuw nsw i64 %value_phi130, 1
; │└└
; │ @ simdloop.jl:77 within `macro expansion` @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:629
; │┌ @ essentials.jl:920 within `getindex`
    %memoryref_byteoffset = shl i64 %value_phi130, 3
    %memoryref_data6 = getelementptr inbounds i8, ptr %memoryref_data, i64 %memoryref_byteoffset
    %24 = load double, ptr %memoryref_data6, align 8
    %memoryref_data16 = getelementptr inbounds i8, ptr %memoryref_data8, i64 %memoryref_byteoffset
    %25 = load double, ptr %memoryref_data16, align 8
; │└
; │┌ @ float.jl:497 within `*`
    %26 = fmul contract double %24, %25
; │└
; │┌ @ float.jl:495 within `+`
    %27 = fadd reassoc contract double %value_phi29, %26
; │└
; │ @ simdloop.jl:75 within `macro expansion`
   %exitcond46.not = icmp eq i64 %smin32, %23
   br i1 %exitcond46.not, label %main.exit.selector, label %L60

main.exit.selector:                               ; preds = %L60, %middle.block
; │ @ simdloop.jl:77 within `macro expansion` @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:629
; │┌ @ float.jl:495 within `+`
    %.lcssa54 = phi double [ %22, %middle.block ], [ %27, %L60 ]
; │└
; │ @ simdloop.jl:75 within `macro expansion`
   %28 = icmp slt i64 %3, %"a::Array.size.0.copyload"
   br i1 %28, label %main.pseudo.exit, label %L69

main.pseudo.exit:                                 ; preds = %main.exit.selector, %L8.preheader
   %value_phi29.copy = phi double [ 1.000000e+00, %L8.preheader ], [ %.lcssa54, %main.exit.selector ]
   %umax = call i64 @llvm.umax.i64(i64 %exit.mainloop.at, i64 %"a::Array.size.0.copyload")
   %umax44 = call i64 @llvm.umax.i64(i64 %exit.mainloop.at, i64 %"b::Array.size.0.copyload")
   br label %L12.postloop

L69:                                              ; preds = %L60.postloop, %main.exit.selector, %top
   %value_phi21 = phi double [ 1.000000e+00, %top ], [ %.lcssa54, %main.exit.selector ], [ %33, %L60.postloop ]
; │ @ simdloop.jl:84 within `macro expansion`
   ret double %value_phi21

L12.postloop:                                     ; preds = %L60.postloop, %main.pseudo.exit
   %value_phi130.postloop = phi i64 [ %exit.mainloop.at, %main.pseudo.exit ], [ %29, %L60.postloop ]
   %value_phi29.postloop = phi double [ %value_phi29.copy, %main.pseudo.exit ], [ %33, %L60.postloop ]
; │ @ simdloop.jl:76 within `macro expansion`
; │┌ @ simdloop.jl:54 within `simd_index`
; ││┌ @ int.jl:87 within `+`
     %29 = add i64 %value_phi130.postloop, 1
; │└└
; │ @ simdloop.jl:77 within `macro expansion` @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:629
; │┌ @ essentials.jl:919 within `getindex`
    %exitcond.not = icmp eq i64 %value_phi130.postloop, %umax
    br i1 %exitcond.not, label %L39, label %L42.postloop

L42.postloop:                                     ; preds = %L12.postloop
    %exitcond45.not = icmp eq i64 %value_phi130.postloop, %umax44
    br i1 %exitcond45.not, label %L57, label %L60.postloop

L60.postloop:                                     ; preds = %L42.postloop
; ││ @ essentials.jl:920 within `getindex`
    %memoryref_byteoffset.postloop = shl i64 %value_phi130.postloop, 3
    %memoryref_data6.postloop = getelementptr inbounds i8, ptr %memoryref_data, i64 %memoryref_byteoffset.postloop
    %30 = load double, ptr %memoryref_data6.postloop, align 8
    %memoryref_data16.postloop = getelementptr inbounds i8, ptr %memoryref_data8, i64 %memoryref_byteoffset.postloop
    %31 = load double, ptr %memoryref_data16.postloop, align 8
; │└
; │┌ @ float.jl:497 within `*`
    %32 = fmul contract double %30, %31
; │└
; │┌ @ float.jl:495 within `+`
    %33 = fadd reassoc contract double %value_phi29.postloop, %32
; │└
; │ @ simdloop.jl:75 within `macro expansion`
; │┌ @ int.jl:83 within `<`
    %.not.postloop = icmp slt i64 %29, %"a::Array.size.0.copyload"
; │└
   br i1 %.not.postloop, label %L12.postloop, label %L69
; └
}

Multiple faster than Single

@benchmark slowdot($a,$b)
BenchmarkTools.Trial: 10000 samples with 223 evaluations per sample.
 Range (minmax):  332.601 ns689.735 ns   GC (min … max): 0.00% … 0.00%
 Time  (median):     334.305 ns                GC (median):    0.00%
 Time  (mean ± σ):   335.921 ns ±  14.695 ns   GC (mean ± σ):  0.00% ± 0.00%

     ▅██▅▃▃▃▂▂▂▃▂▁▂▁     ▁▂▂▁▁                                ▂
  ▅▇██████████████████▆▇▇██████▇█▇▇▇▇▇▇▆▇▆▆▅▅▇▅▅▆▅▅▆▄▄▄▃▂▂▄▃▇ █
  333 ns        Histogram: log(frequency) by time        350 ns <

 Memory estimate: 0 bytes, allocs estimate: 0.
@benchmark fastdot($a,$b)
BenchmarkTools.Trial: 10000 samples with 995 evaluations per sample.
 Range (minmax):  25.173 ns73.878 ns   GC (min … max): 0.00% … 0.00%
 Time  (median):     25.867 ns               GC (median):    0.00%
 Time  (mean ± σ):   26.048 ns ±  1.313 ns   GC (mean ± σ):  0.00% ± 0.00%

        ▂▁▂▃▅█▃▁     ▂▄▄                                     ▁
  ▇█▄▃█▇████████▄▇▇▆████▇▂▃▃▅▆▆▆▅▅▆▆▇▆▆▆▆▆▅▅▄▅▄▄▂▄▃▃▄▂▄▃▃▄▃ █
  25.2 ns      Histogram: log(frequency) by time      28.6 ns <

 Memory estimate: 0 bytes, allocs estimate: 0.

Beyond @simd

  • @simd is somewhat conservative in its vectorization
  • The LoopVectorization provides a more aggresive auto-vectorization macro, @turbo
    • Unless new maintainer steps forward, will not work with Julia 1.11 and newer
    • Does not work for all loops
  • Can manually write SIMD code
    • Using SIMD.jl
    • Using llvmcall (not recommended)

SIMD.jl Example

  • Based on https://schrimpf.github.io/ARGridBootstrap.jl/argridboot.html#simd
Code
using LoopVectorization, SIMD

T = 1000
e = zeros(T)
y = randn(T)
θ = ones(3);
Code
function resids!(e, yin, θ)
  T = length(yin)
  @inbounds @simd for t in 2:T
    e[t-1] = yin[t] - θ[1] - θ[2]*t - θ[3]*yin[t-1]
  end
  nothing
end

@code_llvm resids!(e,y, θ)
; Function Signature: resids!(Array{Float64, 1}, Array{Float64, 1}, Array{Float64, 1})
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:689 within `resids!`
define nonnull ptr @"japi1_resids!_30762"(ptr %"function::Core.Function", ptr noalias nocapture noundef readonly %"args::Any[]", i32 %"nargs::UInt32") #0 {
top:
  %stackargs = alloca ptr, align 8
  store volatile ptr %"args::Any[]", ptr %stackargs, align 8
  %0 = getelementptr inbounds i8, ptr %"args::Any[]", i64 8
  %1 = load ptr, ptr %0, align 8
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:690 within `resids!`
; ┌ @ essentials.jl:11 within `length`
   %.size_ptr = getelementptr inbounds i8, ptr %1, i64 16
   %2 = load i64, ptr %.size_ptr, align 8
; └
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:691 within `resids!`
; ┌ @ simdloop.jl:69 within `macro expansion`
; │┌ @ range.jl:5 within `Colon`
; ││┌ @ range.jl:415 within `UnitRange`
; │││┌ @ range.jl:426 within `unitrange_last`
      %value_phi = call i64 @llvm.smax.i64(i64 %2, i64 1)
; │└└└
; │ @ simdloop.jl:71 within `macro expansion`
; │┌ @ simdloop.jl:51 within `simd_inner_length`
; ││┌ @ range.jl:776 within `length`
; │││┌ @ int.jl:87 within `+`
      %3 = add nsw i64 %value_phi, -1
; │└└└
; │ @ simdloop.jl:72 within `macro expansion`
; │┌ @ int.jl:83 within `<`
    %4 = icmp slt i64 %2, 2
    %.not80.not = icmp eq i64 %3, 0
; │└
   %or.cond = select i1 %4, i1 true, i1 %.not80.not
   br i1 %or.cond, label %L162, label %L21.lr.ph

L21.lr.ph:                                        ; preds = %top
   %5 = getelementptr inbounds i8, ptr %"args::Any[]", i64 16
   %6 = load ptr, ptr %5, align 8
   %7 = load ptr, ptr %"args::Any[]", align 8
   %memoryref_data = load ptr, ptr %1, align 8
   %memoryref_data10 = load ptr, ptr %6, align 8
   %memoryref_data27 = getelementptr inbounds i8, ptr %memoryref_data10, i64 8
   %memoryref_data38 = getelementptr inbounds i8, ptr %memoryref_data10, i64 16
   %memoryref_data56 = load ptr, ptr %7, align 8
; │ @ simdloop.jl:75 within `macro expansion`
   %min.iters.check = icmp slt i64 %2, 33
   br i1 %min.iters.check, label %scalar.ph, label %vector.memcheck

vector.memcheck:                                  ; preds = %L21.lr.ph
   %8 = shl i64 %value_phi, 3
   %9 = getelementptr i8, ptr %memoryref_data56, i64 %8
   %scevgep = getelementptr i8, ptr %9, i64 -8
   %scevgep82 = getelementptr i8, ptr %memoryref_data, i64 %8
   %scevgep83 = getelementptr i8, ptr %memoryref_data10, i64 24
   %bound0 = icmp ult ptr %memoryref_data56, %scevgep82
   %bound1 = icmp ult ptr %memoryref_data, %scevgep
   %found.conflict = and i1 %bound0, %bound1
   %bound084 = icmp ult ptr %memoryref_data56, %scevgep83
   %bound185 = icmp ult ptr %memoryref_data10, %scevgep
   %found.conflict86 = and i1 %bound084, %bound185
   %conflict.rdx = or i1 %found.conflict, %found.conflict86
   br i1 %conflict.rdx, label %scalar.ph, label %vector.ph

vector.ph:                                        ; preds = %vector.memcheck
   %n.vec = and i64 %3, -32
; │ @ simdloop.jl:77 within `macro expansion` @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:692
; │┌ @ essentials.jl:920 within `getindex`
    %.pre = load double, ptr %memoryref_data10, align 8
    %.pre120 = load double, ptr %memoryref_data27, align 8
    %.pre121 = load double, ptr %memoryref_data38, align 8
; │└
; │ @ simdloop.jl:75 within `macro expansion`
   br label %vector.body

vector.body:                                      ; preds = %vector.body, %vector.ph
; │ @ simdloop.jl:78 within `macro expansion`
; │┌ @ int.jl:87 within `+`
    %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
    %vec.ind = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %vector.ph ], [ %vec.ind.next, %vector.body ]
; │└
; │ @ simdloop.jl:76 within `macro expansion`
; │┌ @ simdloop.jl:54 within `simd_index`
; ││┌ @ array.jl:3137 within `getindex`
; │││┌ @ range.jl:951 within `_getindex`
; ││││┌ @ int.jl:87 within `+`
       %10 = add nuw nsw <8 x i64> %vec.ind, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
       %11 = add <8 x i64> %vec.ind, <i64 10, i64 10, i64 10, i64 10, i64 10, i64 10, i64 10, i64 10>
       %12 = add <8 x i64> %vec.ind, <i64 18, i64 18, i64 18, i64 18, i64 18, i64 18, i64 18, i64 18>
       %13 = add <8 x i64> %vec.ind, <i64 26, i64 26, i64 26, i64 26, i64 26, i64 26, i64 26, i64 26>
; │└└└└
; │ @ simdloop.jl:77 within `macro expansion` @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:692
; │┌ @ essentials.jl:920 within `getindex`
    %14 = shl i64 %index, 3
    %15 = getelementptr i8, ptr %memoryref_data, i64 %14
    %16 = getelementptr i8, ptr %15, i64 8
    %17 = getelementptr i8, ptr %15, i64 72
    %18 = getelementptr i8, ptr %15, i64 136
    %19 = getelementptr i8, ptr %15, i64 200
    %wide.load = load <8 x double>, ptr %16, align 8
    %wide.load90 = load <8 x double>, ptr %17, align 8
    %wide.load91 = load <8 x double>, ptr %18, align 8
    %wide.load92 = load <8 x double>, ptr %19, align 8
; │└
; │ @ simdloop.jl:75 within `macro expansion`
   %broadcast.splatinsert97 = insertelement <8 x double> poison, double %.pre, i64 0
   %broadcast.splat98 = shufflevector <8 x double> %broadcast.splatinsert97, <8 x double> poison, <8 x i32> zeroinitializer
   %20 = fsub <8 x double> %wide.load, %broadcast.splat98
   %21 = fsub <8 x double> %wide.load90, %broadcast.splat98
   %22 = fsub <8 x double> %wide.load91, %broadcast.splat98
   %23 = fsub <8 x double> %wide.load92, %broadcast.splat98
; │ @ simdloop.jl:77 within `macro expansion` @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:692
; │┌ @ promotion.jl:434 within `*`
; ││┌ @ promotion.jl:404 within `promote`
; │││┌ @ promotion.jl:379 within `_promote`
; ││││┌ @ number.jl:7 within `convert`
; │││││┌ @ float.jl:245 within `Float64`
        %broadcast.splatinsert105 = insertelement <8 x double> poison, double %.pre120, i64 0
        %broadcast.splat106 = shufflevector <8 x double> %broadcast.splatinsert105, <8 x double> poison, <8 x i32> zeroinitializer
        %24 = sitofp <8 x i64> %10 to <8 x double>
        %25 = sitofp <8 x i64> %11 to <8 x double>
        %26 = sitofp <8 x i64> %12 to <8 x double>
        %27 = sitofp <8 x i64> %13 to <8 x double>
; │└└└└└
; │ @ simdloop.jl:75 within `macro expansion`
   %28 = fmul <8 x double> %broadcast.splat106, %24
   %29 = fmul <8 x double> %broadcast.splat106, %25
   %30 = fmul <8 x double> %broadcast.splat106, %26
   %31 = fmul <8 x double> %broadcast.splat106, %27
; │ @ simdloop.jl:77 within `macro expansion` @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:692
; │┌ @ float.jl:496 within `-`
    %32 = fsub <8 x double> %20, %28
    %33 = fsub <8 x double> %21, %29
    %34 = fsub <8 x double> %22, %30
    %35 = fsub <8 x double> %23, %31
; │└
; │┌ @ essentials.jl:920 within `getindex`
    %broadcast.splatinsert117 = insertelement <8 x double> poison, double %.pre121, i64 0
    %broadcast.splat118 = shufflevector <8 x double> %broadcast.splatinsert117, <8 x double> poison, <8 x i32> zeroinitializer
    %36 = getelementptr double, ptr %15, i64 8
    %37 = getelementptr double, ptr %15, i64 16
    %38 = getelementptr double, ptr %15, i64 24
    %wide.load107 = load <8 x double>, ptr %15, align 8
    %wide.load108 = load <8 x double>, ptr %36, align 8
    %wide.load109 = load <8 x double>, ptr %37, align 8
    %wide.load110 = load <8 x double>, ptr %38, align 8
; │└
; │ @ simdloop.jl:75 within `macro expansion`
   %39 = fmul <8 x double> %broadcast.splat118, %wide.load107
   %40 = fmul <8 x double> %broadcast.splat118, %wide.load108
   %41 = fmul <8 x double> %broadcast.splat118, %wide.load109
   %42 = fmul <8 x double> %broadcast.splat118, %wide.load110
; │ @ simdloop.jl:77 within `macro expansion` @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:692
; │┌ @ float.jl:496 within `-`
    %43 = fsub <8 x double> %32, %39
    %44 = fsub <8 x double> %33, %40
    %45 = fsub <8 x double> %34, %41
    %46 = fsub <8 x double> %35, %42
; │└
; │┌ @ array.jl:986 within `setindex!`
; ││┌ @ array.jl:991 within `_setindex!`
     %47 = getelementptr inbounds i8, ptr %memoryref_data56, i64 %14
     %48 = getelementptr inbounds double, ptr %47, i64 8
     %49 = getelementptr inbounds double, ptr %47, i64 16
     %50 = getelementptr inbounds double, ptr %47, i64 24
     store <8 x double> %43, ptr %47, align 8
     store <8 x double> %44, ptr %48, align 8
     store <8 x double> %45, ptr %49, align 8
     store <8 x double> %46, ptr %50, align 8
; │└└
; │ @ simdloop.jl:78 within `macro expansion`
; │┌ @ int.jl:87 within `+`
    %index.next = add nuw i64 %index, 32
    %vec.ind.next = add <8 x i64> %vec.ind, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
    %51 = icmp eq i64 %index.next, %n.vec
    br i1 %51, label %middle.block, label %vector.body

middle.block:                                     ; preds = %vector.body
; │└
; │ @ simdloop.jl:75 within `macro expansion`
   %cmp.n = icmp eq i64 %3, %n.vec
   br i1 %cmp.n, label %L162, label %scalar.ph

scalar.ph:                                        ; preds = %middle.block, %vector.memcheck, %L21.lr.ph
   %bc.resume.val = phi i64 [ %n.vec, %middle.block ], [ 0, %L21.lr.ph ], [ 0, %vector.memcheck ]
   %52 = add nsw i64 %value_phi, -2
   %53 = sub nsw i64 %52, %bc.resume.val
   %xtraiter = and i64 %3, 7
   %lcmp.mod.not = icmp eq i64 %xtraiter, 0
   br i1 %lcmp.mod.not, label %L21.prol.loopexit, label %L21.prol

L21.prol:                                         ; preds = %L21.prol, %scalar.ph
   %value_phi181.prol = phi i64 [ %67, %L21.prol ], [ %bc.resume.val, %scalar.ph ]
   %prol.iter = phi i64 [ %prol.iter.next, %L21.prol ], [ 0, %scalar.ph ]
; │ @ simdloop.jl:76 within `macro expansion`
; │┌ @ simdloop.jl:54 within `simd_index`
; ││┌ @ array.jl:3137 within `getindex`
; │││┌ @ range.jl:951 within `_getindex`
; ││││┌ @ int.jl:87 within `+`
       %54 = add nuw nsw i64 %value_phi181.prol, 2
; │└└└└
; │ @ simdloop.jl:77 within `macro expansion` @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:692
; │┌ @ essentials.jl:920 within `getindex`
    %memoryref_offset.prol = shl i64 %value_phi181.prol, 3
    %55 = getelementptr i8, ptr %memoryref_data, i64 %memoryref_offset.prol
    %memoryref_data5.prol = getelementptr i8, ptr %55, i64 8
    %56 = load double, ptr %memoryref_data5.prol, align 8
    %57 = load double, ptr %memoryref_data10, align 8
; │└
; │┌ @ float.jl:496 within `-`
    %58 = fsub double %56, %57
; │└
; │┌ @ essentials.jl:920 within `getindex`
    %59 = load double, ptr %memoryref_data27, align 8
; │└
; │┌ @ promotion.jl:434 within `*`
; ││┌ @ promotion.jl:404 within `promote`
; │││┌ @ promotion.jl:379 within `_promote`
; ││││┌ @ number.jl:7 within `convert`
; │││││┌ @ float.jl:245 within `Float64`
        %60 = sitofp i64 %54 to double
; ││└└└└
; ││ @ promotion.jl:434 within `*` @ float.jl:497
    %61 = fmul double %59, %60
; │└
; │┌ @ float.jl:496 within `-`
    %62 = fsub double %58, %61
; │└
; │┌ @ essentials.jl:920 within `getindex`
    %63 = load double, ptr %memoryref_data38, align 8
    %64 = load double, ptr %55, align 8
; │└
; │┌ @ float.jl:497 within `*`
    %65 = fmul double %63, %64
; │└
; │┌ @ float.jl:496 within `-`
    %66 = fsub double %62, %65
; │└
; │┌ @ array.jl:986 within `setindex!`
; ││┌ @ array.jl:991 within `_setindex!`
     %memoryref_data64.prol = getelementptr inbounds i8, ptr %memoryref_data56, i64 %memoryref_offset.prol
     store double %66, ptr %memoryref_data64.prol, align 8
; │└└
; │ @ simdloop.jl:78 within `macro expansion`
; │┌ @ int.jl:87 within `+`
    %67 = add nuw nsw i64 %value_phi181.prol, 1
; │└
; │ @ simdloop.jl:75 within `macro expansion`
   %prol.iter.next = add i64 %prol.iter, 1
   %prol.iter.cmp.not = icmp eq i64 %prol.iter.next, %xtraiter
   br i1 %prol.iter.cmp.not, label %L21.prol.loopexit, label %L21.prol

L21.prol.loopexit:                                ; preds = %L21.prol, %scalar.ph
   %value_phi181.unr = phi i64 [ %bc.resume.val, %scalar.ph ], [ %67, %L21.prol ]
   %68 = icmp ult i64 %53, 7
   br i1 %68, label %L162, label %L21

L21:                                              ; preds = %L21, %L21.prol.loopexit
   %value_phi181 = phi i64 [ %147, %L21 ], [ %value_phi181.unr, %L21.prol.loopexit ]
; │ @ simdloop.jl:76 within `macro expansion`
; │┌ @ simdloop.jl:54 within `simd_index`
; ││┌ @ array.jl:3137 within `getindex`
; │││┌ @ range.jl:951 within `_getindex`
; ││││┌ @ int.jl:87 within `+`
       %69 = add nuw nsw i64 %value_phi181, 2
; │└└└└
; │ @ simdloop.jl:77 within `macro expansion` @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:692
; │┌ @ essentials.jl:920 within `getindex`
    %memoryref_offset = shl i64 %value_phi181, 3
    %70 = getelementptr i8, ptr %memoryref_data, i64 %memoryref_offset
    %memoryref_data5 = getelementptr i8, ptr %70, i64 8
    %71 = load double, ptr %memoryref_data5, align 8
    %72 = load double, ptr %memoryref_data10, align 8
; │└
; │┌ @ float.jl:496 within `-`
    %73 = fsub double %71, %72
; │└
; │┌ @ essentials.jl:920 within `getindex`
    %74 = load double, ptr %memoryref_data27, align 8
; │└
; │┌ @ promotion.jl:434 within `*`
; ││┌ @ promotion.jl:404 within `promote`
; │││┌ @ promotion.jl:379 within `_promote`
; ││││┌ @ number.jl:7 within `convert`
; │││││┌ @ float.jl:245 within `Float64`
        %75 = sitofp i64 %69 to double
; ││└└└└
; ││ @ promotion.jl:434 within `*` @ float.jl:497
    %76 = fmul double %74, %75
; │└
; │┌ @ float.jl:496 within `-`
    %77 = fsub double %73, %76
; │└
; │┌ @ essentials.jl:920 within `getindex`
    %78 = load double, ptr %memoryref_data38, align 8
    %79 = load double, ptr %70, align 8
; │└
; │┌ @ float.jl:497 within `*`
    %80 = fmul double %78, %79
; │└
; │┌ @ float.jl:496 within `-`
    %81 = fsub double %77, %80
; │└
; │┌ @ array.jl:986 within `setindex!`
; ││┌ @ array.jl:991 within `_setindex!`
     %memoryref_data64 = getelementptr inbounds i8, ptr %memoryref_data56, i64 %memoryref_offset
     store double %81, ptr %memoryref_data64, align 8
; │└└
; │ @ simdloop.jl:76 within `macro expansion`
; │┌ @ simdloop.jl:54 within `simd_index`
; ││┌ @ array.jl:3137 within `getindex`
; │││┌ @ range.jl:951 within `_getindex`
; ││││┌ @ int.jl:87 within `+`
       %82 = add nuw nsw i64 %value_phi181, 3
; │└└└└
; │ @ simdloop.jl:77 within `macro expansion` @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:692
; │┌ @ essentials.jl:920 within `getindex`
    %memoryref_offset.1 = add i64 %memoryref_offset, 8
    %83 = getelementptr i8, ptr %memoryref_data, i64 %memoryref_offset.1
    %memoryref_data5.1 = getelementptr i8, ptr %83, i64 8
    %84 = load double, ptr %memoryref_data5.1, align 8
    %85 = load double, ptr %memoryref_data10, align 8
; │└
; │┌ @ float.jl:496 within `-`
    %86 = fsub double %84, %85
; │└
; │┌ @ essentials.jl:920 within `getindex`
    %87 = load double, ptr %memoryref_data27, align 8
; │└
; │┌ @ promotion.jl:434 within `*`
; ││┌ @ promotion.jl:404 within `promote`
; │││┌ @ promotion.jl:379 within `_promote`
; ││││┌ @ number.jl:7 within `convert`
; │││││┌ @ float.jl:245 within `Float64`
        %88 = sitofp i64 %82 to double
; ││└└└└
; ││ @ promotion.jl:434 within `*` @ float.jl:497
    %89 = fmul double %87, %88
; │└
; │┌ @ float.jl:496 within `-`
    %90 = fsub double %86, %89
; │└
; │┌ @ essentials.jl:920 within `getindex`
    %91 = load double, ptr %memoryref_data38, align 8
    %92 = load double, ptr %83, align 8
; │└
; │┌ @ float.jl:497 within `*`
    %93 = fmul double %91, %92
; │└
; │┌ @ float.jl:496 within `-`
    %94 = fsub double %90, %93
; │└
; │┌ @ array.jl:986 within `setindex!`
; ││┌ @ array.jl:991 within `_setindex!`
     %memoryref_data64.1 = getelementptr inbounds i8, ptr %memoryref_data56, i64 %memoryref_offset.1
     store double %94, ptr %memoryref_data64.1, align 8
; │└└
; │ @ simdloop.jl:76 within `macro expansion`
; │┌ @ simdloop.jl:54 within `simd_index`
; ││┌ @ array.jl:3137 within `getindex`
; │││┌ @ range.jl:951 within `_getindex`
; ││││┌ @ int.jl:87 within `+`
       %95 = add nuw nsw i64 %value_phi181, 4
; │└└└└
; │ @ simdloop.jl:77 within `macro expansion` @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:692
; │┌ @ essentials.jl:920 within `getindex`
    %memoryref_offset.2 = shl i64 %69, 3
    %96 = getelementptr i8, ptr %memoryref_data, i64 %memoryref_offset.2
    %memoryref_data5.2 = getelementptr i8, ptr %96, i64 8
    %97 = load double, ptr %memoryref_data5.2, align 8
    %98 = load double, ptr %memoryref_data10, align 8
; │└
; │┌ @ float.jl:496 within `-`
    %99 = fsub double %97, %98
; │└
; │┌ @ essentials.jl:920 within `getindex`
    %100 = load double, ptr %memoryref_data27, align 8
; │└
; │┌ @ promotion.jl:434 within `*`
; ││┌ @ promotion.jl:404 within `promote`
; │││┌ @ promotion.jl:379 within `_promote`
; ││││┌ @ number.jl:7 within `convert`
; │││││┌ @ float.jl:245 within `Float64`
        %101 = sitofp i64 %95 to double
; ││└└└└
; ││ @ promotion.jl:434 within `*` @ float.jl:497
    %102 = fmul double %100, %101
; │└
; │┌ @ float.jl:496 within `-`
    %103 = fsub double %99, %102
; │└
; │┌ @ essentials.jl:920 within `getindex`
    %104 = load double, ptr %memoryref_data38, align 8
    %105 = load double, ptr %96, align 8
; │└
; │┌ @ float.jl:497 within `*`
    %106 = fmul double %104, %105
; │└
; │┌ @ float.jl:496 within `-`
    %107 = fsub double %103, %106
; │└
; │┌ @ array.jl:986 within `setindex!`
; ││┌ @ array.jl:991 within `_setindex!`
     %memoryref_data64.2 = getelementptr inbounds i8, ptr %memoryref_data56, i64 %memoryref_offset.2
     store double %107, ptr %memoryref_data64.2, align 8
; │└└
; │ @ simdloop.jl:76 within `macro expansion`
; │┌ @ simdloop.jl:54 within `simd_index`
; ││┌ @ array.jl:3137 within `getindex`
; │││┌ @ range.jl:951 within `_getindex`
; ││││┌ @ int.jl:87 within `+`
       %108 = add nuw nsw i64 %value_phi181, 5
; │└└└└
; │ @ simdloop.jl:77 within `macro expansion` @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:692
; │┌ @ essentials.jl:920 within `getindex`
    %memoryref_offset.3 = shl i64 %82, 3
    %109 = getelementptr i8, ptr %memoryref_data, i64 %memoryref_offset.3
    %memoryref_data5.3 = getelementptr i8, ptr %109, i64 8
    %110 = load double, ptr %memoryref_data5.3, align 8
    %111 = load double, ptr %memoryref_data10, align 8
; │└
; │┌ @ float.jl:496 within `-`
    %112 = fsub double %110, %111
; │└
; │┌ @ essentials.jl:920 within `getindex`
    %113 = load double, ptr %memoryref_data27, align 8
; │└
; │┌ @ promotion.jl:434 within `*`
; ││┌ @ promotion.jl:404 within `promote`
; │││┌ @ promotion.jl:379 within `_promote`
; ││││┌ @ number.jl:7 within `convert`
; │││││┌ @ float.jl:245 within `Float64`
        %114 = sitofp i64 %108 to double
; ││└└└└
; ││ @ promotion.jl:434 within `*` @ float.jl:497
    %115 = fmul double %113, %114
; │└
; │┌ @ float.jl:496 within `-`
    %116 = fsub double %112, %115
; │└
; │┌ @ essentials.jl:920 within `getindex`
    %117 = load double, ptr %memoryref_data38, align 8
    %118 = load double, ptr %109, align 8
; │└
; │┌ @ float.jl:497 within `*`
    %119 = fmul double %117, %118
; │└
; │┌ @ float.jl:496 within `-`
    %120 = fsub double %116, %119
; │└
; │┌ @ array.jl:986 within `setindex!`
; ││┌ @ array.jl:991 within `_setindex!`
     %memoryref_data64.3 = getelementptr inbounds i8, ptr %memoryref_data56, i64 %memoryref_offset.3
     store double %120, ptr %memoryref_data64.3, align 8
; │└└
; │ @ simdloop.jl:76 within `macro expansion`
; │┌ @ simdloop.jl:54 within `simd_index`
; ││┌ @ array.jl:3137 within `getindex`
; │││┌ @ range.jl:951 within `_getindex`
; ││││┌ @ int.jl:87 within `+`
       %121 = add nuw nsw i64 %value_phi181, 6
; │└└└└
; │ @ simdloop.jl:77 within `macro expansion` @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:692
; │┌ @ essentials.jl:920 within `getindex`
    %memoryref_offset.4 = shl i64 %95, 3
    %122 = getelementptr i8, ptr %memoryref_data, i64 %memoryref_offset.4
    %memoryref_data5.4 = getelementptr i8, ptr %122, i64 8
    %123 = load double, ptr %memoryref_data5.4, align 8
    %124 = load double, ptr %memoryref_data10, align 8
; │└
; │┌ @ float.jl:496 within `-`
    %125 = fsub double %123, %124
; │└
; │┌ @ essentials.jl:920 within `getindex`
    %126 = load double, ptr %memoryref_data27, align 8
; │└
; │┌ @ promotion.jl:434 within `*`
; ││┌ @ promotion.jl:404 within `promote`
; │││┌ @ promotion.jl:379 within `_promote`
; ││││┌ @ number.jl:7 within `convert`
; │││││┌ @ float.jl:245 within `Float64`
        %127 = sitofp i64 %121 to double
; ││└└└└
; ││ @ promotion.jl:434 within `*` @ float.jl:497
    %128 = fmul double %126, %127
; │└
; │┌ @ float.jl:496 within `-`
    %129 = fsub double %125, %128
; │└
; │┌ @ essentials.jl:920 within `getindex`
    %130 = load double, ptr %memoryref_data38, align 8
    %131 = load double, ptr %122, align 8
; │└
; │┌ @ float.jl:497 within `*`
    %132 = fmul double %130, %131
; │└
; │┌ @ float.jl:496 within `-`
    %133 = fsub double %129, %132
; │└
; │┌ @ array.jl:986 within `setindex!`
; ││┌ @ array.jl:991 within `_setindex!`
     %memoryref_data64.4 = getelementptr inbounds i8, ptr %memoryref_data56, i64 %memoryref_offset.4
     store double %133, ptr %memoryref_data64.4, align 8
; │└└
; │ @ simdloop.jl:76 within `macro expansion`
; │┌ @ simdloop.jl:54 within `simd_index`
; ││┌ @ array.jl:3137 within `getindex`
; │││┌ @ range.jl:951 within `_getindex`
; ││││┌ @ int.jl:87 within `+`
       %134 = add nuw nsw i64 %value_phi181, 7
; │└└└└
; │ @ simdloop.jl:77 within `macro expansion` @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:692
; │┌ @ essentials.jl:920 within `getindex`
    %memoryref_offset.5 = shl i64 %108, 3
    %135 = getelementptr i8, ptr %memoryref_data, i64 %memoryref_offset.5
    %memoryref_data5.5 = getelementptr i8, ptr %135, i64 8
    %136 = load double, ptr %memoryref_data5.5, align 8
    %137 = load double, ptr %memoryref_data10, align 8
; │└
; │┌ @ float.jl:496 within `-`
    %138 = fsub double %136, %137
; │└
; │┌ @ essentials.jl:920 within `getindex`
    %139 = load double, ptr %memoryref_data27, align 8
; │└
; │┌ @ promotion.jl:434 within `*`
; ││┌ @ promotion.jl:404 within `promote`
; │││┌ @ promotion.jl:379 within `_promote`
; ││││┌ @ number.jl:7 within `convert`
; │││││┌ @ float.jl:245 within `Float64`
        %140 = sitofp i64 %134 to double
; ││└└└└
; ││ @ promotion.jl:434 within `*` @ float.jl:497
    %141 = fmul double %139, %140
; │└
; │┌ @ float.jl:496 within `-`
    %142 = fsub double %138, %141
; │└
; │┌ @ essentials.jl:920 within `getindex`
    %143 = load double, ptr %memoryref_data38, align 8
    %144 = load double, ptr %135, align 8
; │└
; │┌ @ float.jl:497 within `*`
    %145 = fmul double %143, %144
; │└
; │┌ @ float.jl:496 within `-`
    %146 = fsub double %142, %145
; │└
; │┌ @ array.jl:986 within `setindex!`
; ││┌ @ array.jl:991 within `_setindex!`
     %memoryref_data64.5 = getelementptr inbounds i8, ptr %memoryref_data56, i64 %memoryref_offset.5
     store double %146, ptr %memoryref_data64.5, align 8
; │└└
; │ @ simdloop.jl:76 within `macro expansion`
; │┌ @ simdloop.jl:54 within `simd_index`
; ││┌ @ array.jl:3137 within `getindex`
; │││┌ @ range.jl:951 within `_getindex`
; ││││┌ @ int.jl:87 within `+`
       %147 = add nuw nsw i64 %value_phi181, 8
; │└└└└
; │ @ simdloop.jl:77 within `macro expansion` @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:692
; │┌ @ essentials.jl:920 within `getindex`
    %memoryref_offset.6 = shl i64 %121, 3
    %148 = getelementptr i8, ptr %memoryref_data, i64 %memoryref_offset.6
    %memoryref_data5.6 = getelementptr i8, ptr %148, i64 8
    %149 = load double, ptr %memoryref_data5.6, align 8
    %150 = load double, ptr %memoryref_data10, align 8
; │└
; │┌ @ float.jl:496 within `-`
    %151 = fsub double %149, %150
; │└
; │┌ @ essentials.jl:920 within `getindex`
    %152 = load double, ptr %memoryref_data27, align 8
; │└
; │┌ @ promotion.jl:434 within `*`
; ││┌ @ promotion.jl:404 within `promote`
; │││┌ @ promotion.jl:379 within `_promote`
; ││││┌ @ number.jl:7 within `convert`
; │││││┌ @ float.jl:245 within `Float64`
        %153 = sitofp i64 %147 to double
; ││└└└└
; ││ @ promotion.jl:434 within `*` @ float.jl:497
    %154 = fmul double %152, %153
; │└
; │┌ @ float.jl:496 within `-`
    %155 = fsub double %151, %154
; │└
; │┌ @ essentials.jl:920 within `getindex`
    %156 = load double, ptr %memoryref_data38, align 8
    %157 = load double, ptr %148, align 8
; │└
; │┌ @ float.jl:497 within `*`
    %158 = fmul double %156, %157
; │└
; │┌ @ float.jl:496 within `-`
    %159 = fsub double %155, %158
; │└
; │┌ @ array.jl:986 within `setindex!`
; ││┌ @ array.jl:991 within `_setindex!`
     %memoryref_data64.6 = getelementptr inbounds i8, ptr %memoryref_data56, i64 %memoryref_offset.6
     store double %159, ptr %memoryref_data64.6, align 8
; │└└
; │ @ simdloop.jl:76 within `macro expansion`
; │┌ @ simdloop.jl:54 within `simd_index`
; ││┌ @ array.jl:3137 within `getindex`
; │││┌ @ range.jl:951 within `_getindex`
; ││││┌ @ int.jl:87 within `+`
       %160 = add nuw nsw i64 %value_phi181, 9
; │└└└└
; │ @ simdloop.jl:77 within `macro expansion` @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:692
; │┌ @ essentials.jl:920 within `getindex`
    %memoryref_offset.7 = shl i64 %134, 3
    %161 = getelementptr i8, ptr %memoryref_data, i64 %memoryref_offset.7
    %memoryref_data5.7 = getelementptr i8, ptr %161, i64 8
    %162 = load double, ptr %memoryref_data5.7, align 8
    %163 = load double, ptr %memoryref_data10, align 8
; │└
; │┌ @ float.jl:496 within `-`
    %164 = fsub double %162, %163
; │└
; │┌ @ essentials.jl:920 within `getindex`
    %165 = load double, ptr %memoryref_data27, align 8
; │└
; │┌ @ promotion.jl:434 within `*`
; ││┌ @ promotion.jl:404 within `promote`
; │││┌ @ promotion.jl:379 within `_promote`
; ││││┌ @ number.jl:7 within `convert`
; │││││┌ @ float.jl:245 within `Float64`
        %166 = sitofp i64 %160 to double
; ││└└└└
; ││ @ promotion.jl:434 within `*` @ float.jl:497
    %167 = fmul double %165, %166
; │└
; │┌ @ float.jl:496 within `-`
    %168 = fsub double %164, %167
; │└
; │┌ @ essentials.jl:920 within `getindex`
    %169 = load double, ptr %memoryref_data38, align 8
    %170 = load double, ptr %161, align 8
; │└
; │┌ @ float.jl:497 within `*`
    %171 = fmul double %169, %170
; │└
; │┌ @ float.jl:496 within `-`
    %172 = fsub double %168, %171
; │└
; │┌ @ array.jl:986 within `setindex!`
; ││┌ @ array.jl:991 within `_setindex!`
     %memoryref_data64.7 = getelementptr inbounds i8, ptr %memoryref_data56, i64 %memoryref_offset.7
     store double %172, ptr %memoryref_data64.7, align 8
; │└└
; │ @ simdloop.jl:75 within `macro expansion`
; │┌ @ int.jl:83 within `<`
    %exitcond.not.7 = icmp eq i64 %147, %3
; │└
   br i1 %exitcond.not.7, label %L162, label %L21

L162:                                             ; preds = %L21, %L21.prol.loopexit, %middle.block, %top
; │ @ simdloop.jl:76 within `macro expansion`
; │┌ @ simdloop.jl:54 within `simd_index`
; ││┌ @ array.jl:3137 within `getindex`
; │││┌ @ range.jl:952 within `_getindex`
      %jl_nothing = load ptr, ptr @jl_nothing, align 8
      ret ptr %jl_nothing
; └└└└
}
@benchmark resids!($e,$y,$θ)
BenchmarkTools.Trial: 10000 samples with 985 evaluations per sample.
 Range (minmax):  52.820 ns110.392 ns   GC (min … max): 0.00% … 0.00%
 Time  (median):     54.875 ns                GC (median):    0.00%
 Time  (mean ± σ):   55.575 ns ±   4.061 ns   GC (mean ± σ):  0.00% ± 0.00%

     ▂█ ▂▃▂                                                  ▁
  █▇████████▆▇▇▇▄▄▄▃▅▆▆▄▃▃▃▄▆▁▁▃▆▅▃▃▅▅▄▃▃▁▃▃▁▄▆▆▄▃▁▄▃▃▁▆▆▆▆ █
  52.8 ns       Histogram: log(frequency) by time      77.7 ns <

 Memory estimate: 0 bytes, allocs estimate: 0.
Code
function resids_turbo!(e, yin, θ)
  T = length(yin)
  @turbo for t in 2:T
    e[t-1] = yin[t] - θ[1] - θ[2]*t - θ[3]*yin[t-1]
  end
  nothing
end

@code_llvm resids_turbo!(e,y, θ)
; Function Signature: resids_turbo!(Array{Float64, 1}, Array{Float64, 1}, Array{Float64, 1})
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:710 within `resids_turbo!`
define nonnull ptr @"japi1_resids_turbo!_33941"(ptr %"function::Core.Function", ptr noalias nocapture noundef readonly %"args::Any[]", i32 %"nargs::UInt32") #0 {
top:
  %stackargs = alloca ptr, align 8
  store volatile ptr %"args::Any[]", ptr %stackargs, align 8
  %0 = load ptr, ptr %"args::Any[]", align 8
  %1 = getelementptr inbounds i8, ptr %"args::Any[]", i64 8
  %2 = load ptr, ptr %1, align 8
  %3 = getelementptr inbounds i8, ptr %"args::Any[]", i64 16
  %4 = load ptr, ptr %3, align 8
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:711 within `resids_turbo!`
; ┌ @ essentials.jl:11 within `length`
   %.size_ptr = getelementptr inbounds i8, ptr %2, i64 16
   %.size.0.copyload = load i64, ptr %.size_ptr, align 8
; └
;  @ /home/paul/ARGridBootstrap/docs/build/westdri_talk.qmd:712 within `resids_turbo!`
; ┌ @ /home/paul/.julia/packages/LoopVectorization/GKxH5/src/condense_loopset.jl:1179 within `macro expansion`
; │┌ @ /home/paul/.julia/packages/LayoutPointers/nNKcM/src/stridedpointers.jl:105 within `stridedpointer_preserve`
; ││┌ @ /home/paul/.julia/packages/LayoutPointers/nNKcM/src/stridedpointers.jl:18 within `memory_reference` @ /home/paul/.julia/packages/LayoutPointers/nNKcM/src/stridedpointers.jl:22
; │││┌ @ abstractarray.jl:1255 within `pointer`
; ││││┌ @ pointer.jl:63 within `cconvert`
       %5 = load ptr, ptr %0, align 8
; │└└└└
; │┌ @ abstractarray.jl:1342 within `getindex`
; ││┌ @ abstractarray.jl:1366 within `_getindex`
; │││┌ @ essentials.jl:920 within `getindex`
      %memoryref_data = load ptr, ptr %4, align 8
      %memoryref_data4 = getelementptr inbounds i8, ptr %memoryref_data, i64 8
      %6 = load double, ptr %memoryref_data4, align 8
      %memoryref_data15 = getelementptr inbounds i8, ptr %memoryref_data, i64 16
      %7 = load double, ptr %memoryref_data15, align 8
; │└└└
; │┌ @ /home/paul/.julia/packages/LayoutPointers/nNKcM/src/stridedpointers.jl:105 within `stridedpointer_preserve`
; ││┌ @ /home/paul/.julia/packages/LayoutPointers/nNKcM/src/stridedpointers.jl:18 within `memory_reference` @ /home/paul/.julia/packages/LayoutPointers/nNKcM/src/stridedpointers.jl:22
; │││┌ @ abstractarray.jl:1255 within `pointer`
; ││││┌ @ pointer.jl:63 within `cconvert`
       %8 = load ptr, ptr %2, align 8
; │└└└└
; │┌ @ abstractarray.jl:1342 within `getindex`
; ││┌ @ abstractarray.jl:1366 within `_getindex`
; │││┌ @ essentials.jl:920 within `getindex`
      %9 = load double, ptr %memoryref_data, align 8
; │└└└
; │┌ @ /home/paul/.julia/packages/LoopVectorization/GKxH5/src/condense_loopset.jl:390 within `gespf1`
; ││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:578 within `gesp`
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:541 within `increment_ptr`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:506 within `_gep`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:506 within `macro expansion`
        %ptr.1.i = getelementptr inbounds double, ptr %8, i64 -2
        %ptr.1.i43 = getelementptr inbounds double, ptr %5, i64 -2
; │└└└└└
; │┌ @ /home/paul/.julia/packages/LoopVectorization/GKxH5/src/reconstruct_loopset.jl:1107 within `_turbo_!`
; ││┌ @ /home/paul/.julia/packages/LoopVectorization/GKxH5/src/reconstruct_loopset.jl:1107 within `macro expansion`
; │││┌ @ /home/paul/.julia/packages/StaticArrayInterface/thxn4/src/size.jl:204 within `static_length`
; ││││┌ @ /home/paul/.julia/packages/Static/d7YOk/src/Static.jl:785 within `maybe_static`
; │││││┌ @ /home/paul/.julia/packages/Static/d7YOk/src/ranges.jl:262 within `length`
; ││││││┌ @ operators.jl:425 within `>`
; │││││││┌ @ int.jl:83 within `<`
          %10 = icmp sgt i64 %.size.0.copyload, 1
; │││└└└└└
; │││┌ @ /home/paul/.julia/packages/LoopVectorization/GKxH5/src/modeling/graphs.jl:236 within `vcmpend`
; ││││┌ @ int.jl:520 within `<=`
       %.not194 = icmp slt i64 %.size.0.copyload, 25
; │││└└
     br i1 %.not194, label %L128, label %L91.lr.ph

L91.lr.ph:                                        ; preds = %top
     %ie.i = insertelement <8 x double> poison, double %9, i64 0
     %v.i = shufflevector <8 x double> %ie.i, <8 x double> poison, <8 x i32> zeroinitializer
     %11 = fneg fast double %7
     %ie.i60 = insertelement <8 x double> poison, double %11, i64 0
     %v.i61 = shufflevector <8 x double> %ie.i60, <8 x double> poison, <8 x i32> zeroinitializer
     %12 = fneg fast double %6
     %ie.i67 = insertelement <8 x double> poison, double %12, i64 0
     %v.i68 = shufflevector <8 x double> %ie.i67, <8 x double> poison, <8 x i32> zeroinitializer
     %13 = add nsw i64 %.size.0.copyload, -25
     %14 = udiv i64 %13, 24
     %15 = add nuw nsw i64 %14, 1
     %xtraiter = and i64 %15, 7
     %16 = icmp ult i64 %13, 168
     br i1 %16, label %L128.loopexit.unr-lcssa, label %L91.lr.ph.new

L91.lr.ph.new:                                    ; preds = %L91.lr.ph
     %unroll_iter = and i64 %15, 2305843009213693944
     br label %L91

L91:                                              ; preds = %L91, %L91.lr.ph.new
     %value_phi28195 = phi i64 [ 2, %L91.lr.ph.new ], [ %res.i84.7, %L91 ]
     %niter = phi i64 [ 0, %L91.lr.ph.new ], [ %niter.next.7, %L91 ]
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:771 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:535 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i44 = getelementptr inbounds double, ptr %ptr.1.i, i64 %value_phi28195
; ││││││└└└
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:94 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `macro expansion`
           %res.i45 = load <8 x double>, ptr %ptr.1.i44, align 8
           %ptr.1.i46 = getelementptr inbounds double, ptr %ptr.1.i44, i64 8
           %res.i47 = load <8 x double>, ptr %ptr.1.i46, align 8
           %ptr.1.i48 = getelementptr inbounds double, ptr %ptr.1.i44, i64 16
           %res.i49 = load <8 x double>, ptr %ptr.1.i48, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:54 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i50 = or disjoint i64 %value_phi28195, 1
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:771 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:535 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i51 = getelementptr inbounds double, ptr %ptr.1.i, i64 %res.i50
; ││││││└└└
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:94 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `macro expansion`
           %res.i52 = load <8 x double>, ptr %ptr.1.i51, align 8
           %ptr.1.i53 = getelementptr inbounds double, ptr %ptr.1.i51, i64 8
           %res.i54 = load <8 x double>, ptr %ptr.1.i53, align 8
           %ptr.1.i55 = getelementptr inbounds double, ptr %ptr.1.i51, i64 16
           %res.i56 = load <8 x double>, ptr %ptr.1.i55, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:99 within `sub_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:111 within `vsub_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i57 = fsub reassoc nsz arcp contract afn <8 x double> %res.i52, %v.i
; ││││││└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i58 = fsub reassoc nsz arcp contract afn <8 x double> %res.i54, %v.i
; ││││││└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:7
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i59 = fsub reassoc nsz arcp contract afn <8 x double> %res.i56, %v.i
; │││└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:233
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `fmap`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `macro expansion`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `vmuladd_fast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
            %res.i62 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i45, <8 x double> %res.i57)
            %res.i63 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i47, <8 x double> %res.i58)
            %res.i64 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i49, <8 x double> %res.i59)
; │││└└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vector_width.jl:44 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i65 = add nuw nsw i64 %value_phi28195, 8
       %res.i66 = add nuw nsw i64 %value_phi28195, 16
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:392 within `vmuladd_fast`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/VectorizationBase.jl:114 within `promote`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:203 within `convert`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:254 within `vconvert`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap`
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %17 = sitofp i64 %value_phi28195 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i69 = insertelement <8 x double> poison, double %17, i64 0
               %v.i70 = shufflevector <8 x double> %ie.i69, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i71 = fadd fast <8 x double> %v.i70, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││││││└└└
; ││││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %18 = sitofp i64 %res.i65 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i72 = insertelement <8 x double> poison, double %18, i64 0
               %v.i73 = shufflevector <8 x double> %ie.i72, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i74 = fadd fast <8 x double> %v.i73, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││││││└└└
; ││││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:10
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %19 = sitofp i64 %res.i66 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i75 = insertelement <8 x double> poison, double %19, i64 0
               %v.i76 = shufflevector <8 x double> %ie.i75, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i77 = fadd fast <8 x double> %v.i76, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││└└└└└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:233
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `fmap`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `macro expansion`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `vmuladd_fast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
            %res.i78 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i71, <8 x double> %res.i62)
            %res.i79 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i74, <8 x double> %res.i63)
            %res.i80 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i77, <8 x double> %res.i64)
; │││└└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:1803 within `_vstore!`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:1174 within `_vstore_unroll!`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:870 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i81 = getelementptr inbounds double, ptr %ptr.1.i43, i64 %value_phi28195
; ││││││└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:872 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:198 within `_vstore!`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1655 within `__vstore!`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1655 within `macro expansion`
           store <8 x double> %res.i78, ptr %ptr.1.i81, align 8
           %ptr.1.i82 = getelementptr inbounds double, ptr %ptr.1.i81, i64 8
           store <8 x double> %res.i79, ptr %ptr.1.i82, align 8
           %ptr.1.i83 = getelementptr inbounds double, ptr %ptr.1.i81, i64 16
           store <8 x double> %res.i80, ptr %ptr.1.i83, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i84 = add nuw nsw i64 %value_phi28195, 24
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:771 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:535 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i44.1 = getelementptr inbounds double, ptr %ptr.1.i, i64 %res.i84
; ││││││└└└
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:94 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `macro expansion`
           %res.i45.1 = load <8 x double>, ptr %ptr.1.i44.1, align 8
           %ptr.1.i46.1 = getelementptr inbounds double, ptr %ptr.1.i44.1, i64 8
           %res.i47.1 = load <8 x double>, ptr %ptr.1.i46.1, align 8
           %ptr.1.i48.1 = getelementptr inbounds double, ptr %ptr.1.i44.1, i64 16
           %res.i49.1 = load <8 x double>, ptr %ptr.1.i48.1, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:54 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i50.1 = or disjoint i64 %res.i84, 1
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:771 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:535 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i51.1 = getelementptr inbounds double, ptr %ptr.1.i, i64 %res.i50.1
; ││││││└└└
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:94 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `macro expansion`
           %res.i52.1 = load <8 x double>, ptr %ptr.1.i51.1, align 8
           %ptr.1.i53.1 = getelementptr inbounds double, ptr %ptr.1.i51.1, i64 8
           %res.i54.1 = load <8 x double>, ptr %ptr.1.i53.1, align 8
           %ptr.1.i55.1 = getelementptr inbounds double, ptr %ptr.1.i51.1, i64 16
           %res.i56.1 = load <8 x double>, ptr %ptr.1.i55.1, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:99 within `sub_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:111 within `vsub_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i57.1 = fsub reassoc nsz arcp contract afn <8 x double> %res.i52.1, %v.i
; ││││││└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i58.1 = fsub reassoc nsz arcp contract afn <8 x double> %res.i54.1, %v.i
; ││││││└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:7
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i59.1 = fsub reassoc nsz arcp contract afn <8 x double> %res.i56.1, %v.i
; │││└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:233
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `fmap`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `macro expansion`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `vmuladd_fast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
            %res.i62.1 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i45.1, <8 x double> %res.i57.1)
            %res.i63.1 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i47.1, <8 x double> %res.i58.1)
            %res.i64.1 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i49.1, <8 x double> %res.i59.1)
; │││└└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vector_width.jl:44 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i65.1 = add nuw nsw i64 %value_phi28195, 32
       %res.i66.1 = add nuw nsw i64 %value_phi28195, 40
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:392 within `vmuladd_fast`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/VectorizationBase.jl:114 within `promote`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:203 within `convert`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:254 within `vconvert`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap`
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %20 = sitofp i64 %res.i84 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i69.1 = insertelement <8 x double> poison, double %20, i64 0
               %v.i70.1 = shufflevector <8 x double> %ie.i69.1, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i71.1 = fadd fast <8 x double> %v.i70.1, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││││││└└└
; ││││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %21 = sitofp i64 %res.i65.1 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i72.1 = insertelement <8 x double> poison, double %21, i64 0
               %v.i73.1 = shufflevector <8 x double> %ie.i72.1, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i74.1 = fadd fast <8 x double> %v.i73.1, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││││││└└└
; ││││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:10
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %22 = sitofp i64 %res.i66.1 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i75.1 = insertelement <8 x double> poison, double %22, i64 0
               %v.i76.1 = shufflevector <8 x double> %ie.i75.1, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i77.1 = fadd fast <8 x double> %v.i76.1, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││└└└└└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:233
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `fmap`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `macro expansion`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `vmuladd_fast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
            %res.i78.1 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i71.1, <8 x double> %res.i62.1)
            %res.i79.1 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i74.1, <8 x double> %res.i63.1)
            %res.i80.1 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i77.1, <8 x double> %res.i64.1)
; │││└└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:1803 within `_vstore!`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:1174 within `_vstore_unroll!`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:870 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i81.1 = getelementptr inbounds double, ptr %ptr.1.i43, i64 %res.i84
; ││││││└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:872 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:198 within `_vstore!`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1655 within `__vstore!`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1655 within `macro expansion`
           store <8 x double> %res.i78.1, ptr %ptr.1.i81.1, align 8
           %ptr.1.i82.1 = getelementptr inbounds double, ptr %ptr.1.i81.1, i64 8
           store <8 x double> %res.i79.1, ptr %ptr.1.i82.1, align 8
           %ptr.1.i83.1 = getelementptr inbounds double, ptr %ptr.1.i81.1, i64 16
           store <8 x double> %res.i80.1, ptr %ptr.1.i83.1, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i84.1 = add nuw nsw i64 %value_phi28195, 48
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:771 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:535 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i44.2 = getelementptr inbounds double, ptr %ptr.1.i, i64 %res.i84.1
; ││││││└└└
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:94 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `macro expansion`
           %res.i45.2 = load <8 x double>, ptr %ptr.1.i44.2, align 8
           %ptr.1.i46.2 = getelementptr inbounds double, ptr %ptr.1.i44.2, i64 8
           %res.i47.2 = load <8 x double>, ptr %ptr.1.i46.2, align 8
           %ptr.1.i48.2 = getelementptr inbounds double, ptr %ptr.1.i44.2, i64 16
           %res.i49.2 = load <8 x double>, ptr %ptr.1.i48.2, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:54 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i50.2 = or disjoint i64 %res.i84.1, 1
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:771 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:535 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i51.2 = getelementptr inbounds double, ptr %ptr.1.i, i64 %res.i50.2
; ││││││└└└
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:94 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `macro expansion`
           %res.i52.2 = load <8 x double>, ptr %ptr.1.i51.2, align 8
           %ptr.1.i53.2 = getelementptr inbounds double, ptr %ptr.1.i51.2, i64 8
           %res.i54.2 = load <8 x double>, ptr %ptr.1.i53.2, align 8
           %ptr.1.i55.2 = getelementptr inbounds double, ptr %ptr.1.i51.2, i64 16
           %res.i56.2 = load <8 x double>, ptr %ptr.1.i55.2, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:99 within `sub_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:111 within `vsub_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i57.2 = fsub reassoc nsz arcp contract afn <8 x double> %res.i52.2, %v.i
; ││││││└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i58.2 = fsub reassoc nsz arcp contract afn <8 x double> %res.i54.2, %v.i
; ││││││└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:7
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i59.2 = fsub reassoc nsz arcp contract afn <8 x double> %res.i56.2, %v.i
; │││└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:233
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `fmap`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `macro expansion`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `vmuladd_fast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
            %res.i62.2 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i45.2, <8 x double> %res.i57.2)
            %res.i63.2 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i47.2, <8 x double> %res.i58.2)
            %res.i64.2 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i49.2, <8 x double> %res.i59.2)
; │││└└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vector_width.jl:44 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i65.2 = add nuw nsw i64 %value_phi28195, 56
       %res.i66.2 = add nuw nsw i64 %value_phi28195, 64
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:392 within `vmuladd_fast`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/VectorizationBase.jl:114 within `promote`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:203 within `convert`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:254 within `vconvert`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap`
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %23 = sitofp i64 %res.i84.1 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i69.2 = insertelement <8 x double> poison, double %23, i64 0
               %v.i70.2 = shufflevector <8 x double> %ie.i69.2, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i71.2 = fadd fast <8 x double> %v.i70.2, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││││││└└└
; ││││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %24 = sitofp i64 %res.i65.2 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i72.2 = insertelement <8 x double> poison, double %24, i64 0
               %v.i73.2 = shufflevector <8 x double> %ie.i72.2, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i74.2 = fadd fast <8 x double> %v.i73.2, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││││││└└└
; ││││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:10
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %25 = sitofp i64 %res.i66.2 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i75.2 = insertelement <8 x double> poison, double %25, i64 0
               %v.i76.2 = shufflevector <8 x double> %ie.i75.2, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i77.2 = fadd fast <8 x double> %v.i76.2, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││└└└└└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:233
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `fmap`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `macro expansion`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `vmuladd_fast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
            %res.i78.2 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i71.2, <8 x double> %res.i62.2)
            %res.i79.2 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i74.2, <8 x double> %res.i63.2)
            %res.i80.2 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i77.2, <8 x double> %res.i64.2)
; │││└└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:1803 within `_vstore!`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:1174 within `_vstore_unroll!`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:870 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i81.2 = getelementptr inbounds double, ptr %ptr.1.i43, i64 %res.i84.1
; ││││││└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:872 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:198 within `_vstore!`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1655 within `__vstore!`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1655 within `macro expansion`
           store <8 x double> %res.i78.2, ptr %ptr.1.i81.2, align 8
           %ptr.1.i82.2 = getelementptr inbounds double, ptr %ptr.1.i81.2, i64 8
           store <8 x double> %res.i79.2, ptr %ptr.1.i82.2, align 8
           %ptr.1.i83.2 = getelementptr inbounds double, ptr %ptr.1.i81.2, i64 16
           store <8 x double> %res.i80.2, ptr %ptr.1.i83.2, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i84.2 = add nuw nsw i64 %value_phi28195, 72
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:771 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:535 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i44.3 = getelementptr inbounds double, ptr %ptr.1.i, i64 %res.i84.2
; ││││││└└└
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:94 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `macro expansion`
           %res.i45.3 = load <8 x double>, ptr %ptr.1.i44.3, align 8
           %ptr.1.i46.3 = getelementptr inbounds double, ptr %ptr.1.i44.3, i64 8
           %res.i47.3 = load <8 x double>, ptr %ptr.1.i46.3, align 8
           %ptr.1.i48.3 = getelementptr inbounds double, ptr %ptr.1.i44.3, i64 16
           %res.i49.3 = load <8 x double>, ptr %ptr.1.i48.3, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:54 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i50.3 = or disjoint i64 %res.i84.2, 1
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:771 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:535 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i51.3 = getelementptr inbounds double, ptr %ptr.1.i, i64 %res.i50.3
; ││││││└└└
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:94 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `macro expansion`
           %res.i52.3 = load <8 x double>, ptr %ptr.1.i51.3, align 8
           %ptr.1.i53.3 = getelementptr inbounds double, ptr %ptr.1.i51.3, i64 8
           %res.i54.3 = load <8 x double>, ptr %ptr.1.i53.3, align 8
           %ptr.1.i55.3 = getelementptr inbounds double, ptr %ptr.1.i51.3, i64 16
           %res.i56.3 = load <8 x double>, ptr %ptr.1.i55.3, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:99 within `sub_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:111 within `vsub_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i57.3 = fsub reassoc nsz arcp contract afn <8 x double> %res.i52.3, %v.i
; ││││││└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i58.3 = fsub reassoc nsz arcp contract afn <8 x double> %res.i54.3, %v.i
; ││││││└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:7
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i59.3 = fsub reassoc nsz arcp contract afn <8 x double> %res.i56.3, %v.i
; │││└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:233
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `fmap`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `macro expansion`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `vmuladd_fast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
            %res.i62.3 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i45.3, <8 x double> %res.i57.3)
            %res.i63.3 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i47.3, <8 x double> %res.i58.3)
            %res.i64.3 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i49.3, <8 x double> %res.i59.3)
; │││└└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vector_width.jl:44 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i65.3 = add nuw nsw i64 %value_phi28195, 80
       %res.i66.3 = add nuw nsw i64 %value_phi28195, 88
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:392 within `vmuladd_fast`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/VectorizationBase.jl:114 within `promote`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:203 within `convert`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:254 within `vconvert`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap`
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %26 = sitofp i64 %res.i84.2 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i69.3 = insertelement <8 x double> poison, double %26, i64 0
               %v.i70.3 = shufflevector <8 x double> %ie.i69.3, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i71.3 = fadd fast <8 x double> %v.i70.3, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││││││└└└
; ││││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %27 = sitofp i64 %res.i65.3 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i72.3 = insertelement <8 x double> poison, double %27, i64 0
               %v.i73.3 = shufflevector <8 x double> %ie.i72.3, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i74.3 = fadd fast <8 x double> %v.i73.3, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││││││└└└
; ││││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:10
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %28 = sitofp i64 %res.i66.3 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i75.3 = insertelement <8 x double> poison, double %28, i64 0
               %v.i76.3 = shufflevector <8 x double> %ie.i75.3, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i77.3 = fadd fast <8 x double> %v.i76.3, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││└└└└└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:233
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `fmap`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `macro expansion`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `vmuladd_fast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
            %res.i78.3 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i71.3, <8 x double> %res.i62.3)
            %res.i79.3 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i74.3, <8 x double> %res.i63.3)
            %res.i80.3 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i77.3, <8 x double> %res.i64.3)
; │││└└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:1803 within `_vstore!`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:1174 within `_vstore_unroll!`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:870 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i81.3 = getelementptr inbounds double, ptr %ptr.1.i43, i64 %res.i84.2
; ││││││└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:872 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:198 within `_vstore!`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1655 within `__vstore!`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1655 within `macro expansion`
           store <8 x double> %res.i78.3, ptr %ptr.1.i81.3, align 8
           %ptr.1.i82.3 = getelementptr inbounds double, ptr %ptr.1.i81.3, i64 8
           store <8 x double> %res.i79.3, ptr %ptr.1.i82.3, align 8
           %ptr.1.i83.3 = getelementptr inbounds double, ptr %ptr.1.i81.3, i64 16
           store <8 x double> %res.i80.3, ptr %ptr.1.i83.3, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i84.3 = add nuw nsw i64 %value_phi28195, 96
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:771 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:535 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i44.4 = getelementptr inbounds double, ptr %ptr.1.i, i64 %res.i84.3
; ││││││└└└
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:94 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `macro expansion`
           %res.i45.4 = load <8 x double>, ptr %ptr.1.i44.4, align 8
           %ptr.1.i46.4 = getelementptr inbounds double, ptr %ptr.1.i44.4, i64 8
           %res.i47.4 = load <8 x double>, ptr %ptr.1.i46.4, align 8
           %ptr.1.i48.4 = getelementptr inbounds double, ptr %ptr.1.i44.4, i64 16
           %res.i49.4 = load <8 x double>, ptr %ptr.1.i48.4, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:54 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i50.4 = or disjoint i64 %res.i84.3, 1
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:771 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:535 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i51.4 = getelementptr inbounds double, ptr %ptr.1.i, i64 %res.i50.4
; ││││││└└└
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:94 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `macro expansion`
           %res.i52.4 = load <8 x double>, ptr %ptr.1.i51.4, align 8
           %ptr.1.i53.4 = getelementptr inbounds double, ptr %ptr.1.i51.4, i64 8
           %res.i54.4 = load <8 x double>, ptr %ptr.1.i53.4, align 8
           %ptr.1.i55.4 = getelementptr inbounds double, ptr %ptr.1.i51.4, i64 16
           %res.i56.4 = load <8 x double>, ptr %ptr.1.i55.4, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:99 within `sub_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:111 within `vsub_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i57.4 = fsub reassoc nsz arcp contract afn <8 x double> %res.i52.4, %v.i
; ││││││└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i58.4 = fsub reassoc nsz arcp contract afn <8 x double> %res.i54.4, %v.i
; ││││││└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:7
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i59.4 = fsub reassoc nsz arcp contract afn <8 x double> %res.i56.4, %v.i
; │││└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:233
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `fmap`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `macro expansion`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `vmuladd_fast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
            %res.i62.4 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i45.4, <8 x double> %res.i57.4)
            %res.i63.4 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i47.4, <8 x double> %res.i58.4)
            %res.i64.4 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i49.4, <8 x double> %res.i59.4)
; │││└└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vector_width.jl:44 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i65.4 = add nuw nsw i64 %value_phi28195, 104
       %res.i66.4 = add nuw nsw i64 %value_phi28195, 112
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:392 within `vmuladd_fast`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/VectorizationBase.jl:114 within `promote`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:203 within `convert`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:254 within `vconvert`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap`
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %29 = sitofp i64 %res.i84.3 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i69.4 = insertelement <8 x double> poison, double %29, i64 0
               %v.i70.4 = shufflevector <8 x double> %ie.i69.4, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i71.4 = fadd fast <8 x double> %v.i70.4, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││││││└└└
; ││││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %30 = sitofp i64 %res.i65.4 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i72.4 = insertelement <8 x double> poison, double %30, i64 0
               %v.i73.4 = shufflevector <8 x double> %ie.i72.4, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i74.4 = fadd fast <8 x double> %v.i73.4, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││││││└└└
; ││││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:10
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %31 = sitofp i64 %res.i66.4 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i75.4 = insertelement <8 x double> poison, double %31, i64 0
               %v.i76.4 = shufflevector <8 x double> %ie.i75.4, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i77.4 = fadd fast <8 x double> %v.i76.4, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││└└└└└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:233
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `fmap`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `macro expansion`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `vmuladd_fast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
            %res.i78.4 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i71.4, <8 x double> %res.i62.4)
            %res.i79.4 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i74.4, <8 x double> %res.i63.4)
            %res.i80.4 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i77.4, <8 x double> %res.i64.4)
; │││└└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:1803 within `_vstore!`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:1174 within `_vstore_unroll!`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:870 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i81.4 = getelementptr inbounds double, ptr %ptr.1.i43, i64 %res.i84.3
; ││││││└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:872 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:198 within `_vstore!`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1655 within `__vstore!`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1655 within `macro expansion`
           store <8 x double> %res.i78.4, ptr %ptr.1.i81.4, align 8
           %ptr.1.i82.4 = getelementptr inbounds double, ptr %ptr.1.i81.4, i64 8
           store <8 x double> %res.i79.4, ptr %ptr.1.i82.4, align 8
           %ptr.1.i83.4 = getelementptr inbounds double, ptr %ptr.1.i81.4, i64 16
           store <8 x double> %res.i80.4, ptr %ptr.1.i83.4, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i84.4 = add nuw nsw i64 %value_phi28195, 120
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:771 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:535 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i44.5 = getelementptr inbounds double, ptr %ptr.1.i, i64 %res.i84.4
; ││││││└└└
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:94 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `macro expansion`
           %res.i45.5 = load <8 x double>, ptr %ptr.1.i44.5, align 8
           %ptr.1.i46.5 = getelementptr inbounds double, ptr %ptr.1.i44.5, i64 8
           %res.i47.5 = load <8 x double>, ptr %ptr.1.i46.5, align 8
           %ptr.1.i48.5 = getelementptr inbounds double, ptr %ptr.1.i44.5, i64 16
           %res.i49.5 = load <8 x double>, ptr %ptr.1.i48.5, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:54 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i50.5 = or disjoint i64 %res.i84.4, 1
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:771 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:535 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i51.5 = getelementptr inbounds double, ptr %ptr.1.i, i64 %res.i50.5
; ││││││└└└
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:94 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `macro expansion`
           %res.i52.5 = load <8 x double>, ptr %ptr.1.i51.5, align 8
           %ptr.1.i53.5 = getelementptr inbounds double, ptr %ptr.1.i51.5, i64 8
           %res.i54.5 = load <8 x double>, ptr %ptr.1.i53.5, align 8
           %ptr.1.i55.5 = getelementptr inbounds double, ptr %ptr.1.i51.5, i64 16
           %res.i56.5 = load <8 x double>, ptr %ptr.1.i55.5, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:99 within `sub_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:111 within `vsub_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i57.5 = fsub reassoc nsz arcp contract afn <8 x double> %res.i52.5, %v.i
; ││││││└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i58.5 = fsub reassoc nsz arcp contract afn <8 x double> %res.i54.5, %v.i
; ││││││└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:7
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i59.5 = fsub reassoc nsz arcp contract afn <8 x double> %res.i56.5, %v.i
; │││└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:233
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `fmap`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `macro expansion`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `vmuladd_fast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
            %res.i62.5 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i45.5, <8 x double> %res.i57.5)
            %res.i63.5 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i47.5, <8 x double> %res.i58.5)
            %res.i64.5 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i49.5, <8 x double> %res.i59.5)
; │││└└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vector_width.jl:44 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i65.5 = add nuw nsw i64 %value_phi28195, 128
       %res.i66.5 = add nuw nsw i64 %value_phi28195, 136
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:392 within `vmuladd_fast`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/VectorizationBase.jl:114 within `promote`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:203 within `convert`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:254 within `vconvert`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap`
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %32 = sitofp i64 %res.i84.4 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i69.5 = insertelement <8 x double> poison, double %32, i64 0
               %v.i70.5 = shufflevector <8 x double> %ie.i69.5, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i71.5 = fadd fast <8 x double> %v.i70.5, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││││││└└└
; ││││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %33 = sitofp i64 %res.i65.5 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i72.5 = insertelement <8 x double> poison, double %33, i64 0
               %v.i73.5 = shufflevector <8 x double> %ie.i72.5, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i74.5 = fadd fast <8 x double> %v.i73.5, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││││││└└└
; ││││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:10
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %34 = sitofp i64 %res.i66.5 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i75.5 = insertelement <8 x double> poison, double %34, i64 0
               %v.i76.5 = shufflevector <8 x double> %ie.i75.5, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i77.5 = fadd fast <8 x double> %v.i76.5, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││└└└└└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:233
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `fmap`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `macro expansion`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `vmuladd_fast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
            %res.i78.5 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i71.5, <8 x double> %res.i62.5)
            %res.i79.5 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i74.5, <8 x double> %res.i63.5)
            %res.i80.5 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i77.5, <8 x double> %res.i64.5)
; │││└└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:1803 within `_vstore!`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:1174 within `_vstore_unroll!`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:870 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i81.5 = getelementptr inbounds double, ptr %ptr.1.i43, i64 %res.i84.4
; ││││││└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:872 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:198 within `_vstore!`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1655 within `__vstore!`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1655 within `macro expansion`
           store <8 x double> %res.i78.5, ptr %ptr.1.i81.5, align 8
           %ptr.1.i82.5 = getelementptr inbounds double, ptr %ptr.1.i81.5, i64 8
           store <8 x double> %res.i79.5, ptr %ptr.1.i82.5, align 8
           %ptr.1.i83.5 = getelementptr inbounds double, ptr %ptr.1.i81.5, i64 16
           store <8 x double> %res.i80.5, ptr %ptr.1.i83.5, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i84.5 = add nuw nsw i64 %value_phi28195, 144
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:771 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:535 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i44.6 = getelementptr inbounds double, ptr %ptr.1.i, i64 %res.i84.5
; ││││││└└└
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:94 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `macro expansion`
           %res.i45.6 = load <8 x double>, ptr %ptr.1.i44.6, align 8
           %ptr.1.i46.6 = getelementptr inbounds double, ptr %ptr.1.i44.6, i64 8
           %res.i47.6 = load <8 x double>, ptr %ptr.1.i46.6, align 8
           %ptr.1.i48.6 = getelementptr inbounds double, ptr %ptr.1.i44.6, i64 16
           %res.i49.6 = load <8 x double>, ptr %ptr.1.i48.6, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:54 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i50.6 = or disjoint i64 %res.i84.5, 1
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:771 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:535 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i51.6 = getelementptr inbounds double, ptr %ptr.1.i, i64 %res.i50.6
; ││││││└└└
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:94 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `macro expansion`
           %res.i52.6 = load <8 x double>, ptr %ptr.1.i51.6, align 8
           %ptr.1.i53.6 = getelementptr inbounds double, ptr %ptr.1.i51.6, i64 8
           %res.i54.6 = load <8 x double>, ptr %ptr.1.i53.6, align 8
           %ptr.1.i55.6 = getelementptr inbounds double, ptr %ptr.1.i51.6, i64 16
           %res.i56.6 = load <8 x double>, ptr %ptr.1.i55.6, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:99 within `sub_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:111 within `vsub_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i57.6 = fsub reassoc nsz arcp contract afn <8 x double> %res.i52.6, %v.i
; ││││││└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i58.6 = fsub reassoc nsz arcp contract afn <8 x double> %res.i54.6, %v.i
; ││││││└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:7
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i59.6 = fsub reassoc nsz arcp contract afn <8 x double> %res.i56.6, %v.i
; │││└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:233
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `fmap`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `macro expansion`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `vmuladd_fast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
            %res.i62.6 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i45.6, <8 x double> %res.i57.6)
            %res.i63.6 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i47.6, <8 x double> %res.i58.6)
            %res.i64.6 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i49.6, <8 x double> %res.i59.6)
; │││└└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vector_width.jl:44 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i65.6 = add nuw nsw i64 %value_phi28195, 152
       %res.i66.6 = add nuw nsw i64 %value_phi28195, 160
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:392 within `vmuladd_fast`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/VectorizationBase.jl:114 within `promote`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:203 within `convert`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:254 within `vconvert`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap`
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %35 = sitofp i64 %res.i84.5 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i69.6 = insertelement <8 x double> poison, double %35, i64 0
               %v.i70.6 = shufflevector <8 x double> %ie.i69.6, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i71.6 = fadd fast <8 x double> %v.i70.6, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││││││└└└
; ││││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %36 = sitofp i64 %res.i65.6 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i72.6 = insertelement <8 x double> poison, double %36, i64 0
               %v.i73.6 = shufflevector <8 x double> %ie.i72.6, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i74.6 = fadd fast <8 x double> %v.i73.6, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││││││└└└
; ││││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:10
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %37 = sitofp i64 %res.i66.6 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i75.6 = insertelement <8 x double> poison, double %37, i64 0
               %v.i76.6 = shufflevector <8 x double> %ie.i75.6, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i77.6 = fadd fast <8 x double> %v.i76.6, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││└└└└└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:233
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `fmap`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `macro expansion`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `vmuladd_fast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
            %res.i78.6 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i71.6, <8 x double> %res.i62.6)
            %res.i79.6 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i74.6, <8 x double> %res.i63.6)
            %res.i80.6 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i77.6, <8 x double> %res.i64.6)
; │││└└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:1803 within `_vstore!`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:1174 within `_vstore_unroll!`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:870 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i81.6 = getelementptr inbounds double, ptr %ptr.1.i43, i64 %res.i84.5
; ││││││└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:872 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:198 within `_vstore!`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1655 within `__vstore!`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1655 within `macro expansion`
           store <8 x double> %res.i78.6, ptr %ptr.1.i81.6, align 8
           %ptr.1.i82.6 = getelementptr inbounds double, ptr %ptr.1.i81.6, i64 8
           store <8 x double> %res.i79.6, ptr %ptr.1.i82.6, align 8
           %ptr.1.i83.6 = getelementptr inbounds double, ptr %ptr.1.i81.6, i64 16
           store <8 x double> %res.i80.6, ptr %ptr.1.i83.6, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i84.6 = add nuw nsw i64 %value_phi28195, 168
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:771 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:535 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i44.7 = getelementptr inbounds double, ptr %ptr.1.i, i64 %res.i84.6
; ││││││└└└
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:94 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `macro expansion`
           %res.i45.7 = load <8 x double>, ptr %ptr.1.i44.7, align 8
           %ptr.1.i46.7 = getelementptr inbounds double, ptr %ptr.1.i44.7, i64 8
           %res.i47.7 = load <8 x double>, ptr %ptr.1.i46.7, align 8
           %ptr.1.i48.7 = getelementptr inbounds double, ptr %ptr.1.i44.7, i64 16
           %res.i49.7 = load <8 x double>, ptr %ptr.1.i48.7, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:54 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i50.7 = or disjoint i64 %res.i84.6, 1
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:771 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:535 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i51.7 = getelementptr inbounds double, ptr %ptr.1.i, i64 %res.i50.7
; ││││││└└└
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:94 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `macro expansion`
           %res.i52.7 = load <8 x double>, ptr %ptr.1.i51.7, align 8
           %ptr.1.i53.7 = getelementptr inbounds double, ptr %ptr.1.i51.7, i64 8
           %res.i54.7 = load <8 x double>, ptr %ptr.1.i53.7, align 8
           %ptr.1.i55.7 = getelementptr inbounds double, ptr %ptr.1.i51.7, i64 16
           %res.i56.7 = load <8 x double>, ptr %ptr.1.i55.7, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:99 within `sub_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:111 within `vsub_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i57.7 = fsub reassoc nsz arcp contract afn <8 x double> %res.i52.7, %v.i
; ││││││└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i58.7 = fsub reassoc nsz arcp contract afn <8 x double> %res.i54.7, %v.i
; ││││││└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:7
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i59.7 = fsub reassoc nsz arcp contract afn <8 x double> %res.i56.7, %v.i
; │││└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:233
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `fmap`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `macro expansion`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `vmuladd_fast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
            %res.i62.7 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i45.7, <8 x double> %res.i57.7)
            %res.i63.7 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i47.7, <8 x double> %res.i58.7)
            %res.i64.7 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i49.7, <8 x double> %res.i59.7)
; │││└└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vector_width.jl:44 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i65.7 = add nuw nsw i64 %value_phi28195, 176
       %res.i66.7 = add nuw nsw i64 %value_phi28195, 184
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:392 within `vmuladd_fast`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/VectorizationBase.jl:114 within `promote`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:203 within `convert`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:254 within `vconvert`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap`
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %38 = sitofp i64 %res.i84.6 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i69.7 = insertelement <8 x double> poison, double %38, i64 0
               %v.i70.7 = shufflevector <8 x double> %ie.i69.7, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i71.7 = fadd fast <8 x double> %v.i70.7, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││││││└└└
; ││││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %39 = sitofp i64 %res.i65.7 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i72.7 = insertelement <8 x double> poison, double %39, i64 0
               %v.i73.7 = shufflevector <8 x double> %ie.i72.7, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i74.7 = fadd fast <8 x double> %v.i73.7, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││││││└└└
; ││││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:10
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %40 = sitofp i64 %res.i66.7 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i75.7 = insertelement <8 x double> poison, double %40, i64 0
               %v.i76.7 = shufflevector <8 x double> %ie.i75.7, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i77.7 = fadd fast <8 x double> %v.i76.7, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││└└└└└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:233
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `fmap`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `macro expansion`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `vmuladd_fast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
            %res.i78.7 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i71.7, <8 x double> %res.i62.7)
            %res.i79.7 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i74.7, <8 x double> %res.i63.7)
            %res.i80.7 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i77.7, <8 x double> %res.i64.7)
; │││└└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:1803 within `_vstore!`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:1174 within `_vstore_unroll!`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:870 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i81.7 = getelementptr inbounds double, ptr %ptr.1.i43, i64 %res.i84.6
; ││││││└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:872 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:198 within `_vstore!`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1655 within `__vstore!`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1655 within `macro expansion`
           store <8 x double> %res.i78.7, ptr %ptr.1.i81.7, align 8
           %ptr.1.i82.7 = getelementptr inbounds double, ptr %ptr.1.i81.7, i64 8
           store <8 x double> %res.i79.7, ptr %ptr.1.i82.7, align 8
           %ptr.1.i83.7 = getelementptr inbounds double, ptr %ptr.1.i81.7, i64 16
           store <8 x double> %res.i80.7, ptr %ptr.1.i83.7, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i84.7 = add nuw nsw i64 %value_phi28195, 192
; │││└└
     %niter.next.7 = add i64 %niter, 8
     %niter.ncmp.7 = icmp eq i64 %niter.next.7, %unroll_iter
     br i1 %niter.ncmp.7, label %L128.loopexit.unr-lcssa, label %L91

L128.loopexit.unr-lcssa:                          ; preds = %L91, %L91.lr.ph
     %res.i84.lcssa.ph = phi i64 [ undef, %L91.lr.ph ], [ %res.i84.7, %L91 ]
     %value_phi28195.unr = phi i64 [ 2, %L91.lr.ph ], [ %res.i84.7, %L91 ]
     %lcmp.mod.not = icmp eq i64 %xtraiter, 0
     br i1 %lcmp.mod.not, label %L128, label %L91.epil

L91.epil:                                         ; preds = %L91.epil, %L128.loopexit.unr-lcssa
     %value_phi28195.epil = phi i64 [ %res.i84.epil, %L91.epil ], [ %value_phi28195.unr, %L128.loopexit.unr-lcssa ]
     %epil.iter = phi i64 [ %epil.iter.next, %L91.epil ], [ 0, %L128.loopexit.unr-lcssa ]
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:771 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:535 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i44.epil = getelementptr inbounds double, ptr %ptr.1.i, i64 %value_phi28195.epil
; ││││││└└└
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:94 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `macro expansion`
           %res.i45.epil = load <8 x double>, ptr %ptr.1.i44.epil, align 8
           %ptr.1.i46.epil = getelementptr inbounds double, ptr %ptr.1.i44.epil, i64 8
           %res.i47.epil = load <8 x double>, ptr %ptr.1.i46.epil, align 8
           %ptr.1.i48.epil = getelementptr inbounds double, ptr %ptr.1.i44.epil, i64 16
           %res.i49.epil = load <8 x double>, ptr %ptr.1.i48.epil, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:54 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i50.epil = or disjoint i64 %value_phi28195.epil, 1
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:771 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:535 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i51.epil = getelementptr inbounds double, ptr %ptr.1.i, i64 %res.i50.epil
; ││││││└└└
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:94 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `macro expansion`
           %res.i52.epil = load <8 x double>, ptr %ptr.1.i51.epil, align 8
           %ptr.1.i53.epil = getelementptr inbounds double, ptr %ptr.1.i51.epil, i64 8
           %res.i54.epil = load <8 x double>, ptr %ptr.1.i53.epil, align 8
           %ptr.1.i55.epil = getelementptr inbounds double, ptr %ptr.1.i51.epil, i64 16
           %res.i56.epil = load <8 x double>, ptr %ptr.1.i55.epil, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:99 within `sub_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:111 within `vsub_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i57.epil = fsub reassoc nsz arcp contract afn <8 x double> %res.i52.epil, %v.i
; ││││││└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i58.epil = fsub reassoc nsz arcp contract afn <8 x double> %res.i54.epil, %v.i
; ││││││└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:7
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i59.epil = fsub reassoc nsz arcp contract afn <8 x double> %res.i56.epil, %v.i
; │││└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:233
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `fmap`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `macro expansion`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `vmuladd_fast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
            %res.i62.epil = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i45.epil, <8 x double> %res.i57.epil)
            %res.i63.epil = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i47.epil, <8 x double> %res.i58.epil)
            %res.i64.epil = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i61, <8 x double> %res.i49.epil, <8 x double> %res.i59.epil)
; │││└└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vector_width.jl:44 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i65.epil = add nuw nsw i64 %value_phi28195.epil, 8
       %res.i66.epil = add nuw nsw i64 %value_phi28195.epil, 16
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:392 within `vmuladd_fast`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/VectorizationBase.jl:114 within `promote`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:203 within `convert`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:254 within `vconvert`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap`
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %41 = sitofp i64 %value_phi28195.epil to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i69.epil = insertelement <8 x double> poison, double %41, i64 0
               %v.i70.epil = shufflevector <8 x double> %ie.i69.epil, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i71.epil = fadd fast <8 x double> %v.i70.epil, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││││││└└└
; ││││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %42 = sitofp i64 %res.i65.epil to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i72.epil = insertelement <8 x double> poison, double %42, i64 0
               %v.i73.epil = shufflevector <8 x double> %ie.i72.epil, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i74.epil = fadd fast <8 x double> %v.i73.epil, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││││││└└└
; ││││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:10
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %43 = sitofp i64 %res.i66.epil to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i75.epil = insertelement <8 x double> poison, double %43, i64 0
               %v.i76.epil = shufflevector <8 x double> %ie.i75.epil, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i77.epil = fadd fast <8 x double> %v.i76.epil, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││└└└└└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:233
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `fmap`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `macro expansion`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `vmuladd_fast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
            %res.i78.epil = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i71.epil, <8 x double> %res.i62.epil)
            %res.i79.epil = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i74.epil, <8 x double> %res.i63.epil)
            %res.i80.epil = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i68, <8 x double> %res.i77.epil, <8 x double> %res.i64.epil)
; │││└└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:1803 within `_vstore!`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:1174 within `_vstore_unroll!`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:870 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i81.epil = getelementptr inbounds double, ptr %ptr.1.i43, i64 %value_phi28195.epil
; ││││││└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:872 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:198 within `_vstore!`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1655 within `__vstore!`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1655 within `macro expansion`
           store <8 x double> %res.i78.epil, ptr %ptr.1.i81.epil, align 8
           %ptr.1.i82.epil = getelementptr inbounds double, ptr %ptr.1.i81.epil, i64 8
           store <8 x double> %res.i79.epil, ptr %ptr.1.i82.epil, align 8
           %ptr.1.i83.epil = getelementptr inbounds double, ptr %ptr.1.i81.epil, i64 16
           store <8 x double> %res.i80.epil, ptr %ptr.1.i83.epil, align 8
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i84.epil = add nuw nsw i64 %value_phi28195.epil, 24
; │││└└
     %epil.iter.next = add i64 %epil.iter, 1
     %epil.iter.cmp.not = icmp eq i64 %epil.iter.next, %xtraiter
     br i1 %epil.iter.cmp.not, label %L128, label %L91.epil

L128:                                             ; preds = %L91.epil, %L128.loopexit.unr-lcssa, %top
     %value_phi28.lcssa = phi i64 [ 2, %top ], [ %res.i84.lcssa.ph, %L128.loopexit.unr-lcssa ], [ %res.i84.epil, %L91.epil ]
; │││┌ @ /home/paul/.julia/packages/LoopVectorization/GKxH5/src/modeling/graphs.jl:229 within `cmpend`
; ││││┌ @ int.jl:520 within `<=`
       %.not191 = icmp sgt i64 %value_phi28.lcssa, %.size.0.copyload
; │││└└
     br i1 %.not191, label %L232, label %L130

L130:                                             ; preds = %L128
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/masks.jl:552 within `mask`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/masks.jl:514 within `_mask`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/masks.jl:460 within `macro expansion`
; ││││││┌ @ int.jl:544 within `>>>` @ int.jl:536
         %44 = trunc i64 %.size.0.copyload to i8
         %45 = add i8 %44, 6
         %46 = and i8 %45, 7
         %47 = xor i8 %46, 7
         %48 = lshr i8 -1, %47
         %49 = select i1 %10, i8 %48, i8 -1
; │││└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:54 within `vsub_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i85 = add nsw i64 %.size.0.copyload, -8
; │││└└
; │││┌ @ operators.jl:425 within `>`
; ││││┌ @ int.jl:83 within `<`
       %.not192 = icmp slt i64 %res.i85, %value_phi28.lcssa
; │││└└
     br i1 %.not192, label %L145, label %L164

L145:                                             ; preds = %L130
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:103 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:44 within `linear_index`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/cartesian_indexing.jl:4 within `tdot` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/cartesian_indexing.jl:8
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/lazymul.jl:61 within `lazymul`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 within `vmul_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
           %res.i86 = shl i64 %value_phi28.lcssa, 3
; ││││└└└└└
; ││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:104 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1129 within `__vload`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1129 within `macro expansion`
        %ptr.1.i87 = getelementptr i8, ptr %ptr.1.i, i64 %res.i86
        %mask.0.i = bitcast i8 %49 to <8 x i1>
        %res.i88 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr nonnull %ptr.1.i87, i32 8, <8 x i1> %mask.0.i, <8 x double> zeroinitializer)
        %ptr.1.i91 = getelementptr i8, ptr %ptr.1.i87, i64 8
        %res.i93 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr nonnull %ptr.1.i91, i32 8, <8 x i1> %mask.0.i, <8 x double> zeroinitializer)
; │││└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:94 within `sub_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/VectorizationBase.jl:110 within `promote`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:199 within `convert`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:191 within `vconvert`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:150 within `vbroadcast`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:121 within `_vbroadcast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:94 within `macro expansion`
            %ie.i94 = insertelement <8 x double> poison, double %9, i64 0
            %v.i95 = shufflevector <8 x double> %ie.i94, <8 x double> poison, <8 x i32> zeroinitializer
; ││││└└└└└└
; ││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:95 within `sub_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
        %res.i96 = fsub reassoc nsz arcp contract afn <8 x double> %res.i93, %v.i95
; │││└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ fastmath.jl:165 within `sub_fast`
       %50 = fneg fast double %7
; ││││└
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:392 within `vmuladd_fast`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/VectorizationBase.jl:114 within `promote`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:199 within `convert`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:191 within `vconvert`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:150 within `vbroadcast`
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:121 within `_vbroadcast`
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:94 within `macro expansion`
              %ie.i97 = insertelement <8 x double> poison, double %50, i64 0
              %v.i98 = shufflevector <8 x double> %ie.i97, <8 x double> poison, <8 x i32> zeroinitializer
; ││││││└└└└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
         %res.i99 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i98, <8 x double> %res.i88, <8 x double> %res.i96)
; ││││└└└
; ││││┌ @ fastmath.jl:165 within `sub_fast`
       %51 = fneg fast double %6
; ││││└
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:392 within `vmuladd_fast`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/VectorizationBase.jl:114 within `promote`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:199 within `convert`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:191 within `vconvert`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:150 within `vbroadcast`
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:121 within `_vbroadcast`
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:94 within `macro expansion`
              %ie.i100 = insertelement <8 x double> poison, double %51, i64 0
              %v.i101 = shufflevector <8 x double> %ie.i100, <8 x double> poison, <8 x i32> zeroinitializer
; ││││││││└└└└
; ││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:203 within `convert`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││┌ @ number.jl:7 within `convert`
; ││││││││││┌ @ float.jl:245 within `Float64`
             %52 = sitofp i64 %value_phi28.lcssa to double
; │││││││││└└
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
             %ie.i102 = insertelement <8 x double> poison, double %52, i64 0
             %v.i103 = shufflevector <8 x double> %ie.i102, <8 x double> poison, <8 x i32> zeroinitializer
             %res.i104 = fadd fast <8 x double> %v.i103, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││└└└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
         %res.i105 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i101, <8 x double> %res.i104, <8 x double> %res.i99)
; │││└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:211 within `_vstore!`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1840 within `__vstore!`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1840 within `macro expansion`
        %ptr.1.i107 = getelementptr inbounds i8, ptr %ptr.1.i43, i64 %res.i86
        call void @llvm.masked.store.v8f64.p0(<8 x double> %res.i105, ptr nonnull %ptr.1.i107, i32 8, <8 x i1> %mask.0.i)
; │││└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       br label %L232

L164:                                             ; preds = %L130
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:54 within `vsub_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i110 = add nsw i64 %.size.0.copyload, -16
; │││└└
; │││┌ @ operators.jl:425 within `>`
; ││││┌ @ int.jl:83 within `<`
       %.not193 = icmp slt i64 %res.i110, %value_phi28.lcssa
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:788 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:659 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i111 = getelementptr inbounds double, ptr %ptr.1.i, i64 %value_phi28.lcssa
; ││││││└└└
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:94 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `macro expansion`
           %res.i112 = load <8 x double>, ptr %ptr.1.i111, align 8
; │││││││└└
; │││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl within `macro expansion`
           %ptr.1.i113 = getelementptr inbounds double, ptr %ptr.1.i111, i64 8
; │││└└└└└└
     br i1 %.not193, label %L167, label %L195

L167:                                             ; preds = %L164
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:788 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:659 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:104 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1129 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1129 within `macro expansion`
           %mask.0.i114 = bitcast i8 %49 to <8 x i1>
           %res.i115 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr nonnull %ptr.1.i113, i32 8, <8 x i1> %mask.0.i114, <8 x double> zeroinitializer)
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:54 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i116 = or disjoint i64 %value_phi28.lcssa, 1
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:788 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:659 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i117 = getelementptr inbounds double, ptr %ptr.1.i, i64 %res.i116
; ││││││└└└
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:94 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `macro expansion`
           %res.i118 = load <8 x double>, ptr %ptr.1.i117, align 8
; │││││││└└
; │││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:104 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1129 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1129 within `macro expansion`
           %ptr.1.i119 = getelementptr inbounds double, ptr %ptr.1.i117, i64 8
           %res.i121 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr nonnull %ptr.1.i119, i32 8, <8 x i1> %mask.0.i114, <8 x double> zeroinitializer)
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:98 within `sub_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/VectorizationBase.jl:110 within `promote`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:199 within `convert`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:242 within `vconvert` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:191
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:150 within `vbroadcast`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:121 within `_vbroadcast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:94 within `macro expansion`
            %ie.i122 = insertelement <8 x double> poison, double %9, i64 0
            %v.i123 = shufflevector <8 x double> %ie.i122, <8 x double> poison, <8 x i32> zeroinitializer
; ││││└└└└└└
; ││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:99 within `sub_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:111 within `vsub_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i124 = fsub reassoc nsz arcp contract afn <8 x double> %res.i118, %v.i123
; ││││││└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:7
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i125 = fsub reassoc nsz arcp contract afn <8 x double> %res.i121, %v.i123
; │││└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ fastmath.jl:165 within `sub_fast`
       %53 = fneg fast double %7
; ││││└
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:392 within `vmuladd_fast`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/VectorizationBase.jl:114 within `promote`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:199 within `convert`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:242 within `vconvert` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:191
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:150 within `vbroadcast`
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:121 within `_vbroadcast`
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:94 within `macro expansion`
              %ie.i126 = insertelement <8 x double> poison, double %53, i64 0
              %v.i127 = shufflevector <8 x double> %ie.i126, <8 x double> poison, <8 x i32> zeroinitializer
; ││││││└└└└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:233
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `fmap`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `macro expansion`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `vmuladd_fast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
            %res.i128 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i127, <8 x double> %res.i112, <8 x double> %res.i124)
            %res.i129 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i127, <8 x double> %res.i115, <8 x double> %res.i125)
; │││└└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vector_width.jl:44 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i130 = add nuw nsw i64 %value_phi28.lcssa, 8
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ fastmath.jl:165 within `sub_fast`
       %54 = fneg fast double %6
; ││││└
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:392 within `vmuladd_fast`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/VectorizationBase.jl:114 within `promote`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:199 within `convert`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:242 within `vconvert` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:191
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:150 within `vbroadcast`
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:121 within `_vbroadcast`
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:94 within `macro expansion`
              %ie.i131 = insertelement <8 x double> poison, double %54, i64 0
              %v.i132 = shufflevector <8 x double> %ie.i131, <8 x double> poison, <8 x i32> zeroinitializer
; ││││││││└└└└
; ││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:203 within `convert`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:254 within `vconvert`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap`
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %55 = sitofp i64 %value_phi28.lcssa to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i133 = insertelement <8 x double> poison, double %55, i64 0
               %v.i134 = shufflevector <8 x double> %ie.i133, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i135 = fadd fast <8 x double> %v.i134, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││││││└└└
; ││││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:10
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %56 = sitofp i64 %res.i130 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i136 = insertelement <8 x double> poison, double %56, i64 0
               %v.i137 = shufflevector <8 x double> %ie.i136, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i138 = fadd fast <8 x double> %v.i137, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││└└└└└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:233
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `fmap`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `macro expansion`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `vmuladd_fast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
            %res.i139 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i132, <8 x double> %res.i135, <8 x double> %res.i128)
            %res.i140 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i132, <8 x double> %res.i138, <8 x double> %res.i129)
; │││└└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:1826 within `_vstore!`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:1473 within `_vstore_unroll!`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:870 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i141 = getelementptr inbounds double, ptr %ptr.1.i43, i64 %value_phi28.lcssa
; ││││││└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:872 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:198 within `_vstore!`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1655 within `__vstore!`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1655 within `macro expansion`
           store <8 x double> %res.i139, ptr %ptr.1.i141, align 8
; │││││││└└
; │││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:211 within `_vstore!`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1840 within `__vstore!`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1840 within `macro expansion`
           %ptr.1.i142 = getelementptr inbounds double, ptr %ptr.1.i141, i64 8
           call void @llvm.masked.store.v8f64.p0(<8 x double> %res.i140, ptr nonnull %ptr.1.i142, i32 8, <8 x i1> %mask.0.i114)
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       br label %L232

L195:                                             ; preds = %L164
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:788 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:659 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:94 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `macro expansion`
           %res.i148 = load <8 x double>, ptr %ptr.1.i113, align 8
; │││││││└└
; │││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:104 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1129 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1129 within `macro expansion`
           %ptr.1.i149 = getelementptr inbounds double, ptr %ptr.1.i111, i64 16
           %mask.0.i150 = bitcast i8 %49 to <8 x i1>
           %res.i151 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr nonnull %ptr.1.i149, i32 8, <8 x i1> %mask.0.i150, <8 x double> zeroinitializer)
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:54 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i152 = or disjoint i64 %value_phi28.lcssa, 1
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:788 within `_vload`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:659 within `_vload_unroll`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:60 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i153 = getelementptr inbounds double, ptr %ptr.1.i, i64 %res.i152
; ││││││└└└
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:94 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1119 within `macro expansion`
           %res.i154 = load <8 x double>, ptr %ptr.1.i153, align 8
           %ptr.1.i155 = getelementptr inbounds double, ptr %ptr.1.i153, i64 8
           %res.i156 = load <8 x double>, ptr %ptr.1.i155, align 8
; │││││││└└
; │││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:104 within `_vload`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1129 within `__vload`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1129 within `macro expansion`
           %ptr.1.i157 = getelementptr inbounds double, ptr %ptr.1.i153, i64 16
           %res.i159 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr nonnull %ptr.1.i157, i32 8, <8 x i1> %mask.0.i150, <8 x double> zeroinitializer)
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:98 within `sub_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/VectorizationBase.jl:110 within `promote`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:199 within `convert`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:242 within `vconvert` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:191
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:150 within `vbroadcast`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:121 within `_vbroadcast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:94 within `macro expansion`
            %ie.i160 = insertelement <8 x double> poison, double %9, i64 0
            %v.i161 = shufflevector <8 x double> %ie.i160, <8 x double> poison, <8 x i32> zeroinitializer
; ││││└└└└└└
; ││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:99 within `sub_fast`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:111 within `vsub_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i162 = fsub reassoc nsz arcp contract afn <8 x double> %res.i154, %v.i161
; ││││││└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i163 = fsub reassoc nsz arcp contract afn <8 x double> %res.i156, %v.i161
; ││││││└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:11 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:7
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `vsub_fast`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:115 within `macro expansion`
          %res.i164 = fsub reassoc nsz arcp contract afn <8 x double> %res.i159, %v.i161
; │││└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ fastmath.jl:165 within `sub_fast`
       %57 = fneg fast double %7
; ││││└
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:392 within `vmuladd_fast`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/VectorizationBase.jl:114 within `promote`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:199 within `convert`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:242 within `vconvert` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:191
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:150 within `vbroadcast`
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:121 within `_vbroadcast`
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:94 within `macro expansion`
              %ie.i165 = insertelement <8 x double> poison, double %57, i64 0
              %v.i166 = shufflevector <8 x double> %ie.i165, <8 x double> poison, <8 x i32> zeroinitializer
; ││││││└└└└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:233
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `fmap`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `macro expansion`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `vmuladd_fast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
            %res.i167 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i166, <8 x double> %res.i112, <8 x double> %res.i162)
            %res.i168 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i166, <8 x double> %res.i148, <8 x double> %res.i163)
            %res.i169 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i166, <8 x double> %res.i151, <8 x double> %res.i164)
; │││└└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vector_width.jl:44 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       %res.i170 = add nuw nsw i64 %value_phi28.lcssa, 8
       %res.i171 = add nuw nsw i64 %value_phi28.lcssa, 16
; │││└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:497 within `vfnmadd_fast`
; ││││┌ @ fastmath.jl:165 within `sub_fast`
       %58 = fneg fast double %6
; ││││└
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:492 within `vfmadd_fast`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:392 within `vmuladd_fast`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/VectorizationBase.jl:114 within `promote`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:199 within `convert`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:242 within `vconvert` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:191
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:150 within `vbroadcast`
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:121 within `_vbroadcast`
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/vbroadcast.jl:94 within `macro expansion`
              %ie.i172 = insertelement <8 x double> poison, double %58, i64 0
              %v.i173 = shufflevector <8 x double> %ie.i172, <8 x double> poison, <8 x i32> zeroinitializer
; ││││││││└└└└
; ││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:203 within `convert`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/conversion.jl:254 within `vconvert`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap`
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %59 = sitofp i64 %value_phi28.lcssa to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i174 = insertelement <8 x double> poison, double %59, i64 0
               %v.i175 = shufflevector <8 x double> %ie.i174, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i176 = fadd fast <8 x double> %v.i175, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││││││└└└
; ││││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %60 = sitofp i64 %res.i170 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i177 = insertelement <8 x double> poison, double %60, i64 0
               %v.i178 = shufflevector <8 x double> %ie.i177, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i179 = fadd fast <8 x double> %v.i178, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││││││└└└
; ││││││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 within `fmap` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:18 @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:10
; ││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:139 within `vconvert`
; │││││││││││┌ @ number.jl:7 within `convert`
; ││││││││││││┌ @ float.jl:245 within `Float64`
               %61 = sitofp i64 %res.i171 to double
; │││││││││││└└
; │││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:71 within `vrangeincr`
; ││││││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/ranges.jl:93 within `macro expansion`
               %ie.i180 = insertelement <8 x double> poison, double %61, i64 0
               %v.i181 = shufflevector <8 x double> %ie.i180, <8 x double> poison, <8 x i32> zeroinitializer
               %res.i182 = fadd fast <8 x double> %v.i181, <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00>
; ││││││└└└└└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/base_defs.jl:393 within `vmuladd_fast` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:233
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `fmap`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/fmap.jl:25 within `macro expansion`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `vmuladd_fast`
; │││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/intrin_funcs.jl:437 within `macro expansion`
            %res.i183 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i173, <8 x double> %res.i176, <8 x double> %res.i167)
            %res.i184 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i173, <8 x double> %res.i179, <8 x double> %res.i168)
            %res.i185 = call reassoc nsz arcp contract afn <8 x double> @llvm.fmuladd.v8f64(<8 x double> %v.i173, <8 x double> %res.i182, <8 x double> %res.i169)
; │││└└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:1826 within `_vstore!`
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:1473 within `_vstore_unroll!`
; │││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:870 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:535 within `gep`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `_gep`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:513 within `macro expansion`
           %ptr.1.i186 = getelementptr inbounds double, ptr %ptr.1.i43, i64 %value_phi28.lcssa
; ││││││└└└
; ││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/vecunroll/memory.jl:872 within `macro expansion`
; ││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:198 within `_vstore!`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1655 within `__vstore!`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1655 within `macro expansion`
           store <8 x double> %res.i183, ptr %ptr.1.i186, align 8
           %ptr.1.i187 = getelementptr inbounds double, ptr %ptr.1.i186, i64 8
           store <8 x double> %res.i184, ptr %ptr.1.i187, align 8
; │││││││└└
; │││││││ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/strided_pointers/stridedpointers.jl:211 within `_vstore!`
; │││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1840 within `__vstore!`
; ││││││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/memory_addr.jl:1840 within `macro expansion`
           %ptr.1.i188 = getelementptr inbounds double, ptr %ptr.1.i186, i64 16
           call void @llvm.masked.store.v8f64.p0(<8 x double> %res.i185, ptr nonnull %ptr.1.i188, i32 8, <8 x i1> %mask.0.i150)
; │││└└└└└└
; │││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/static.jl:52 within `vadd_nsw` @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48
; ││││┌ @ /home/paul/.julia/packages/VectorizationBase/7mwzi/src/llvm_intrin/binary_ops.jl:48 within `macro expansion`
       br label %L232

L232:                                             ; preds = %L195, %L167, %L145, %L128
; │└└└└
   %jl_nothing = load ptr, ptr @jl_nothing, align 8
   ret ptr %jl_nothing
; └
}
@benchmark resids_turbo!($e,$y,$θ)
BenchmarkTools.Trial: 10000 samples with 980 evaluations per sample.
 Range (minmax):  61.974 ns232.695 ns   GC (min … max): 0.00% … 0.00%
 Time  (median):     63.794 ns                GC (median):    0.00%
 Time  (mean ± σ):   64.091 ns ±   2.898 ns   GC (mean ± σ):  0.00% ± 0.00%

              ▁▂▆█▅▂        ▁▁▂▂▁▁                            ▂
  ▆▄▆▆▆▄▁▅▆▇█▇██████▇▇▇▇▇█▇████████▇▇█▇▇▇▆▆▆▆▆▅▃▁▄▄▁▄▃▃▆▃▄▁▄ █
  62 ns         Histogram: log(frequency) by time      68.8 ns <

 Memory estimate: 0 bytes, allocs estimate: 0.

Example: Manual SIMD

oneto(::Val{1}) = (1,)
oneto(::Val{N}) where N = (oneto(Val(N-1))..., N)

function resids_simd!(e,yin, θ, width::Val{N}=Val(8)) where N
  lane = VecRange{N}(0)
  tv=Vec{N,Float64}(oneto(Val(N)))
  θ1=-Vec{N,Float64}(θ[1])
  θ2=-Vec{N,Float64}(θ[2])
  θ3=-Vec{N,Float64}(θ[3])
  remainder = length(e) % N
  @inbounds for t  1:N:(length(e)-remainder) #eachindex(e)
    @fastmath e[t+lane] = muladd2,tv,yin[t+1+lane])+muladd3,yin[t+lane],θ1)
    @fastmath tv+=N
  end
  @inbounds for t  (length(e)-remainder+1):length(e)
    @fastmath e[t] = muladd(-θ[2],t+1,yin[t+1])-muladd(θ[3],yin[t],θ[1])
  end
  nothing
end

@benchmark resids_simd!($e,$y,$θ)
BenchmarkTools.Trial: 10000 samples with 961 evaluations per sample.
 Range (minmax):  86.886 ns187.660 ns   GC (min … max): 0.00% … 0.00%
 Time  (median):     88.753 ns                GC (median):    0.00%
 Time  (mean ± σ):   89.799 ns ±   6.363 ns   GC (mean ± σ):  0.00% ± 0.00%

       ▂▅█                                                     
  ▂▂▃▅▆████▄▃▃▃▃▃▂▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂▁▂▂▁▁▁▂▂▁▂▁▂▂ ▃
  86.9 ns         Histogram: frequency by time          101 ns <

 Memory estimate: 0 bytes, allocs estimate: 0.

Example: Necessary Manual SIMD

function xx_xy!(xx,xy,yin)
  T = length(yin)
  xx .= zero(eltype(xx))
  xy .= zero(eltype(xy))
  @inbounds @fastmath @simd for t in 2:T # @turbo errors
    xx[1,3] += yin[t-1]
    xx[2,3] += t*yin[t-1]
    xx[3,3] += yin[t-1]^2
    xy[1] += yin[t]
    xy[2] += t*yin[t]
    xy[3] += yin[t-1]*yin[t]
  end
  xx[1,1] = T-1 # = 1'*1
  xx[1,2] = xx[2,1] = (T+1)*T/2 - 1 # sum(p+1:T)
  xx[2,2] = (2*(T)+1)*(T)*(T+1)/6 - 1 # sum((p+1:T).^2)
  xx[3,1] = xx[1,3]
  xx[3,2] = xx[2,3]
  nothing
end

xx = @MMatrix zeros(3,3)
xy = @MVector zeros(3)
@benchmark xx_xy!($xx,$xy,$y)
BenchmarkTools.Trial: 10000 samples with 9 evaluations per sample.
 Range (minmax):  2.153 μs 2.897 μs   GC (min … max): 0.00% … 0.00%
 Time  (median):     2.166 μs               GC (median):    0.00%
 Time  (mean ± σ):   2.172 μs ± 26.866 ns   GC (mean ± σ):  0.00% ± 0.00%

   ▁▆█▆▂  ▂▁                                              ▂
  ▄█████▇███▇▇█▇▆▇▆▅▃▄▃▄▁▁▁▃▁▁▁▁▁▁▃▅▄▄▅▅▁▄▅▄▅▅▅▆▅▅▅▅▆▆▆▅▆ █
  2.15 μs      Histogram: log(frequency) by time     2.34 μs <

 Memory estimate: 0 bytes, allocs estimate: 0.
function xx_xy_simd!(xx,xy,yin, v::Val{N}=Val(32)) where {N}
  T = length(yin)
  remainder=(T-1) % N
  xx .= zero(eltype(xx))
  xy .= zero(eltype(xy))
  tv = Vec{N,eltype(yin)}(oneto(Val(N)))+1
  lane = VecRange{N}(0)
  @inbounds for t in 2:N:(T-remainder)
    xx[1,3] += sum(yin[t-1+lane])
    xx[2,3] += sum(yin[t-1+lane]*tv)
    xx[3,3] += sum(yin[t-1+lane]^2)
    xy[1] += sum(yin[t+lane])
    xy[2] += sum(tv*yin[t+lane])
    xy[3] += sum(yin[t-1+lane]*yin[t+lane])
    tv += N
  end
  @inbounds for t in (T-remainder+1):T
    xx[1,3] += yin[t-1]
    xx[2,3] += yin[t-1]*t
    xx[3,3] += yin[t-1]^2
    xy[1] += yin[t]
    xy[2] += t*yin[t]
    xy[3] += yin[t-1]*yin[t]
  end
  xx[1,1] = T-1 # = 1'*1
  xx[1,2] = xx[2,1] = (T+1)*T/2 - 1 # sum(2:T)
  xx[2,2] = (2*(T)+1)*(T)*(T+1)/6 - 1 # sum((2:T).^2)
  xx[3,1] = xx[1,3]
  xx[3,2] = xx[2,3]
  nothing
end

@benchmark xx_xy_simd!($xx,$xy,$y)
BenchmarkTools.Trial: 10000 samples with 671 evaluations per sample.
 Range (minmax):  186.313 ns403.535 ns   GC (min … max): 0.00% … 0.00%
 Time  (median):     190.450 ns                GC (median):    0.00%
 Time  (mean ± σ):   191.183 ns ±   5.150 ns   GC (mean ± σ):  0.00% ± 0.00%

                       ▆█                                   
  ▂▂▂▂▂▂▂▁▁▁▁▂▂▂▂▂▂▂▂▃▇███▆▄▃▂▂▂▂▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂ ▃
  186 ns           Histogram: frequency by time          197 ns <

 Memory estimate: 0 bytes, allocs estimate: 0.

References

Nissen, Jakob Nybo. 2022. “How to Optimise Julia Code: A Practical Guide.” https://viralinstruction.com/posts/optimise/#how_to_optimise_julia_code_a_practical_guide.
Rackauckas, Chris. 2019. “Optimizing Serial Code.” https://book.sciml.ai/notes/02-Optimizing_Serial_Code/.
Schrimpf, Paul. 2019 (revised 2024). Coding for Performance.” https://schrimpf.github.io/ARGridBootstrap.jl/argridboot.html.