diff --git a/.github/workflows/ci-julia-nightly.yml b/.github/workflows/ci-julia-nightly.yml index 266ad2bf..ae43ddc6 100644 --- a/.github/workflows/ci-julia-nightly.yml +++ b/.github/workflows/ci-julia-nightly.yml @@ -9,7 +9,7 @@ on: tags: '*' jobs: test-julia-nightly: - name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} + name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ github.event_name }} runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -18,29 +18,18 @@ jobs: - 'nightly' os: - ubuntu-latest - - macOS-latest + - macOS-latest # Arm - windows-latest - arch: - - x64 + # Use default arch steps: - uses: actions/checkout@v4 - uses: julia-actions/setup-julia@v2 with: version: ${{ matrix.version }} - arch: ${{ matrix.arch }} - - uses: actions/cache@v4 - env: - cache-name: cache-artifacts - with: - path: ~/.julia/artifacts - key: ${{ runner.os }}-test-${{ env.cache-name }}-${{ hashFiles('**/Project.toml') }} - restore-keys: | - ${{ runner.os }}-test-${{ env.cache-name }}- - ${{ runner.os }}-test- - ${{ runner.os }}- + - uses: julia-actions/cache@v2 - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 - uses: julia-actions/julia-processcoverage@v1 - - uses: codecov/codecov-action@v3 + - uses: codecov/codecov-action@v5 with: - file: lcov.info + files: lcov.info diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6d860d09..10c13262 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,7 +18,7 @@ jobs: - '^1.6.0-0' os: - ubuntu-latest - - macOS-latest + - macos-13 # Intel - windows-latest arch: - x64 @@ -44,46 +44,3 @@ jobs: - uses: codecov/codecov-action@v3 with: file: lcov.info - docs: - name: Documentation - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: julia-actions/setup-julia@v2 - with: - version: 'nightly' - - run: julia --color=yes -e 'using Pkg; VERSION >= v"1.5-" && !isdir(joinpath(DEPOT_PATH[1], "registries", "General")) && Pkg.Registry.add("General")' - shell: bash - env: - JULIA_PKG_SERVER: "" - - run: | - julia --project=docs -e ' - using Pkg - Pkg.develop(PackageSpec(path=pwd())) - Pkg.instantiate()' - - run: julia --project=docs docs/make.jl - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} - doctests: - name: Doctests - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: julia-actions/setup-julia@v2 - with: - version: 'nightly' - - run: julia --color=yes -e 'using Pkg; VERSION >= v"1.5-" && !isdir(joinpath(DEPOT_PATH[1], "registries", "General")) && Pkg.Registry.add("General")' - shell: bash - env: - JULIA_PKG_SERVER: "" - - run: | - julia --project=docs -e ' - using Pkg - Pkg.develop(PackageSpec(path=pwd())) - Pkg.instantiate()' - - run: | - julia --project=docs -e ' - using Documenter: doctest - using VectorizationBase - doctest(VectorizationBase)' diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml new file mode 100644 index 00000000..2605c054 --- /dev/null +++ b/.github/workflows/doc.yml @@ -0,0 +1,53 @@ +name: Docs +on: + pull_request: + branches: + - master + push: + branches: + - master + tags: '*' +jobs: + doctests: + name: Doctests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v2 + with: + version: 'nightly' + - uses: julia-actions/cache@v2 + - uses: julia-actions/julia-buildpkg@v1 + - name: Configure doc environment + shell: julia --project=docs --color=yes {0} + run: | + using Pkg + Pkg.develop(PackageSpec(path=pwd())) + Pkg.instantiate() + - name: Run doctest + shell: julia --color=yes --project=docs {0} + run: | + using Documenter: doctest + using VectorizationBase + doctest(VectorizationBase) + deploydocs: + name: Deploy Documentation + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v2 + with: + version: 'nightly' + - uses: julia-actions/cache@v2 + - uses: julia-actions/julia-buildpkg@v1 + - name: Configure doc environment + shell: julia --project=docs --color=yes {0} + run: | + using Pkg + Pkg.develop(PackageSpec(path=pwd())) + Pkg.instantiate() + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-docdeploy@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} diff --git a/Project.toml b/Project.toml index 46733635..819539e0 100644 --- a/Project.toml +++ b/Project.toml @@ -26,6 +26,7 @@ LinearAlgebra = "1" SIMDTypes = "0.1" Static = "0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1" StaticArrayInterface = "1" +Test = "<0.0.1, 1" julia = "1.6" [extras] diff --git a/docs/make.jl b/docs/make.jl index 0383bc0c..005be85b 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,6 +1,8 @@ using VectorizationBase using Documenter +DocMeta.setdocmeta!(VectorizationBase, :DocTestSetup, :(using VectorizationBase); recursive=true) + makedocs(; modules = [VectorizationBase], authors = "Chris Elrod", @@ -11,7 +13,6 @@ makedocs(; canonical = "https://JuliaSIMD.github.io/VectorizationBase.jl" ), pages = ["Home" => "index.md"], - strict = false ) deploydocs(; repo = "github.com/JuliaSIMD/VectorizationBase.jl") diff --git a/src/VectorizationBase.jl b/src/VectorizationBase.jl index 16b594e8..cd6cb621 100644 --- a/src/VectorizationBase.jl +++ b/src/VectorizationBase.jl @@ -167,7 +167,7 @@ julia> rgbs = [ B = Float32(i + 200) / 255 ) for i = 0:7:49 ] -8-element Vector{NamedTuple{(:R, :G, :B), Tuple{Float32, Float32, Float32}}}: +8-element Vector{@NamedTuple{R::Float32, G::Float32, B::Float32}}: (R = 0.0, G = 0.39215687, B = 0.78431374) (R = 0.02745098, G = 0.41960785, B = 0.8117647) (R = 0.05490196, G = 0.44705883, B = 0.8392157) diff --git a/src/llvm_intrin/memory_addr.jl b/src/llvm_intrin/memory_addr.jl index 8ad9cf44..8b08ffb7 100644 --- a/src/llvm_intrin/memory_addr.jl +++ b/src/llvm_intrin/memory_addr.jl @@ -166,10 +166,12 @@ function offset_ptr( end # after this block, we will have a index_gep_typ pointer if iszero(O) - push!( - instrs, + instr = @static if USE_OPAQUE_PTR + "%ptr.$(i) = bitcast ptr %0 to ptr" # do-nothing + else "%ptr.$(i) = inttoptr $(JULIAPOINTERTYPE) %0 to $(index_gep_typ)*" - ) + end + push!(instrs, instr) i += 1 else # !iszero(O) if !iszero(O & (tzf - 1)) # then index_gep_typ works for the constant offset @@ -179,28 +181,36 @@ function offset_ptr( offset_gep_typ = index_gep_typ offset = O >> tz end - push!( - instrs, + instr = @static if USE_OPAQUE_PTR + "%ptr.$(i) = bitcast ptr %0 to ptr" # do-nothing + else "%ptr.$(i) = inttoptr $(JULIAPOINTERTYPE) %0 to $(offset_gep_typ)*" - ) + end + push!(instrs, instr) i += 1 - push!( - instrs, + instr = @static if USE_OPAQUE_PTR + "%ptr.$(i) = getelementptr inbounds $(offset_gep_typ), ptr %ptr.$(i-1), i32 $(offset)" + else "%ptr.$(i) = getelementptr inbounds $(offset_gep_typ), $(offset_gep_typ)* %ptr.$(i-1), i32 $(offset)" - ) + end + push!(instrs, instr) i += 1 if forgep && iszero(M) && (iszero(X) || isone(X)) - push!( - instrs, + instr = @static if USE_OPAQUE_PTR + "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to ptr" # do-nothiong + else "%ptr.$(i) = ptrtoint $(offset_gep_typ)* %ptr.$(i-1) to $(JULIAPOINTERTYPE)" - ) + end + push!(instrs, instr) i += 1 return instrs, i elseif offset_gep_typ != index_gep_typ - push!( - instrs, + instr = @static if USE_OPAQUE_PTR + "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to ptr" # do-nothing + else "%ptr.$(i) = bitcast $(offset_gep_typ)* %ptr.$(i-1) to $(index_gep_typ)*" - ) + end + push!(instrs, instr) i += 1 end end @@ -216,22 +226,28 @@ function offset_ptr( "%$(indname) = mul nsw <$W x i$(ibits)> %$(indargname), $(constmul)" ) end - push!( - instrs, + instr = @static if USE_OPAQUE_PTR + "%ptr.$(i) = getelementptr inbounds $(index_gep_typ), ptr %ptr.$(i-1), <$W x i$(ibits)> %$(indname)" + else "%ptr.$(i) = getelementptr inbounds $(index_gep_typ), $(index_gep_typ)* %ptr.$(i-1), <$W x i$(ibits)> %$(indname)" - ) + end + push!(instrs, instr) i += 1 if forgep - push!( - instrs, + instr = @static if USE_OPAQUE_PTR + "%ptr.$(i) = bitcast <$W x ptr> %ptr.$(i-1) to <$W x ptr>" # do-nothing + else "%ptr.$(i) = ptrtoint <$W x $index_gep_typ*> %ptr.$(i-1) to <$W x $JULIAPOINTERTYPE>" - ) + end + push!(instrs, instr) i += 1 elseif index_gep_typ != vtyp - push!( - instrs, + instr = @static if USE_OPAQUE_PTR + "%ptr.$(i) = bitcast <$W x ptr> %ptr.$(i-1) to <$W x ptr>" # do-nothing + else "%ptr.$(i) = bitcast <$W x $index_gep_typ*> %ptr.$(i-1) to <$W x $typ*>" - ) + end + push!(instrs, instr) i += 1 end return instrs, i @@ -276,10 +292,12 @@ function offset_ptr( end # TODO: if X != 1 and X != 0, check if it is better to gep -> gep, or broadcast -> add -> gep end - push!( - instrs, + instr = @static if USE_OPAQUE_PTR + "%ptr.$(i) = getelementptr inbounds $(index_gep_typ), ptr %ptr.$(i-1), i$(ibits) %$(indname)" + else "%ptr.$(i) = getelementptr inbounds $(index_gep_typ), $(index_gep_typ)* %ptr.$(i-1), i$(ibits) %$(indname)" - ) + end + push!(instrs, instr) i += 1 end # ind_type === :Integer || ind_type === :StaticInt @@ -291,37 +309,47 @@ function offset_ptr( vityp = "i$(8vibytes)" vi = join((X * w for w ∈ 0:W-1), ", $vityp ") if typ !== index_gep_typ - push!( - instrs, + instr = @static if USE_OPAQUE_PTR + "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to ptr" # do-nothing + else "%ptr.$(i) = bitcast $(index_gep_typ)* %ptr.$(i-1) to $(typ)*" - ) + end + push!(instrs, instr) i += 1 end - push!( - instrs, + instr = @static if USE_OPAQUE_PTR + "%ptr.$(i) = getelementptr inbounds $(typ), ptr %ptr.$(i-1), <$W x $(vityp)> <$vityp $vi>" + else "%ptr.$(i) = getelementptr inbounds $(typ), $(typ)* %ptr.$(i-1), <$W x $(vityp)> <$vityp $vi>" - ) + end + push!(instrs, instr) i += 1 if forgep - push!( - instrs, + instr = @static if USE_OPAQUE_PTR + "%ptr.$(i) = bitcast <$W x ptr> %ptr.$(i-1) to <$W x ptr>" # do-nothing + else "%ptr.$(i) = ptrtoint <$W x $typ*> %ptr.$(i-1) to <$W x $JULIAPOINTERTYPE>" - ) + end + push!(instrs, instr) i += 1 end return instrs, i end if forgep # if forgep, just return now - push!( - instrs, + instr = @static if USE_OPAQUE_PTR + "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to ptr" # do-nothing + else "%ptr.$(i) = ptrtoint $(index_gep_typ)* %ptr.$(i-1) to $JULIAPOINTERTYPE" - ) + end + push!(instrs, instr) i += 1 elseif index_gep_typ != vtyp - push!( - instrs, + instr = @static if USE_OPAQUE_PTR + "%ptr.$(i) = bitcast ptr %ptr.$(i-1) to ptr" # do-nothing + else "%ptr.$(i) = bitcast $(index_gep_typ)* %ptr.$(i-1) to $(vtyp)*" - ) + end + push!(instrs, instr) i += 1 end instrs, i @@ -373,14 +401,22 @@ function gep_quote( # ::Type{T}, ind_type::Symbol, indargname = '1', ibytes::Int, W::Int = 1, X::Int = 1, M::Int = 1, O::Int = 0, forgep::Bool = false instrs, i = offset_ptr(T_sym, ind_type, '1', ibits, W, X, M, O, true, rs) ret = Expr(:curly, :Ptr, T_sym) - lret = JULIAPOINTERTYPE + lret = @static if USE_OPAQUE_PTR + "ptr" + else + JULIAPOINTERTYPE + end if gep_returns_vector(W, X, M, ind_type) ret = Expr(:curly, :_Vec, W, ret) lret = "<$W x $lret>" end args = Expr(:curly, :Tuple, Expr(:curly, :Ptr, T_sym)) - largs = String[JULIAPOINTERTYPE] + largs = @static if USE_OPAQUE_PTR + String["ptr"] + else + String[JULIAPOINTERTYPE] + end arg_syms = Union{Symbol,Expr}[:ptr] if !(iszero(M) || ind_type === :StaticInt) @@ -816,33 +852,40 @@ function vload_quote_llvmcall_core( truncate_mask!(instrs, '1' + dynamic_index, W, 0, false) end end + @static if USE_OPAQUE_PTR + ptr_type = "ptr" + vptr_type = "ptr" + else + ptr_type = "$typ*" + vptr_type = "$vtyp*" + end if grv loadinstr = "$vtyp @llvm.masked.gather." * suffix(W, T_sym) * '.' * ptr_suffix(W, T_sym) - decl *= "declare $loadinstr(<$W x $typ*>, i32, <$W x i1>, $vtyp)" + decl *= "declare $loadinstr(<$W x $ptr_type>, i32, <$W x i1>, $vtyp)" m = mask ? m = "%mask.0" : llvmconst(W, "i1 1") passthrough = mask ? "zeroinitializer" : "undef" push!( instrs, - "%res = call $loadinstr(<$W x $typ*> %ptr.$(i-1), i32 $alignment, <$W x i1> $m, $vtyp $passthrough)" * + "%res = call $loadinstr(<$W x $ptr_type> %ptr.$(i-1), i32 $alignment, <$W x i1> $m, $vtyp $passthrough)" * LOAD_SCOPE_TBAA_FLAGS ) elseif mask suff = suffix(W, T_sym) loadinstr = "$vtyp @llvm.masked.load." * suff * ".p0" * suff - decl *= "declare $loadinstr($vtyp*, i32, <$W x i1>, $vtyp)" + decl *= "declare $loadinstr($vptr_type, i32, <$W x i1>, $vtyp)" push!( instrs, - "%res = call $loadinstr($vtyp* %ptr.$(i-1), i32 $alignment, <$W x i1> %mask.0, $vtyp zeroinitializer)" * + "%res = call $loadinstr($vptr_type %ptr.$(i-1), i32 $alignment, <$W x i1> %mask.0, $vtyp zeroinitializer)" * LOAD_SCOPE_TBAA_FLAGS ) else push!( instrs, - "%res = load $vtyp, $vtyp* %ptr.$(i-1), align $alignment" * + "%res = load $vtyp, $vptr_type %ptr.$(i-1), align $alignment" * LOAD_SCOPE_TBAA_FLAGS ) end @@ -886,7 +929,11 @@ function vload_quote_llvmcall_core( end end args = Expr(:curly, :Tuple, Expr(:curly, :Ptr, T_sym)) - largs = String[JULIAPOINTERTYPE] + largs = @static if USE_OPAQUE_PTR + String["ptr"] + else + String[JULIAPOINTERTYPE] + end arg_syms = Union{Symbol,Expr}[:ptr] if dynamic_index push!(arg_syms, :(data(i))) @@ -1248,39 +1295,46 @@ function vstore_quote( else argtostore = "%1" end + @static if USE_OPAQUE_PTR + ptr_type = "ptr" + vptr_type = "ptr" + else + ptr_type = "$typ*" + vptr_type = "$vtyp*" + end if grv storeinstr = "void @llvm.masked.scatter." * suffix(W, T_sym) * '.' * ptr_suffix(W, T_sym) - decl *= "declare $storeinstr($vtyp, <$W x $typ*>, i32, <$W x i1>)" + decl *= "declare $storeinstr($vtyp, <$W x $ptr_type>, i32, <$W x i1>)" m = mask ? m = "%mask.0" : llvmconst(W, "i1 1") push!( instrs, - "call $storeinstr($vtyp $(argtostore), <$W x $typ*> %ptr.$(i-1), i32 $alignment, <$W x i1> $m)" * + "call $storeinstr($vtyp $(argtostore), <$W x $ptr_type> %ptr.$(i-1), i32 $alignment, <$W x i1> $m)" * metadata ) - # push!(instrs, "call $storeinstr($vtyp $(argtostore), <$W x $typ*> %ptr.$(i-1), i32 $alignment, <$W x i1> $m)") + # push!(instrs, "call $storeinstr($vtyp $(argtostore), <$W x $ptr_type> %ptr.$(i-1), i32 $alignment, <$W x i1> $m)") elseif mask suff = suffix(W, T_sym) storeinstr = "void @llvm.masked.store." * suff * ".p0" * suff - decl *= "declare $storeinstr($vtyp, $vtyp*, i32, <$W x i1>)" + decl *= "declare $storeinstr($vtyp, $vptr_type, i32, <$W x i1>)" push!( instrs, - "call $storeinstr($vtyp $(argtostore), $vtyp* %ptr.$(i-1), i32 $alignment, <$W x i1> %mask.0)" * + "call $storeinstr($vtyp $(argtostore), $vptr_type %ptr.$(i-1), i32 $alignment, <$W x i1> %mask.0)" * metadata ) elseif nontemporal push!( instrs, - "store $vtyp $(argtostore), $vtyp* %ptr.$(i-1), align $alignment, !nontemporal !{i32 1}" * + "store $vtyp $(argtostore), $vptr_type %ptr.$(i-1), align $alignment, !nontemporal !{i32 1}" * metadata ) else push!( instrs, - "store $vtyp $(argtostore), $vtyp* %ptr.$(i-1), align $alignment" * + "store $vtyp $(argtostore), $vptr_type %ptr.$(i-1), align $alignment" * metadata ) end @@ -1298,7 +1352,12 @@ function vstore_quote( else Expr(:curly, :Tuple, ptrtyp, T_sym) end - largs = String[JULIAPOINTERTYPE, vtyp] + largs = @static if USE_OPAQUE_PTR + String["ptr"] + else + String[JULIAPOINTERTYPE] + end + push!(largs, vtyp) arg_syms = Union{Symbol,Expr}[:ptr, Expr(:call, :data, :v)] if dynamic_index push!(arg_syms, :(data(i))) @@ -2170,12 +2229,20 @@ end "Prefetch intrinsic requires a read/write argument of 0, 1, but received $R." ) ) - decl = "declare void @llvm.prefetch(i8*, i32, i32, i32)" - instrs = """ + @static if USE_OPAQUE_PTR + decl = "declare void @llvm.prefetch(ptr, i32, i32, i32)" + instrs = """ + call void @llvm.prefetch(ptr %0, i32 $R, i32 $L, i32 1) + ret void + """ + else + decl = "declare void @llvm.prefetch(i8*, i32, i32, i32)" + instrs = """ %addr = inttoptr $JULIAPOINTERTYPE %0 to i8* call void @llvm.prefetch(i8* %addr, i32 $R, i32 $L, i32 1) ret void - """ + """ + end llvmcall_expr( decl, instrs, @@ -2228,8 +2295,13 @@ end @generated function lifetime_start!(ptr::Ptr{T}, ::Val{L}) where {L,T} ptyp = LLVM_TYPES[T] - decl = "declare void @llvm.lifetime.start(i64, $ptyp* nocapture)" - instrs = "%ptr = inttoptr $JULIAPOINTERTYPE %0 to $ptyp*\ncall void @llvm.lifetime.start(i64 $L, $ptyp* %ptr)\nret void" + @static if USE_OPAQUE_PTR + decl = "declare void @llvm.lifetime.start(i64, ptr nocapture)" + instrs = "call void @llvm.lifetime.start(i64 $L, ptr %0)\nret void" + else + decl = "declare void @llvm.lifetime.start(i64, $ptyp* nocapture)" + instrs = "%ptr = inttoptr $JULIAPOINTERTYPE %0 to $ptyp*\ncall void @llvm.lifetime.start(i64 $L, $ptyp* %ptr)\nret void" + end llvmcall_expr( decl, instrs, @@ -2244,8 +2316,13 @@ end end @generated function lifetime_end!(ptr::Ptr{T}, ::Val{L}) where {L,T} ptyp = LLVM_TYPES[T] - decl = "declare void @llvm.lifetime.end(i64, $ptyp* nocapture)" - instrs = "%ptr = inttoptr $JULIAPOINTERTYPE %0 to $ptyp*\ncall void @llvm.lifetime.end(i64 $L, $ptyp* %ptr)\nret void" + @static if USE_OPAQUE_PTR + decl = "declare void @llvm.lifetime.end(i64, ptr nocapture)" + instrs = "call void @llvm.lifetime.end(i64 $L, ptr %0)\nret void" + else + decl = "declare void @llvm.lifetime.end(i64, $ptyp* nocapture)" + instrs = "%ptr = inttoptr $JULIAPOINTERTYPE %0 to $ptyp*\ncall void @llvm.lifetime.end(i64 $L, $ptyp* %ptr)\nret void" + end llvmcall_expr( decl, instrs, @@ -2284,12 +2361,18 @@ end vtyp = "<$W x $typ>" mtyp_input = LLVM_TYPES[U] mtyp_trunc = "i$W" - instrs = String["%ptr = inttoptr $JULIAPOINTERTYPE %1 to $typ*"] + @static if USE_OPAQUE_PTR + ptr_type = "ptr" + instrs = String["%ptr = bitcast ptr %1 to ptr"] + else + ptr_type = "$typ*" + instrs = String["%ptr = inttoptr $JULIAPOINTERTYPE %1 to $ptr_type"] + end truncate_mask!(instrs, '2', W, 0) - decl = "declare void @llvm.masked.compressstore.$(suffix(W,T))($vtyp, $typ*, <$W x i1>)" + decl = "declare void @llvm.masked.compressstore.$(suffix(W,T))($vtyp, $ptr_type, <$W x i1>)" push!( instrs, - "call void @llvm.masked.compressstore.$(suffix(W,T))($vtyp %0, $typ* %ptr, <$W x i1> %mask.0)\nret void" + "call void @llvm.masked.compressstore.$(suffix(W,T))($vtyp %0, $ptr_type %ptr, <$W x i1> %mask.0)\nret void" ) llvmcall_expr( decl, @@ -2314,17 +2397,23 @@ end mtyp_input = LLVM_TYPES[U] mtyp_trunc = "i$W" instrs = String[] - push!(instrs, "%ptr = inttoptr $JULIAPOINTERTYPE %0 to $typ*") + @static if USE_OPAQUE_PTR + ptr_type = "ptr" + push!(instrs, "%ptr = bitcast ptr %1 to ptr") + else + ptr_type = "$typ*" + push!(instrs, "%ptr = inttoptr $JULIAPOINTERTYPE %0 to $ptr_type") + end if mtyp_input == mtyp_trunc push!(instrs, "%mask = bitcast $mtyp_input %1 to <$W x i1>") else push!(instrs, "%masktrunc = trunc $mtyp_input %1 to $mtyp_trunc") push!(instrs, "%mask = bitcast $mtyp_trunc %masktrunc to <$W x i1>") end - decl = "declare $vtyp @llvm.masked.expandload.$(suffix(W,T))($typ*, <$W x i1>, $vtyp)" + decl = "declare $vtyp @llvm.masked.expandload.$(suffix(W,T))($ptr_type, <$W x i1>, $vtyp)" push!( instrs, - "%res = call $vtyp @llvm.masked.expandload.$(suffix(W,T))($typ* %ptr, <$W x i1> %mask, $vtyp zeroinitializer)\nret $vtyp %res" + "%res = call $vtyp @llvm.masked.expandload.$(suffix(W,T))($ptr_type %ptr, <$W x i1> %mask, $vtyp zeroinitializer)\nret $vtyp %res" ) llvmcall_expr( decl, diff --git a/src/llvm_intrin/vbroadcast.jl b/src/llvm_intrin/vbroadcast.jl index 29cae454..ef4aac43 100644 --- a/src/llvm_intrin/vbroadcast.jl +++ b/src/llvm_intrin/vbroadcast.jl @@ -167,9 +167,15 @@ end ptyp = JULIAPOINTERTYPE vtyp = "<$W x $typ>" alignment = Base.datatype_alignment(T) - instrs = """ - %ptr = inttoptr $ptyp %0 to $typ* - %res = load $typ, $typ* %ptr, align $alignment + instrs = @static if USE_OPAQUE_PTR + "%res = load $typ, ptr %0, align $alignment" + else + """ + %ptr = inttoptr $ptyp %0 to $typ* + %res = load $typ, $typ* %ptr, align $alignment + """ + end + instrs *= """ %ie = insertelement $vtyp undef, $typ %res, i32 0 %v = shufflevector $vtyp %ie, $vtyp undef, <$W x i32> zeroinitializer ret $vtyp %v diff --git a/src/llvm_types.jl b/src/llvm_types.jl index 266ceb2c..04564287 100644 --- a/src/llvm_types.jl +++ b/src/llvm_types.jl @@ -114,6 +114,14 @@ function _get_alignment(W::Int, sym::Symbol)::Int end end +""" +use opaque pointer + +Ref: +- Switch LLVM codegen of Ptr{T} to an actual pointer type. + https://github.com/JuliaLang/julia/pull/53687 +""" +const USE_OPAQUE_PTR = VERSION >= v"1.12-DEV" const JULIAPOINTERTYPE = 'i' * string(8sizeof(Int)) vtype(W, typ::String) = (isone(abs(W)) ? typ : "<$W x $typ>")::String diff --git a/test/runtests.jl b/test/runtests.jl index 7afe72e4..1f8363db 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,7 +9,10 @@ include("testsetup.jl") println("Aqua.test_all") t0 = time_ns() deps_compat = VERSION <= v"1.8" || isempty(VERSION.prerelease) - Aqua.test_all(VectorizationBase; deps_compat = deps_compat) + Aqua.test_all(VectorizationBase; + deps_compat = deps_compat, + piracies=false + ) println("Aqua took $((time_ns() - t0)*1e-9) seconds") # @test isempty(detect_unbound_args(VectorizationBase)) # @test isempty(detect_ambiguities(VectorizationBase))