Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ version = "11.2.6"
[deps]
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
GPUArraysCore = "46192b85-c4d5-4398-a991-12ede77f4527"
GPUToolbox = "096a3bc2-3ced-46d0-87f4-dd12716f4bfc"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Expand All @@ -24,8 +25,9 @@ JLD2Ext = "JLD2"
[compat]
Adapt = "4.0"
GPUArraysCore = "= 0.2.0"
GPUToolbox = "0.2, 0.3, 1"
JLD2 = "0.4, 0.5, 0.6"
KernelAbstractions = "0.9.28, 0.10"
KernelAbstractions = "0.10"
LLVM = "3.9, 4, 5, 6, 7, 8, 9"
LinearAlgebra = "1"
Printf = "1"
Expand Down
3 changes: 3 additions & 0 deletions src/GPUArrays.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
module GPUArrays

using GPUToolbox
using KernelAbstractions
import KernelAbstractions.KernelIntrinsics as KI
using Serialization
using Random
using LinearAlgebra
Expand All @@ -26,6 +28,7 @@ include("host/construction.jl")
## integrations and specialized methods
include("host/base.jl")
include("host/indexing.jl")
include("host/reverse.jl")
include("host/broadcast.jl")
include("host/mapreduce.jl")
include("host/linalg.jl")
Expand Down
163 changes: 163 additions & 0 deletions src/host/reverse.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
# reversing

# the kernel works by treating the array as 1d. after reversing by dimension x an element at
# pos [i1, i2, i3, ... , i{x}, ..., i{n}] will be at
# pos [i1, i2, i3, ... , d{x} - i{x} + 1, ..., i{n}] where d{x} is the size of dimension x

# out-of-place version, copying a single value per thread from input to output
function _reverse(input::AnyGPUArray{T, N}, output::AnyGPUArray{T, N};
dims=1:ndims(input)) where {T, N}
@assert size(input) == size(output)
rev_dims = ntuple((d)-> d in dims && size(input, d) > 1, N)
ref = size(input) .+ 1
# converts an ND-index in the data array to the linear index
lin_idx = LinearIndices(input)
# converts a linear index in a reduced array to an ND-index, but using the reduced size
nd_idx = CartesianIndices(input)

## COV_EXCL_START
function rev_kernel(input, output)
offset_in = Int32(KI.get_local_size().x) * (KI.get_group_id().x - 1i32)
index_in = offset_in + KI.get_local_id().x

@inbounds if index_in <= length(input)
idx = Tuple(nd_idx[index_in])
idx = ifelse.(rev_dims, ref .- idx, idx)
index_out = lin_idx[idx...]
output[index_out] = input[index_in]
end
return
end
## COV_EXCL_STOP

backend = get_backend(input)
kernel = KI.@kikernel backend launch=false rev_kernel(input, output)
nthreads = KI.kernel_max_work_group_size(backend, kernel; max_work_items=length(input))
ngroups = cld(length(input), nthreads)

kernel(input, output; numworkgroups=ngroups, workgroupsize=nthreads)
end

# in-place version, swapping elements on half the number of threads
function _reverse!(data::AnyGPUArray{T, N}; dims=1:ndims(data)) where {T, N}
rev_dims = ntuple((d)-> d in dims && size(data, d) > 1, N)
half_dim = findlast(rev_dims)
if isnothing(half_dim)
# no reverse operation needed at all in this case.
return
end
ref = size(data) .+ 1
# converts an ND-index in the data array to the linear index
lin_idx = LinearIndices(data)
reduced_size = ntuple((d)->ifelse(d==half_dim, cld(size(data,d),2), size(data,d)), N)
reduced_length = prod(reduced_size)
# converts a linear index in a reduced array to an ND-index, but using the reduced size
nd_idx = CartesianIndices(reduced_size)

## COV_EXCL_START
function rev_kernel!(data)
offset_in = Int32(KI.get_local_size().x) * (KI.get_group_id().x - 1i32)
index_in = offset_in + KI.get_local_id().x

@inbounds if index_in <= reduced_length
idx = Tuple(nd_idx[index_in])
index_in = lin_idx[idx...]
idx = ifelse.(rev_dims, ref .- idx, idx)
index_out = lin_idx[idx...]

if index_in < index_out
temp = data[index_out]
data[index_out] = data[index_in]
data[index_in] = temp
end
end
return
end
## COV_EXCL_STOP

# NOTE: we launch slightly more than half the number of elements in the array as threads.
# The last non-singleton dimension along which to reverse is used to define how the array is split.
# Only the middle row in case of an odd array dimension could cause trouble, but this is prevented by
# ignoring the threads that cross the mid-point

backend = get_backend(data)
kernel = KI.@kikernel backend launch=false rev_kernel!(data)
nthreads = KI.kernel_max_work_group_size(backend, kernel; max_work_items=reduced_length)
ngroups = cld(reduced_length, nthreads)

kernel(data; numworkgroups=ngroups, workgroupsize=nthreads)
end


# n-dimensional API

function Base.reverse!(data::AnyGPUArray{T, N}; dims=:) where {T, N}
if isa(dims, Colon)
dims = 1:ndims(data)
end
if !applicable(iterate, dims)
throw(ArgumentError("dimension $dims is not an iterable"))
end
if !all(1 .≤ dims .≤ ndims(data))
throw(ArgumentError("dimension $dims is not 1 ≤ $dims ≤ $(ndims(data))"))
end

# no reverse operation needed at all otherwise.
if !(all(size(data)[[dims...]].==1) || isempty(data))
_reverse!(data; dims=dims)
end

return data
end

# out-of-place
function Base.reverse(input::AnyGPUArray{T, N}; dims=:) where {T, N}
if isa(dims, Colon)
dims = 1:ndims(input)
end
if !applicable(iterate, dims)
throw(ArgumentError("dimension $dims is not an iterable"))
end
if !all(1 .≤ dims .≤ ndims(input))
throw(ArgumentError("dimension $dims is not 1 ≤ $dims ≤ $(ndims(input))"))
end

if all(size(input)[[dims...]].==1) || isempty(input)
# no reverse operation needed at all in this case.
return copy(input)
else
output = similar(input)
_reverse(input, output; dims=dims)
return output
end
end


# 1-dimensional API

# in-place
Base.@propagate_inbounds function Base.reverse!(data::AnyGPUVector{T}, start::Integer,
stop::Integer=length(data)) where {T}
# only reverse if there are more than 2 elements to reverse
(stop-start > 1) && _reverse!(view(data, start:stop))
return data
end

Base.reverse!(data::AnyGPUVector{T}) where {T} = @inbounds reverse!(data, 1, length(data))

# out-of-place
Base.@propagate_inbounds function Base.reverse(input::AnyGPUVector{T}, start::Integer,
stop::Integer=length(input)) where {T}
# Copy array in one kernel if no work is to be done
(stop-start > 1) || return copy(input)

output = similar(input)

start > 1 && copyto!(output, 1, input, 1, start-1)
_reverse(view(input, start:stop), view(output, start:stop))
stop < length(input) && copyto!(output, stop+1, input, stop+1)

return output
end

Base.reverse(data::AnyGPUVector{T}) where {T} = @inbounds reverse(data, 1, length(data))
55 changes: 55 additions & 0 deletions test/testsuite/base.jl
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,61 @@ end
gA = reshape(AT(A),4)
end

@testset "reverse" begin
# 1-d out-of-place
@test compare(x->reverse(x), AT, rand(Float32, 1000))
@test compare(x->reverse(x), AT, rand(Float32, 0))
@test compare(x->reverse(x), AT, rand(Float32, 1))
@test compare(x->reverse(x, 10), AT, rand(Float32, 1000))
@test compare(x->reverse(x, 10, 90), AT, rand(Float32, 1000))
@test compare(x->reverse(x, 3, 3), AT, Float32[1,2,3,4,5])

# 1-d in-place
@test compare(x->reverse!(x), AT, rand(Float32, 1000))
@test compare(x->reverse!(x), AT, rand(Float32, 0))
@test compare(x->reverse!(x), AT, rand(Float32, 1))
@test compare(x->reverse!(x, 10), AT, rand(Float32, 1000))
@test compare(x->reverse!(x, 10, 90), AT, rand(Float32, 1000))
@test compare(x->reverse!(x, 3, 3), AT, Float32[1,2,3,4,5])

# n-d out-of-place
for shape in ([1, 2, 4, 3], [4, 2], [5], [0], [1], [2^5, 2^5, 2^5]),
dim in 1:length(shape)
@testset "Shape: $shape, Dim: $dim" begin
@test compare(x->reverse(x; dims=dim), AT, rand(Float32, shape...))

cpu = rand(Float32, shape...)
gpu = AT(cpu)
reverse!(gpu; dims=dim)
@test Array(gpu) == reverse(cpu; dims=dim)
end
end

# supports multidimensional reverse
for shape in ([1,1,1,1], [1, 2, 4, 3], [2^5, 2^5, 2^5]),
dim in ((1,2),(2,3),(1,3),:)
@testset "Shape: $shape, Dim: $dim" begin
@test compare(x->reverse(x; dims=dim), AT, rand(Float32, shape...))

cpu = rand(Float32, shape...)
gpu = AT(cpu)
reverse!(gpu; dims=dim)
@test Array(gpu) == reverse(cpu; dims=dim)
end
end

# wrapped array
@test compare(x->reverse(x), AT, reshape(rand(Float32, 2,2), 4))

# error throwing
cpu = rand(Float32, 1,2,3,4)
gpu = AT(cpu)
@test_throws ArgumentError reverse!(gpu, dims=5)
@test_throws ArgumentError reverse!(gpu, dims=0)
@test_throws ArgumentError reverse(gpu, dims=5)
@test_throws ArgumentError reverse(gpu, dims=0)
end

@testset "reinterpret" begin
A = Int32[-1,-2,-3]
dA = AT(A)
Expand Down
Loading