Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/tvm/runtime/device_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ enum DeviceAttrKind : int {
kDriverVersion = 12,
kL2CacheSizeBytes = 13,
kTotalGlobalMemory = 14,
kAvailableGlobalMemory = 15,
};

#ifdef TVM_KALLOC_ALIGNMENT
Expand Down
16 changes: 15 additions & 1 deletion python/tvm/_ffi/runtime_ctypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,11 +539,25 @@ def total_global_memory(self):
Returns
-------
total_global_memory : int or None
Return the global memory available on device in bytes.
Return the total size of global memory on device in bytes.
Return None if the device does not support this feature.
"""
return self._GetDeviceAttr(self.device_type, self.device_id, 14)

@property
def available_global_memory(self):
"""Return size of the available global memory.

Supported devices include CUDA.

Returns
-------
available_global_memory : int or None
Return the amount of unallocated global memory on device in bytes.
Return None if the device does not support this feature.
"""
return self._GetDeviceAttr(self.device_type, self.device_id, 15)

def texture_spatial_limit(self):
"""Returns limits for textures by spatial dimensions

Expand Down
6 changes: 6 additions & 0 deletions src/runtime/cuda/cuda_device_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,12 @@ class CUDADeviceAPI final : public DeviceAPI {
*rv = total_global_memory;
return;
}
case kAvailableGlobalMemory: {
size_t free_mem, total_mem;
CUDA_CALL(cudaMemGetInfo(&free_mem, &total_mem));
*rv = static_cast<int64_t>(free_mem);
return;
}
}
*rv = value;
}
Expand Down
6 changes: 6 additions & 0 deletions src/runtime/opencl/opencl_device_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,12 @@ void OpenCLWorkspace::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv)
*rv = static_cast<int64_t>(total_global_memory);
return;
}

case kAvailableGlobalMemory:
// Not currently implemented. Based on
// https://stackoverflow.com/a/3568223, may not be implementable
// at all through OpenCL API.
break;
}
}

Expand Down
4 changes: 4 additions & 0 deletions src/runtime/rocm/rocm_device_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,10 @@ class ROCMDeviceAPI final : public DeviceAPI {
*rv = total_global_memory;
return;
}

case kAvailableGlobalMemory:
// Not currently implemented.
break;
}
*rv = value;
}
Expand Down
5 changes: 5 additions & 0 deletions src/runtime/vulkan/vulkan_device_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,11 @@ void VulkanDeviceAPI::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv)
*rv = device(index).compute_memory_size;
return;
}

case kAvailableGlobalMemory:
// Not currently implemented. Will only be implementable for
// devices that support the VK_EXT_memory_budget extension.
break;
}
}

Expand Down
70 changes: 49 additions & 21 deletions tests/python/all-platform-minimal-test/test_runtime_ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,33 +16,63 @@
# under the License.
"""Basic runtime enablement test."""

import tvm
from tvm import te
import math

import pytest
import numpy as np

import tvm
import tvm.testing
from tvm import te

dtype = tvm.testing.parameter("uint8", "int8", "uint16", "int16", "uint32", "int32", "float32")


def test_nd_create(target, dev, dtype):
x = np.random.randint(0, 10, size=(3, 4))
x = np.array(x, dtype=dtype)
y = tvm.nd.array(x, device=dev)
z = y.copyto(dev)
assert y.dtype == x.dtype
assert y.shape == x.shape
assert isinstance(y, tvm.nd.NDArray)
np.testing.assert_equal(x, y.numpy())
np.testing.assert_equal(x, z.numpy())

# no need here, just to test usablity
dev.sync()


def test_memory_usage(target, dev, dtype):
available_memory_before = dev.available_global_memory
if available_memory_before is None:
pytest.skip(reason=f"Target '{target}' does not support queries of available memory")

arr = tvm.nd.empty([1024, 1024], dtype=dtype, device=dev)
available_memory_after = dev.available_global_memory

num_elements = math.prod(arr.shape)
element_nbytes = tvm.runtime.DataType(dtype).itemsize()
expected_memory_after = available_memory_before - num_elements * element_nbytes

# Allocations may be padded out to provide alignment, to match a
# page boundary, due to additional device-side bookkeeping
# required by the TVM backend or the driver, etc. Therefore, the
# available memory may decrease by more than the requested amount.
assert available_memory_after <= expected_memory_after

# TVM's NDArray type is a reference-counted handle to the
# underlying reference. After the last reference to an NDArray is
# cleared, the backing allocation will be freed.
del arr

@tvm.testing.uses_gpu
def test_nd_create():
for target, dev in tvm.testing.enabled_targets():
for dtype in ["uint8", "int8", "uint16", "int16", "uint32", "int32", "float32"]:
x = np.random.randint(0, 10, size=(3, 4))
x = np.array(x, dtype=dtype)
y = tvm.nd.array(x, device=dev)
z = y.copyto(dev)
assert y.dtype == x.dtype
assert y.shape == x.shape
assert isinstance(y, tvm.nd.NDArray)
np.testing.assert_equal(x, y.numpy())
np.testing.assert_equal(x, z.numpy())
# no need here, just to test usablity
dev.sync()
assert dev.available_global_memory == available_memory_before


def test_fp16_conversion():
n = 100

for (src, dst) in [("float32", "float16"), ("float16", "float32")]:
for src, dst in [("float32", "float16"), ("float16", "float32")]:
A = te.placeholder((n,), dtype=src)
B = te.compute((n,), lambda i: A[i].astype(dst))

Expand All @@ -66,6 +96,4 @@ def test_dtype():


if __name__ == "__main__":
test_nd_create()
test_fp16_conversion()
test_dtype()
tvm.testing.main()