diff --git a/CMakeLists.txt b/CMakeLists.txt index 787e2c1..9690ff1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,6 +33,8 @@ set_property(GLOBAL PROPERTY USE_FOLDERS ON) project(SimpleSYCLSamples VERSION 1.0) +option(SAMPLES_BUILD_CUDA "Enable support for the nvptx64-nvidia-cuda SYCL target") + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME) diff --git a/samples/10_queueexperiments/CMakeLists.txt b/samples/10_queueexperiments/CMakeLists.txt new file mode 100644 index 0000000..43982b8 --- /dev/null +++ b/samples/10_queueexperiments/CMakeLists.txt @@ -0,0 +1,25 @@ +# Copyright (c) 2022 Ben Ashbaugh +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +add_sycl_sample( + TEST + NUMBER 10 + TARGET queueexperiments + SOURCES main.cpp ) diff --git a/samples/10_queueexperiments/main.cpp b/samples/10_queueexperiments/main.cpp new file mode 100644 index 0000000..1e1e009 --- /dev/null +++ b/samples/10_queueexperiments/main.cpp @@ -0,0 +1,556 @@ +/* +// Copyright (c) 2022 Ben Ashbaugh +// +// SPDX-License-Identifier: MIT +*/ + +#include +#include + +#include + +#include + +using namespace cl; +using test_clock = std::chrono::high_resolution_clock; + +constexpr int maxKernels = 256; +constexpr int testIterations = 32; + +struct Params +{ + sycl::platform platform; + sycl::device device; + + sycl::context context; + sycl::queue queue; + + std::vector> buffers; + std::vector dptrs; + + int numIterations = 1; + size_t numElements = 1; +}; + +class TimeSink { +public: + TimeSink(sycl::accessor _dst, int _iterations) : dst(_dst), iterations(_iterations) {} + void operator()(sycl::id<1> i) const { + float result; + for (int i = 0; i < iterations; i++) { + result = 0.0f; + while (result < 1.0f) result += 1e-6f; + } + dst[i] += result; + } +private: + sycl::accessor dst; + int iterations; +}; + +class TimeSinkRO { +public: + TimeSinkRO(sycl::accessor _dst, sycl::accessor _src, int _iterations) : + dst(_dst), src(_src), iterations(_iterations) {} + void operator()(sycl::id<1> i) const { + float result; + for (int i = 0; i < iterations; i++) { + result = 0.0f; + while (result < 1.0f) result += 1e-6f; + } + dst[i] = src[i] + result; + } +private: + sycl::accessor dst; + sycl::accessor src; + int iterations; +}; + +class TimeSinkUSM { +public: + TimeSinkUSM(float* _dst, int _iterations) : dst(_dst), iterations(_iterations) {} + void operator()(sycl::id<1> i) const { + float result; + for (int i = 0; i < iterations; i++) { + result = 0.0f; + while (result < 1.0f) result += 1e-6f; + } + dst[i] += result; + } +private: + float* dst; + int iterations; +}; + +static void init(Params& params) +{ + for (auto& buffer : params.buffers) { + params.queue.submit([&](sycl::handler& h) { + sycl::accessor acc{buffer, h}; + h.fill(acc, 0.0f); + }); + } + params.queue.wait(); +} + +static void go_in_order_queue(Params& params, const int numKernels) +{ + init(params); + + printf("%40s (n=%3d): ", __FUNCTION__, numKernels); fflush(stdout); + + sycl::queue queue(params.context, params.device, sycl::property::queue::in_order()); + + float best = 999.0f; + for (int test = 0; test < testIterations; test++) { + auto start = test_clock::now(); + for (int i = 0; i < numKernels; i++) { + queue.submit([&](sycl::handler& h) { + sycl::accessor acc{params.buffers[i], h}; + h.parallel_for(params.numElements, TimeSink(acc, params.numIterations)); + }); + } + queue.wait(); + + auto end = test_clock::now(); + std::chrono::duration elapsed_seconds = end - start; + best = std::min(best, elapsed_seconds.count()); + } + printf("Finished in %f seconds\n", best); +} + +static void go_out_of_order_queue_deps(Params& params, const int numKernels) +{ + init(params); + + printf("%40s (n=%3d): ", __FUNCTION__, numKernels); fflush(stdout); + + sycl::queue queue(params.context, params.device); + + float best = 999.0f; + for (int test = 0; test < testIterations; test++) { + auto start = test_clock::now(); + for (int i = 0; i < numKernels; i++) { + queue.submit([&](sycl::handler& h) { + sycl::accessor acc{params.buffers[0], h}; + h.parallel_for(params.numElements, TimeSink(acc, params.numIterations)); + }); + } + queue.wait(); + + auto end = test_clock::now(); + std::chrono::duration elapsed_seconds = end - start; + best = std::min(best, elapsed_seconds.count()); + } + printf("Finished in %f seconds\n", best); +} + +static void go_out_of_order_queue_no_deps(Params& params, const int numKernels) +{ + init(params); + + printf("%40s (n=%3d): ", __FUNCTION__, numKernels); fflush(stdout); + + sycl::queue queue(params.context, params.device); + + float best = 999.0f; + for (int test = 0; test < testIterations; test++) { + auto start = test_clock::now(); + for (int i = 0; i < numKernels; i++) { + queue.submit([&](sycl::handler& h) { + sycl::accessor acc{params.buffers[i], h}; + h.parallel_for(params.numElements, TimeSink(acc, params.numIterations)); + }); + } + queue.wait(); + + auto end = test_clock::now(); + std::chrono::duration elapsed_seconds = end - start; + best = std::min(best, elapsed_seconds.count()); + } + printf("Finished in %f seconds\n", best); +} + +static void go_out_of_order_queue_ro_dep(Params& params, const int numKernels) +{ + init(params); + + sycl::buffer robuffer{ params.numElements }; + + params.queue.submit([&](sycl::handler& h) { + sycl::accessor acc{robuffer, h}; + h.fill(acc, 0.0f); + }); + + printf("%40s (n=%3d): ", __FUNCTION__, numKernels); fflush(stdout); + + sycl::queue queue(params.context, params.device); + + float best = 999.0f; + for (int test = 0; test < testIterations; test++) { + auto start = test_clock::now(); + for (int i = 0; i < numKernels; i++) { + queue.submit([&](sycl::handler& h) { + sycl::accessor acc{params.buffers[i], h}; + sycl::accessor roacc{robuffer, h, sycl::read_only}; + h.parallel_for(params.numElements, TimeSinkRO(acc, roacc, params.numIterations)); + }); + } + queue.wait(); + + auto end = test_clock::now(); + std::chrono::duration elapsed_seconds = end - start; + best = std::min(best, elapsed_seconds.count()); + } + printf("Finished in %f seconds\n", best); +} + +static void go_multiple_in_order_queues(Params& params, const int numKernels) +{ + init(params); + + printf("%40s (n=%3d): ", __FUNCTION__, numKernels); fflush(stdout); + + std::vector queues; + for (int i = 0; i < numKernels; i++) { + queues.push_back(sycl::queue{params.context, params.device, sycl::property::queue::in_order()}); + } + + float best = 999.0f; + for (int test = 0; test < testIterations; test++) { + auto start = test_clock::now(); + for (int i = 0; i < numKernels; i++) { + queues[i].submit([&](sycl::handler& h) { + sycl::accessor acc{params.buffers[i], h}; + h.parallel_for(params.numElements, TimeSink(acc, params.numIterations)); + }); + } + for (int i = 0; i < numKernels; i++) { + queues[i].wait(); + } + + auto end = test_clock::now(); + std::chrono::duration elapsed_seconds = end - start; + best = std::min(best, elapsed_seconds.count()); + } + printf("Finished in %f seconds\n", best); +} + +static void go_multiple_out_of_order_queues(Params& params, const int numKernels) +{ + init(params); + + printf("%40s (n=%3d): ", __FUNCTION__, numKernels); fflush(stdout); + + std::vector queues; + for (int i = 0; i < numKernels; i++) { + queues.push_back(sycl::queue{params.context, params.device}); + } + + float best = 999.0f; + for (int test = 0; test < testIterations; test++) { + auto start = test_clock::now(); + for (int i = 0; i < numKernels; i++) { + queues[i].submit([&](sycl::handler& h) { + sycl::accessor acc{params.buffers[i], h}; + h.parallel_for(params.numElements, TimeSink(acc, params.numIterations)); + }); + } + for (int i = 0; i < numKernels; i++) { + queues[i].wait(); + } + + auto end = test_clock::now(); + std::chrono::duration elapsed_seconds = end - start; + best = std::min(best, elapsed_seconds.count()); + } + printf("Finished in %f seconds\n", best); +} + +static void go_multiple_context_in_order_queues(Params& params, const int numKernels) +{ + init(params); + + printf("%40s (n=%3d): ", __FUNCTION__, numKernels); fflush(stdout); + + std::vector queues; + for (int i = 0; i < numKernels; i++) { + queues.push_back(sycl::queue{params.device, sycl::property::queue::in_order()}); + } + + float best = 999.0f; + for (int test = 0; test < testIterations; test++) { + auto start = test_clock::now(); + for (int i = 0; i < numKernels; i++) { + queues[i].submit([&](sycl::handler& h) { + sycl::accessor acc{params.buffers[i], h}; + h.parallel_for(params.numElements, TimeSink(acc, params.numIterations)); + }); + } + for (int i = 0; i < numKernels; i++) { + queues[i].wait(); + } + + auto end = test_clock::now(); + std::chrono::duration elapsed_seconds = end - start; + best = std::min(best, elapsed_seconds.count()); + } + printf("Finished in %f seconds\n", best); +} + +static void init_usm(Params& params) +{ + for (auto& dptr : params.dptrs) { + params.queue.fill(dptr, 0.0f, params.numElements); + } + params.queue.wait(); +} + +static void go_in_order_queue_usm(Params& params, const int numKernels) +{ + init_usm(params); + + printf("%40s (n=%3d): ", __FUNCTION__, numKernels); fflush(stdout); + + sycl::queue queue(params.context, params.device, sycl::property::queue::in_order()); + + float best = 999.0f; + for (int test = 0; test < testIterations; test++) { + auto start = test_clock::now(); + for (int i = 0; i < numKernels; i++) { + queue.parallel_for(params.numElements, TimeSinkUSM(params.dptrs[i], params.numIterations)); + } + queue.wait(); + + auto end = test_clock::now(); + std::chrono::duration elapsed_seconds = end - start; + best = std::min(best, elapsed_seconds.count()); + } + printf("Finished in %f seconds\n", best); +} + +static void go_out_of_order_queue_usm_deps(Params& params, const int numKernels) +{ + init_usm(params); + + printf("%40s (n=%3d): ", __FUNCTION__, numKernels); fflush(stdout); + + sycl::queue queue(params.context, params.device); + + float best = 999.0f; + for (int test = 0; test < testIterations; test++) { + auto start = test_clock::now(); + sycl::event dependency; + for (int i = 0; i < numKernels; i++) { + dependency = queue.parallel_for(params.numElements, dependency, TimeSinkUSM(params.dptrs[i], params.numIterations)); + } + queue.wait(); + + auto end = test_clock::now(); + std::chrono::duration elapsed_seconds = end - start; + best = std::min(best, elapsed_seconds.count()); + } + printf("Finished in %f seconds\n", best); +} + +static void go_out_of_order_queue_usm_no_deps(Params& params, const int numKernels) +{ + init_usm(params); + + printf("%40s (n=%3d): ", __FUNCTION__, numKernels); fflush(stdout); + + sycl::queue queue(params.context, params.device); + + float best = 999.0f; + for (int test = 0; test < testIterations; test++) { + auto start = test_clock::now(); + for (int i = 0; i < numKernels; i++) { + queue.parallel_for(params.numElements, TimeSinkUSM(params.dptrs[i], params.numIterations)); + } + queue.wait(); + + auto end = test_clock::now(); + std::chrono::duration elapsed_seconds = end - start; + best = std::min(best, elapsed_seconds.count()); + } + printf("Finished in %f seconds\n", best); +} + +static void go_multiple_in_order_queues_usm(Params& params, const int numKernels) +{ + init_usm(params); + + printf("%40s (n=%3d): ", __FUNCTION__, numKernels); fflush(stdout); + + std::vector queues; + for (int i = 0; i < numKernels; i++) { + queues.push_back(sycl::queue{params.context, params.device, sycl::property::queue::in_order()}); + } + + float best = 999.0f; + for (int test = 0; test < testIterations; test++) { + auto start = test_clock::now(); + for (int i = 0; i < numKernels; i++) { + queues[i].parallel_for(params.numElements, TimeSinkUSM(params.dptrs[i], params.numIterations)); + } + for (int i = 0; i < numKernels; i++) { + queues[i].wait(); + } + + auto end = test_clock::now(); + std::chrono::duration elapsed_seconds = end - start; + best = std::min(best, elapsed_seconds.count()); + } + printf("Finished in %f seconds\n", best); +} + +static void go_multiple_out_of_order_queues_usm(Params& params, const int numKernels) +{ + init_usm(params); + + printf("%40s (n=%3d): ", __FUNCTION__, numKernels); fflush(stdout); + + std::vector queues; + for (int i = 0; i < numKernels; i++) { + queues.push_back(sycl::queue{params.context, params.device}); + } + + float best = 999.0f; + for (int test = 0; test < testIterations; test++) { + auto start = test_clock::now(); + for (int i = 0; i < numKernels; i++) { + queues[i].parallel_for(params.numElements, TimeSinkUSM(params.dptrs[i], params.numIterations)); + } + for (int i = 0; i < numKernels; i++) { + queues[i].wait(); + } + + auto end = test_clock::now(); + std::chrono::duration elapsed_seconds = end - start; + best = std::min(best, elapsed_seconds.count()); + } + printf("Finished in %f seconds\n", best); +} + +int main(int argc, char** argv) +{ + Params params; + + int platformIndex = 0; + int deviceIndex = 0; + int numKernels = -1; + bool testMultipleContexts = false; + + { + popl::OptionParser op("Supported Options"); + op.add>("p", "platform", "Platform Index", platformIndex, &platformIndex); + op.add>("d", "device", "Device Index", deviceIndex, &deviceIndex); + op.add>("k", "kernels", "Kernels to Execute (-1 for all)", numKernels, &numKernels); + op.add>("i", "iterations", "Iterations in Each Kernel", params.numIterations, ¶ms.numIterations); + op.add>("e", "elements", "Number of ND-Range Elements", params.numElements, ¶ms.numElements); + op.add("", "multicontexts", "Run the Multiple Context Tests", &testMultipleContexts); + bool printUsage = false; + try { + op.parse(argc, argv); + } catch (std::exception& e) { + fprintf(stderr, "Error: %s\n\n", e.what()); + printUsage = true; + } + + if (printUsage || !op.unknown_options().empty() || !op.non_option_args().empty()) { + fprintf(stderr, + "Usage: queueexperiments [options]\n" + "%s", op.help().c_str()); + return -1; + } + } + + if (numKernels > maxKernels) { + printf("Number of kernels is %d, which exceeds the maximum of %d.\n", numKernels, maxKernels); + printf("The number of kernels will be set to %d instead.\n", maxKernels); + numKernels = maxKernels; + } + + params.platform = sycl::platform::get_platforms()[platformIndex]; + printf("Running on SYCL platform: %s\n", params.platform.get_info().c_str()); + + params.device = params.platform.get_devices()[deviceIndex]; + printf("Running on SYCL device: %s\n", params.device.get_info().c_str()); + + printf("Initializing tests...\n"); + + params.context = sycl::context{ params.device }; + params.queue = sycl::queue{ params.context, params.device }; + + for (int i = 0; i < maxKernels; i++) { + params.buffers.push_back(sycl::buffer{sycl::range{params.numElements}}); + } + if (params.device.get_info()) { + for (int i = 0; i < maxKernels; i++) { + params.dptrs.push_back(sycl::malloc_device(params.numElements, params.device, params.context)); + } + } else { + printf("Skipping USM tests - device does not support USM.\n"); + } + + printf("... done!\n"); + + std::vector counts; + if (numKernels < 0) { + counts.assign({1, 2, 4, 8, 16}); + } else { + counts.assign({numKernels}); + } + + for (auto& count : counts) { + go_in_order_queue(params, count); + } + for (auto& count : counts) { + go_out_of_order_queue_deps(params, count); + } + for (auto& count : counts) { + go_multiple_in_order_queues(params, count); + } + for (auto& count : counts) { + go_out_of_order_queue_no_deps(params, count); + } + for (auto& count : counts) { + go_out_of_order_queue_ro_dep(params, count); + } + for (auto& count : counts) { + go_multiple_out_of_order_queues(params, count); + } + if (params.device.get_info()) { + for (auto& count : counts) { + go_in_order_queue_usm(params, count); + } + for (auto& count : counts) { + go_out_of_order_queue_usm_deps(params, count); + } + for (auto& count : counts) { + go_multiple_in_order_queues_usm(params, count); + } + for (auto& count : counts) { + go_out_of_order_queue_usm_no_deps(params, count); + } + for (auto& count : counts) { + go_multiple_out_of_order_queues_usm(params, count); + } + } + + if (testMultipleContexts) { + for (auto& count : counts) { + go_multiple_context_in_order_queues(params, count); + } + } + + printf("Cleaning up...\n"); + + for (auto& dptr : params.dptrs) { + sycl::free(dptr, params.context); + } + + printf("... done!\n"); + + return 0; +} diff --git a/samples/11_threadconcurrent/CMakeLists.txt b/samples/11_threadconcurrent/CMakeLists.txt new file mode 100644 index 0000000..5b0a147 --- /dev/null +++ b/samples/11_threadconcurrent/CMakeLists.txt @@ -0,0 +1,29 @@ +# Copyright (c) 2022 Ben Ashbaugh +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +add_sycl_sample( + TEST + NUMBER 11 + TARGET thread_concurrency + SOURCES main.cpp ) + +find_package(Threads REQUIRED) +target_link_libraries(thread_concurrency PRIVATE Threads::Threads) + diff --git a/samples/11_threadconcurrent/main.cpp b/samples/11_threadconcurrent/main.cpp new file mode 100644 index 0000000..c809641 --- /dev/null +++ b/samples/11_threadconcurrent/main.cpp @@ -0,0 +1,202 @@ +/* +// Copyright (c) 2022 Ben Ashbaugh & Nico Galoppo +// +// SPDX-License-Identifier: MIT +*/ + +#include +#include + +#include +#include + +#include +#include + +using namespace cl; +using test_clock = std::chrono::high_resolution_clock; + +constexpr int maxThreads = 2; +constexpr int testIterations = 32; + +struct Params +{ + sycl::platform platform; + sycl::device device; + + sycl::context context; + sycl::queue queue; + + std::vector> buffers; + std::vector dptrs; + + int numIterations = 1; + size_t numElements = 1; +}; + +class TimeSink { +public: + TimeSink(sycl::accessor _dst, int _iterations) : dst(_dst), iterations(_iterations) {} + void operator()(sycl::id<1> i) const { + float result; + for (int i = 0; i < iterations; i++) { + result = 0.0f; + while (result < 1.0f) result += 1e-6f; + } + dst[i] += result; + } +private: + sycl::accessor dst; + int iterations; +}; + +static void init(Params& params) +{ + for (auto& buffer : params.buffers) { + params.queue.submit([&](sycl::handler& h) { + sycl::accessor acc{buffer, h}; + h.fill(acc, 0.0f); + }); + } + params.queue.wait(); +} + +static void go(Params& params, const int kernelNum) +{ + init(params); + + sycl::queue queue(params.context, params.device); + + float total = .0f; + for (int test = 0; test < testIterations; test++) { + auto start = test_clock::now(); + + queue.submit([&](sycl::handler& h) { + sycl::accessor acc{params.buffers[kernelNum], h}; + h.parallel_for(params.numElements, TimeSink(acc, params.numIterations)); + }); + + queue.wait(); + + auto end = test_clock::now(); + std::chrono::duration elapsed_seconds = end - start; + total += elapsed_seconds.count(); + } + printf("%40s (i=%3d): ", __FUNCTION__, kernelNum); fflush(stdout); + printf("Average time: %f seconds\n", total / testIterations); +} + +static void go2(Params& params, const int kernelNum) +{ + init(params); + + sycl::queue queue(params.context, params.device); + + float total = .0f; + auto start = test_clock::now(); + for (int test = 0; test < testIterations; test++) { + queue.submit([&](sycl::handler& h) { + sycl::accessor acc{params.buffers[kernelNum], h}; + h.parallel_for(params.numElements, TimeSink(acc, params.numIterations)); + }); + } + queue.wait(); + + auto end = test_clock::now(); + std::chrono::duration elapsed_seconds = end - start; + printf("%40s (i=%3d): ", __FUNCTION__, kernelNum); fflush(stdout); + printf("Average time: %f seconds\n", elapsed_seconds.count() / testIterations); +} + +int main(int argc, char** argv) +{ + Params params; + + int platformIndex = 0; + int deviceIndex = 0; + + { + popl::OptionParser op("Supported Options"); + op.add>("p", "platform", "Platform Index", platformIndex, &platformIndex); + op.add>("d", "device", "Device Index", deviceIndex, &deviceIndex); + //op.add>("t", "threads", "Threads to Execute", numThreads, &numThreads); + op.add>("i", "iterations", "Iterations in Each Kernel", params.numIterations, ¶ms.numIterations); + op.add>("e", "elements", "Number of ND-Range Elements", params.numElements, ¶ms.numElements); + bool printUsage = false; + try { + op.parse(argc, argv); + } catch (std::exception& e) { + fprintf(stderr, "Error: %s\n\n", e.what()); + printUsage = true; + } + + if (printUsage || !op.unknown_options().empty() || !op.non_option_args().empty()) { + fprintf(stderr, + "Usage: thread_concurrency [options]\n" + "%s", op.help().c_str()); + return -1; + } + } + + //if (numThreads > maxThreads) { + //printf("Number of kernels is %d, which exceeds the maximum of %d.\n", numKernels, maxKernels); + //printf("The number of kernels will be set to %d instead.\n", maxKernels); + //numKernels = maxKernels; + //} + + params.platform = sycl::platform::get_platforms()[platformIndex]; + printf("Running on SYCL platform: %s\n", params.platform.get_info().c_str()); + + params.device = params.platform.get_devices()[deviceIndex]; + printf("Running on SYCL device: %s\n", params.device.get_info().c_str()); + + printf("Initializing tests...\n"); + + params.context = sycl::context{ params.device }; + params.queue = sycl::queue{ params.context, params.device }; + + for (int i = 0; i < maxThreads; i++) { + params.buffers.push_back(sycl::buffer{sycl::range{params.numElements}}); + } + + printf("... done!\n"); + + printf("Testing without threads\n"); + go(params, 0); + + printf("Testing with threads\n"); + { + std::thread t([params]() mutable { + go(params, 0); + }); + + //usleep( 100000 ); + + go(params, 1); + + t.join(); + } + + printf("Testing with threads 2\n"); + { + std::thread t([params]() mutable { + go2(params, 0); + }); + + //usleep( 100000 ); + + go2(params, 1); + + t.join(); + } + + printf("Cleaning up...\n"); + + for (auto& dptr : params.dptrs) { + sycl::free(dptr, params.context); + } + + printf("... done!\n"); + + return 0; +} diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 69701d0..4d55fc1 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -42,6 +42,11 @@ function(add_sycl_sample) #target_link_options(${SYCL_SAMPLE_TARGET} PRIVATE -fsycl ) target_link_libraries(${SYCL_SAMPLE_TARGET} PRIVATE sycl -fsycl) + if(SAMPLES_BUILD_CUDA) + target_compile_options(${SYCL_SAMPLE_TARGET} PRIVATE -fsycl-targets=nvptx64-nvidia-cuda) + target_link_libraries(${SYCL_SAMPLE_TARGET} PRIVATE -fsycl-targets=nvptx64-nvidia-cuda) + endif() + if (WIN32) target_compile_definitions(${SYCL_SAMPLE_TARGET} PRIVATE _CRT_SECURE_NO_WARNINGS) target_compile_options(${SYCL_SAMPLE_TARGET} PRIVATE /EHa) @@ -66,5 +71,7 @@ endfunction() add_subdirectory( 00_enumsycl ) add_subdirectory( 00_hellosycl ) add_subdirectory( 04_julia ) +add_subdirectory( 10_queueexperiments ) +add_subdirectory( 11_threadconcurrent ) add_subdirectory( dpcpp )