Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 19 additions & 20 deletions .github/workflows/ci-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ on:
push:
paths:
- '.github/workflows/ci-linux.yml'
- 'Setup.hs'
- 'stack*.yaml'
- '*.cabal'
- '*/src/**'
Expand All @@ -18,20 +19,20 @@ jobs:
strategy:
matrix:
ghc:
- "8.10"
- "8.8"
- "8.6"
- "8.4"
- "8.2"
- "8.0"
- "7.8"
cuda:
- "10.2"
- "10.1"
- "10.0"
- "9.10"
- "9.8"
- "9.6"
- "9.4"
- "9.2"
- "9.1"
- "9.0"
# - "8.10" # save some resources
# - "8.8"
# - "8.6"
- "8.4"
cuda:
- "13.0"
- "12.9"
# - "12.5" # save some resources

# include:
# - os: windows-latest
Expand All @@ -43,16 +44,16 @@ jobs:
HADDOCK_FLAGS: "--haddock --no-haddock-deps --no-haddock-hyperlink-source --haddock-arguments=\"--no-print-missing-docs\""

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v5

- run: ln -s stack-${{ matrix.ghc }}.yaml stack.yaml

- uses: actions/cache@v2
- uses: actions/cache@v4
with:
path: snapshot.pkgdb
key: ${{ runner.os }}-${{ matrix.ghc }}-${{ matrix.cuda }}-snapshot.pkgdb

- uses: actions/cache@v2
- uses: actions/cache@v4
with:
path: |
~/.local/bin
Expand All @@ -61,7 +62,6 @@ jobs:
.stack-work
key: ${{ runner.os }}-${{ matrix.ghc }}-${{ matrix.cuda }}-${{ hashFiles('stack.yaml') }}-${{ hashFiles('snapshot.pkgdb') }}
restore-keys: |
${{ runner.os }}-${{ matrix.ghc }}-${{ matrix.cuda }}-${{ hashFiles('stack.yaml') }}-${{ hashFiles('snapshot.pkgdb') }}
${{ runner.os }}-${{ matrix.ghc }}-${{ matrix.cuda }}-${{ hashFiles('stack.yaml') }}-
${{ runner.os }}-${{ matrix.ghc }}-${{ matrix.cuda }}-

Expand All @@ -80,10 +80,9 @@ jobs:
- name: Install CUDA
run: |
MATRIX_CUDA=${{ matrix.cuda }}
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin
sudo mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600
sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
sudo add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /"
UBUNTUVER=$(sed -n '/^DISTRIB_RELEASE=/ { s/.*=//; s/\.//; p; q; }' /etc/lsb-release)
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu$UBUNTUVER/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
sudo apt-get -y install cuda-${MATRIX_CUDA/./-}
echo "CUDA_HOME=/usr/local/cuda-${MATRIX_CUDA}" >> $GITHUB_ENV
Expand Down
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,19 @@ Notable changes to the project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/).

**NOTE:** The version numbers of this package roughly align to the latest
version of the CUDA API this package is built against This means that this
version of the CUDA API this package is built against. This means that this
package _DOES NOT_ follow the PVP, or indeed any sensible version scheme,
because NVIDIA are A-OK introducing breaking changes in minor updates.


## [0.13.0.0] - ???
### Added
* Support for CUDA-13

### Removed
* A number of fields from DeviceProperties, as they have been removed from
`cudaDeviceProp`. Use `Foreign.CUDA.Driver.Device.attribute` to query them.

## [0.12.8.0] - 2025-08-21
### Added
* Support for CUDA-12
Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,13 @@ Here is an incomplete historical list of missing bindings. Pull requests welcome
- cuGraphMemAllocNodeGetParams
- cuGraphMemFreeNodeGetParams

### CUDA-12
### CUDA >= 12

A lot. PRs welcome.

- CUDA-12.3
- Edge data in the driver Graph API (`cuGraphAddDependencies_v2` etc.)


# Old compatibility notes

Expand Down
19 changes: 13 additions & 6 deletions Setup.hs
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
-- Decouple from GHC's default language setting, so that it's easier
-- to maintain compatibility with old GHCs.
{-# LANGUAGE Haskell2010 #-}
{-# OPTIONS_GHC -Wall #-}

{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TupleSections #-}

-- The MIN_VERSION_Cabal macro was introduced with Cabal-1.24 (??)
#ifndef MIN_VERSION_Cabal
#define MIN_VERSION_Cabal(major1,major2,minor) 0
#endif

import Distribution.PackageDescription
import Distribution.PackageDescription hiding ( Flag )
import Distribution.Simple
import Distribution.Simple.BuildPaths
import Distribution.Simple.Command
Expand Down Expand Up @@ -249,7 +257,9 @@ cudaLibraryPaths (Platform arch os) installPath = [ installPath </> path | path
(Windows, X86_64) -> ["lib/x64"]
(OSX, _) -> ["lib"] -- MacOS does not distinguish 32- vs. 64-bit paths
(_, X86_64) -> ["lib64", "lib"] -- prefer lib64 for 64-bit systems
#if MIN_VERSION_Cabal(2,4,0)
(_, AArch64) -> ["lib64", "lib"]
#endif
_ -> ["lib"] -- otherwise


Expand Down Expand Up @@ -734,7 +744,6 @@ die' _ = die
-- Compatibility across Cabal 3.14 symbolic paths.
-- If we want to drop pre-Cabal-3.14 compatibility at some point, this should all be merged in above.

workingDirFlag :: HasCommonFlags flags => flags -> Flag CWDPath
lbiCWD :: LocalBuildInfo -> Maybe CWDPath

#if MIN_VERSION_Cabal(3,14,0)
Expand All @@ -745,6 +754,7 @@ type CWDPath = SymbolicPath CWD ('Dir Pkg)
regVerbosity :: RegisterFlags -> Flag Verbosity
regVerbosity = setupVerbosity . registerCommonFlags

workingDirFlag :: HasCommonFlags flags => flags -> Flag CWDPath
workingDirFlag = setupWorkingDir . getCommonFlags

lbiCWD = flagToMaybe . setupWorkingDir . configCommonFlags . LBC.configFlags . LBC.packageBuildDescr . localBuildDescr
Expand Down Expand Up @@ -772,6 +782,7 @@ type CWDPath = ()

-- regVerbosity is still present as an actual field in Cabal 3.12

workingDirFlag :: flags -> Flag CWDPath
workingDirFlag _ = NoFlag

lbiCWD _ = Nothing
Expand All @@ -785,10 +796,6 @@ makeRelativePathEx = id
interpretSymbolicPath :: Maybe CWDPath -> FilePath -> FilePath
interpretSymbolicPath _ = id

type HasCommonFlags flags = () :: Constraint
getCommonFlags :: flags -> ()
getCommonFlags _ = ()

readHookedBuildInfoWithCWD :: Verbosity -> Maybe CWDPath -> FilePath -> IO HookedBuildInfo
readHookedBuildInfoWithCWD verb _ path = readHookedBuildInfo verb path
#endif
Expand Down
22 changes: 22 additions & 0 deletions cbits/stubs.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
*/

#include "cbits/stubs.h"
#include <string.h> // memset

#if CUDART_VERSION >= 7000
cudaError_t cudaLaunchKernel_simple(const void *func, unsigned int gridX, unsigned int gridY, unsigned int gridZ, unsigned int blockX, unsigned int blockY, unsigned int blockZ, void **args, size_t sharedMem, cudaStream_t stream)
Expand Down Expand Up @@ -196,7 +197,13 @@ CUresult CUDAAPI cuDeviceTotalMem(size_t *bytes, CUdevice dev)

CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice dev)
{
#if CUDA_VERSION >= 13000
CUctxCreateParams params;
memset(&params, 0, sizeof params);
return cuCtxCreate_v4(pctx, &params, flags, dev);
#else
return cuCtxCreate_v2(pctx, flags, dev);
#endif
}

CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name)
Expand Down Expand Up @@ -424,3 +431,18 @@ CUresult CUDAAPI cuGraphExecKernelNodeSetParams_simple(CUgraphExec hGraphExec, C
}
#endif

#if CUDA_VERSION >= 13000
// This is the signature of the CUDA <=12 version; much easier to shim here than in Haskell.
CUresult cuMemAdvise_device(CUdeviceptr dptr, size_t count, CUmem_advise advice, CUdevice device)
{
return cuMemAdvise(dptr, count, advice, (CUmemLocation){.id = device, .type = CU_MEM_LOCATION_TYPE_DEVICE});
}

// This is the signature of the CUDA <=12 version; much easier to shim here than in Haskell.
CUresult cuMemPrefetchAsync_device(CUdeviceptr dptr, size_t count, CUdevice device, CUstream hStream)
{
// flags is reserved and must be 0 in CUDA 13
return cuMemPrefetchAsync(dptr, count, (CUmemLocation){.id = device, .type = CU_MEM_LOCATION_TYPE_DEVICE}, 0, hStream);
}
#endif

5 changes: 5 additions & 0 deletions cbits/stubs.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,11 @@ CUresult CUDAAPI cuDevicePrimaryCtxSetFlags(CUdevice dev, unsigned int flags);
CUresult CUDAAPI cuIpcOpenMemHandle(CUdeviceptr *pdptr, CUipcMemHandle handle, unsigned int Flags);
#endif

#if CUDA_VERSION >= 13000
CUresult cuMemAdvise_device(CUdeviceptr dptr, size_t count, CUmem_advise advice, CUdevice device);
CUresult cuMemPrefetchAsync_device(CUdeviceptr dptr, size_t count, CUdevice device, CUstream hStream);
#endif

#ifdef __cplusplus
}
#endif
Expand Down
6 changes: 3 additions & 3 deletions cuda.cabal
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cabal-version: 1.24

Name: cuda
Version: 0.12.8.0
Version: 0.13.0.0
Synopsis: FFI binding to the CUDA interface for programming NVIDIA GPUs
Description:
The CUDA library provides a direct, general purpose C-like SPMD programming
Expand Down Expand Up @@ -30,7 +30,7 @@ Description:
.
* "Foreign.CUDA.Runtime"
.
Tested with library versions up to CUDA-12.8. See also the
Tested with library versions up to CUDA-13.0. See also the
<https://travis-ci.org/tmcdonell/cuda travis-ci.org> build matrix for
version compatibility.
.
Expand Down Expand Up @@ -177,6 +177,6 @@ source-repository head
source-repository this
type: git
location: https://github.com/tmcdonell/cuda
tag: v0.12.8.0
tag: v0.13.0.0

-- vim: nospell
72 changes: 56 additions & 16 deletions examples/src/deviceQueryDrv/DeviceQuery.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
module Main where

import Control.Monad
import Foreign.Marshal.Utils ( toBool )
import Numeric
import Prelude hiding ( (<>) )
import Text.PrettyPrint
Expand All @@ -15,6 +16,32 @@ import Foreign.CUDA.Analysis as CUDA
import qualified Foreign.CUDA.Driver as CUDA


-- In CUDA 13, a number of device properties became things only queryable using
-- cu(da)DeviceGetAttribute. This data type captures those.
$(if CUDA.libraryVersion >= 13000 then [d|
data AttrProperties = AttrProperties
{ clockRate :: !Int -- ^ Clock frequency in kilohertz
, memClockRate :: !Int -- ^ Peak memory clock frequency in kilohertz
, computeMode :: !ComputeMode
, kernelExecTimeoutEnabled :: !Bool -- ^ Whether there is a runtime limit on kernels
, singleToDoublePerfRatio :: !Int -- ^ Ratio of single precision performance (in floating-point operations per second) to double precision performance
}
getAttrProperties :: Device -> IO AttrProperties
getAttrProperties d = do
clockRate <- CUDA.attribute d CUDA.ClockRate
memClockRate <- CUDA.attribute d CUDA.MemoryClockRate
computeMode <- toEnum <$> CUDA.attribute d CUDA.ComputeMode
kernelExecTimeoutEnabled <- toBool <$> CUDA.attribute d CUDA.KernelExecTimeout
singleToDoublePerfRatio <- CUDA.attribute d CUDA.SingleToDoublePrecisionPerfRatio
return AttrProperties{..}
|] else [d|
-- make it a record to ensure the {..} syntax is accepted
data AttrProperties = AttrProperties { _dummyAttrProperty :: () }
getAttrProperties :: Device -> IO AttrProperties
getAttrProperties _ = return (AttrProperties ())
|])


main :: IO ()
main = do
version <- CUDA.driverVersion
Expand All @@ -32,16 +59,17 @@ main = do
infos <- forM [0 .. numDevices-1] $ \n -> do
dev <- CUDA.device n
prp <- CUDA.props dev
return (n, dev, prp)
prp2 <- getAttrProperties dev
return (n, dev, prp, prp2)

forM_ infos $ \(n, dev, prp) -> do
forM_ infos $ \(n, dev, prp, prp2) -> do
p2p <- statP2P dev prp infos
printf "\nDevice %d: %s\n%s\n" n (deviceName prp) (statDevice prp)
printf "\nDevice %d: %s\n%s\n" n (deviceName prp) (statDevice prp prp2)
unless (null p2p) $ printf "%s\n" p2p


statDevice :: DeviceProperties -> String
statDevice dev@DeviceProperties{..} =
statDevice :: DeviceProperties -> AttrProperties -> String
statDevice dev@DeviceProperties{..} AttrProperties{..} =
let
DeviceResources{..} = deviceResources dev

Expand Down Expand Up @@ -69,22 +97,34 @@ statDevice dev@DeviceProperties{..} =
,(" 2D:", grid maxTextureDim2D)
,(" 3D:", cube maxTextureDim3D)
,("Texture alignment:", text $ showBytes textureAlignment)
,("Maximum memory pitch:", text $ showBytes memPitch)
,("Concurrent kernel execution:", bool concurrentKernels)
,("Concurrent copy and execution:", bool deviceOverlap <> text (printf ", with %d copy engine%s" asyncEngineCount (if asyncEngineCount > 1 then "s" else "")))
,("Runtime limit on kernel execution:", bool kernelExecTimeoutEnabled)
,("Maximum memory pitch:", text $ showBytes memPitch)]++

$(if CUDA.libraryVersion >= 13000 then [|
[("Concurrent copy and kernel execution:", bool concurrentKernels <> text (printf " with %d copy engine%s" asyncEngineCount (if asyncEngineCount > 1 then "s" else "")))]
|] else [|
[("Concurrent kernel execution:", bool concurrentKernels)
,("Concurrent copy and execution:", bool deviceOverlap <> text (printf ", with %d copy engine%s" asyncEngineCount (if asyncEngineCount > 1 then "s" else "")))]
|])++

[("Runtime limit on kernel execution:", bool kernelExecTimeoutEnabled)
,("Integrated GPU sharing host memory:", bool integrated)
,("Host page-locked memory mapping:", bool canMapHostMemory)
,("ECC memory support:", bool eccEnabled)
,("Unified addressing (UVA):", bool unifiedAddressing)]++
#if __GLASGOW_HASKELL__ > 710

$(if CUDA.libraryVersion >= 8000 then [|
[("Single to double precision performance:", text $ printf "%d : 1" singleToDoublePerfRatio)
,("Supports compute pre-emption:", bool preemption)]|] else [|[]|])++
,("Supports compute pre-emption:", bool preemption)]
|] else [|[]|])++

$(if CUDA.libraryVersion >= 9000 then [|
[("Supports cooperative launch:", bool cooperativeLaunch)
,("Supports multi-device cooperative launch:", bool cooperativeLaunchMultiDevice)]|] else [|[]|])++
#endif
[("Supports cooperative launch:", bool cooperativeLaunch)]
|] else [|[]|])++

$(if CUDA.libraryVersion >= 9000 && CUDA.libraryVersion < 13000 then [|
[("Supports multi-device cooperative launch:", bool cooperativeLaunchMultiDevice)]
|] else [|[]|])++

[("PCI bus/location:", int (busID pciInfo) <> char '/' <> int (deviceID pciInfo))
,("Compute mode:", text (show computeMode))
]
Expand All @@ -94,7 +134,7 @@ statDevice dev@DeviceProperties{..} =
$ text (describe computeMode)


statP2P :: Device -> DeviceProperties -> [(Int, Device, DeviceProperties)] -> IO String
statP2P :: Device -> DeviceProperties -> [(Int, Device, DeviceProperties, AttrProperties)] -> IO String
statP2P dev prp infos
| CUDA.libraryVersion < 4000 = return []

Expand All @@ -103,7 +143,7 @@ statP2P dev prp infos
| otherwise
= let
go [] = return []
go ((m, peer, pp):is) =
go ((m, peer, pp, _):is) =
if dev == peer
then go is
else do
Expand Down
Loading