Documentation
¶
Index ¶
- func GetDeviceCount() (n int32, err error)
- func Malloc(mem cutil.Mem, sizet uint) error
- func Malloc3D(p *PitchedPtr, e Extent) error
- func Malloc3dArray(a *Array, desc *ChannelFormatDesc, e Extent, flag ArrayFlag) error
- func MallocArray(a *Array, desc *ChannelFormatDesc, width, height uint, flag ArrayFlag) error
- func MallocEx(w *gocu.Worker, mem cutil.Mem, sizet uint) error
- func MallocHost(mem cutil.Mem, sizet uint) error
- func MallocHostEx(w *gocu.Worker, mem cutil.Mem, sizet uint) error
- func MallocManagedGlobal(mem cutil.Mem, size uint) error
- func MallocManagedGlobalEx(w *gocu.Worker, mem cutil.Mem, size uint) error
- func MallocManagedHost(mem cutil.Mem, size uint) error
- func MallocManagedHostEx(w *gocu.Worker, mem cutil.Mem, size uint) error
- func Memcpy(dest, src cutil.Pointer, sizet uint, kind MemcpyKind) error
- func Memcpy2D(dest cutil.Pointer, dpitch uint, src cutil.Pointer, spitch uint, ...) error
- func Memcpy3D(m *Memcpy3DParams) error
- func Memcpy3DAsync(m *Memcpy3DParams, s gocu.Streamer) error
- func MemcpyAsync(dest, src cutil.Pointer, sizet uint, kind MemcpyKind, stream gocu.Streamer) error
- func MemcpyPeer(dest cutil.Pointer, ddev Device, src cutil.Pointer, sdev Device, sizet uint) error
- func MemcpyPeerAsync(dest cutil.Pointer, ddev Device, src cutil.Pointer, sdev Device, sizet uint, ...) error
- func MemcpyUS(dest, src unsafe.Pointer, sizet uint, kind MemcpyKind) error
- func Memset(mem cutil.Mem, value int32, count uint) error
- func MemsetUS(mem unsafe.Pointer, value int32, count uint) error
- func SetValidDevices(devices []Device) error
- func SyncNillStream() error
- type Array
- type ArrayFlag
- type Atribs
- type ChannelFormatDesc
- type ChannelFormatKind
- type Device
- func (d Device) AttrAsyncEngineCount() (int, error)
- func (d Device) AttrCanFlushRemoteWrites() (int, error)
- func (d Device) AttrCanMapHostMemory() (int, error)
- func (d Device) AttrCanUseHostPointerForRegisteredMem() (int, error)
- func (d Device) AttrClockRate() (int, error)
- func (d Device) AttrComputeCapabilityMajor() (int, error)
- func (d Device) AttrComputeCapabilityMinor() (int, error)
- func (d Device) AttrComputeMode() (int, error)
- func (d Device) AttrComputePreemptionSupported() (int, error)
- func (d Device) AttrConcurrentKernels() (int, error)
- func (d Device) AttrConcurrentManagedAccess() (int, error)
- func (d Device) AttrCooperativeLaunch() (int, error)
- func (d Device) AttrCooperativeMultiDeviceLaunch() (int, error)
- func (d Device) AttrDirectManagedMemAccessFromHost() (int, error)
- func (d Device) AttrEccEnabled() (int, error)
- func (d Device) AttrGlobalL1CacheSupported() (int, error)
- func (d Device) AttrGlobalMemoryBusWidth() (int, error)
- func (d Device) AttrGpuOverlap() (int, error)
- func (d Device) AttrHostNativeAtomicSupported() (int, error)
- func (d Device) AttrHostRegisterSupported() (int, error)
- func (d Device) AttrIntegrated() (int, error)
- func (d Device) AttrIsMultiGpuBoard() (int, error)
- func (d Device) AttrKernelExecTimeout() (int, error)
- func (d Device) AttrL2CacheSize() (int, error)
- func (d Device) AttrLocalL1CacheSupported() (int, error)
- func (d Device) AttrManagedMemory() (int, error)
- func (d Device) AttrMaxBlockDimX() (int, error)
- func (d Device) AttrMaxBlockDimY() (int, error)
- func (d Device) AttrMaxBlockDimZ() (int, error)
- func (d Device) AttrMaxGridDimX() (int, error)
- func (d Device) AttrMaxGridDimY() (int, error)
- func (d Device) AttrMaxGridDimZ() (int, error)
- func (d Device) AttrMaxPitch() (int, error)
- func (d Device) AttrMaxRegistersPerBlock() (int, error)
- func (d Device) AttrMaxRegistersPerMultiprocessor() (int, error)
- func (d Device) AttrMaxSharedMemoryPerBlock() (int, error)
- func (d Device) AttrMaxSharedMemoryPerBlockOptin() (int, error)
- func (d Device) AttrMaxSharedMemoryPerMultiprocessor() (int, error)
- func (d Device) AttrMaxSurface1DLayeredLayers() (int, error)
- func (d Device) AttrMaxSurface1DLayeredWidth() (int, error)
- func (d Device) AttrMaxSurface1DWidth() (int, error)
- func (d Device) AttrMaxSurface2DHeight() (int, error)
- func (d Device) AttrMaxSurface2DLayeredHeight() (int, error)
- func (d Device) AttrMaxSurface2DLayeredLayers() (int, error)
- func (d Device) AttrMaxSurface2DLayeredWidth() (int, error)
- func (d Device) AttrMaxSurface2DWidth() (int, error)
- func (d Device) AttrMaxSurface3DDepth() (int, error)
- func (d Device) AttrMaxSurface3DHeight() (int, error)
- func (d Device) AttrMaxSurface3DWidth() (int, error)
- func (d Device) AttrMaxSurfaceCubemapLayeredLayers() (int, error)
- func (d Device) AttrMaxSurfaceCubemapLayeredWidth() (int, error)
- func (d Device) AttrMaxSurfaceCubemapWidth() (int, error)
- func (d Device) AttrMaxTexture1DLayeredLayers() (int, error)
- func (d Device) AttrMaxTexture1DLayeredWidth() (int, error)
- func (d Device) AttrMaxTexture1DLinearWidth() (int, error)
- func (d Device) AttrMaxTexture1DMipmappedWidth() (int, error)
- func (d Device) AttrMaxTexture1DWidth() (int, error)
- func (d Device) AttrMaxTexture2DGatherHeight() (int, error)
- func (d Device) AttrMaxTexture2DGatherWidth() (int, error)
- func (d Device) AttrMaxTexture2DHeight() (int, error)
- func (d Device) AttrMaxTexture2DLayeredHeight() (int, error)
- func (d Device) AttrMaxTexture2DLayeredLayers() (int, error)
- func (d Device) AttrMaxTexture2DLayeredWidth() (int, error)
- func (d Device) AttrMaxTexture2DLinearHeight() (int, error)
- func (d Device) AttrMaxTexture2DLinearPitch() (int, error)
- func (d Device) AttrMaxTexture2DLinearWidth() (int, error)
- func (d Device) AttrMaxTexture2DMipmappedHeight() (int, error)
- func (d Device) AttrMaxTexture2DMipmappedWidth() (int, error)
- func (d Device) AttrMaxTexture2DWidth() (int, error)
- func (d Device) AttrMaxTexture3DDepth() (int, error)
- func (d Device) AttrMaxTexture3DDepthAlt() (int, error)
- func (d Device) AttrMaxTexture3DHeight() (int, error)
- func (d Device) AttrMaxTexture3DHeightAlt() (int, error)
- func (d Device) AttrMaxTexture3DWidth() (int, error)
- func (d Device) AttrMaxTexture3DWidthAlt() (int, error)
- func (d Device) AttrMaxTextureCubemapLayeredLayers() (int, error)
- func (d Device) AttrMaxTextureCubemapLayeredWidth() (int, error)
- func (d Device) AttrMaxTextureCubemapWidth() (int, error)
- func (d Device) AttrMaxThreadsPerBlock() (int, error)
- func (d Device) AttrMaxThreadsPerMultiProcessor() (int, error)
- func (d Device) AttrMemoryClockRate() (int, error)
- func (d Device) AttrMultiGpuBoardGroupID() (int, error)
- func (d Device) AttrMultiProcessorCount() (int, error)
- func (d Device) AttrPageableMemoryAccess() (int, error)
- func (d Device) AttrPageableMemoryAccessUsesHostPageTables() (int, error)
- func (d Device) AttrPciBusID() (int, error)
- func (d Device) AttrPciDeviceID() (int, error)
- func (d Device) AttrPciDomainID() (int, error)
- func (d Device) AttrSingleToDoublePrecisionPerfRatio() (int, error)
- func (d Device) AttrStreamPrioritiesSupported() (int, error)
- func (d Device) AttrSurfaceAlignment() (int, error)
- func (d Device) AttrTccDriver() (int, error)
- func (d Device) AttrTextureAlignment() (int, error)
- func (d Device) AttrTexturePitchAlignment() (int, error)
- func (d Device) AttrTotalConstantMemory() (int, error)
- func (d Device) AttrUnifiedAddressing() (int, error)
- func (d Device) AttrWarpSize() (int, error)
- func (d Device) CanAccessPeer(peer Device) (bool, error)
- func (d Device) DeviceSync() error
- func (d Device) DisablePeerAccess(peer Device) error
- func (d Device) EnablePeerAccess(peer Device) error
- func (d Device) Major() (int, error)
- func (d Device) MaxBlockDimXYZ() ([]int32, error)
- func (d Device) MaxGridDimXYZ() ([]int32, error)
- func (d Device) MaxThreadsPerBlock() (int32, error)
- func (d Device) MaxThreadsPerMultiProcessor() (int32, error)
- func (d Device) MemGetInfo() (free, total int, err error)
- func (d Device) MemPrefetchAsync(mem cutil.Mem, size uint, s gocu.Streamer) error
- func (d Device) Minor() (int, error)
- func (d Device) MultiProcessorCount() (int32, error)
- func (d Device) Reset() error
- func (d Device) Set() error
- type Error
- type Event
- type Extent
- type MemAttach
- type MemManager
- type MemType
- type Memcpy3DParams
- type MemcpyKind
- type PitchedPtr
- type Pos
- type Stream
- type StreamCaptureMode
- type StreamCaptureStatus
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func GetDeviceCount ¶
GetDeviceCount returns the number of devices.
func Malloc ¶
Malloc will allocate memory to the device the size that was passed. It will also set the finalizer for GC
func Malloc3D ¶
func Malloc3D(p *PitchedPtr, e Extent) error
Malloc3D - Allocates logical 1D, 2D, or 3D memory objects on the device.
func Malloc3dArray ¶
func Malloc3dArray(a *Array, desc *ChannelFormatDesc, e Extent, flag ArrayFlag) error
Malloc3dArray - Allocate an array on the device.
func MallocArray ¶
func MallocArray(a *Array, desc *ChannelFormatDesc, width, height uint, flag ArrayFlag) error
MallocArray - Allocate an array on the device.
func MallocEx ¶
MallocEx is like Malloc but it takes a worker and memory allocated to mem will be allocated to the context being used on that host thread. If w is nil then it will behave like Malloc
func MallocHost ¶
MallocHost will allocate memory on the host for cuda use.
func MallocHostEx ¶
MallocHostEx is like MallocHost but it takes a worker and memory allocated to mem will be allocated to the context being used on that host thread. If w is nil then it will behave like MallocHost
func MallocManagedGlobal ¶
MallocManagedGlobal Allocates memory on current devices.
func MallocManagedGlobalEx ¶
MallocManagedGlobalEx is like MallocManagedGlobal but it takes a worker and memory allocated to mem will be allocated to the context being used on that host thread. If w is nil then it will behave like MallocManagedGlobal
func MallocManagedHost ¶
MallocManagedHost uses the Unified memory mangement system and starts it off in the host. Memory is set to 0. It will also set a finalizer on the memory for GC
func MallocManagedHostEx ¶
MallocManagedHostEx is like MallocManagedHost but it takes a worker and memory allocated to mem will be allocated to the context being used on that host thread. If w is nil then it will behave like MallocManagedHost
func Memcpy ¶
func Memcpy(dest, src cutil.Pointer, sizet uint, kind MemcpyKind) error
Memcpy copies some memory from src to dest. If default is selected and if the system supports unified virtual addressing then the transfer is inferred.
func Memcpy2D ¶
func Memcpy2D(dest cutil.Pointer, dpitch uint, src cutil.Pointer, spitch uint, width, height uint, kind MemcpyKind) error
Memcpy2D copies some memory from src to dest. If default is selected and if the system supports unified virtual addressing then the transfer is inferred.
func Memcpy3D ¶
func Memcpy3D(m *Memcpy3DParams) error
Memcpy3D -Copies a matrix (height rows of width bytes each) from the memory area pointed to by src to the CUDA array dst starting at the upper left corner (wOffset, hOffset) where kind specifies the direction of the copy. m is created using CreateMemcpy3DParams
func Memcpy3DAsync ¶
func Memcpy3DAsync(m *Memcpy3DParams, s gocu.Streamer) error
Memcpy3DAsync -Copies a matrix (height rows of width bytes each) from the memory area pointed to by src to the CUDA array dst starting at the upper left corner (wOffset, hOffset) where kind specifies the direction of the copy. m is created using CreateMemcpy3DParams
func MemcpyAsync ¶
MemcpyAsync Copies data between host and device.
func MemcpyPeer ¶
MemcpyPeer Copies memory between two devices.
func MemcpyPeerAsync ¶
func MemcpyPeerAsync(dest cutil.Pointer, ddev Device, src cutil.Pointer, sdev Device, sizet uint, stream gocu.Streamer) error
MemcpyPeerAsync copies memory between two devices async.
func MemcpyUS ¶
func MemcpyUS(dest, src unsafe.Pointer, sizet uint, kind MemcpyKind) error
MemcpyUS will do a memcopy using unsafe pointers. It's a little lower level than the regular MemCpy
func SetValidDevices ¶
SetValidDevices takes a list of devices in terms of user priority for cuda execution
Types ¶
type ArrayFlag ¶
ArrayFlag are flags used for array
func (*ArrayFlag) Cubemap ¶
Cubemap - Allocates a cubemap CUDA array. Width must be equal to height, and depth must be six. If the cudaArrayLayered flag is also set, depth must be a multiple of six.
func (*ArrayFlag) Default ¶
Default - This flag's value is defined to be 0 and provides default array allocation
func (*ArrayFlag) Layered ¶
Layered - Allocates a layered CUDA array, with the depth extent indicating the number of layers
func (*ArrayFlag) SurfaceLoadStore ¶
SurfaceLoadStore - Allocates a CUDA array that could be read from or written to using a surface reference.
func (*ArrayFlag) TextureGather ¶
TextureGather - This flag indicates that texture gather operations will be performed on the CUDA array. Texture gather can only be performed on 2D CUDA arrays.
type Atribs ¶
type Atribs struct { Type MemType Device int32 DPtr unsafe.Pointer HPtr unsafe.Pointer Managed bool }
Atribs are a memories attributes on the device side
type ChannelFormatDesc ¶
type ChannelFormatDesc C.struct_cudaChannelFormatDesc
ChannelFormatDesc describes a channels format
func CreateChannelFormatDesc ¶
func CreateChannelFormatDesc(x, y, z, w int32, f ChannelFormatKind) ChannelFormatDesc
CreateChannelFormatDesc - Returns a channel descriptor with format f and number of bits of each component x, y, z, and w.
So a float needs to be 32bits.
unsigned is 8 ,32 bits
signed is 8,32 bits
type ChannelFormatKind ¶
type ChannelFormatKind C.enum_cudaChannelFormatKind
ChannelFormatKind is the kind of format the channel is in
func (*ChannelFormatKind) Float ¶
func (c *ChannelFormatKind) Float() ChannelFormatKind
Float - sets the channel format to Float
func (*ChannelFormatKind) Signed ¶
func (c *ChannelFormatKind) Signed() ChannelFormatKind
Signed - sets the channel format to Signed
func (*ChannelFormatKind) UnSigned ¶
func (c *ChannelFormatKind) UnSigned() ChannelFormatKind
UnSigned - sets the channel format to UnSigned
type Device ¶
Device is a struct that holds a device info.
func CreateDevice ¶
CreateDevice just creates a device it doesn't set it
func (Device) AttrAsyncEngineCount ¶
AttrAsyncEngineCount - Number of asynchronous engines
func (Device) AttrCanFlushRemoteWrites ¶
AttrCanFlushRemoteWrites - Device supports flushing of outstanding remote writes.
func (Device) AttrCanMapHostMemory ¶
AttrCanMapHostMemory - Device can map host memory into CUDA address space
func (Device) AttrCanUseHostPointerForRegisteredMem ¶
AttrCanUseHostPointerForRegisteredMem - Device can access host registered memory at the same virtual address as the CPU
func (Device) AttrClockRate ¶
AttrClockRate - Peak clock frequency in kilohertz
func (Device) AttrComputeCapabilityMajor ¶
AttrComputeCapabilityMajor - Major compute capability version number
func (Device) AttrComputeCapabilityMinor ¶
AttrComputeCapabilityMinor - Minor compute capability version number
func (Device) AttrComputeMode ¶
AttrComputeMode - Compute mode (See cudaComputeMode for details)
func (Device) AttrComputePreemptionSupported ¶
AttrComputePreemptionSupported - Device supports Compute Preemption
func (Device) AttrConcurrentKernels ¶
AttrConcurrentKernels - Device can possibly execute multiple kernels concurrently
func (Device) AttrConcurrentManagedAccess ¶
AttrConcurrentManagedAccess - Device can coherently access managed memory concurrently with the CPU
func (Device) AttrCooperativeLaunch ¶
AttrCooperativeLaunch - Device supports launching cooperative kernels via cudaLaunchCooperativeKernel
func (Device) AttrCooperativeMultiDeviceLaunch ¶
AttrCooperativeMultiDeviceLaunch - Device can participate in cooperative kernels launched via cudaLaunchCooperativeKernelMultiDevice
func (Device) AttrDirectManagedMemAccessFromHost ¶
AttrDirectManagedMemAccessFromHost - Host can directly access managed memory on the device without migration.
func (Device) AttrEccEnabled ¶
AttrEccEnabled - Device has ECC support enabled
func (Device) AttrGlobalL1CacheSupported ¶
AttrGlobalL1CacheSupported - Device supports caching globals in L1
func (Device) AttrGlobalMemoryBusWidth ¶
AttrGlobalMemoryBusWidth - Global memory bus width in bits
func (Device) AttrGpuOverlap ¶
AttrGpuOverlap - Device can possibly copy memory and execute a kernel concurrently
func (Device) AttrHostNativeAtomicSupported ¶
AttrHostNativeAtomicSupported - Link between the device and the host supports native atomic operations
func (Device) AttrHostRegisterSupported ¶
AttrHostRegisterSupported - Device supports host memory registration via cudaHostRegister.
func (Device) AttrIntegrated ¶
AttrIntegrated - Device is integrated with host memory
func (Device) AttrIsMultiGpuBoard ¶
AttrIsMultiGpuBoard - Device is on a multi-GPU board
func (Device) AttrKernelExecTimeout ¶
AttrKernelExecTimeout - Specifies whether there is a run time limit on kernels
func (Device) AttrL2CacheSize ¶
AttrL2CacheSize - Size of L2 cache in bytes
func (Device) AttrLocalL1CacheSupported ¶
AttrLocalL1CacheSupported - Device supports caching locals in L1
func (Device) AttrManagedMemory ¶
AttrManagedMemory - Device can allocate managed memory on this system
func (Device) AttrMaxBlockDimX ¶
AttrMaxBlockDimX - Maximum block dimension X
func (Device) AttrMaxBlockDimY ¶
AttrMaxBlockDimY - Maximum block dimension Y
func (Device) AttrMaxBlockDimZ ¶
AttrMaxBlockDimZ - Maximum block dimension Z
func (Device) AttrMaxGridDimX ¶
AttrMaxGridDimX - Maximum grid dimension X
func (Device) AttrMaxGridDimY ¶
AttrMaxGridDimY - Maximum grid dimension Y
func (Device) AttrMaxGridDimZ ¶
AttrMaxGridDimZ - Maximum grid dimension Z
func (Device) AttrMaxPitch ¶
AttrMaxPitch - Maximum pitch in bytes allowed by memory copies
func (Device) AttrMaxRegistersPerBlock ¶
AttrMaxRegistersPerBlock - Maximum number of 32-bit registers available per block
func (Device) AttrMaxRegistersPerMultiprocessor ¶
AttrMaxRegistersPerMultiprocessor - Maximum number of 32-bit registers available per multiprocessor
func (Device) AttrMaxSharedMemoryPerBlock ¶
AttrMaxSharedMemoryPerBlock - Maximum shared memory available per block in bytes
func (Device) AttrMaxSharedMemoryPerBlockOptin ¶
AttrMaxSharedMemoryPerBlockOptin - The maximum optin shared memory per block. This value may vary by chip. See cudaFuncSetAttribute
func (Device) AttrMaxSharedMemoryPerMultiprocessor ¶
AttrMaxSharedMemoryPerMultiprocessor - Maximum shared memory available per multiprocessor in bytes
func (Device) AttrMaxSurface1DLayeredLayers ¶
AttrMaxSurface1DLayeredLayers - Maximum layers in a 1D layered surface
func (Device) AttrMaxSurface1DLayeredWidth ¶
AttrMaxSurface1DLayeredWidth - Maximum 1D layered surface width
func (Device) AttrMaxSurface1DWidth ¶
AttrMaxSurface1DWidth - Maximum 1D surface width
func (Device) AttrMaxSurface2DHeight ¶
AttrMaxSurface2DHeight - Maximum 2D surface height
func (Device) AttrMaxSurface2DLayeredHeight ¶
AttrMaxSurface2DLayeredHeight - Maximum 2D layered surface height
func (Device) AttrMaxSurface2DLayeredLayers ¶
AttrMaxSurface2DLayeredLayers - Maximum layers in a 2D layered surface
func (Device) AttrMaxSurface2DLayeredWidth ¶
AttrMaxSurface2DLayeredWidth - Maximum 2D layered surface width
func (Device) AttrMaxSurface2DWidth ¶
AttrMaxSurface2DWidth - Maximum 2D surface width
func (Device) AttrMaxSurface3DDepth ¶
AttrMaxSurface3DDepth - Maximum 3D surface depth
func (Device) AttrMaxSurface3DHeight ¶
AttrMaxSurface3DHeight - Maximum 3D surface height
func (Device) AttrMaxSurface3DWidth ¶
AttrMaxSurface3DWidth - Maximum 3D surface width
func (Device) AttrMaxSurfaceCubemapLayeredLayers ¶
AttrMaxSurfaceCubemapLayeredLayers - Maximum layers in a cubemap layered surface
func (Device) AttrMaxSurfaceCubemapLayeredWidth ¶
AttrMaxSurfaceCubemapLayeredWidth - Maximum cubemap layered surface width
func (Device) AttrMaxSurfaceCubemapWidth ¶
AttrMaxSurfaceCubemapWidth - Maximum cubemap surface width
func (Device) AttrMaxTexture1DLayeredLayers ¶
AttrMaxTexture1DLayeredLayers - Maximum layers in a 1D layered texture
func (Device) AttrMaxTexture1DLayeredWidth ¶
AttrMaxTexture1DLayeredWidth - Maximum 1D layered texture width
func (Device) AttrMaxTexture1DLinearWidth ¶
AttrMaxTexture1DLinearWidth - Maximum 1D linear texture width
func (Device) AttrMaxTexture1DMipmappedWidth ¶
AttrMaxTexture1DMipmappedWidth - Maximum mipmapped 1D texture width
func (Device) AttrMaxTexture1DWidth ¶
AttrMaxTexture1DWidth - Maximum 1D texture width
func (Device) AttrMaxTexture2DGatherHeight ¶
AttrMaxTexture2DGatherHeight - Maximum 2D texture height if cudaArrayTextureGather is set
func (Device) AttrMaxTexture2DGatherWidth ¶
AttrMaxTexture2DGatherWidth - Maximum 2D texture width if cudaArrayTextureGather is set
func (Device) AttrMaxTexture2DHeight ¶
AttrMaxTexture2DHeight - Maximum 2D texture height
func (Device) AttrMaxTexture2DLayeredHeight ¶
AttrMaxTexture2DLayeredHeight - Maximum 2D layered texture height
func (Device) AttrMaxTexture2DLayeredLayers ¶
AttrMaxTexture2DLayeredLayers - Maximum layers in a 2D layered texture
func (Device) AttrMaxTexture2DLayeredWidth ¶
AttrMaxTexture2DLayeredWidth - Maximum 2D layered texture width
func (Device) AttrMaxTexture2DLinearHeight ¶
AttrMaxTexture2DLinearHeight - Maximum 2D linear texture height
func (Device) AttrMaxTexture2DLinearPitch ¶
AttrMaxTexture2DLinearPitch - Maximum 2D linear texture pitch in bytes
func (Device) AttrMaxTexture2DLinearWidth ¶
AttrMaxTexture2DLinearWidth - Maximum 2D linear texture width
func (Device) AttrMaxTexture2DMipmappedHeight ¶
AttrMaxTexture2DMipmappedHeight - Maximum mipmapped 2D texture height
func (Device) AttrMaxTexture2DMipmappedWidth ¶
AttrMaxTexture2DMipmappedWidth - Maximum mipmapped 2D texture width
func (Device) AttrMaxTexture2DWidth ¶
AttrMaxTexture2DWidth - Maximum 2D texture width
func (Device) AttrMaxTexture3DDepth ¶
AttrMaxTexture3DDepth - Maximum 3D texture depth
func (Device) AttrMaxTexture3DDepthAlt ¶
AttrMaxTexture3DDepthAlt - Alternate maximum 3D texture depth
func (Device) AttrMaxTexture3DHeight ¶
AttrMaxTexture3DHeight - Maximum 3D texture height
func (Device) AttrMaxTexture3DHeightAlt ¶
AttrMaxTexture3DHeightAlt - Alternate maximum 3D texture height
func (Device) AttrMaxTexture3DWidth ¶
AttrMaxTexture3DWidth - Maximum 3D texture width
func (Device) AttrMaxTexture3DWidthAlt ¶
AttrMaxTexture3DWidthAlt - Alternate maximum 3D texture width
func (Device) AttrMaxTextureCubemapLayeredLayers ¶
AttrMaxTextureCubemapLayeredLayers - Maximum layers in a cubemap layered texture
func (Device) AttrMaxTextureCubemapLayeredWidth ¶
AttrMaxTextureCubemapLayeredWidth - Maximum cubemap layered texture width/height
func (Device) AttrMaxTextureCubemapWidth ¶
AttrMaxTextureCubemapWidth - Maximum cubemap texture width/height
func (Device) AttrMaxThreadsPerBlock ¶
AttrMaxThreadsPerBlock - Maximum number of threads per block
func (Device) AttrMaxThreadsPerMultiProcessor ¶
AttrMaxThreadsPerMultiProcessor - Maximum resident threads per multiprocessor
func (Device) AttrMemoryClockRate ¶
AttrMemoryClockRate - Peak memory clock frequency in kilohertz
func (Device) AttrMultiGpuBoardGroupID ¶
AttrMultiGpuBoardGroupID - Unique identifier for a group of devices on the same multi-GPU board
func (Device) AttrMultiProcessorCount ¶
AttrMultiProcessorCount - Number of multiprocessors on device
func (Device) AttrPageableMemoryAccess ¶
AttrPageableMemoryAccess - Device supports coherently accessing pageable memory without calling cudaHostRegister on it
func (Device) AttrPageableMemoryAccessUsesHostPageTables ¶
AttrPageableMemoryAccessUsesHostPageTables - Device accesses pageable memory via the host page tables.
func (Device) AttrPciBusID ¶
AttrPciBusID - PCI bus ID of the device
func (Device) AttrPciDeviceID ¶
AttrPciDeviceID - PCI device ID of the device
func (Device) AttrPciDomainID ¶
AttrPciDomainID - PCI domain ID of the device
func (Device) AttrSingleToDoublePrecisionPerfRatio ¶
AttrSingleToDoublePrecisionPerfRatio - Ratio of single precision performance (in floating-point operations per second) to double precision performance
func (Device) AttrStreamPrioritiesSupported ¶
AttrStreamPrioritiesSupported - Device supports stream priorities
func (Device) AttrSurfaceAlignment ¶
AttrSurfaceAlignment - Alignment requirement for surfaces
func (Device) AttrTccDriver ¶
AttrTccDriver - Device is using TCC driver model
func (Device) AttrTextureAlignment ¶
AttrTextureAlignment - Alignment requirement for textures
func (Device) AttrTexturePitchAlignment ¶
AttrTexturePitchAlignment - Pitch alignment requirement for textures
func (Device) AttrTotalConstantMemory ¶
AttrTotalConstantMemory - Memory available on device for __constant__ variables in a CUDA C kernel in bytes
func (Device) AttrUnifiedAddressing ¶
AttrUnifiedAddressing - Device shares a unified address space with the host
func (Device) AttrWarpSize ¶
AttrWarpSize - Warp size in threads
func (Device) CanAccessPeer ¶
CanAccessPeer checks to see if peer's memory can be accessed by device called by method. Deivce calling method doesn't get set.
func (Device) DeviceSync ¶
DeviceSync Blocks until the device has completed all preceding requested tasks. DeviceSync() returns an error if one of the preceding tasks has failed. If the cudaDeviceScheduleBlockingSync flag was set for this device, the host thread will block until the device has finished its work. Will Set Device
func (Device) DisablePeerAccess ¶
DisablePeerAccess check cudaDeviceDisablePeerAccess Device calling method will be set
func (Device) EnablePeerAccess ¶
EnablePeerAccess enables memory access between device Device calling method will be set
func (Device) MaxBlockDimXYZ ¶
MaxBlockDimXYZ returns an array of the values of blocks xyz in that order and an error
Will not set device
func (Device) MaxGridDimXYZ ¶
MaxGridDimXYZ returns an array of the values of blocks xyz in that order and an error Will not set device
func (Device) MaxThreadsPerBlock ¶
MaxThreadsPerBlock returns the max number of threads per block and the rutime error Will not set device
func (Device) MaxThreadsPerMultiProcessor ¶
MaxThreadsPerMultiProcessor returns the number of threads that run a multiprocessor on device and the runtime error Will not set device
func (Device) MemGetInfo ¶
MemGetInfo returns the free and total memory for device called Will Set Device
func (Device) MemPrefetchAsync ¶
MemPrefetchAsync - Prefetches memory to the specified destination device.
From Cuda Documentation:
Prefetches memory to the specified destination device. devPtr is the base device pointer of the memory to be prefetched and dstDevice is the destination device. count specifies the number of bytes to copy. stream is the stream in which the operation is enqueued. The memory range must refer to managed memory allocated via cudaMallocManaged or declared via __managed__ variables.
Passing in cudaCpuDeviceId for dstDevice will prefetch the data to host memory. If dstDevice is a GPU, then the device attribute cudaDevAttrConcurrentManagedAccess must be non-zero. Additionally, stream must be associated with a device that has a non-zero value for the device attribute cudaDevAttrConcurrentManagedAccess.
The start address and end address of the memory range will be rounded down and rounded up respectively to be aligned to CPU page size before the prefetch operation is enqueued in the stream.
If no physical memory has been allocated for this region, then this memory region will be populated and mapped on the destination device. If there's insufficient memory to prefetch the desired region, the Unified Memory driver may evict pages from other cudaMallocManaged allocations to host memory in order to make room. Device memory allocated using cudaMalloc or cudaMallocArray will not be evicted.
By default, any mappings to the previous location of the migrated pages are removed and mappings for the new location are only setup on dstDevice. The exact behavior however also depends on the settings applied to this memory range via cudaMemAdvise as described below:
If cudaMemAdviseSetReadMostly was set on any subset of this memory range, then that subset will create a read-only copy of the pages on dstDevice.
If cudaMemAdviseSetPreferredLocation was called on any subset of this memory range, then the pages will be migrated to dstDevice even if dstDevice is not the preferred location of any pages in the memory range.
If cudaMemAdviseSetAccessedBy was called on any subset of this memory range, then mappings to those pages from all the appropriate processors are updated to refer to the new location if establishing such a mapping is possible. Otherwise, those mappings are cleared.
Note that this API is not required for functionality and only serves to improve performance by allowing the application to migrate data to a suitable location before it is accessed. Memory accesses to this range are always coherent and are allowed even when the data is actively being migrated.
Note that this function is asynchronous with respect to the host and all work on other devices.
func (Device) MultiProcessorCount ¶
MultiProcessorCount returns the number of multiproccessors on device and the runtime error Will not set device
type Error ¶
type Error struct { // Context is typically a C function name. Context string // Name is the C constant name for the error, // such as "CURAND_STATUS_INTERNAL_ERROR". Name string // Message is the main error message. // // This may be human-readable, although it may often be // the same as Name. Message string }
Error is a CUDA-related error.
type Event ¶
type Event struct {
// contains filtered or unexported fields
}
Event is a cuda event
func CreateEvent ¶
CreateEvent will create and return an Event
func (*Event) ElapsedTime ¶
ElapsedTime takes the current event and compares it to a previous event and returns the time difference. in ms
type Extent ¶
type Extent C.struct_cudaExtent
Extent is a cuda struct cudaExtent
func MakeCudaExtent ¶
MakeCudaExtent -returns a cudaExtent based on input parameters.
type MemAttach ¶
MemAttach - This is a new type derived from a list of the defines for cudart
func (*MemAttach) Global ¶
Global sets m to Global and returns m - Memory can be accessed by any stream on any device
func (*MemAttach) Host ¶
Host sets m to Active and returns m - Memory cannot be accessed by any stream on any device
type MemManager ¶
type MemManager struct {
// contains filtered or unexported fields
}
MemManager allocates memory to a cuda context/device under the unified memory management, and handles memory copies between memory under the unified memory mangement, and copies to and from Go memory.
func CreateMemManager ¶
func CreateMemManager(w *gocu.Worker) (*MemManager, error)
CreateMemManager creates an allocator that is bounded to cudas unified memory management.
func (*MemManager) Copy ¶
func (m *MemManager) Copy(dest, src cutil.Pointer, sib uint) error
Copy copies memory with amount of bytes passed in sib from src to dest
func (*MemManager) Malloc ¶
func (m *MemManager) Malloc(sib uint) (cuda cutil.Mem, err error)
Malloc allocates memory to either the host or the device. sib = size in bytes
func (*MemManager) SetHost ¶
func (m *MemManager) SetHost(onhost bool)
SetHost sets a host allocation flag. SetHost can be changed at anytime.
-onhost=true all mallocs with allocator will allocate to host -onhost=false all mallocs with allocator will allocate to device assigned to allocater. (default)
type Memcpy3DParams ¶
type Memcpy3DParams C.struct_cudaMemcpy3DParms
Memcpy3DParams is used for Memcpy3d
func CreateMemcpy3DParams ¶
func CreateMemcpy3DParams(srcArray *Array, srcPos Pos, srcPtr PitchedPtr, dstArray *Array, dstPos Pos, dstPtr PitchedPtr, ext Extent, kind MemcpyKind) (m *Memcpy3DParams)
CreateMemcpy3DParams srcpp and destpp are optional and can be zero
type MemcpyKind ¶
type MemcpyKind C.enum_cudaMemcpyKind
MemcpyKind are enum flags for mem copy can be passed using methdos
func (*MemcpyKind) Default ¶
func (m *MemcpyKind) Default() MemcpyKind
Default return MemcpyKind(C.cudaMemcpyDefault )
func (*MemcpyKind) DeviceToDevice ¶
func (m *MemcpyKind) DeviceToDevice() MemcpyKind
DeviceToDevice return MemcpyKind(C.cudaMemcpyDeviceToDevice )
func (*MemcpyKind) DeviceToHost ¶
func (m *MemcpyKind) DeviceToHost() MemcpyKind
DeviceToHost return MemcpyKind(C.cudaMemcpyDeviceToHost )
func (*MemcpyKind) HostToDevice ¶
func (m *MemcpyKind) HostToDevice() MemcpyKind
HostToDevice return MemcpyKind(C.cudaMemcpyHostToDevice )
func (*MemcpyKind) HostToHost ¶
func (m *MemcpyKind) HostToHost() MemcpyKind
HostToHost return MemcpyKind(C.cudaMemcpyHostToHost )
type PitchedPtr ¶
type PitchedPtr C.struct_cudaPitchedPtr
PitchedPtr is a cudaPitchedPtr
func MakeCudaPitchedPtr ¶
func MakeCudaPitchedPtr(ptr cutil.Pointer, pitch, xsize, ysize uint) PitchedPtr
MakeCudaPitchedPtr makes a pitched pointer
func (PitchedPtr) Pointer ¶
func (p PitchedPtr) Pointer() cutil.Pointer
Pointer returns the ptiched pointer
func (*PitchedPtr) Ptr ¶
func (p *PitchedPtr) Ptr() unsafe.Pointer
Ptr satisfies the cutil.Pointer interface
type Pos ¶
type Pos C.struct_cudaPos
Pos is a cuda struct cudaPos
func MakeCudaPos ¶
MakeCudaPos returns a cudaPos based on input parameters.
type Stream ¶
type Stream struct {
// contains filtered or unexported fields
}
Stream holds a C.cudaStream_t
func CreateBlockingPriorityStream ¶
CreateBlockingPriorityStream creates a blocking stream
func CreateBlockingStream ¶
CreateBlockingStream creats an asyncronus stream stream for the user
func CreateNonBlockingPriorityStream ¶
CreateNonBlockingPriorityStream creates a non blocking Priority Stream
func CreateNonBlockingStream ¶
CreateNonBlockingStream creates a blocking stream
func ExternalWrapper ¶
ExternalWrapper is used for other packages that might return a C.cudaStream_t
func (*Stream) AttachMemAsync ¶
AttachMemAsync - Enqueues an operation in stream to specify stream association of length bytes of memory starting from devPtr. This function is a stream-ordered operation, meaning that it is dependent on, and will only take effect when, previous work in stream has completed. Any previous association is automatically replaced.
From Cuda documentation:
devPtr must point to an one of the following types of memories:
managed memory declared using the __managed__ keyword or allocated with cudaMallocManaged.
a valid host-accessible region of system-allocated pageable memory. This type of memory may only be specified if the device associated with the stream reports a non-zero value for the device attribute cudaDevAttrPageableMemoryAccess.
For managed allocations, length must be either zero or the entire allocation's size. Both indicate that the entire allocation's stream association is being changed. Currently, it is not possible to change stream association for a portion of a managed allocation.
For pageable allocations, length must be non-zero.
The stream association is specified using flags which must be one of cudaMemAttachGlobal, cudaMemAttachHost or cudaMemAttachSingle. The default value for flags is cudaMemAttachSingle If the cudaMemAttachGlobal flag is specified, the memory can be accessed by any stream on any device. If the cudaMemAttachHost flag is specified, the program makes a guarantee that it won't access the memory on the device from any stream on a device that has a zero value for the device attribute cudaDevAttrConcurrentManagedAccess. If the cudaMemAttachSingle flag is specified and stream is associated with a device that has a zero value for the device attribute cudaDevAttrConcurrentManagedAccess, the program makes a guarantee that it will only access the memory on the device from stream. It is illegal to attach singly to the NULL stream, because the NULL stream is a virtual global stream and not a specific stream. An error will be returned in this case.
When memory is associated with a single stream, the Unified Memory system will allow CPU access to this memory region so long as all operations in stream have completed, regardless of whether other streams are active. In effect, this constrains exclusive ownership of the managed memory region by an active GPU to per-stream activity instead of whole-GPU activity.
Accessing memory on the device from streams that are not associated with it will produce undefined results. No error checking is performed by the Unified Memory system to ensure that kernels launched into other streams do not access this region.
It is a program's responsibility to order calls to cudaStreamAttachMemAsync via events, synchronization or other means to ensure legal access to memory at all times. Data visibility and coherency will be changed appropriately for all kernels which follow a stream-association change.
If stream is destroyed while data is associated with it, the association is removed and the association reverts to the default visibility of the allocation as specified at cudaMallocManaged. For __managed__ variables, the default association is always cudaMemAttachGlobal. Note that destroying a stream is an asynchronous operation, and as a result, the change to default association won't happen until all work in the stream has completed.
func (*Stream) Ptr ¶
Ptr returns an unsafe pointer to the hidden stream. This allows stream to be used with other cuda libraries in other go packages so if a C function calls for a Pointer then you can type case the unsafe pointer into a (C.cudaStream_t)(unsafe.Pointer)
type StreamCaptureMode ¶
type StreamCaptureMode C.enum_cudaStreamCaptureMode
StreamCaptureMode - Possible modes for stream capture thread interactions
func (*StreamCaptureMode) Global ¶
func (s *StreamCaptureMode) Global() StreamCaptureMode
Global sets s to global and returns s
func (*StreamCaptureMode) Relaxed ¶
func (s *StreamCaptureMode) Relaxed() StreamCaptureMode
Relaxed sets s to Relaxed and returns s
func (StreamCaptureMode) String ¶
func (s StreamCaptureMode) String() string
func (*StreamCaptureMode) ThreadLocal ¶
func (s *StreamCaptureMode) ThreadLocal() StreamCaptureMode
ThreadLocal sets s to ThreadLocal and returns s
type StreamCaptureStatus ¶
type StreamCaptureStatus C.enum_cudaStreamCaptureStatus
StreamCaptureStatus - Possible stream capture statuses returned by cudaStreamIsCapturing Even though this is for returns. I think this can still be used for switches.
func (*StreamCaptureStatus) Active ¶
func (s *StreamCaptureStatus) Active() StreamCaptureStatus
Active sets s to Active and returns s
func (*StreamCaptureStatus) Invalid ¶
func (s *StreamCaptureStatus) Invalid() StreamCaptureStatus
Invalid sets s to Invalid and returns s
func (*StreamCaptureStatus) None ¶
func (s *StreamCaptureStatus) None() StreamCaptureStatus
None sets s to None and returns s
func (StreamCaptureStatus) String ¶
func (s StreamCaptureStatus) String() string