cudart

package

v0.0.0-...-c9f06ed Latest Latest Go to latest Published: May 13, 2020 License: MIT Imports: 6 Imported by: 16

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/dereklstinson/gocudnn

Links

Open Source Insights

Documentation ¶

Index ¶

func GetDeviceCount() (n int32, err error)
func Malloc(mem cutil.Mem, sizet uint) error
func Malloc3D(p *PitchedPtr, e Extent) error
func Malloc3dArray(a *Array, desc *ChannelFormatDesc, e Extent, flag ArrayFlag) error
func MallocArray(a *Array, desc *ChannelFormatDesc, width, height uint, flag ArrayFlag) error
func MallocEx(w *gocu.Worker, mem cutil.Mem, sizet uint) error
func MallocHost(mem cutil.Mem, sizet uint) error
func MallocHostEx(w *gocu.Worker, mem cutil.Mem, sizet uint) error
func MallocManagedGlobal(mem cutil.Mem, size uint) error
func MallocManagedGlobalEx(w *gocu.Worker, mem cutil.Mem, size uint) error
func MallocManagedHost(mem cutil.Mem, size uint) error
func MallocManagedHostEx(w *gocu.Worker, mem cutil.Mem, size uint) error
func Memcpy(dest, src cutil.Pointer, sizet uint, kind MemcpyKind) error
func Memcpy2D(dest cutil.Pointer, dpitch uint, src cutil.Pointer, spitch uint, ...) error
func Memcpy3D(m *Memcpy3DParams) error
func Memcpy3DAsync(m *Memcpy3DParams, s gocu.Streamer) error
func MemcpyAsync(dest, src cutil.Pointer, sizet uint, kind MemcpyKind, stream gocu.Streamer) error
func MemcpyPeer(dest cutil.Pointer, ddev Device, src cutil.Pointer, sdev Device, sizet uint) error
func MemcpyPeerAsync(dest cutil.Pointer, ddev Device, src cutil.Pointer, sdev Device, sizet uint, ...) error
func MemcpyUS(dest, src unsafe.Pointer, sizet uint, kind MemcpyKind) error
func Memset(mem cutil.Mem, value int32, count uint) error
func MemsetUS(mem unsafe.Pointer, value int32, count uint) error
func SetValidDevices(devices []Device) error
func SyncNillStream() error
type Array
type ArrayFlag
- func (a *ArrayFlag) Cubemap() ArrayFlag
- func (a *ArrayFlag) Default() ArrayFlag
- func (a *ArrayFlag) Layered() ArrayFlag
- func (a *ArrayFlag) SurfaceLoadStore() ArrayFlag
- func (a *ArrayFlag) TextureGather() ArrayFlag
type Atribs
- func PointerGetAttributes(mem cutil.Pointer) (Atribs, error)
type ChannelFormatDesc
- func CreateChannelFormatDesc(x, y, z, w int32, f ChannelFormatKind) ChannelFormatDesc
type ChannelFormatKind
- func (c *ChannelFormatKind) Float() ChannelFormatKind
- func (c *ChannelFormatKind) Signed() ChannelFormatKind
- func (c *ChannelFormatKind) UnSigned() ChannelFormatKind
type Device
- func CreateDevice(device int32) Device
- func GetDevice() (Device, error)
- func (d Device) AttrAsyncEngineCount() (int, error)
- func (d Device) AttrCanFlushRemoteWrites() (int, error)
- func (d Device) AttrCanMapHostMemory() (int, error)
- func (d Device) AttrCanUseHostPointerForRegisteredMem() (int, error)
- func (d Device) AttrClockRate() (int, error)
- func (d Device) AttrComputeCapabilityMajor() (int, error)
- func (d Device) AttrComputeCapabilityMinor() (int, error)
- func (d Device) AttrComputeMode() (int, error)
- func (d Device) AttrComputePreemptionSupported() (int, error)
- func (d Device) AttrConcurrentKernels() (int, error)
- func (d Device) AttrConcurrentManagedAccess() (int, error)
- func (d Device) AttrCooperativeLaunch() (int, error)
- func (d Device) AttrCooperativeMultiDeviceLaunch() (int, error)
- func (d Device) AttrDirectManagedMemAccessFromHost() (int, error)
- func (d Device) AttrEccEnabled() (int, error)
- func (d Device) AttrGlobalL1CacheSupported() (int, error)
- func (d Device) AttrGlobalMemoryBusWidth() (int, error)
- func (d Device) AttrGpuOverlap() (int, error)
- func (d Device) AttrHostNativeAtomicSupported() (int, error)
- func (d Device) AttrHostRegisterSupported() (int, error)
- func (d Device) AttrIntegrated() (int, error)
- func (d Device) AttrIsMultiGpuBoard() (int, error)
- func (d Device) AttrKernelExecTimeout() (int, error)
- func (d Device) AttrL2CacheSize() (int, error)
- func (d Device) AttrLocalL1CacheSupported() (int, error)
- func (d Device) AttrManagedMemory() (int, error)
- func (d Device) AttrMaxBlockDimX() (int, error)
- func (d Device) AttrMaxBlockDimY() (int, error)
- func (d Device) AttrMaxBlockDimZ() (int, error)
- func (d Device) AttrMaxGridDimX() (int, error)
- func (d Device) AttrMaxGridDimY() (int, error)
- func (d Device) AttrMaxGridDimZ() (int, error)
- func (d Device) AttrMaxPitch() (int, error)
- func (d Device) AttrMaxRegistersPerBlock() (int, error)
- func (d Device) AttrMaxRegistersPerMultiprocessor() (int, error)
- func (d Device) AttrMaxSharedMemoryPerBlock() (int, error)
- func (d Device) AttrMaxSharedMemoryPerBlockOptin() (int, error)
- func (d Device) AttrMaxSharedMemoryPerMultiprocessor() (int, error)
- func (d Device) AttrMaxSurface1DLayeredLayers() (int, error)
- func (d Device) AttrMaxSurface1DLayeredWidth() (int, error)
- func (d Device) AttrMaxSurface1DWidth() (int, error)
- func (d Device) AttrMaxSurface2DHeight() (int, error)
- func (d Device) AttrMaxSurface2DLayeredHeight() (int, error)
- func (d Device) AttrMaxSurface2DLayeredLayers() (int, error)
- func (d Device) AttrMaxSurface2DLayeredWidth() (int, error)
- func (d Device) AttrMaxSurface2DWidth() (int, error)
- func (d Device) AttrMaxSurface3DDepth() (int, error)
- func (d Device) AttrMaxSurface3DHeight() (int, error)
- func (d Device) AttrMaxSurface3DWidth() (int, error)
- func (d Device) AttrMaxSurfaceCubemapLayeredLayers() (int, error)
- func (d Device) AttrMaxSurfaceCubemapLayeredWidth() (int, error)
- func (d Device) AttrMaxSurfaceCubemapWidth() (int, error)
- func (d Device) AttrMaxTexture1DLayeredLayers() (int, error)
- func (d Device) AttrMaxTexture1DLayeredWidth() (int, error)
- func (d Device) AttrMaxTexture1DLinearWidth() (int, error)
- func (d Device) AttrMaxTexture1DMipmappedWidth() (int, error)
- func (d Device) AttrMaxTexture1DWidth() (int, error)
- func (d Device) AttrMaxTexture2DGatherHeight() (int, error)
- func (d Device) AttrMaxTexture2DGatherWidth() (int, error)
- func (d Device) AttrMaxTexture2DHeight() (int, error)
- func (d Device) AttrMaxTexture2DLayeredHeight() (int, error)
- func (d Device) AttrMaxTexture2DLayeredLayers() (int, error)
- func (d Device) AttrMaxTexture2DLayeredWidth() (int, error)
- func (d Device) AttrMaxTexture2DLinearHeight() (int, error)
- func (d Device) AttrMaxTexture2DLinearPitch() (int, error)
- func (d Device) AttrMaxTexture2DLinearWidth() (int, error)
- func (d Device) AttrMaxTexture2DMipmappedHeight() (int, error)
- func (d Device) AttrMaxTexture2DMipmappedWidth() (int, error)
- func (d Device) AttrMaxTexture2DWidth() (int, error)
- func (d Device) AttrMaxTexture3DDepth() (int, error)
- func (d Device) AttrMaxTexture3DDepthAlt() (int, error)
- func (d Device) AttrMaxTexture3DHeight() (int, error)
- func (d Device) AttrMaxTexture3DHeightAlt() (int, error)
- func (d Device) AttrMaxTexture3DWidth() (int, error)
- func (d Device) AttrMaxTexture3DWidthAlt() (int, error)
- func (d Device) AttrMaxTextureCubemapLayeredLayers() (int, error)
- func (d Device) AttrMaxTextureCubemapLayeredWidth() (int, error)
- func (d Device) AttrMaxTextureCubemapWidth() (int, error)
- func (d Device) AttrMaxThreadsPerBlock() (int, error)
- func (d Device) AttrMaxThreadsPerMultiProcessor() (int, error)
- func (d Device) AttrMemoryClockRate() (int, error)
- func (d Device) AttrMultiGpuBoardGroupID() (int, error)
- func (d Device) AttrMultiProcessorCount() (int, error)
- func (d Device) AttrPageableMemoryAccess() (int, error)
- func (d Device) AttrPageableMemoryAccessUsesHostPageTables() (int, error)
- func (d Device) AttrPciBusID() (int, error)
- func (d Device) AttrPciDeviceID() (int, error)
- func (d Device) AttrPciDomainID() (int, error)
- func (d Device) AttrSingleToDoublePrecisionPerfRatio() (int, error)
- func (d Device) AttrStreamPrioritiesSupported() (int, error)
- func (d Device) AttrSurfaceAlignment() (int, error)
- func (d Device) AttrTccDriver() (int, error)
- func (d Device) AttrTextureAlignment() (int, error)
- func (d Device) AttrTexturePitchAlignment() (int, error)
- func (d Device) AttrTotalConstantMemory() (int, error)
- func (d Device) AttrUnifiedAddressing() (int, error)
- func (d Device) AttrWarpSize() (int, error)
- func (d Device) CanAccessPeer(peer Device) (bool, error)
- func (d Device) DeviceSync() error
- func (d Device) DisablePeerAccess(peer Device) error
- func (d Device) EnablePeerAccess(peer Device) error
- func (d Device) Major() (int, error)
- func (d Device) MaxBlockDimXYZ() ([]int32, error)
- func (d Device) MaxGridDimXYZ() ([]int32, error)
- func (d Device) MaxThreadsPerBlock() (int32, error)
- func (d Device) MaxThreadsPerMultiProcessor() (int32, error)
- func (d Device) MemGetInfo() (free, total int, err error)
- func (d Device) MemPrefetchAsync(mem cutil.Mem, size uint, s gocu.Streamer) error
- func (d Device) Minor() (int, error)
- func (d Device) MultiProcessorCount() (int32, error)
- func (d Device) Reset() error
- func (d Device) Set() error
type Error
- func (e *Error) Error() string
type Event
- func CreateEvent() (event *Event, err error)
- func (e *Event) ElapsedTime(previous *Event) (float32, error)
- func (e *Event) Record(s gocu.Streamer) error
- func (e *Event) Status() (bool, error)
- func (e *Event) Sync() error
type Extent
- func MakeCudaExtent(w, h, d uint) Extent
- func (e Extent) Depth() uint
- func (e Extent) Height() uint
- func (e Extent) Width() uint
type MemAttach
- func (m *MemAttach) Global() MemAttach
- func (m *MemAttach) Host() MemAttach
- func (m *MemAttach) Single() MemAttach
- func (m MemAttach) String() string
type MemManager
- func CreateMemManager(w *gocu.Worker) (*MemManager, error)
- func (m *MemManager) AsyncCopy(dest, src cutil.Pointer, sib uint, s gocu.Streamer) error
- func (m *MemManager) Copy(dest, src cutil.Pointer, sib uint) error
- func (m *MemManager) Malloc(sib uint) (cuda cutil.Mem, err error)
- func (m *MemManager) SetHost(onhost bool)
type MemType
type Memcpy3DParams
- func CreateMemcpy3DParams(srcArray *Array, srcPos Pos, srcPtr PitchedPtr, dstArray *Array, dstPos Pos, ...) (m *Memcpy3DParams)
type MemcpyKind
- func (m *MemcpyKind) Default() MemcpyKind
- func (m *MemcpyKind) DeviceToDevice() MemcpyKind
- func (m *MemcpyKind) DeviceToHost() MemcpyKind
- func (m *MemcpyKind) HostToDevice() MemcpyKind
- func (m *MemcpyKind) HostToHost() MemcpyKind
type PitchedPtr
- func MakeCudaPitchedPtr(ptr cutil.Pointer, pitch, xsize, ysize uint) PitchedPtr
- func (p PitchedPtr) Pitch() uint
- func (p PitchedPtr) Pointer() cutil.Pointer
- func (p *PitchedPtr) Ptr() unsafe.Pointer
- func (p PitchedPtr) Xsize() uint
- func (p PitchedPtr) Ysize() uint
type Pos
- func MakeCudaPos(x, y, z uint) Pos
- func (p Pos) X() uint
- func (p Pos) Y() uint
- func (p Pos) Z() uint
type Stream
- func CreateBlockingPriorityStream(priority int32) (*Stream, error)
- func CreateBlockingStream() (*Stream, error)
- func CreateNonBlockingPriorityStream(priority int32) (*Stream, error)
- func CreateNonBlockingStream() (*Stream, error)
- func ExternalWrapper(x unsafe.Pointer) *Stream
- func (s *Stream) AttachMemAsync(mem cutil.Pointer, size uint, attachmode MemAttach) error
- func (s *Stream) Ptr() unsafe.Pointer
- func (s *Stream) Query() (b bool, err error)
- func (s *Stream) Sync() error
- func (s *Stream) WaitEvent(event *Event, flags uint32) error
type StreamCaptureMode
- func (s *StreamCaptureMode) Global() StreamCaptureMode
- func (s *StreamCaptureMode) Relaxed() StreamCaptureMode
- func (s StreamCaptureMode) String() string
- func (s *StreamCaptureMode) ThreadLocal() StreamCaptureMode
type StreamCaptureStatus
- func (s *StreamCaptureStatus) Active() StreamCaptureStatus
- func (s *StreamCaptureStatus) Invalid() StreamCaptureStatus
- func (s *StreamCaptureStatus) None() StreamCaptureStatus
- func (s StreamCaptureStatus) String() string

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func GetDeviceCount ¶

func GetDeviceCount() (n int32, err error)

GetDeviceCount returns the number of devices.

func Malloc ¶

func Malloc(mem cutil.Mem, sizet uint) error

Malloc will allocate memory to the device the size that was passed. It will also set the finalizer for GC

func Malloc3D ¶

func Malloc3D(p *PitchedPtr, e Extent) error

Malloc3D - Allocates logical 1D, 2D, or 3D memory objects on the device.

func Malloc3dArray ¶

func Malloc3dArray(a *Array, desc *ChannelFormatDesc, e Extent, flag ArrayFlag) error

Malloc3dArray - Allocate an array on the device.

func MallocArray ¶

func MallocArray(a *Array, desc *ChannelFormatDesc, width, height uint, flag ArrayFlag) error

MallocArray - Allocate an array on the device.

func MallocEx ¶

func MallocEx(w *gocu.Worker, mem cutil.Mem, sizet uint) error

MallocEx is like Malloc but it takes a worker and memory allocated to mem will be allocated to the context being used on that host thread. If w is nil then it will behave like Malloc

func MallocHost ¶

func MallocHost(mem cutil.Mem, sizet uint) error

MallocHost will allocate memory on the host for cuda use.

func MallocHostEx ¶

func MallocHostEx(w *gocu.Worker, mem cutil.Mem, sizet uint) error

MallocHostEx is like MallocHost but it takes a worker and memory allocated to mem will be allocated to the context being used on that host thread. If w is nil then it will behave like MallocHost

func MallocManagedGlobal ¶

func MallocManagedGlobal(mem cutil.Mem, size uint) error

MallocManagedGlobal Allocates memory on current devices.

func MallocManagedGlobalEx ¶

func MallocManagedGlobalEx(w *gocu.Worker, mem cutil.Mem, size uint) error

MallocManagedGlobalEx is like MallocManagedGlobal but it takes a worker and memory allocated to mem will be allocated to the context being used on that host thread. If w is nil then it will behave like MallocManagedGlobal

func MallocManagedHost ¶

func MallocManagedHost(mem cutil.Mem, size uint) error

MallocManagedHost uses the Unified memory mangement system and starts it off in the host. Memory is set to 0. It will also set a finalizer on the memory for GC

func MallocManagedHostEx ¶

func MallocManagedHostEx(w *gocu.Worker, mem cutil.Mem, size uint) error

MallocManagedHostEx is like MallocManagedHost but it takes a worker and memory allocated to mem will be allocated to the context being used on that host thread. If w is nil then it will behave like MallocManagedHost

func Memcpy ¶

func Memcpy(dest, src cutil.Pointer, sizet uint, kind MemcpyKind) error

Memcpy copies some memory from src to dest. If default is selected and if the system supports unified virtual addressing then the transfer is inferred.

func Memcpy2D ¶

func Memcpy2D(dest cutil.Pointer, dpitch uint, src cutil.Pointer, spitch uint, width, height uint, kind MemcpyKind) error

Memcpy2D copies some memory from src to dest. If default is selected and if the system supports unified virtual addressing then the transfer is inferred.

func Memcpy3D ¶

func Memcpy3D(m *Memcpy3DParams) error

Memcpy3D -Copies a matrix (height rows of width bytes each) from the memory area pointed to by src to the CUDA array dst starting at the upper left corner (wOffset, hOffset) where kind specifies the direction of the copy. m is created using CreateMemcpy3DParams

func Memcpy3DAsync ¶

func Memcpy3DAsync(m *Memcpy3DParams, s gocu.Streamer) error

Memcpy3DAsync -Copies a matrix (height rows of width bytes each) from the memory area pointed to by src to the CUDA array dst starting at the upper left corner (wOffset, hOffset) where kind specifies the direction of the copy. m is created using CreateMemcpy3DParams

func MemcpyAsync ¶

func MemcpyAsync(dest, src cutil.Pointer, sizet uint, kind MemcpyKind, stream gocu.Streamer) error

MemcpyAsync Copies data between host and device.

func MemcpyPeer ¶

func MemcpyPeer(dest cutil.Pointer, ddev Device, src cutil.Pointer, sdev Device, sizet uint) error

MemcpyPeer Copies memory between two devices.

func MemcpyPeerAsync ¶

func MemcpyPeerAsync(dest cutil.Pointer, ddev Device, src cutil.Pointer, sdev Device, sizet uint, stream gocu.Streamer) error

MemcpyPeerAsync copies memory between two devices async.

func MemcpyUS ¶

func MemcpyUS(dest, src unsafe.Pointer, sizet uint, kind MemcpyKind) error

MemcpyUS will do a memcopy using unsafe pointers. It's a little lower level than the regular MemCpy

func Memset ¶

func Memset(mem cutil.Mem, value int32, count uint) error

Memset sets the value for each byte in device memory

func MemsetUS ¶

func MemsetUS(mem unsafe.Pointer, value int32, count uint) error

MemsetUS is like Memset but with unsafe.pointer

func SetValidDevices ¶

func SetValidDevices(devices []Device) error

SetValidDevices takes a list of devices in terms of user priority for cuda execution

func SyncNillStream ¶

func SyncNillStream() error

SyncNillStream will sync the nill stream

Types ¶

type Array ¶

type Array struct {
	// contains filtered or unexported fields
}

Array is a cudaArray_t

type ArrayFlag ¶

type ArrayFlag C.uint

ArrayFlag are flags used for array

func (*ArrayFlag) Cubemap ¶

func (a *ArrayFlag) Cubemap() ArrayFlag

Cubemap - Allocates a cubemap CUDA array. Width must be equal to height, and depth must be six. If the cudaArrayLayered flag is also set, depth must be a multiple of six.

func (*ArrayFlag) Default ¶

func (a *ArrayFlag) Default() ArrayFlag

Default - This flag's value is defined to be 0 and provides default array allocation

func (*ArrayFlag) Layered ¶

func (a *ArrayFlag) Layered() ArrayFlag

Layered - Allocates a layered CUDA array, with the depth extent indicating the number of layers

func (*ArrayFlag) SurfaceLoadStore ¶

func (a *ArrayFlag) SurfaceLoadStore() ArrayFlag

SurfaceLoadStore - Allocates a CUDA array that could be read from or written to using a surface reference.

func (*ArrayFlag) TextureGather ¶

func (a *ArrayFlag) TextureGather() ArrayFlag

TextureGather - This flag indicates that texture gather operations will be performed on the CUDA array. Texture gather can only be performed on 2D CUDA arrays.

type Atribs ¶

type Atribs struct {
	Type    MemType
	Device  int32
	DPtr    unsafe.Pointer
	HPtr    unsafe.Pointer
	Managed bool
}

Atribs are a memories attributes on the device side

func PointerGetAttributes ¶

func PointerGetAttributes(mem cutil.Pointer) (Atribs, error)

PointerGetAttributes returns the atributes

type ChannelFormatDesc ¶

type ChannelFormatDesc C.struct_cudaChannelFormatDesc

ChannelFormatDesc describes a channels format

func CreateChannelFormatDesc ¶

func CreateChannelFormatDesc(x, y, z, w int32, f ChannelFormatKind) ChannelFormatDesc

CreateChannelFormatDesc - Returns a channel descriptor with format f and number of bits of each component x, y, z, and w.

So a float needs to be 32bits.

unsigned is 8 ,32 bits

signed is 8,32 bits

type ChannelFormatKind ¶

type ChannelFormatKind C.enum_cudaChannelFormatKind

ChannelFormatKind is the kind of format the channel is in

func (*ChannelFormatKind) Float ¶

func (c *ChannelFormatKind) Float() ChannelFormatKind

Float - sets the channel format to Float

func (*ChannelFormatKind) Signed ¶

func (c *ChannelFormatKind) Signed() ChannelFormatKind

Signed - sets the channel format to Signed

func (*ChannelFormatKind) UnSigned ¶

func (c *ChannelFormatKind) UnSigned() ChannelFormatKind

UnSigned - sets the channel format to UnSigned

type Device ¶

type Device C.int

Device is a struct that holds a device info.

func CreateDevice ¶

func CreateDevice(device int32) Device

CreateDevice just creates a device it doesn't set it

func GetDevice ¶

func GetDevice() (Device, error)

GetDevice gets the currently set device being used

func (Device) AttrAsyncEngineCount ¶

func (d Device) AttrAsyncEngineCount() (int, error)

AttrAsyncEngineCount - Number of asynchronous engines

func (Device) AttrCanFlushRemoteWrites ¶

func (d Device) AttrCanFlushRemoteWrites() (int, error)

AttrCanFlushRemoteWrites - Device supports flushing of outstanding remote writes.

func (Device) AttrCanMapHostMemory ¶

func (d Device) AttrCanMapHostMemory() (int, error)

AttrCanMapHostMemory - Device can map host memory into CUDA address space

func (Device) AttrCanUseHostPointerForRegisteredMem ¶

func (d Device) AttrCanUseHostPointerForRegisteredMem() (int, error)

AttrCanUseHostPointerForRegisteredMem - Device can access host registered memory at the same virtual address as the CPU

func (Device) AttrClockRate ¶

func (d Device) AttrClockRate() (int, error)

AttrClockRate - Peak clock frequency in kilohertz

func (Device) AttrComputeCapabilityMajor ¶

func (d Device) AttrComputeCapabilityMajor() (int, error)

AttrComputeCapabilityMajor - Major compute capability version number

func (Device) AttrComputeCapabilityMinor ¶

func (d Device) AttrComputeCapabilityMinor() (int, error)

AttrComputeCapabilityMinor - Minor compute capability version number

func (Device) AttrComputeMode ¶

func (d Device) AttrComputeMode() (int, error)

AttrComputeMode - Compute mode (See cudaComputeMode for details)

func (Device) AttrComputePreemptionSupported ¶

func (d Device) AttrComputePreemptionSupported() (int, error)

AttrComputePreemptionSupported - Device supports Compute Preemption

func (Device) AttrConcurrentKernels ¶

func (d Device) AttrConcurrentKernels() (int, error)

AttrConcurrentKernels - Device can possibly execute multiple kernels concurrently

func (Device) AttrConcurrentManagedAccess ¶

func (d Device) AttrConcurrentManagedAccess() (int, error)

AttrConcurrentManagedAccess - Device can coherently access managed memory concurrently with the CPU

func (Device) AttrCooperativeLaunch ¶

func (d Device) AttrCooperativeLaunch() (int, error)

AttrCooperativeLaunch - Device supports launching cooperative kernels via cudaLaunchCooperativeKernel

func (Device) AttrCooperativeMultiDeviceLaunch ¶

func (d Device) AttrCooperativeMultiDeviceLaunch() (int, error)

AttrCooperativeMultiDeviceLaunch - Device can participate in cooperative kernels launched via cudaLaunchCooperativeKernelMultiDevice

func (Device) AttrDirectManagedMemAccessFromHost ¶

func (d Device) AttrDirectManagedMemAccessFromHost() (int, error)

AttrDirectManagedMemAccessFromHost - Host can directly access managed memory on the device without migration.

func (Device) AttrEccEnabled ¶

func (d Device) AttrEccEnabled() (int, error)

AttrEccEnabled - Device has ECC support enabled

func (Device) AttrGlobalL1CacheSupported ¶

func (d Device) AttrGlobalL1CacheSupported() (int, error)

AttrGlobalL1CacheSupported - Device supports caching globals in L1

func (Device) AttrGlobalMemoryBusWidth ¶

func (d Device) AttrGlobalMemoryBusWidth() (int, error)

AttrGlobalMemoryBusWidth - Global memory bus width in bits

func (Device) AttrGpuOverlap ¶

func (d Device) AttrGpuOverlap() (int, error)

AttrGpuOverlap - Device can possibly copy memory and execute a kernel concurrently

func (Device) AttrHostNativeAtomicSupported ¶

func (d Device) AttrHostNativeAtomicSupported() (int, error)

AttrHostNativeAtomicSupported - Link between the device and the host supports native atomic operations

func (Device) AttrHostRegisterSupported ¶

func (d Device) AttrHostRegisterSupported() (int, error)

AttrHostRegisterSupported - Device supports host memory registration via cudaHostRegister.

func (Device) AttrIntegrated ¶

func (d Device) AttrIntegrated() (int, error)

AttrIntegrated - Device is integrated with host memory

func (Device) AttrIsMultiGpuBoard ¶

func (d Device) AttrIsMultiGpuBoard() (int, error)

AttrIsMultiGpuBoard - Device is on a multi-GPU board

func (Device) AttrKernelExecTimeout ¶

func (d Device) AttrKernelExecTimeout() (int, error)

AttrKernelExecTimeout - Specifies whether there is a run time limit on kernels

func (Device) AttrL2CacheSize ¶

func (d Device) AttrL2CacheSize() (int, error)

AttrL2CacheSize - Size of L2 cache in bytes

func (Device) AttrLocalL1CacheSupported ¶

func (d Device) AttrLocalL1CacheSupported() (int, error)

AttrLocalL1CacheSupported - Device supports caching locals in L1

func (Device) AttrManagedMemory ¶

func (d Device) AttrManagedMemory() (int, error)

AttrManagedMemory - Device can allocate managed memory on this system

func (Device) AttrMaxBlockDimX ¶

func (d Device) AttrMaxBlockDimX() (int, error)

AttrMaxBlockDimX - Maximum block dimension X

func (Device) AttrMaxBlockDimY ¶

func (d Device) AttrMaxBlockDimY() (int, error)

AttrMaxBlockDimY - Maximum block dimension Y

func (Device) AttrMaxBlockDimZ ¶

func (d Device) AttrMaxBlockDimZ() (int, error)

AttrMaxBlockDimZ - Maximum block dimension Z

func (Device) AttrMaxGridDimX ¶

func (d Device) AttrMaxGridDimX() (int, error)

AttrMaxGridDimX - Maximum grid dimension X

func (Device) AttrMaxGridDimY ¶

func (d Device) AttrMaxGridDimY() (int, error)

AttrMaxGridDimY - Maximum grid dimension Y

func (Device) AttrMaxGridDimZ ¶

func (d Device) AttrMaxGridDimZ() (int, error)

AttrMaxGridDimZ - Maximum grid dimension Z

func (Device) AttrMaxPitch ¶

func (d Device) AttrMaxPitch() (int, error)

AttrMaxPitch - Maximum pitch in bytes allowed by memory copies

func (Device) AttrMaxRegistersPerBlock ¶

func (d Device) AttrMaxRegistersPerBlock() (int, error)

AttrMaxRegistersPerBlock - Maximum number of 32-bit registers available per block

func (Device) AttrMaxRegistersPerMultiprocessor ¶

func (d Device) AttrMaxRegistersPerMultiprocessor() (int, error)

AttrMaxRegistersPerMultiprocessor - Maximum number of 32-bit registers available per multiprocessor

func (Device) AttrMaxSharedMemoryPerBlock ¶

func (d Device) AttrMaxSharedMemoryPerBlock() (int, error)

AttrMaxSharedMemoryPerBlock - Maximum shared memory available per block in bytes

func (Device) AttrMaxSharedMemoryPerBlockOptin ¶

func (d Device) AttrMaxSharedMemoryPerBlockOptin() (int, error)

AttrMaxSharedMemoryPerBlockOptin - The maximum optin shared memory per block. This value may vary by chip. See cudaFuncSetAttribute

func (Device) AttrMaxSharedMemoryPerMultiprocessor ¶

func (d Device) AttrMaxSharedMemoryPerMultiprocessor() (int, error)

AttrMaxSharedMemoryPerMultiprocessor - Maximum shared memory available per multiprocessor in bytes

func (Device) AttrMaxSurface1DLayeredLayers ¶

func (d Device) AttrMaxSurface1DLayeredLayers() (int, error)

AttrMaxSurface1DLayeredLayers - Maximum layers in a 1D layered surface

func (Device) AttrMaxSurface1DLayeredWidth ¶

func (d Device) AttrMaxSurface1DLayeredWidth() (int, error)

AttrMaxSurface1DLayeredWidth - Maximum 1D layered surface width

func (Device) AttrMaxSurface1DWidth ¶

func (d Device) AttrMaxSurface1DWidth() (int, error)

AttrMaxSurface1DWidth - Maximum 1D surface width

func (Device) AttrMaxSurface2DHeight ¶

func (d Device) AttrMaxSurface2DHeight() (int, error)

AttrMaxSurface2DHeight - Maximum 2D surface height

func (Device) AttrMaxSurface2DLayeredHeight ¶

func (d Device) AttrMaxSurface2DLayeredHeight() (int, error)

AttrMaxSurface2DLayeredHeight - Maximum 2D layered surface height

func (Device) AttrMaxSurface2DLayeredLayers ¶

func (d Device) AttrMaxSurface2DLayeredLayers() (int, error)

AttrMaxSurface2DLayeredLayers - Maximum layers in a 2D layered surface

func (Device) AttrMaxSurface2DLayeredWidth ¶

func (d Device) AttrMaxSurface2DLayeredWidth() (int, error)

AttrMaxSurface2DLayeredWidth - Maximum 2D layered surface width

func (Device) AttrMaxSurface2DWidth ¶

func (d Device) AttrMaxSurface2DWidth() (int, error)

AttrMaxSurface2DWidth - Maximum 2D surface width

func (Device) AttrMaxSurface3DDepth ¶

func (d Device) AttrMaxSurface3DDepth() (int, error)

AttrMaxSurface3DDepth - Maximum 3D surface depth

func (Device) AttrMaxSurface3DHeight ¶

func (d Device) AttrMaxSurface3DHeight() (int, error)

AttrMaxSurface3DHeight - Maximum 3D surface height

func (Device) AttrMaxSurface3DWidth ¶

func (d Device) AttrMaxSurface3DWidth() (int, error)

AttrMaxSurface3DWidth - Maximum 3D surface width

func (Device) AttrMaxSurfaceCubemapLayeredLayers ¶

func (d Device) AttrMaxSurfaceCubemapLayeredLayers() (int, error)

AttrMaxSurfaceCubemapLayeredLayers - Maximum layers in a cubemap layered surface

func (Device) AttrMaxSurfaceCubemapLayeredWidth ¶

func (d Device) AttrMaxSurfaceCubemapLayeredWidth() (int, error)

AttrMaxSurfaceCubemapLayeredWidth - Maximum cubemap layered surface width

func (Device) AttrMaxSurfaceCubemapWidth ¶

func (d Device) AttrMaxSurfaceCubemapWidth() (int, error)

AttrMaxSurfaceCubemapWidth - Maximum cubemap surface width

func (Device) AttrMaxTexture1DLayeredLayers ¶

func (d Device) AttrMaxTexture1DLayeredLayers() (int, error)

AttrMaxTexture1DLayeredLayers - Maximum layers in a 1D layered texture

func (Device) AttrMaxTexture1DLayeredWidth ¶

func (d Device) AttrMaxTexture1DLayeredWidth() (int, error)

AttrMaxTexture1DLayeredWidth - Maximum 1D layered texture width

func (Device) AttrMaxTexture1DLinearWidth ¶

func (d Device) AttrMaxTexture1DLinearWidth() (int, error)

AttrMaxTexture1DLinearWidth - Maximum 1D linear texture width

func (Device) AttrMaxTexture1DMipmappedWidth ¶

func (d Device) AttrMaxTexture1DMipmappedWidth() (int, error)

AttrMaxTexture1DMipmappedWidth - Maximum mipmapped 1D texture width

func (Device) AttrMaxTexture1DWidth ¶

func (d Device) AttrMaxTexture1DWidth() (int, error)

AttrMaxTexture1DWidth - Maximum 1D texture width

func (Device) AttrMaxTexture2DGatherHeight ¶

func (d Device) AttrMaxTexture2DGatherHeight() (int, error)

AttrMaxTexture2DGatherHeight - Maximum 2D texture height if cudaArrayTextureGather is set

func (Device) AttrMaxTexture2DGatherWidth ¶

func (d Device) AttrMaxTexture2DGatherWidth() (int, error)

AttrMaxTexture2DGatherWidth - Maximum 2D texture width if cudaArrayTextureGather is set

func (Device) AttrMaxTexture2DHeight ¶

func (d Device) AttrMaxTexture2DHeight() (int, error)

AttrMaxTexture2DHeight - Maximum 2D texture height

func (Device) AttrMaxTexture2DLayeredHeight ¶

func (d Device) AttrMaxTexture2DLayeredHeight() (int, error)

AttrMaxTexture2DLayeredHeight - Maximum 2D layered texture height

func (Device) AttrMaxTexture2DLayeredLayers ¶

func (d Device) AttrMaxTexture2DLayeredLayers() (int, error)

AttrMaxTexture2DLayeredLayers - Maximum layers in a 2D layered texture

func (Device) AttrMaxTexture2DLayeredWidth ¶

func (d Device) AttrMaxTexture2DLayeredWidth() (int, error)

AttrMaxTexture2DLayeredWidth - Maximum 2D layered texture width

func (Device) AttrMaxTexture2DLinearHeight ¶

func (d Device) AttrMaxTexture2DLinearHeight() (int, error)

AttrMaxTexture2DLinearHeight - Maximum 2D linear texture height

func (Device) AttrMaxTexture2DLinearPitch ¶

func (d Device) AttrMaxTexture2DLinearPitch() (int, error)

AttrMaxTexture2DLinearPitch - Maximum 2D linear texture pitch in bytes

func (Device) AttrMaxTexture2DLinearWidth ¶

func (d Device) AttrMaxTexture2DLinearWidth() (int, error)

AttrMaxTexture2DLinearWidth - Maximum 2D linear texture width

func (Device) AttrMaxTexture2DMipmappedHeight ¶

func (d Device) AttrMaxTexture2DMipmappedHeight() (int, error)

AttrMaxTexture2DMipmappedHeight - Maximum mipmapped 2D texture height

func (Device) AttrMaxTexture2DMipmappedWidth ¶

func (d Device) AttrMaxTexture2DMipmappedWidth() (int, error)

AttrMaxTexture2DMipmappedWidth - Maximum mipmapped 2D texture width

func (Device) AttrMaxTexture2DWidth ¶

func (d Device) AttrMaxTexture2DWidth() (int, error)

AttrMaxTexture2DWidth - Maximum 2D texture width

func (Device) AttrMaxTexture3DDepth ¶

func (d Device) AttrMaxTexture3DDepth() (int, error)

AttrMaxTexture3DDepth - Maximum 3D texture depth

func (Device) AttrMaxTexture3DDepthAlt ¶

func (d Device) AttrMaxTexture3DDepthAlt() (int, error)

AttrMaxTexture3DDepthAlt - Alternate maximum 3D texture depth

func (Device) AttrMaxTexture3DHeight ¶

func (d Device) AttrMaxTexture3DHeight() (int, error)

AttrMaxTexture3DHeight - Maximum 3D texture height

func (Device) AttrMaxTexture3DHeightAlt ¶

func (d Device) AttrMaxTexture3DHeightAlt() (int, error)

AttrMaxTexture3DHeightAlt - Alternate maximum 3D texture height

func (Device) AttrMaxTexture3DWidth ¶

func (d Device) AttrMaxTexture3DWidth() (int, error)

AttrMaxTexture3DWidth - Maximum 3D texture width

func (Device) AttrMaxTexture3DWidthAlt ¶

func (d Device) AttrMaxTexture3DWidthAlt() (int, error)

AttrMaxTexture3DWidthAlt - Alternate maximum 3D texture width

func (Device) AttrMaxTextureCubemapLayeredLayers ¶

func (d Device) AttrMaxTextureCubemapLayeredLayers() (int, error)

AttrMaxTextureCubemapLayeredLayers - Maximum layers in a cubemap layered texture

func (Device) AttrMaxTextureCubemapLayeredWidth ¶

func (d Device) AttrMaxTextureCubemapLayeredWidth() (int, error)

AttrMaxTextureCubemapLayeredWidth - Maximum cubemap layered texture width/height

func (Device) AttrMaxTextureCubemapWidth ¶

func (d Device) AttrMaxTextureCubemapWidth() (int, error)

AttrMaxTextureCubemapWidth - Maximum cubemap texture width/height

func (Device) AttrMaxThreadsPerBlock ¶

func (d Device) AttrMaxThreadsPerBlock() (int, error)

AttrMaxThreadsPerBlock - Maximum number of threads per block

func (Device) AttrMaxThreadsPerMultiProcessor ¶

func (d Device) AttrMaxThreadsPerMultiProcessor() (int, error)

AttrMaxThreadsPerMultiProcessor - Maximum resident threads per multiprocessor

func (Device) AttrMemoryClockRate ¶

func (d Device) AttrMemoryClockRate() (int, error)

AttrMemoryClockRate - Peak memory clock frequency in kilohertz

func (Device) AttrMultiGpuBoardGroupID ¶

func (d Device) AttrMultiGpuBoardGroupID() (int, error)

AttrMultiGpuBoardGroupID - Unique identifier for a group of devices on the same multi-GPU board

func (Device) AttrMultiProcessorCount ¶

func (d Device) AttrMultiProcessorCount() (int, error)

AttrMultiProcessorCount - Number of multiprocessors on device

func (Device) AttrPageableMemoryAccess ¶

func (d Device) AttrPageableMemoryAccess() (int, error)

AttrPageableMemoryAccess - Device supports coherently accessing pageable memory without calling cudaHostRegister on it

func (Device) AttrPageableMemoryAccessUsesHostPageTables ¶

func (d Device) AttrPageableMemoryAccessUsesHostPageTables() (int, error)

AttrPageableMemoryAccessUsesHostPageTables - Device accesses pageable memory via the host page tables.

func (Device) AttrPciBusID ¶

func (d Device) AttrPciBusID() (int, error)

AttrPciBusID - PCI bus ID of the device

func (Device) AttrPciDeviceID ¶

func (d Device) AttrPciDeviceID() (int, error)

AttrPciDeviceID - PCI device ID of the device

func (Device) AttrPciDomainID ¶

func (d Device) AttrPciDomainID() (int, error)

AttrPciDomainID - PCI domain ID of the device

func (Device) AttrSingleToDoublePrecisionPerfRatio ¶

func (d Device) AttrSingleToDoublePrecisionPerfRatio() (int, error)

AttrSingleToDoublePrecisionPerfRatio - Ratio of single precision performance (in floating-point operations per second) to double precision performance

func (Device) AttrStreamPrioritiesSupported ¶

func (d Device) AttrStreamPrioritiesSupported() (int, error)

AttrStreamPrioritiesSupported - Device supports stream priorities

func (Device) AttrSurfaceAlignment ¶

func (d Device) AttrSurfaceAlignment() (int, error)

AttrSurfaceAlignment - Alignment requirement for surfaces

func (Device) AttrTccDriver ¶

func (d Device) AttrTccDriver() (int, error)

AttrTccDriver - Device is using TCC driver model

func (Device) AttrTextureAlignment ¶

func (d Device) AttrTextureAlignment() (int, error)

AttrTextureAlignment - Alignment requirement for textures

func (Device) AttrTexturePitchAlignment ¶

func (d Device) AttrTexturePitchAlignment() (int, error)

AttrTexturePitchAlignment - Pitch alignment requirement for textures

func (Device) AttrTotalConstantMemory ¶

func (d Device) AttrTotalConstantMemory() (int, error)

AttrTotalConstantMemory - Memory available on device for __constant__ variables in a CUDA C kernel in bytes

func (Device) AttrUnifiedAddressing ¶

func (d Device) AttrUnifiedAddressing() (int, error)

AttrUnifiedAddressing - Device shares a unified address space with the host

func (Device) AttrWarpSize ¶

func (d Device) AttrWarpSize() (int, error)

AttrWarpSize - Warp size in threads

func (Device) CanAccessPeer ¶

func (d Device) CanAccessPeer(peer Device) (bool, error)

CanAccessPeer checks to see if peer's memory can be accessed by device called by method. Deivce calling method doesn't get set.

func (Device) DeviceSync ¶

func (d Device) DeviceSync() error

DeviceSync Blocks until the device has completed all preceding requested tasks. DeviceSync() returns an error if one of the preceding tasks has failed. If the cudaDeviceScheduleBlockingSync flag was set for this device, the host thread will block until the device has finished its work. Will Set Device

func (Device) DisablePeerAccess ¶

func (d Device) DisablePeerAccess(peer Device) error

DisablePeerAccess check cudaDeviceDisablePeerAccess Device calling method will be set

func (Device) EnablePeerAccess ¶

func (d Device) EnablePeerAccess(peer Device) error

EnablePeerAccess enables memory access between device Device calling method will be set

func (Device) Major ¶

func (d Device) Major() (int, error)

Major returns the major compute capability of device

func (Device) MaxBlockDimXYZ ¶

func (d Device) MaxBlockDimXYZ() ([]int32, error)

MaxBlockDimXYZ returns an array of the values of blocks xyz in that order and an error

Will not set device

func (Device) MaxGridDimXYZ ¶

func (d Device) MaxGridDimXYZ() ([]int32, error)

MaxGridDimXYZ returns an array of the values of blocks xyz in that order and an error Will not set device

func (Device) MaxThreadsPerBlock ¶

func (d Device) MaxThreadsPerBlock() (int32, error)

MaxThreadsPerBlock returns the max number of threads per block and the rutime error Will not set device

func (Device) MaxThreadsPerMultiProcessor ¶

func (d Device) MaxThreadsPerMultiProcessor() (int32, error)

MaxThreadsPerMultiProcessor returns the number of threads that run a multiprocessor on device and the runtime error Will not set device

func (Device) MemGetInfo ¶

func (d Device) MemGetInfo() (free, total int, err error)

MemGetInfo returns the free and total memory for device called Will Set Device

func (Device) MemPrefetchAsync ¶

func (d Device) MemPrefetchAsync(mem cutil.Mem, size uint, s gocu.Streamer) error

MemPrefetchAsync - Prefetches memory to the specified destination device.

From Cuda Documentation:

Prefetches memory to the specified destination device. devPtr is the base device pointer of the memory to be prefetched and dstDevice is the destination device. count specifies the number of bytes to copy. stream is the stream in which the operation is enqueued. The memory range must refer to managed memory allocated via cudaMallocManaged or declared via __managed__ variables.

Passing in cudaCpuDeviceId for dstDevice will prefetch the data to host memory. If dstDevice is a GPU, then the device attribute cudaDevAttrConcurrentManagedAccess must be non-zero. Additionally, stream must be associated with a device that has a non-zero value for the device attribute cudaDevAttrConcurrentManagedAccess.

The start address and end address of the memory range will be rounded down and rounded up respectively to be aligned to CPU page size before the prefetch operation is enqueued in the stream.

If no physical memory has been allocated for this region, then this memory region will be populated and mapped on the destination device. If there's insufficient memory to prefetch the desired region, the Unified Memory driver may evict pages from other cudaMallocManaged allocations to host memory in order to make room. Device memory allocated using cudaMalloc or cudaMallocArray will not be evicted.

By default, any mappings to the previous location of the migrated pages are removed and mappings for the new location are only setup on dstDevice. The exact behavior however also depends on the settings applied to this memory range via cudaMemAdvise as described below:

If cudaMemAdviseSetReadMostly was set on any subset of this memory range, then that subset will create a read-only copy of the pages on dstDevice.

If cudaMemAdviseSetPreferredLocation was called on any subset of this memory range, then the pages will be migrated to dstDevice even if dstDevice is not the preferred location of any pages in the memory range.

If cudaMemAdviseSetAccessedBy was called on any subset of this memory range, then mappings to those pages from all the appropriate processors are updated to refer to the new location if establishing such a mapping is possible. Otherwise, those mappings are cleared.

Note that this API is not required for functionality and only serves to improve performance by allowing the application to migrate data to a suitable location before it is accessed. Memory accesses to this range are always coherent and are allowed even when the data is actively being migrated.

Note that this function is asynchronous with respect to the host and all work on other devices.

func (Device) Minor ¶

func (d Device) Minor() (int, error)

Minor returns the minor comnute capability of device

func (Device) MultiProcessorCount ¶

func (d Device) MultiProcessorCount() (int32, error)

MultiProcessorCount returns the number of multiproccessors on device and the runtime error Will not set device

func (Device) Reset ¶

func (d Device) Reset() error

Reset resets the device. If device isn't set on current host thread. This function will auto set it. Make sure that the device that was currently using the host thread is set back onto host

func (Device) Set ¶

func (d Device) Set() error

Set sets the device to use. This will change the device that is residing on the current host thread. There is no sychronization, with the previous or new device on the host thread.

type Error ¶

type Error struct {
	// Context is typically a C function name.
	Context string

	// Name is the C constant name for the error,
	// such as "CURAND_STATUS_INTERNAL_ERROR".
	Name string

	// Message is the main error message.
	//
	// This may be human-readable, although it may often be
	// the same as Name.
	Message string
}

Error is a CUDA-related error.

func (*Error) Error ¶

func (e *Error) Error() string

Error generates a message "context: message".

type Event ¶

type Event struct {
	// contains filtered or unexported fields
}

Event is a cuda event

func CreateEvent ¶

func CreateEvent() (event *Event, err error)

CreateEvent will create and return an Event

func (*Event) ElapsedTime ¶

func (e *Event) ElapsedTime(previous *Event) (float32, error)

ElapsedTime takes the current event and compares it to a previous event and returns the time difference. in ms

func (*Event) Record ¶

func (e *Event) Record(s gocu.Streamer) error

Record records an event

func (*Event) Status ¶

func (e *Event) Status() (bool, error)

Status is the function cudaEventQuery. I didn't like the name and how the function was handled. error will returned as nil if cudaSuccess and cudaErrorNotReady are returned. It will return a 1 of event is completed. It will return a 0 if event is not complete

func (*Event) Sync ¶

func (e *Event) Sync() error

Sync waits for an event to complete

type Extent ¶

type Extent C.struct_cudaExtent

Extent is a cuda struct cudaExtent

func MakeCudaExtent ¶

func MakeCudaExtent(w, h, d uint) Extent

MakeCudaExtent -returns a cudaExtent based on input parameters.

func (Extent) Depth ¶

func (e Extent) Depth() uint

Depth returns e.depth

func (Extent) Height ¶

func (e Extent) Height() uint

Height returns e.height

func (Extent) Width ¶

func (e Extent) Width() uint

Width returns e.width

type MemAttach ¶

type MemAttach C.uint

MemAttach - This is a new type derived from a list of the defines for cudart

func (*MemAttach) Global ¶

func (m *MemAttach) Global() MemAttach

Global sets m to Global and returns m - Memory can be accessed by any stream on any device

func (*MemAttach) Host ¶

func (m *MemAttach) Host() MemAttach

Host sets m to Active and returns m - Memory cannot be accessed by any stream on any device

func (*MemAttach) Single ¶

func (m *MemAttach) Single() MemAttach

Single sets m to Single and returns m - Memory can only be accessed by a single stream on the associated device

func (MemAttach) String ¶

func (m MemAttach) String() string

type MemManager ¶

type MemManager struct {
	// contains filtered or unexported fields
}

MemManager allocates memory to a cuda context/device under the unified memory management, and handles memory copies between memory under the unified memory mangement, and copies to and from Go memory.

func CreateMemManager ¶

func CreateMemManager(w *gocu.Worker) (*MemManager, error)

CreateMemManager creates an allocator that is bounded to cudas unified memory management.

func (*MemManager) AsyncCopy ¶

func (m *MemManager) AsyncCopy(dest, src cutil.Pointer, sib uint, s gocu.Streamer) error

AsyncCopy does an AsyncCopy with the mem manager.

func (*MemManager) Copy ¶

func (m *MemManager) Copy(dest, src cutil.Pointer, sib uint) error

Copy copies memory with amount of bytes passed in sib from src to dest

func (*MemManager) Malloc ¶

func (m *MemManager) Malloc(sib uint) (cuda cutil.Mem, err error)

Malloc allocates memory to either the host or the device. sib = size in bytes

func (*MemManager) SetHost ¶

func (m *MemManager) SetHost(onhost bool)

SetHost sets a host allocation flag. SetHost can be changed at anytime.

	-onhost=true all mallocs with allocator will allocate to host
 -onhost=false all mallocs with allocator will allocate to device assigned to allocater. (default)

type MemType ¶

type MemType C.cudaMemoryType

MemType is a typedefed C.cudaMemoryType

type Memcpy3DParams ¶

type Memcpy3DParams C.struct_cudaMemcpy3DParms

Memcpy3DParams is used for Memcpy3d

func CreateMemcpy3DParams ¶

func CreateMemcpy3DParams(srcArray *Array, srcPos Pos, srcPtr PitchedPtr, dstArray *Array, dstPos Pos, dstPtr PitchedPtr, ext Extent, kind MemcpyKind) (m *Memcpy3DParams)

CreateMemcpy3DParams srcpp and destpp are optional and can be zero

type MemcpyKind ¶

type MemcpyKind C.enum_cudaMemcpyKind

MemcpyKind are enum flags for mem copy can be passed using methdos

func (*MemcpyKind) Default ¶

func (m *MemcpyKind) Default() MemcpyKind

Default return MemcpyKind(C.cudaMemcpyDefault )

func (*MemcpyKind) DeviceToDevice ¶

func (m *MemcpyKind) DeviceToDevice() MemcpyKind

DeviceToDevice return MemcpyKind(C.cudaMemcpyDeviceToDevice )

func (*MemcpyKind) DeviceToHost ¶

func (m *MemcpyKind) DeviceToHost() MemcpyKind

DeviceToHost return MemcpyKind(C.cudaMemcpyDeviceToHost )

func (*MemcpyKind) HostToDevice ¶

func (m *MemcpyKind) HostToDevice() MemcpyKind

HostToDevice return MemcpyKind(C.cudaMemcpyHostToDevice )

func (*MemcpyKind) HostToHost ¶

func (m *MemcpyKind) HostToHost() MemcpyKind

HostToHost return MemcpyKind(C.cudaMemcpyHostToHost )

type PitchedPtr ¶

type PitchedPtr C.struct_cudaPitchedPtr

PitchedPtr is a cudaPitchedPtr

func MakeCudaPitchedPtr ¶

func MakeCudaPitchedPtr(ptr cutil.Pointer, pitch, xsize, ysize uint) PitchedPtr

MakeCudaPitchedPtr makes a pitched pointer

func (PitchedPtr) Pitch ¶

func (p PitchedPtr) Pitch() uint

Pitch returns the pitch

func (PitchedPtr) Pointer ¶

func (p PitchedPtr) Pointer() cutil.Pointer

Pointer returns the ptiched pointer

func (*PitchedPtr) Ptr ¶

func (p *PitchedPtr) Ptr() unsafe.Pointer

Ptr satisfies the cutil.Pointer interface

func (PitchedPtr) Xsize ¶

func (p PitchedPtr) Xsize() uint

Xsize returns the xsize

func (PitchedPtr) Ysize ¶

func (p PitchedPtr) Ysize() uint

Ysize returns the ysize

type Pos ¶

type Pos C.struct_cudaPos

Pos is a cuda struct cudaPos

func MakeCudaPos ¶

func MakeCudaPos(x, y, z uint) Pos

MakeCudaPos returns a cudaPos based on input parameters.

func (Pos) X ¶

func (p Pos) X() uint

X returns x position

func (Pos) Y ¶

func (p Pos) Y() uint

Y returns y position

func (Pos) Z ¶

func (p Pos) Z() uint

Z returns z position

type Stream ¶

type Stream struct {
	// contains filtered or unexported fields
}

Stream holds a C.cudaStream_t

func CreateBlockingPriorityStream ¶

func CreateBlockingPriorityStream(priority int32) (*Stream, error)

CreateBlockingPriorityStream creates a blocking stream

func CreateBlockingStream ¶

func CreateBlockingStream() (*Stream, error)

CreateBlockingStream creats an asyncronus stream stream for the user

func CreateNonBlockingPriorityStream ¶

func CreateNonBlockingPriorityStream(priority int32) (*Stream, error)

CreateNonBlockingPriorityStream creates a non blocking Priority Stream

func CreateNonBlockingStream ¶

func CreateNonBlockingStream() (*Stream, error)

CreateNonBlockingStream creates a blocking stream

func ExternalWrapper ¶

func ExternalWrapper(x unsafe.Pointer) *Stream

ExternalWrapper is used for other packages that might return a C.cudaStream_t

func (*Stream) AttachMemAsync ¶

func (s *Stream) AttachMemAsync(mem cutil.Pointer, size uint, attachmode MemAttach) error

AttachMemAsync - Enqueues an operation in stream to specify stream association of length bytes of memory starting from devPtr. This function is a stream-ordered operation, meaning that it is dependent on, and will only take effect when, previous work in stream has completed. Any previous association is automatically replaced.

From Cuda documentation:

devPtr must point to an one of the following types of memories:

managed memory declared using the __managed__ keyword or allocated with cudaMallocManaged.

a valid host-accessible region of system-allocated pageable memory. This type of memory may only be specified if the device associated with the stream reports a non-zero value for the device attribute cudaDevAttrPageableMemoryAccess.

For managed allocations, length must be either zero or the entire allocation's size. Both indicate that the entire allocation's stream association is being changed. Currently, it is not possible to change stream association for a portion of a managed allocation.

For pageable allocations, length must be non-zero.

The stream association is specified using flags which must be one of cudaMemAttachGlobal, cudaMemAttachHost or cudaMemAttachSingle. The default value for flags is cudaMemAttachSingle If the cudaMemAttachGlobal flag is specified, the memory can be accessed by any stream on any device. If the cudaMemAttachHost flag is specified, the program makes a guarantee that it won't access the memory on the device from any stream on a device that has a zero value for the device attribute cudaDevAttrConcurrentManagedAccess. If the cudaMemAttachSingle flag is specified and stream is associated with a device that has a zero value for the device attribute cudaDevAttrConcurrentManagedAccess, the program makes a guarantee that it will only access the memory on the device from stream. It is illegal to attach singly to the NULL stream, because the NULL stream is a virtual global stream and not a specific stream. An error will be returned in this case.

When memory is associated with a single stream, the Unified Memory system will allow CPU access to this memory region so long as all operations in stream have completed, regardless of whether other streams are active. In effect, this constrains exclusive ownership of the managed memory region by an active GPU to per-stream activity instead of whole-GPU activity.

Accessing memory on the device from streams that are not associated with it will produce undefined results. No error checking is performed by the Unified Memory system to ensure that kernels launched into other streams do not access this region.

It is a program's responsibility to order calls to cudaStreamAttachMemAsync via events, synchronization or other means to ensure legal access to memory at all times. Data visibility and coherency will be changed appropriately for all kernels which follow a stream-association change.

If stream is destroyed while data is associated with it, the association is removed and the association reverts to the default visibility of the allocation as specified at cudaMallocManaged. For __managed__ variables, the default association is always cudaMemAttachGlobal. Note that destroying a stream is an asynchronous operation, and as a result, the change to default association won't happen until all work in the stream has completed.

func (*Stream) Ptr ¶

func (s *Stream) Ptr() unsafe.Pointer

Ptr returns an unsafe pointer to the hidden stream. This allows stream to be used with other cuda libraries in other go packages so if a C function calls for a Pointer then you can type case the unsafe pointer into a (C.cudaStream_t)(unsafe.Pointer)

func (*Stream) Query ¶

func (s *Stream) Query() (b bool, err error)

Query - Queries an asynchronous stream for completion status.

returns true if ready, false if not.

if an error occures err will not be nil

func (*Stream) Sync ¶

func (s *Stream) Sync() error

Sync Syncronizes the stream

func (*Stream) WaitEvent ¶

func (s *Stream) WaitEvent(event *Event, flags uint32) error

WaitEvent - Make a compute stream wait on an event.

Flags must be zero

type StreamCaptureMode ¶

type StreamCaptureMode C.enum_cudaStreamCaptureMode

StreamCaptureMode - Possible modes for stream capture thread interactions

func (*StreamCaptureMode) Global ¶

func (s *StreamCaptureMode) Global() StreamCaptureMode

Global sets s to global and returns s

func (*StreamCaptureMode) Relaxed ¶

func (s *StreamCaptureMode) Relaxed() StreamCaptureMode

Relaxed sets s to Relaxed and returns s

func (StreamCaptureMode) String ¶

func (s StreamCaptureMode) String() string

func (*StreamCaptureMode) ThreadLocal ¶

func (s *StreamCaptureMode) ThreadLocal() StreamCaptureMode

ThreadLocal sets s to ThreadLocal and returns s

type StreamCaptureStatus ¶

type StreamCaptureStatus C.enum_cudaStreamCaptureStatus

StreamCaptureStatus - Possible stream capture statuses returned by cudaStreamIsCapturing Even though this is for returns. I think this can still be used for switches.

func (*StreamCaptureStatus) Active ¶

func (s *StreamCaptureStatus) Active() StreamCaptureStatus

Active sets s to Active and returns s

func (*StreamCaptureStatus) Invalid ¶

func (s *StreamCaptureStatus) Invalid() StreamCaptureStatus

Invalid sets s to Invalid and returns s

func (*StreamCaptureStatus) None ¶

func (s *StreamCaptureStatus) None() StreamCaptureStatus

None sets s to None and returns s

func (StreamCaptureStatus) String ¶

func (s StreamCaptureStatus) String() string

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
crtutil Package crtutil allows cudart to work with Go's io Reader and Writer interfaces.	Package crtutil allows cudart to work with Go's io Reader and Writer interfaces.

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL