cuda

package
v3.4.0-beta3+incompatible Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 24, 2014 License: GPL-3.0 Imports: 13 Imported by: 0

Documentation

Overview

Package cuda provides GPU interaction

Index

Constants

View Source
const (
	X = 0
	Y = 1
	Z = 2
)
View Source
const CONV_TOLERANCE = 1e-6

Maximum tolerable error on demag convolution self-test.

View Source
const FFT_IMAG_TOLERANCE = 1e-6

Maximum tolerable imaginary/real part for demag kernel in Fourier space. Assures kernel has correct symmetry.

View Source
const REDUCE_BLOCKSIZE = C.REDUCE_BLOCKSIZE

Block size for reduce kernels.

Variables

View Source
var (
	Version     float32 // cuda version
	DevName     string  // GPU name
	TotalMem    int64   // total GPU memory
	GPUInfo     string  // Human-readable GPU description
	Synchronous bool    // for debug: synchronize stream0 at every kernel launch

)
View Source
var (
	BlockSize    = 512
	TileX, TileY = 32, 32
	MaxGridSize  = 65535
)

CUDA Launch parameters. there might be better choices for recent hardware, but it barely makes a difference in the end.

Functions

func AddCubicAnisotropy

func AddCubicAnisotropy(Beff, m *data.Slice, k1_red LUTPtr, c1, c2 LUTPtrs, regions *Bytes)

Adds cubic anisotropy field to Beff. see cubicanisotropy.cu

func AddDMI

func AddDMI(Beff *data.Slice, m *data.Slice, D_redx, D_redy, D_redz, A_red float32, mesh *data.Mesh)

Add effective field of Dzyaloshinskii-Moriya interaction to Beff (Tesla). According to Bagdanov and Röβler, PRL 87, 3, 2001. eq.8 (out-of-plane symmetry breaking). See dmi.cu

func AddDotProduct

func AddDotProduct(dst *data.Slice, prefactor float32, a, b *data.Slice)

dst += prefactor * dot(a, b), as used for energy density

func AddExchange

func AddExchange(B, m *data.Slice, Aex_red SymmLUT, regions *Bytes, mesh *data.Mesh)

Add exchange field to Beff.

m: normalized magnetization
B: effective field in Tesla
Aex_red: 2*Aex / (Msat * 1e18 m2)

see exchange.cu

func AddSlonczewskiTorque

func AddSlonczewskiTorque(torque, m, J *data.Slice, fixedP LUTPtrs, Msat, alpha, pol, λ, ε_prime LUTPtr, regions *Bytes, mesh *data.Mesh)

Add Slonczewski ST torque to torque (Tesla). see slonczewski.cu

func AddTemperature

func AddTemperature(Beff, noise *data.Slice, temp_red LUTPtr, kmu0_VgammaDt float64, regions *Bytes)

Add thermal noise (Brown) to Beff. see temperature.cu

func AddUniaxialAnisotropy

func AddUniaxialAnisotropy(Beff, m *data.Slice, k1_red LUTPtr, u LUTPtrs, regions *Bytes)

Add uniaxial magnetocrystalline anisotropy field to Beff. see uniaxialanisotropy.cu

func AddZhangLiTorque

func AddZhangLiTorque(torque, m, J *data.Slice, bsat, alpha, xi, pol LUTPtr, regions *Bytes, mesh *data.Mesh)

Add Zhang-Li ST torque (Tesla) to torque. see zhangli.cu

func Buffer

func Buffer(nComp int, size [3]int) *data.Slice

Returns a GPU slice for temporary use. To be returned to the pool with Recycle

func Dot

func Dot(a, b *data.Slice) float32

Dot product.

func FreeBuffers

func FreeBuffers()

Frees all buffers. Called after mesh resize.

func GPUCopy

func GPUCopy(in *data.Slice) *data.Slice

Returns a copy of in, allocated on GPU.

func GetCell

func GetCell(s *data.Slice, comp, ix, iy, iz int) float32

func GetElem

func GetElem(s *data.Slice, comp int, index int) float32

func Init

func Init(gpu int)

Locks to an OS thread and initializes CUDA for that thread.

func LLTorque

func LLTorque(torque, m, B *data.Slice, alpha LUTPtr, regions *Bytes)

Landau-Lifshitz torque divided by gamma0:

  • 1/(1+α²) [ m x B + α m x (m x B) ] torque in Tesla m normalized B in Tesla

see lltorque.cu

func Madd2

func Madd2(dst, src1, src2 *data.Slice, factor1, factor2 float32)

multiply-add: dst[i] = src1[i] * factor1 + src2[i] * factor2

func Madd3

func Madd3(dst, src1, src2, src3 *data.Slice, factor1, factor2, factor3 float32)

multiply-add: dst[i] = src1[i] * factor1 + src2[i] * factor2 + src3 * factor3

func MaxAbs

func MaxAbs(in *data.Slice) float32

Maximum of absolute values of all elements.

func MaxVecDiff

func MaxVecDiff(x, y *data.Slice) float64

Maximum of the norms of the difference between all vectors (x1,y1,z1) and (x2,y2,z2)

(dx, dy, dz) = (x1, y1, z1) - (x2, y2, z2)
max_i sqrt( dx[i]*dx[i] + dy[i]*dy[i] + dz[i]*dz[i] )

func MaxVecNorm

func MaxVecNorm(v *data.Slice) float64

Maximum of the norms of all vectors (x[i], y[i], z[i]).

max_i sqrt( x[i]*x[i] + y[i]*y[i] + z[i]*z[i] )

func MemAlloc

func MemAlloc(bytes int64) unsafe.Pointer

Wrapper for cu.MemAlloc, fatal exit on out of memory.

func Memset

func Memset(s *data.Slice, val ...float32)

Memset sets the Slice's components to the specified values. To be carefully used on unified slice (need sync)

func Mul

func Mul(dst, a, b *data.Slice)

multiply: dst[i] = a[i] * b[i]

func NewSlice

func NewSlice(nComp int, size [3]int) *data.Slice

Make a GPU Slice with nComp components each of size length.

func Normalize

func Normalize(vec, vol *data.Slice)

Normalize vec to unit length, unless length or vol are zero.

func Recycle

func Recycle(s *data.Slice)

Returns a buffer obtained from GetBuffer to the pool.

func RegionAddV

func RegionAddV(dst *data.Slice, lut LUTPtrs, regions *Bytes)

dst += LUT[region], for vectors. Used to add terms to excitation.

func RegionDecode

func RegionDecode(dst *data.Slice, lut LUTPtr, regions *Bytes)

decode the regions+LUT pair into an uncompressed array

func RegionSelect

func RegionSelect(dst, src *data.Slice, regions *Bytes, region byte)

select the part of src within the specified region, set 0's everywhere else.

func Resize

func Resize(dst, src *data.Slice, layer int)

Select and resize one layer for interactive output

func SetCell

func SetCell(s *data.Slice, comp int, ix, iy, iz int, value float32)

func SetElem

func SetElem(s *data.Slice, comp int, index int, value float32)

func ShiftBytes

func ShiftBytes(dst, src *Bytes, m *data.Mesh, shiftX int, clamp byte)

Like Shift, but for bytes

func ShiftX

func ShiftX(dst, src *data.Slice, shiftX int, clampL, clampR float32)

shift dst by shx cells (positive or negative) along X-axis. new edge value is clampL at left edge or clampR at right edge.

func Sum

func Sum(in *data.Slice) float32

Sum of all elements.

func Sync

func Sync()

Synchronize the global stream (usually not needed, done automatically with -sync)

func Zero

func Zero(s *data.Slice)

Set all elements of all components to zero.

Types

type Bytes

type Bytes struct {
	Ptr unsafe.Pointer
	Len int
}

3D byte slice, used for region lookup.

func NewBytes

func NewBytes(Len int) *Bytes

Construct new byte slice with given length.

func (*Bytes) Copy

func (dst *Bytes) Copy(src *Bytes)

Copy on device: dst = src.

func (*Bytes) Download

func (src *Bytes) Download(dst []byte)

Copy to host: dst = src.

func (*Bytes) Free

func (b *Bytes) Free()

Frees the GPU memory and disables the slice.

func (*Bytes) Set

func (dst *Bytes) Set(index int, value byte)

Set one element to value

func (*Bytes) Upload

func (dst *Bytes) Upload(src []byte)

Upload src (host) to dst (gpu).

type DemagConvolution

type DemagConvolution struct {
	// contains filtered or unexported fields
}

Stores the necessary state to perform FFT-accelerated convolution with magnetostatic kernel (or other kernel of same symmetry).

func NewDemag

func NewDemag(inputSize, PBC [3]int, kernel [3][3]*data.Slice) *DemagConvolution

Initializes a convolution to evaluate the demag field for the given mesh geometry.

func (*DemagConvolution) Exec

func (c *DemagConvolution) Exec(B, m, vol *data.Slice, Bsat LUTPtr, regions *Bytes)

Calculate the demag field of m * vol * Bsat, store result in B.

m:    magnetization normalized to unit length
vol:  unitless mask used to scale m's length, may be nil
Bsat: saturation magnetization in Tesla
B:    resulting demag field, in Tesla

func (*DemagConvolution) Free

func (c *DemagConvolution) Free()

type LUTPtr

type LUTPtr unsafe.Pointer // points to 256 float32's

type LUTPtrs

type LUTPtrs []unsafe.Pointer // elements point to 256 float32's

type MFMConvolution

type MFMConvolution struct {
	// contains filtered or unexported fields
}

Stores the necessary state to perform FFT-accelerated convolution

func NewMFM

func NewMFM(mesh *data.Mesh, lift, tipsize float64) *MFMConvolution

Initializes a convolution to evaluate the demag field for the given mesh geometry.

func (*MFMConvolution) Exec

func (c *MFMConvolution) Exec(outp, inp, vol *data.Slice, Bsat LUTPtr, regions *Bytes)

store MFM image in output, based on magnetization in inp.

func (*MFMConvolution) Free

func (c *MFMConvolution) Free()

func (*MFMConvolution) Reinit

func (c *MFMConvolution) Reinit(lift, tipsize float64)

type SymmLUT

type SymmLUT unsafe.Pointer // points to 256x256 symmetric matrix, only lower half stored. See exchange.cu

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL