cuda

package
v0.0.0-...-68e3ea5 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 23, 2024 License: CC-BY-3.0, Freetype, GPL-3.0-or-later Imports: 14 Imported by: 0

Documentation

Overview

Package cuda provides GPU interaction

Index

Constants

View Source
const (
	BlockSize    = 512
	TileX, TileY = 32, 32
	MaxGridSize  = 65535
)

CUDA Launch parameters. there might be better choices for recent hardware, but it barely makes a difference in the end.

View Source
const (
	X = 0
	Y = 1
	Z = 2
)
View Source
const CONV_TOLERANCE = 1e-6

Maximum tolerable error on demag convolution self-test.

View Source
const FFT_IMAG_TOLERANCE = 1e-6

Maximum tolerable imaginary/real part for demag kernel in Fourier space. Assures kernel has correct symmetry.

View Source
const REDUCE_BLOCKSIZE = C.REDUCE_BLOCKSIZE

Block size for reduce kernels.

Variables

View Source
var (
	DriverVersion int    // cuda driver version
	DevName       string // GPU name
	TotalMem      int64  // total GPU memory
	GPUInfo       string // Human-readable GPU description
	Synchronous   bool   // for debug: synchronize stream0 at every kernel launch

)
View Source
var UseCC = 0

Functions

func Add

func Add(dst, src1, src2 *data.Slice)

Add: dst = src1 + src2.

func AddCubicAnisotropy2

func AddCubicAnisotropy2(Beff, m *data.Slice, Msat, k1, k2, k3, c1, c2 MSlice)

Add uniaxial magnetocrystalline anisotropy field to Beff. see uniaxialanisotropy.cu

func AddDMI

func AddDMI(Beff *data.Slice, m *data.Slice, Aex_red, Dex_red SymmLUT, Msat MSlice, regions *Bytes, mesh *data.Mesh, OpenBC bool)

Add effective field of Dzyaloshinskii-Moriya interaction to Beff (Tesla). According to Bagdanov and Röβler, PRL 87, 3, 2001. eq.8 (out-of-plane symmetry breaking). See dmi.cu

func AddDMIBulk

func AddDMIBulk(Beff *data.Slice, m *data.Slice, Aex_red, D_red SymmLUT, Msat MSlice, regions *Bytes, mesh *data.Mesh, OpenBC bool)

Add effective field due to bulk Dzyaloshinskii-Moriya interaction to Beff. See dmibulk.cu

func AddDotProduct

func AddDotProduct(dst *data.Slice, prefactor float32, a, b *data.Slice)

dst += prefactor * dot(a, b), as used for energy density

func AddDotProduct2

func AddDotProduct2(dst *data.Slice, prefactor float32, a, b *data.Slice)

dst += prefactor * dot(a, b), as used for energy density

func AddExchange

func AddExchange(B, m *data.Slice, Aex_red SymmLUT, Msat MSlice, regions *Bytes, mesh *data.Mesh, JZ float32)

Add exchange field to Beff.

m: normalized magnetization
B: effective field in Tesla
Aex_red: Aex / (Msat * 1e18 m2)

see exchange.cu

func AddMagnetoelasticField

func AddMagnetoelasticField(Beff, m *data.Slice, exx, eyy, ezz, exy, exz, eyz, B1, B2, Msat MSlice)

Add magneto-elasticit coupling field to the effective field. see magnetoelasticfield.cu

func AddSlonczewskiTorque2

func AddSlonczewskiTorque2(torque, m *data.Slice, Msat, J, fixedP, alpha, pol, λ, ε_prime MSlice, thickness MSlice, flp float64, mesh *data.Mesh)

Add Slonczewski ST torque to torque (Tesla). see slonczewski.cu

func AddUniaxialAnisotropy2

func AddUniaxialAnisotropy2(Beff, m *data.Slice, Msat, k1, k2, u MSlice)

Add uniaxial magnetocrystalline anisotropy field to Beff. see uniaxialanisotropy.cu

func AddZhangLiTorque

func AddZhangLiTorque(torque, m *data.Slice, Msat, J, alpha, xi, pol MSlice, mesh *data.Mesh)

Add Zhang-Li ST torque (Tesla) to torque. see zhangli.cu

func Buffer

func Buffer(nComp int, size [3]int) *data.Slice

Returns a GPU slice for temporary use. To be returned to the pool with Recycle

func CopyPath

func CopyPath(T, m *data.Slice, noi int)

func CopyToSubspace

func CopyToSubspace(v0, v1, w2, hw *data.Slice, id int, alpha, beta float32)

dst += prefactor * dot(a, b), as used for energy density

func Crop

func Crop(dst, src *data.Slice, offX, offY, offZ int)

Crop stores in dst a rectangle cropped from src at given offset position. dst size may be smaller than src.

func CrossProduct

func CrossProduct(dst, a, b *data.Slice)

func Div

func Div(dst, a, b *data.Slice)

divide: dst[i] = a[i] / b[i] divide-by-zero yields zero.

func Dot

func Dot(a, b *data.Slice) float32

Dot product.

func ExchangeDecode

func ExchangeDecode(dst *data.Slice, Aex_red SymmLUT, regions *Bytes, mesh *data.Mesh)

Finds the average exchange strength around each cell, for debugging.

func FreeBuffers

func FreeBuffers()

Frees all buffers. Called after mesh resize.

func GMinimize

func GMinimize(m, Beff *data.Slice, dt float32)

func GNEB

func GNEB(B, T, m *data.Slice, image, noi int, tp, Lp, Ln, k float32, CIGNEB, Pos int)

func GPUCopy

func GPUCopy(in *data.Slice) *data.Slice

Returns a copy of in, allocated on GPU.

func GenerateU1U2

func GenerateU1U2(m, u1, u2 *data.Slice)

func GenerateW

func GenerateW(w2 *data.Slice)

func GetCell

func GetCell(s *data.Slice, comp, ix, iy, iz int) float32

func GetElem

func GetElem(s *data.Slice, comp int, index int) float32

func GetHW

func GetHW(k0, k, u1, u2, hw *data.Slice, epsilon float32)

func GetMagnetoelasticForceDensity

func GetMagnetoelasticForceDensity(out, m *data.Slice, B1, B2 MSlice, mesh *data.Mesh)

Calculate magneto-elasticit force density see magnetoelasticforce.cu

func GetPhi

func GetPhi(dst, src *data.Slice, prefactor float32, a, b *data.Slice)

dst += prefactor * dot(a, b), as used for energy density

func GetReactioCoordinate

func GetReactioCoordinate(in *data.Slice, image, noi int) float32

func GetVelocity

func GetVelocity(v, k, m1, m2 *data.Slice)

func Init

func Init(gpu int)

Locks to an OS thread and initializes CUDA for that thread.

func Invert

func Invert(dst, src *data.Slice)

dst += prefactor * dot(a, b), as used for energy density

func LLNoPrecess

func LLNoPrecess(torque, m, B *data.Slice)

Landau-Lifshitz torque with precession disabled. Used by engine.Relax().

func LLTorque

func LLTorque(torque, m, B *data.Slice, alpha MSlice)

Landau-Lifshitz torque divided by gamma0:

  • 1/(1+α²) [ m x B + α m x (m x B) ] torque in Tesla m normalized B in Tesla

see lltorque.cu

func Madd2

func Madd2(dst, src1, src2 *data.Slice, factor1, factor2 float32)

multiply-add: dst[i] = src1[i] * factor1 + src2[i] * factor2

func Madd3

func Madd3(dst, src1, src2, src3 *data.Slice, factor1, factor2, factor3 float32)

multiply-add: dst[i] = src1[i] * factor1 + src2[i] * factor2 + src3[i] * factor3

func Madd4

func Madd4(dst, src1, src2, src3, src4 *data.Slice, factor1, factor2, factor3, factor4 float32)

multiply-add: dst[i] = src1[i] * factor1 + src2[i] * factor2 + src3[i] * factor3 + src4[i] * factor4

func Madd5

func Madd5(dst, src1, src2, src3, src4, src5 *data.Slice, factor1, factor2, factor3, factor4, factor5 float32)

multiply-add: dst[i] = src1[i] * factor1 + src2[i] * factor2 + src3[i] * factor3 + src4[i] * factor4 + src5[i] * factor5

func Madd6

func Madd6(dst, src1, src2, src3, src4, src5, src6 *data.Slice, factor1, factor2, factor3, factor4, factor5, factor6 float32)

multiply-add: dst[i] = src1[i] * factor1 + src2[i] * factor2 + src3[i] * factor3 + src4[i] * factor4 + src5[i] * factor5 + src6[i] * factor6

func Madd7

func Madd7(dst, src1, src2, src3, src4, src5, src6, src7 *data.Slice, factor1, factor2, factor3, factor4, factor5, factor6, factor7 float32)

multiply-add: dst[i] = src1[i] * factor1 + src2[i] * factor2 + src3[i] * factor3 + src4[i] * factor4 + src5[i] * factor5 + src6[i] * factor6 + src7[i] * factor7

func MaxAbs

func MaxAbs(in *data.Slice) float32

Maximum of absolute values of all elements.

func MaxG1G2

func MaxG1G2(v, b *data.Slice) float64

func MaxVecDiff

func MaxVecDiff(x, y *data.Slice) float64

Maximum of the norms of the difference between all vectors (x1,y1,z1) and (x2,y2,z2)

(dx, dy, dz) = (x1, y1, z1) - (x2, y2, z2)
max_i sqrt( dx[i]*dx[i] + dy[i]*dy[i] + dz[i]*dz[i] )

func MaxVecNorm

func MaxVecNorm(v *data.Slice) float64

Maximum of the norms of all vectors (x[i], y[i], z[i]).

max_i sqrt( x[i]*x[i] + y[i]*y[i] + z[i]*z[i] )

func MemAlloc

func MemAlloc(bytes int64) unsafe.Pointer

Wrapper for cu.MemAlloc, fatal exit on out of memory.

func MemCpy

func MemCpy(dst, src unsafe.Pointer, bytes int64)

func MemCpyDtoH

func MemCpyDtoH(dst, src unsafe.Pointer, bytes int64)

func MemCpyHtoD

func MemCpyHtoD(dst, src unsafe.Pointer, bytes int64)

func Memset

func Memset(s *data.Slice, val ...float32)

Memset sets the Slice's components to the specified values. To be carefully used on unified slice (need sync)

func Minimize

func Minimize(m, m0, torque *data.Slice, dt float32)

m = 1 / (4 + τ²(m x H)²) [{4 - τ²(m x H)²} m - 4τ(m x m x H)] note: torque from LLNoPrecess has negative sign

func Mul

func Mul(dst, a, b *data.Slice)

multiply: dst[i] = a[i] * b[i] a and b must have the same number of components

func MyDot

func MyDot(a, b *data.Slice) float32

func MyZero

func MyZero(dst *data.Slice)

dst += prefactor * dot(a, b), as used for energy density

func NewSlice

func NewSlice(nComp int, size [3]int) *data.Slice

Make a GPU Slice with nComp components each of size length.

func Normalize

func Normalize(vec, vol *data.Slice)

Normalize vec to unit length, unless length or vol are zero.

func Projection

func Projection(k, m *data.Slice)

dst += prefactor * dot(a, b), as used for energy density

func Recycle

func Recycle(s *data.Slice)

Returns a buffer obtained from GetBuffer to the pool.

func RegionAddS

func RegionAddS(dst *data.Slice, lut LUTPtr, regions *Bytes)

dst += LUT[region], for scalar. Used to add terms to scalar excitation.

func RegionAddV

func RegionAddV(dst *data.Slice, lut LUTPtrs, regions *Bytes)

dst += LUT[region], for vectors. Used to add terms to excitation.

func RegionDecode

func RegionDecode(dst *data.Slice, lut LUTPtr, regions *Bytes)

decode the regions+LUT pair into an uncompressed array

func RegionSelect

func RegionSelect(dst, src *data.Slice, regions *Bytes, region byte)

select the part of src within the specified region, set 0's everywhere else.

func Resize

func Resize(dst, src *data.Slice, layer int)

Select and resize one layer for interactive output

func Rotate

func Rotate(s, v, st, w3 *data.Slice, epsilon float32)

func SetCell

func SetCell(s *data.Slice, comp int, ix, iy, iz int, value float32)

func SetElem

func SetElem(s *data.Slice, comp int, index int, value float32)

func SetMaxAngle

func SetMaxAngle(dst, m *data.Slice, Aex_red SymmLUT, regions *Bytes, mesh *data.Mesh)

SetMaxAngle sets dst to the maximum angle of each cells magnetization with all of its neighbors, provided the exchange stiffness with that neighbor is nonzero.

func SetPhi

func SetPhi(s *data.Slice, m *data.Slice)

func SetTemperature

func SetTemperature(Bth, noise *data.Slice, k2mu0_Mu0VgammaDt float64, Msat, Temp, Alpha MSlice)

Set Bth to thermal noise (Brown). see temperature.cu

func SetTheta

func SetTheta(s *data.Slice, m *data.Slice)

func SetTopologicalCharge

func SetTopologicalCharge(s *data.Slice, m *data.Slice, mesh *data.Mesh)

Set s to the toplogogical charge density s = m · (∂m/∂x ❌ ∂m/∂y) See topologicalcharge.cu

func SetTopologicalChargeLattice

func SetTopologicalChargeLattice(s *data.Slice, m *data.Slice, mesh *data.Mesh)

Topological charge according to Berg and Lüscher

func ShiftBytes

func ShiftBytes(dst, src *Bytes, m *data.Mesh, shiftX int, clamp byte)

Like Shift, but for bytes

func ShiftBytesY

func ShiftBytesY(dst, src *Bytes, m *data.Mesh, shiftY int, clamp byte)

func ShiftMagZ

func ShiftMagZ(dst, src *data.Slice, shiftZ int, clampL, clampR float32)

func ShiftX

func ShiftX(dst, src *data.Slice, shiftX int, clampL, clampR float32)

shift dst by shx cells (positive or negative) along X-axis. new edge value is clampL at left edge or clampR at right edge.

func ShiftY

func ShiftY(dst, src *data.Slice, shiftY int, clampL, clampR float32)

func ShiftZ

func ShiftZ(dst, src *data.Slice, shiftZ int, clampL, clampR float32)

func Sum

func Sum(in *data.Slice) float32

Sum of all elements.

func Sync

func Sync()

Synchronize the global stream This is called before and after all memcopy operations between host and device.

func Tangent

func Tangent(T, m *data.Slice, image, noi int, Ep, Ei, En, Lp, Ln float32)

func TotalForce

func TotalForce(src *data.Slice, noi int) float64

func TurnOnGeom

func TurnOnGeom(Beff, vol *data.Slice)

func VPOminimize

func VPOminimize(m, Beff *data.Slice, regions *Bytes, dt float32, minend, noi int)

func Velocity

func Velocity(v, k0, m, m0 *data.Slice)

func Velocity2

func Velocity2(v, k0, m, m0 *data.Slice, dt float32)

func W2ToW3

func W2ToW3(u1, u2, w2, w3 *data.Slice)

func Zero

func Zero(s *data.Slice)

Set all elements of all components to zero.

func ZeroMask

func ZeroMask(dst *data.Slice, mask LUTPtr, regions *Bytes)

Sets vector dst to zero where mask != 0.

Types

type Bytes

type Bytes struct {
	Ptr unsafe.Pointer
	Len int
}

3D byte slice, used for region lookup.

func NewBytes

func NewBytes(Len int) *Bytes

Construct new byte slice with given length, initialised to zeros.

func (*Bytes) Copy

func (dst *Bytes) Copy(src *Bytes)

Copy on device: dst = src.

func (*Bytes) Download

func (src *Bytes) Download(dst []byte)

Copy to host: dst = src.

func (*Bytes) Free

func (b *Bytes) Free()

Frees the GPU memory and disables the slice.

func (*Bytes) Get

func (src *Bytes) Get(index int) byte

Get one element. data.Index can be used to find the index for x,y,z.

func (*Bytes) Set

func (dst *Bytes) Set(index int, value byte)

Set one element to value. data.Index can be used to find the index for x,y,z.

func (*Bytes) Upload

func (dst *Bytes) Upload(src []byte)

Upload src (host) to dst (gpu).

type DemagConvolution

type DemagConvolution struct {
	// contains filtered or unexported fields
}

Stores the necessary state to perform FFT-accelerated convolution with magnetostatic kernel (or other kernel of same symmetry).

func NewDemag

func NewDemag(noi int, gneb byte, inputSize, PBC [3]int, kernel [3][3]*data.Slice, imkernel [3][3]*data.Slice, test bool) *DemagConvolution

Initializes a convolution to evaluate the demag field for the given mesh geometry. Sanity-checked if test == true (slow-ish for large meshes).

func (*DemagConvolution) Exec

func (c *DemagConvolution) Exec(noi int, gneb byte, B, m2, m, vol *data.Slice, Msat MSlice)

Calculate the demag field of m * vol * Bsat, store result in B.

m:    magnetization normalized to unit length
vol:  unitless mask used to scale m's length, may be nil
Bsat: saturation magnetization in Tesla
B:    resulting demag field, in Tesla

func (*DemagConvolution) Free

func (c *DemagConvolution) Free()

type LUTPtr

type LUTPtr unsafe.Pointer // points to 256 float32's

type LUTPtrs

type LUTPtrs []unsafe.Pointer // elements point to 256 float32's

type MFMConvolution

type MFMConvolution struct {
	// contains filtered or unexported fields
}

Stores the necessary state to perform FFT-accelerated convolution

func NewMFM

func NewMFM(mesh *data.Mesh, lift, tipsize float64, cachedir string) *MFMConvolution

Initializes a convolution to evaluate the demag field for the given mesh geometry.

func (*MFMConvolution) Exec

func (c *MFMConvolution) Exec(outp, inp, vol *data.Slice, Msat MSlice)

store MFM image in output, based on magnetization in inp.

func (*MFMConvolution) Free

func (c *MFMConvolution) Free()

func (*MFMConvolution) Reinit

func (c *MFMConvolution) Reinit(lift, tipsize float64, cachedir string)

type MSlice

type MSlice struct {
	// contains filtered or unexported fields
}

Slice + scalar multiplier.

func MakeMSlice

func MakeMSlice(arr *data.Slice, mul []float64) MSlice

func ToMSlice

func ToMSlice(s *data.Slice) MSlice

func (MSlice) DevPtr

func (m MSlice) DevPtr(c int) unsafe.Pointer

func (MSlice) Len

func (m MSlice) Len() int

func (MSlice) Mul

func (m MSlice) Mul(c int) float32

func (MSlice) Recycle

func (m MSlice) Recycle()

func (MSlice) SetMul

func (m MSlice) SetMul(c int, mul float32)

func (MSlice) Size

func (m MSlice) Size() [3]int

type SymmLUT

type SymmLUT unsafe.Pointer // points to 256x256 symmetric matrix, only lower half stored. See exchange.cu

Source Files

Directories

Path Synopsis
Go bindings for the CUDA driver API.
Go bindings for the CUDA driver API.
Go bindings for the CUDA CUFFT API.
Go bindings for the CUDA CUFFT API.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL