Documentation ¶
Overview ¶
Package cuda provides GPU interaction
Index ¶
- Constants
- Variables
- func Add(dst, src1, src2 *data.Slice)
- func AddCubicAnisotropy2(Beff, m *data.Slice, Msat, k1, k2, k3, c1, c2 MSlice)
- func AddDMI(Beff *data.Slice, m *data.Slice, Aex_red, Dex_red SymmLUT, Msat MSlice, ...)
- func AddDMIBulk(Beff *data.Slice, m *data.Slice, Aex_red, D_red SymmLUT, Msat MSlice, ...)
- func AddDotProduct(dst *data.Slice, prefactor float32, a, b *data.Slice)
- func AddExchange(B, m *data.Slice, Aex_red SymmLUT, Msat MSlice, regions *Bytes, ...)
- func AddMagnetoelasticField(Beff, m *data.Slice, exx, eyy, ezz, exy, exz, eyz, B1, B2, Msat MSlice)
- func AddSlonczewskiTorque2(torque, m *data.Slice, Msat, J, fixedP, alpha, pol, λ, ε_prime MSlice, ...)
- func AddUniaxialAnisotropy2(Beff, m *data.Slice, Msat, k1, k2, u MSlice)
- func AddZhangLiTorque(torque, m *data.Slice, Msat, J, alpha, xi, pol MSlice, mesh *data.Mesh)
- func Buffer(nComp int, size [3]int) *data.Slice
- func Crop(dst, src *data.Slice, offX, offY, offZ int)
- func CrossProduct(dst, a, b *data.Slice)
- func Div(dst, a, b *data.Slice)
- func Dot(a, b *data.Slice) float32
- func ExchangeDecode(dst *data.Slice, Aex_red SymmLUT, regions *Bytes, mesh *data.Mesh)
- func FreeBuffers()
- func GPUCopy(in *data.Slice) *data.Slice
- func GetCell(s *data.Slice, comp, ix, iy, iz int) float32
- func GetElem(s *data.Slice, comp int, index int) float32
- func GetMagnetoelasticForceDensity(out, m *data.Slice, B1, B2 MSlice, mesh *data.Mesh)
- func Init(gpu int)
- func LLNoPrecess(torque, m, B *data.Slice)
- func LLTorque(torque, m, B *data.Slice, alpha MSlice)
- func Madd2(dst, src1, src2 *data.Slice, factor1, factor2 float32)
- func Madd3(dst, src1, src2, src3 *data.Slice, factor1, factor2, factor3 float32)
- func Madd4(dst, src1, src2, src3, src4 *data.Slice, ...)
- func Madd5(dst, src1, src2, src3, src4, src5 *data.Slice, ...)
- func Madd6(dst, src1, src2, src3, src4, src5, src6 *data.Slice, ...)
- func Madd7(dst, src1, src2, src3, src4, src5, src6, src7 *data.Slice, ...)
- func MaxAbs(in *data.Slice) float32
- func MaxVecDiff(x, y *data.Slice) float64
- func MaxVecNorm(v *data.Slice) float64
- func MemAlloc(bytes int64) unsafe.Pointer
- func MemCpy(dst, src unsafe.Pointer, bytes int64)
- func MemCpyDtoH(dst, src unsafe.Pointer, bytes int64)
- func MemCpyHtoD(dst, src unsafe.Pointer, bytes int64)
- func Memset(s *data.Slice, val ...float32)
- func Minimize(m, m0, torque *data.Slice, dt float32)
- func Mul(dst, a, b *data.Slice)
- func NewSlice(nComp int, size [3]int) *data.Slice
- func Normalize(vec, vol *data.Slice)
- func Recycle(s *data.Slice)
- func RegionAddS(dst *data.Slice, lut LUTPtr, regions *Bytes)
- func RegionAddV(dst *data.Slice, lut LUTPtrs, regions *Bytes)
- func RegionDecode(dst *data.Slice, lut LUTPtr, regions *Bytes)
- func RegionSelect(dst, src *data.Slice, regions *Bytes, region byte)
- func Resize(dst, src *data.Slice, layer int)
- func SetCell(s *data.Slice, comp int, ix, iy, iz int, value float32)
- func SetElem(s *data.Slice, comp int, index int, value float32)
- func SetMaxAngle(dst, m *data.Slice, Aex_red SymmLUT, regions *Bytes, mesh *data.Mesh)
- func SetPhi(s *data.Slice, m *data.Slice)
- func SetTemperature(Bth, noise *data.Slice, k2mu0_Mu0VgammaDt float64, Msat, Temp, Alpha MSlice)
- func SetTheta(s *data.Slice, m *data.Slice)
- func SetTopologicalCharge(s *data.Slice, m *data.Slice, mesh *data.Mesh)
- func SetTopologicalChargeLattice(s *data.Slice, m *data.Slice, mesh *data.Mesh)
- func ShiftBytes(dst, src *Bytes, m *data.Mesh, shiftX int, clamp byte)
- func ShiftBytesY(dst, src *Bytes, m *data.Mesh, shiftY int, clamp byte)
- func ShiftX(dst, src *data.Slice, shiftX int, clampL, clampR float32)
- func ShiftY(dst, src *data.Slice, shiftY int, clampL, clampR float32)
- func ShiftZ(dst, src *data.Slice, shiftZ int, clampL, clampR float32)
- func Sum(in *data.Slice) float32
- func Sync()
- func Zero(s *data.Slice)
- func ZeroMask(dst *data.Slice, mask LUTPtr, regions *Bytes)
- type Bytes
- type DemagConvolution
- type LUTPtr
- type LUTPtrs
- type MFMConvolution
- type MSlice
- type SymmLUT
Constants ¶
const ( BlockSize = 512 TileX, TileY = 32, 32 MaxGridSize = 65535 )
CUDA Launch parameters. there might be better choices for recent hardware, but it barely makes a difference in the end.
const ( X = 0 Y = 1 Z = 2 )
const CONV_TOLERANCE = 1e-6
Maximum tolerable error on demag convolution self-test.
const FFT_IMAG_TOLERANCE = 1e-6
Maximum tolerable imaginary/real part for demag kernel in Fourier space. Assures kernel has correct symmetry.
const REDUCE_BLOCKSIZE = C.REDUCE_BLOCKSIZE
Block size for reduce kernels.
Variables ¶
var ( DriverVersion int // cuda driver version DevName string // GPU name TotalMem int64 // total GPU memory GPUInfo string // Human-readable GPU description Synchronous bool // for debug: synchronize stream0 at every kernel launch )
var UseCC = 0
Functions ¶
func AddCubicAnisotropy2 ¶
Add uniaxial magnetocrystalline anisotropy field to Beff. see uniaxialanisotropy.cu
func AddDMI ¶
func AddDMI(Beff *data.Slice, m *data.Slice, Aex_red, Dex_red SymmLUT, Msat MSlice, regions *Bytes, mesh *data.Mesh, OpenBC bool)
Add effective field of Dzyaloshinskii-Moriya interaction to Beff (Tesla). According to Bagdanov and Röβler, PRL 87, 3, 2001. eq.8 (out-of-plane symmetry breaking). See dmi.cu
func AddDMIBulk ¶
func AddDMIBulk(Beff *data.Slice, m *data.Slice, Aex_red, D_red SymmLUT, Msat MSlice, regions *Bytes, mesh *data.Mesh, OpenBC bool)
Add effective field due to bulk Dzyaloshinskii-Moriya interaction to Beff. See dmibulk.cu
func AddDotProduct ¶
dst += prefactor * dot(a, b), as used for energy density
func AddExchange ¶
Add exchange field to Beff.
m: normalized magnetization B: effective field in Tesla Aex_red: Aex / (Msat * 1e18 m2)
see exchange.cu
func AddMagnetoelasticField ¶
Add magneto-elasticit coupling field to the effective field. see magnetoelasticfield.cu
func AddSlonczewskiTorque2 ¶
func AddSlonczewskiTorque2(torque, m *data.Slice, Msat, J, fixedP, alpha, pol, λ, ε_prime MSlice, thickness MSlice, flp float64, mesh *data.Mesh)
Add Slonczewski ST torque to torque (Tesla). see slonczewski.cu
func AddUniaxialAnisotropy2 ¶
Add uniaxial magnetocrystalline anisotropy field to Beff. see uniaxialanisotropy.cu
func AddZhangLiTorque ¶
Add Zhang-Li ST torque (Tesla) to torque. see zhangli.cu
func Crop ¶
Crop stores in dst a rectangle cropped from src at given offset position. dst size may be smaller than src.
func CrossProduct ¶
func ExchangeDecode ¶
Finds the average exchange strength around each cell, for debugging.
func GetMagnetoelasticForceDensity ¶
Calculate magneto-elasticit force density see magnetoelasticforce.cu
func LLNoPrecess ¶
Landau-Lifshitz torque with precession disabled. Used by engine.Relax().
func LLTorque ¶
Landau-Lifshitz torque divided by gamma0:
- 1/(1+α²) [ m x B + α m x (m x B) ] torque in Tesla m normalized B in Tesla
see lltorque.cu
func Madd4 ¶
multiply-add: dst[i] = src1[i] * factor1 + src2[i] * factor2 + src3[i] * factor3 + src4[i] * factor4
func Madd5 ¶
func Madd5(dst, src1, src2, src3, src4, src5 *data.Slice, factor1, factor2, factor3, factor4, factor5 float32)
multiply-add: dst[i] = src1[i] * factor1 + src2[i] * factor2 + src3[i] * factor3 + src4[i] * factor4 + src5[i] * factor5
func Madd6 ¶
func Madd6(dst, src1, src2, src3, src4, src5, src6 *data.Slice, factor1, factor2, factor3, factor4, factor5, factor6 float32)
multiply-add: dst[i] = src1[i] * factor1 + src2[i] * factor2 + src3[i] * factor3 + src4[i] * factor4 + src5[i] * factor5 + src6[i] * factor6
func Madd7 ¶
func Madd7(dst, src1, src2, src3, src4, src5, src6, src7 *data.Slice, factor1, factor2, factor3, factor4, factor5, factor6, factor7 float32)
multiply-add: dst[i] = src1[i] * factor1 + src2[i] * factor2 + src3[i] * factor3 + src4[i] * factor4 + src5[i] * factor5 + src6[i] * factor6 + src7[i] * factor7
func MaxVecDiff ¶
Maximum of the norms of the difference between all vectors (x1,y1,z1) and (x2,y2,z2)
(dx, dy, dz) = (x1, y1, z1) - (x2, y2, z2) max_i sqrt( dx[i]*dx[i] + dy[i]*dy[i] + dz[i]*dz[i] )
func MaxVecNorm ¶
Maximum of the norms of all vectors (x[i], y[i], z[i]).
max_i sqrt( x[i]*x[i] + y[i]*y[i] + z[i]*z[i] )
func MemCpyDtoH ¶
func MemCpyHtoD ¶
func Memset ¶
Memset sets the Slice's components to the specified values. To be carefully used on unified slice (need sync)
func Minimize ¶
m = 1 / (4 + τ²(m x H)²) [{4 - τ²(m x H)²} m - 4τ(m x m x H)] note: torque from LLNoPrecess has negative sign
func RegionAddS ¶
dst += LUT[region], for scalar. Used to add terms to scalar excitation.
func RegionAddV ¶
dst += LUT[region], for vectors. Used to add terms to excitation.
func RegionDecode ¶
decode the regions+LUT pair into an uncompressed array
func RegionSelect ¶
select the part of src within the specified region, set 0's everywhere else.
func SetMaxAngle ¶
SetMaxAngle sets dst to the maximum angle of each cells magnetization with all of its neighbors, provided the exchange stiffness with that neighbor is nonzero.
func SetTemperature ¶
Set Bth to thermal noise (Brown). see temperature.cu
func SetTopologicalCharge ¶
Set s to the toplogogical charge density s = m · (∂m/∂x ❌ ∂m/∂y) See topologicalcharge.cu
func SetTopologicalChargeLattice ¶
Topological charge according to Berg and Lüscher
func ShiftBytes ¶
Like Shift, but for bytes
func ShiftX ¶
shift dst by shx cells (positive or negative) along X-axis. new edge value is clampL at left edge or clampR at right edge.
Types ¶
type Bytes ¶
3D byte slice, used for region lookup.
type DemagConvolution ¶
type DemagConvolution struct {
// contains filtered or unexported fields
}
Stores the necessary state to perform FFT-accelerated convolution with magnetostatic kernel (or other kernel of same symmetry).
func NewDemag ¶
func NewDemag(inputSize, PBC [3]int, kernel [3][3]*data.Slice, test bool) *DemagConvolution
Initializes a convolution to evaluate the demag field for the given mesh geometry. Sanity-checked if test == true (slow-ish for large meshes).
func (*DemagConvolution) Exec ¶
func (c *DemagConvolution) Exec(B, m, vol *data.Slice, Msat MSlice)
Calculate the demag field of m * vol * Bsat, store result in B.
m: magnetization normalized to unit length vol: unitless mask used to scale m's length, may be nil Bsat: saturation magnetization in Tesla B: resulting demag field, in Tesla
func (*DemagConvolution) Free ¶
func (c *DemagConvolution) Free()
type MFMConvolution ¶
type MFMConvolution struct {
// contains filtered or unexported fields
}
Stores the necessary state to perform FFT-accelerated convolution
func NewMFM ¶
func NewMFM(mesh *data.Mesh, lift, tipsize float64, cachedir string) *MFMConvolution
Initializes a convolution to evaluate the demag field for the given mesh geometry.
func (*MFMConvolution) Exec ¶
func (c *MFMConvolution) Exec(outp, inp, vol *data.Slice, Msat MSlice)
store MFM image in output, based on magnetization in inp.
func (*MFMConvolution) Free ¶
func (c *MFMConvolution) Free()
func (*MFMConvolution) Reinit ¶
func (c *MFMConvolution) Reinit(lift, tipsize float64, cachedir string)
Source Files ¶
- alloc.go
- angles.go
- anisotropy.go
- buffer.go
- bytes.go
- conv_common.go
- conv_copypad.go
- conv_demag.go
- conv_kernmul.go
- conv_mfm.go
- conv_selftest.go
- copypadmul2_wrapper.go
- copyunpad_wrapper.go
- crop.go
- crop_wrapper.go
- crossproduct.go
- crossproduct_wrapper.go
- cubicanisotropy2_wrapper.go
- div_wrapper.go
- dmi.go
- dmi_wrapper.go
- dmibulk.go
- dmibulk_wrapper.go
- dotproduct.go
- dotproduct_wrapper.go
- exchange.go
- exchange_wrapper.go
- exchangedecode_wrapper.go
- fatbin.go
- fft3dc2r.go
- fft3dr2c.go
- fftplan.go
- init.go
- kernmulc_wrapper.go
- kernmulrsymm2dxy_wrapper.go
- kernmulrsymm2dz_wrapper.go
- kernmulrsymm3d_wrapper.go
- llnoprecess_wrapper.go
- lltorque.go
- lltorque2_wrapper.go
- lut.go
- madd.go
- madd2_wrapper.go
- madd3_wrapper.go
- madd4_wrapper.go
- madd5_wrapper.go
- madd6_wrapper.go
- madd7_wrapper.go
- magnetoelastic.go
- magnetoelasticfield_wrapper.go
- magnetoelasticforce_wrapper.go
- maxangle.go
- maxangle_wrapper.go
- minimize.go
- minimize_wrapper.go
- mslice.go
- mul_wrapper.go
- normalize.go
- normalize_wrapper.go
- phi_wrapper.go
- reduce.go
- reducedot_wrapper.go
- reducemaxabs_wrapper.go
- reducemaxdiff_wrapper.go
- reducemaxvecdiff2_wrapper.go
- reducemaxvecnorm2_wrapper.go
- reducesum_wrapper.go
- region.go
- regionadds_wrapper.go
- regionaddv_wrapper.go
- regiondecode_wrapper.go
- regionselect_wrapper.go
- resize.go
- resize_wrapper.go
- shift.go
- shiftbytes_wrapper.go
- shiftbytesy_wrapper.go
- shiftx_wrapper.go
- shifty_wrapper.go
- shiftz_wrapper.go
- slice.go
- slonczewski.go
- slonczewski2_wrapper.go
- temperature.go
- temperature2_wrapper.go
- theta_wrapper.go
- topologicalcharge.go
- topologicalcharge_wrapper.go
- topologicalchargelattice.go
- topologicalchargelattice_wrapper.go
- uniaxialanisotropy2_wrapper.go
- util.go
- zeromask.go
- zeromask_wrapper.go
- zhangli.go
- zhangli2_wrapper.go