Documentation ¶
Overview ¶
Package cuda provides GPU interaction
Index ¶
- Constants
- Variables
- func AddCubicAnisotropy(Beff, m *data.Slice, k1_red LUTPtr, c1, c2 LUTPtrs, regions *Bytes)
- func AddDMI(Beff *data.Slice, m *data.Slice, D_redx, D_redy, D_redz, A_red float32, ...)
- func AddDotProduct(dst *data.Slice, prefactor float32, a, b *data.Slice)
- func AddExchange(B, m *data.Slice, Aex_red SymmLUT, regions *Bytes, mesh *data.Mesh)
- func AddSlonczewskiTorque(torque, m, J *data.Slice, fixedP LUTPtrs, ...)
- func AddTemperature(Beff, noise *data.Slice, temp_red LUTPtr, kmu0_VgammaDt float64, ...)
- func AddUniaxialAnisotropy(Beff, m *data.Slice, k1_red LUTPtr, u LUTPtrs, regions *Bytes)
- func AddZhangLiTorque(torque, m, J *data.Slice, bsat, alpha, xi, pol LUTPtr, regions *Bytes, ...)
- func Buffer(nComp int, size [3]int) *data.Slice
- func Dot(a, b *data.Slice) float32
- func FreeBuffers()
- func GPUCopy(in *data.Slice) *data.Slice
- func GetCell(s *data.Slice, comp, ix, iy, iz int) float32
- func GetElem(s *data.Slice, comp int, index int) float32
- func Init(gpu int)
- func LLTorque(torque, m, B *data.Slice, alpha LUTPtr, regions *Bytes)
- func Madd2(dst, src1, src2 *data.Slice, factor1, factor2 float32)
- func Madd3(dst, src1, src2, src3 *data.Slice, factor1, factor2, factor3 float32)
- func MaxAbs(in *data.Slice) float32
- func MaxVecDiff(x, y *data.Slice) float64
- func MaxVecNorm(v *data.Slice) float64
- func MemAlloc(bytes int64) unsafe.Pointer
- func Memset(s *data.Slice, val ...float32)
- func Mul(dst, a, b *data.Slice)
- func NewSlice(nComp int, size [3]int) *data.Slice
- func Normalize(vec, vol *data.Slice)
- func Recycle(s *data.Slice)
- func RegionAddV(dst *data.Slice, lut LUTPtrs, regions *Bytes)
- func RegionDecode(dst *data.Slice, lut LUTPtr, regions *Bytes)
- func RegionSelect(dst, src *data.Slice, regions *Bytes, region byte)
- func Resize(dst, src *data.Slice, layer int)
- func SetCell(s *data.Slice, comp int, ix, iy, iz int, value float32)
- func SetElem(s *data.Slice, comp int, index int, value float32)
- func ShiftBytes(dst, src *Bytes, m *data.Mesh, shiftX int, clamp byte)
- func ShiftX(dst, src *data.Slice, shiftX int, clampL, clampR float32)
- func Sum(in *data.Slice) float32
- func Sync()
- func Zero(s *data.Slice)
- type Bytes
- type DemagConvolution
- type LUTPtr
- type LUTPtrs
- type MFMConvolution
- type SymmLUT
Constants ¶
const ( X = 0 Y = 1 Z = 2 )
const CONV_TOLERANCE = 1e-6
Maximum tolerable error on demag convolution self-test.
const FFT_IMAG_TOLERANCE = 1e-6
Maximum tolerable imaginary/real part for demag kernel in Fourier space. Assures kernel has correct symmetry.
const REDUCE_BLOCKSIZE = C.REDUCE_BLOCKSIZE
Block size for reduce kernels.
Variables ¶
var ( Version float32 // cuda version DevName string // GPU name TotalMem int64 // total GPU memory GPUInfo string // Human-readable GPU description Synchronous bool // for debug: synchronize stream0 at every kernel launch )
var ( BlockSize = 512 TileX, TileY = 32, 32 MaxGridSize = 65535 )
CUDA Launch parameters. there might be better choices for recent hardware, but it barely makes a difference in the end.
Functions ¶
func AddCubicAnisotropy ¶
Adds cubic anisotropy field to Beff. see cubicanisotropy.cu
func AddDMI ¶
func AddDMI(Beff *data.Slice, m *data.Slice, D_redx, D_redy, D_redz, A_red float32, mesh *data.Mesh)
Add effective field of Dzyaloshinskii-Moriya interaction to Beff (Tesla). According to Bagdanov and Röβler, PRL 87, 3, 2001. eq.8 (out-of-plane symmetry breaking). See dmi.cu
func AddDotProduct ¶
dst += prefactor * dot(a, b), as used for energy density
func AddExchange ¶
Add exchange field to Beff.
m: normalized magnetization B: effective field in Tesla Aex_red: 2*Aex / (Msat * 1e18 m2)
see exchange.cu
func AddSlonczewskiTorque ¶
func AddSlonczewskiTorque(torque, m, J *data.Slice, fixedP LUTPtrs, Msat, alpha, pol, λ, ε_prime LUTPtr, regions *Bytes, mesh *data.Mesh)
Add Slonczewski ST torque to torque (Tesla). see slonczewski.cu
func AddTemperature ¶
func AddTemperature(Beff, noise *data.Slice, temp_red LUTPtr, kmu0_VgammaDt float64, regions *Bytes)
Add thermal noise (Brown) to Beff. see temperature.cu
func AddUniaxialAnisotropy ¶
Add uniaxial magnetocrystalline anisotropy field to Beff. see uniaxialanisotropy.cu
func AddZhangLiTorque ¶
func AddZhangLiTorque(torque, m, J *data.Slice, bsat, alpha, xi, pol LUTPtr, regions *Bytes, mesh *data.Mesh)
Add Zhang-Li ST torque (Tesla) to torque. see zhangli.cu
func LLTorque ¶
Landau-Lifshitz torque divided by gamma0:
- 1/(1+α²) [ m x B + α m x (m x B) ] torque in Tesla m normalized B in Tesla
see lltorque.cu
func MaxVecDiff ¶
Maximum of the norms of the difference between all vectors (x1,y1,z1) and (x2,y2,z2)
(dx, dy, dz) = (x1, y1, z1) - (x2, y2, z2) max_i sqrt( dx[i]*dx[i] + dy[i]*dy[i] + dz[i]*dz[i] )
func MaxVecNorm ¶
Maximum of the norms of all vectors (x[i], y[i], z[i]).
max_i sqrt( x[i]*x[i] + y[i]*y[i] + z[i]*z[i] )
func Memset ¶
Memset sets the Slice's components to the specified values. To be carefully used on unified slice (need sync)
func RegionAddV ¶
dst += LUT[region], for vectors. Used to add terms to excitation.
func RegionDecode ¶
decode the regions+LUT pair into an uncompressed array
func RegionSelect ¶
select the part of src within the specified region, set 0's everywhere else.
func ShiftBytes ¶
Like Shift, but for bytes
func ShiftX ¶
shift dst by shx cells (positive or negative) along X-axis. new edge value is clampL at left edge or clampR at right edge.
Types ¶
type DemagConvolution ¶
type DemagConvolution struct {
// contains filtered or unexported fields
}
Stores the necessary state to perform FFT-accelerated convolution with magnetostatic kernel (or other kernel of same symmetry).
func NewDemag ¶
func NewDemag(inputSize, PBC [3]int, kernel [3][3]*data.Slice) *DemagConvolution
Initializes a convolution to evaluate the demag field for the given mesh geometry.
func (*DemagConvolution) Exec ¶
func (c *DemagConvolution) Exec(B, m, vol *data.Slice, Bsat LUTPtr, regions *Bytes)
Calculate the demag field of m * vol * Bsat, store result in B.
m: magnetization normalized to unit length vol: unitless mask used to scale m's length, may be nil Bsat: saturation magnetization in Tesla B: resulting demag field, in Tesla
func (*DemagConvolution) Free ¶
func (c *DemagConvolution) Free()
type MFMConvolution ¶
type MFMConvolution struct {
// contains filtered or unexported fields
}
Stores the necessary state to perform FFT-accelerated convolution
func NewMFM ¶
func NewMFM(mesh *data.Mesh, lift, tipsize float64) *MFMConvolution
Initializes a convolution to evaluate the demag field for the given mesh geometry.
func (*MFMConvolution) Exec ¶
func (c *MFMConvolution) Exec(outp, inp, vol *data.Slice, Bsat LUTPtr, regions *Bytes)
store MFM image in output, based on magnetization in inp.
func (*MFMConvolution) Free ¶
func (c *MFMConvolution) Free()
func (*MFMConvolution) Reinit ¶
func (c *MFMConvolution) Reinit(lift, tipsize float64)
Source Files ¶
- alloc.go
- buffer.go
- bytes.go
- conv_common.go
- conv_copypad.go
- conv_demag.go
- conv_kernmul.go
- conv_mfm.go
- conv_selftest.go
- copypadmul_wrapper.go
- copyunpad_wrapper.go
- cubicanisotropy.go
- cubicanisotropy_wrapper.go
- dmi.go
- dmi_wrapper.go
- dotproduct.go
- dotproduct_wrapper.go
- exchange.go
- exchange_wrapper.go
- fatbin.go
- fft3dc2r.go
- fft3dr2c.go
- fftplan.go
- init.go
- kernmulc_wrapper.go
- kernmulrsymm2dxy_wrapper.go
- kernmulrsymm2dz_wrapper.go
- kernmulrsymm3d_wrapper.go
- lltorque.go
- lltorque_wrapper.go
- lut.go
- madd.go
- madd2_wrapper.go
- madd3_wrapper.go
- mul_wrapper.go
- normalize.go
- normalize_wrapper.go
- reduce.go
- reducedot_wrapper.go
- reducemaxabs_wrapper.go
- reducemaxdiff_wrapper.go
- reducemaxvecdiff2_wrapper.go
- reducemaxvecnorm2_wrapper.go
- reducesum_wrapper.go
- region.go
- regionaddv_wrapper.go
- regiondecode_wrapper.go
- regionselect_wrapper.go
- resize.go
- resize_wrapper.go
- shift.go
- shiftbytes_wrapper.go
- shiftx_wrapper.go
- slice.go
- slonczewski.go
- slonczewski_wrapper.go
- temperature.go
- temperature_wrapper.go
- uniaxialanisotropy.go
- uniaxialanisotropy_wrapper.go
- util.go
- zhangli.go
- zhangli_wrapper.go