Documentation ¶
Overview ¶
Low-level CUDA functionality. Inside this package, all data is stored in ZYX coordinates. I.e., our X axis (component 0) is perpendicular to the thin film and has usually a small number of cells (or even just one). Z is usually the longest axis. This annoying convention is imposed by the cuda FFT data layout.
Index ¶
- Constants
- Variables
- func AddCubicAnisotropy(Beff, m *data.Slice, k1_red LUTPtr, c1, c2 LUTPtrs, regions *Bytes, str int)
- func AddDMI(Beff *data.Slice, m *data.Slice, D_red0, D_red1, D_red2, A_red float32, ...)
- func AddExchange(B, m *data.Slice, Aex_red SymmLUT, regions *Bytes, str int)
- func AddSlonczewskiTorque(torque, m, J *data.Slice, fixedP LUTPtrs, ...)
- func AddTemperature(Beff, noise *data.Slice, temp_red LUTPtr, kmu0_VgammaDt float64, ...)
- func AddUniaxialAnisotropy(Beff, m *data.Slice, k1_red LUTPtr, u LUTPtrs, regions *Bytes, str int)
- func AddZhangLiTorque(torque, m, jpol *data.Slice, bsat, alpha, xi, pol LUTPtr, regions *Bytes)
- func Buffer(nComp int, m *data.Mesh) *data.Slice
- func Dot(a, b *data.Slice) float32
- func GPUCopy(in *data.Slice) *data.Slice
- func GetCell(s *data.Slice, comp, i, j, k int) float32
- func GetElem(s *data.Slice, comp int, index int) float32
- func Init(gpu int, sched string)
- func LLTorque(torque, m, B *data.Slice, alpha LUTPtr, regions *Bytes)
- func LockThread()
- func Madd2(dst, src1, src2 *data.Slice, factor1, factor2 float32)
- func Madd3(dst, src1, src2, src3 *data.Slice, factor1, factor2, factor3 float32)
- func MaxAbs(in *data.Slice) float32
- func MaxVecDiff(x, y *data.Slice) float64
- func MaxVecNorm(v *data.Slice) float64
- func MemAlloc(bytes int64) unsafe.Pointer
- func Memset(s *data.Slice, val ...float32)
- func Mul(dst, a, b *data.Slice)
- func NewSlice(nComp int, m *data.Mesh) *data.Slice
- func NewUnifiedSlice(nComp int, m *data.Mesh) *data.Slice
- func Normalize(vec, vol *data.Slice)
- func Recycle(s *data.Slice)
- func RegionAddV(dst *data.Slice, lut LUTPtrs, regions *Bytes, str int)
- func RegionDecode(dst *data.Slice, lut LUTPtr, regions *Bytes)
- func RegionSelect(dst, src *data.Slice, regions *Bytes, region byte)
- func Resize(dst, src *data.Slice, layer int)
- func SetCell(s *data.Slice, comp int, i, j, k int, value float32)
- func SetElem(s *data.Slice, comp int, index int, value float32)
- func Shift(dst, src *data.Slice, shift [3]int)
- func ShiftBytes(dst, src *Bytes, m *data.Mesh, shift [3]int)
- func Sum(in *data.Slice) float32
- func Sync(str int)
- func SyncAll()
- func Zero(s *data.Slice)
- type Bytes
- type DemagConvolution
- type LUTPtr
- type LUTPtrs
- type SymmLUT
Constants ¶
const CONV_TOLERANCE = 1e-6
Maximum tolerable error on demag convolution self-test.
const DEFAULT_KERNEL_ACC = 6
Default accuracy setting for demag kernel.
const FFT_IMAG_TOLERANCE = 1e-5
Maximum tolerable imaginary/real part for demag kernel in Fourier space. Assures kernel has correct symmetry.
const REDUCE_BLOCKSIZE = C.REDUCE_BLOCKSIZE
Block size for reduce kernels.
Variables ¶
var ( Version float32 DevName string TotalMem int64 )
var ( BlockSize = 512 TileX, TileY = 32, 32 MaxGridSize = 65535 )
CUDA Launch parameters. TODO: optimize?
Functions ¶
func AddCubicAnisotropy ¶
func AddDMI ¶
Add effective field of Dzyaloshinskii-Moriya interaction to Beff (Tesla). According to Bagdanov and Röβler, PRL 87, 3, 2001. eq.8 (out-of-plane symmetry breaking). See dmi.cu
func AddExchange ¶
Add exchange field to Beff.
m: normalized magnetization B: effective field in Tesla Aex_red: 2*Aex / (Msat * 1e18 m2)
func AddSlonczewskiTorque ¶
func AddTemperature ¶
func AddUniaxialAnisotropy ¶
Add uniaxial magnetocrystalline anisotropy field to Beff.
func AddZhangLiTorque ¶
func LLTorque ¶
Landau-Lifshitz torque divided by gamma0:
- 1/(1+α²) [ m x B + α m x (m x B) ] torque in Tesla m normalized B in Tesla
func LockThread ¶
func LockThread()
LockCudaThread locks the current goroutine to an OS thread and sets the CUDA context for that thread. To be called by every fresh goroutine that will use CUDA.
func MaxVecDiff ¶
Maximum of the norms of the difference between all vectors (x1,y1,z1) and (x2,y2,z2)
(dx, dy, dz) = (x1, y1, z1) - (x2, y2, z2) max_i sqrt( dx[i]*dx[i] + dy[i]*dy[i] + dz[i]*dz[i] )
func MaxVecNorm ¶
Maximum of the norms of all vectors (x[i], y[i], z[i]).
max_i sqrt( x[i]*x[i] + y[i]*y[i] + z[i]*z[i] )
func NewUnifiedSlice ¶
Make a GPU Slice with nComp components each of size length.
func RegionAddV ¶
dst += LUT[region], for vectors. Used for complex excitation.
func RegionDecode ¶
decode the regions+LUT pair into an uncompressed array
func RegionSelect ¶
select the part of src within the specified region, set 0's everywhere else.
func Shift ¶
Copy dst to src, shifting data by given number of cells. Off-boundary values are clamped. Used, e.g., to make the simulation window follow interesting features.
func ShiftBytes ¶
Like Shift, but for bytes
Types ¶
type DemagConvolution ¶
type DemagConvolution struct { FFTMesh data.Mesh // mesh of FFT m // contains filtered or unexported fields }
Stores the necessary state to perform FFT-accelerated convolution with magnetostatic kernel (or other kernel of same symmetry).
func NewDemag ¶
func NewDemag(mesh *data.Mesh) *DemagConvolution
Initializes a convolution to evaluate the demag field for the given mesh geometry.
func (*DemagConvolution) Exec ¶
func (c *DemagConvolution) Exec(B, m, vol *data.Slice, Bsat LUTPtr, regions *Bytes)
Calculate the demag field of m * vol * Bsat, store result in B.
m: magnetization normalized to unit length vol: unitless mask used to scale m's length, may be nil Bsat: saturation magnetization in Tesla B: resulting demag field, in Tesla
Source Files ¶
- alloc.go
- buffer.go
- bytes.go
- conv_common.go
- conv_copypad.go
- conv_demag.go
- conv_kernmul.go
- conv_selftest.go
- copypadmul_wrapper.go
- copyunpad_wrapper.go
- cubicanisotropy.go
- cubicanisotropy_wrapper.go
- dmi.go
- dmi_wrapper.go
- doc.go
- exchange.go
- exchange_wrapper.go
- fatbin.go
- fft3dc2r.go
- fft3dr2c.go
- fftplan.go
- init.go
- kernmulrsymm2dx_wrapper.go
- kernmulrsymm2dyz_wrapper.go
- kernmulrsymm3d_wrapper.go
- lltorque.go
- lltorque_wrapper.go
- lut.go
- madd.go
- madd2_wrapper.go
- madd3_wrapper.go
- mul_wrapper.go
- normalize.go
- normalize_wrapper.go
- reduce.go
- reducedot_wrapper.go
- reducemaxabs_wrapper.go
- reducemaxdiff_wrapper.go
- reducemaxvecdiff2_wrapper.go
- reducemaxvecnorm2_wrapper.go
- reducesum_wrapper.go
- region.go
- regionaddv_wrapper.go
- regiondecode_wrapper.go
- regionselect_wrapper.go
- resize.go
- resize_wrapper.go
- shift.go
- shift_wrapper.go
- shiftbytes_wrapper.go
- slice.go
- slonczewski.go
- slonczewski_wrapper.go
- streampool.go
- temperature.go
- temperature_wrapper.go
- uniaxialanisotropy.go
- uniaxialanisotropy_wrapper.go
- util.go
- zhangli.go
- zhangli_wrapper.go