Documentation ¶
Overview ¶
Example ¶
package main import ( "reflect" "runtime" "unsafe" "github.com/pkg/errors" "gonum.org/v1/gonum/blas" "gorgonia.org/cu" "gorgonia.org/cu/blas" "gorgonia.org/tensor" ) type Engine struct { tensor.StdEng ctx cu.Context *cublas.Standard } func newEngine() *Engine { ctx := cu.NewContext(cu.Device(0), cu.SchedAuto) blas := cublas.New(cublas.WithContext(ctx)) return &Engine{ ctx: ctx, Standard: blas, } } func (e *Engine) AllocAccessible() bool { return true } func (e *Engine) Alloc(size int64) (tensor.Memory, error) { return e.ctx.MemAllocManaged(size, cu.AttachGlobal) } func (e *Engine) AllocFlags() (tensor.MemoryFlag, tensor.DataOrder) { return tensor.MakeMemoryFlag(tensor.ManuallyManaged), tensor.ColMajor } func (e *Engine) Free(mem tensor.Memory, size int64) error { e.ctx.MemFree(mem.(cu.DevicePtr)) return nil } func (e *Engine) Memset(mem tensor.Memory, val interface{}) error { panic("not implemented") } func (e *Engine) Memclr(mem tensor.Memory) { panic("not implemented") } func (e *Engine) Memcpy(dst tensor.Memory, src tensor.Memory) error { panic("not implemented") } func (e *Engine) Accessible(mem tensor.Memory) (tensor.Memory, error) { // panic("not implemented") size := mem.MemSize() retVal := make([]byte, int(size)) e.ctx.MemcpyDtoH(unsafe.Pointer(&retVal[0]), cu.DevicePtr(mem.Uintptr()), int64(size)) l := int(size / 8) foo2 := &reflect.SliceHeader{ Data: uintptr(unsafe.Pointer(&retVal[0])), Len: l, Cap: l, } return *(*foomem)(unsafe.Pointer(foo2)), e.ctx.Error() } func (e *Engine) WorksWith(order tensor.DataOrder) bool { return true } func (e *Engine) NonStdAlloc() {} func (e *Engine) ContextErr() error { return e.ctx.Error() } type foomem []float64 func (m foomem) Uintptr() uintptr { return uintptr(unsafe.Pointer(&m[0])) } func (m foomem) Pointer() unsafe.Pointer { return unsafe.Pointer(&m[0]) } func (m foomem) MemSize() uintptr { return uintptr(len(m) * 8) } func (e *Engine) checkThreeFloat(a, b, ret tensor.Tensor) (ad, bd, retVal *tensor.Dense, err error) { if /*a.IsNativelyAccessible() &&*/ !a.IsManuallyManaged() { return nil, nil, nil, errors.New("CUDA Engine only takes non-natively accessible memory (memory on graphics cards). a isn't.") } if /* b.IsNativelyAccessible() && */ !b.IsManuallyManaged() { return nil, nil, nil, errors.New("CUDA Engine only takes non-natively accessible memory (memory on graphics cards). b isn't") } if /* ret.IsNativelyAccessible() && */ !ret.IsManuallyManaged() { return nil, nil, nil, errors.New("CUDA Engine only takes non-natively accessible memory (memory on graphics cards). ret isn't") } if a.Dtype() != b.Dtype() || b.Dtype() != ret.Dtype() { return nil, nil, nil, errors.New("Expected a and b and retVal all to have the same Dtype") } var ok bool if ad, ok = a.(*tensor.Dense); !ok { return nil, nil, nil, errors.New("Expected a to be a *tensor.Dense") } if bd, ok = b.(*tensor.Dense); !ok { return nil, nil, nil, errors.New("Expected b to be a *tensor.Dense") } if retVal, ok = ret.(*tensor.Dense); !ok { return nil, nil, nil, errors.New("Expected ret to be a *tensor.Dense") } return } func (e *Engine) MatVecMul(a, b, prealloc tensor.Tensor) (err error) { var ad, bd, pd *tensor.Dense if ad, bd, pd, err = e.checkThreeFloat(a, b, prealloc); err != nil { return errors.Wrapf(err, "MatVecMul failed pre check") } tA := blas.Trans do := a.DataOrder() z := do.IsTransposed() m := a.Shape()[0] n := a.Shape()[1] var lda int switch { case do.IsRowMajor() && z: tA = blas.NoTrans lda = m case do.IsRowMajor() && !z: lda = n m, n = n, m case do.IsColMajor() && z: tA = blas.Trans lda = n m, n = n, m case do.IsColMajor() && !z: lda = m tA = blas.NoTrans } incX, incY := 1, 1 // step size // ASPIRATIONAL TODO: different incX and incY // TECHNICAL DEBT. TECHDEBT. TECH DEBT // Example use case: // log.Printf("a %v %v", ad.Strides(), ad.ostrides()) // log.Printf("b %v", b.Strides()) // incX := a.Strides()[0] // incY = b.Strides()[0] switch ad.Dtype() { case tensor.Float64: A := ad.Float64s() X := bd.Float64s() Y := pd.Float64s() alpha, beta := float64(1), float64(0) e.Standard.Dgemv(tA, m, n, alpha, A, lda, X, incX, beta, Y, incY) case tensor.Float32: A := ad.Float32s() X := bd.Float32s() Y := pd.Float32s() alpha, beta := float32(1), float32(0) e.Standard.Sgemv(tA, m, n, alpha, A, lda, X, incX, beta, Y, incY) default: return errors.New("Unsupported Dtype") } return e.Standard.Err() } func (e *Engine) MatMul(a, b, prealloc tensor.Tensor) (err error) { var ad, bd, pd *tensor.Dense if ad, bd, pd, err = e.checkThreeFloat(a, b, prealloc); err != nil { return errors.Wrapf(err, "MatVecMul failed pre check") } ado := a.DataOrder() bdo := b.DataOrder() if !ado.HasSameOrder(bdo) { return errors.Errorf("a does not have the same data order as b. a is %v. b is %v", a.DataOrder(), b.DataOrder()) } // get result shapes. k is the shared dimension // a is (m, k) // b is (k, n) // c is (m, n) var m, n, k int m = ad.Shape()[0] k = ad.Shape()[1] n = bd.Shape()[1] // // wrt the strides, we use the original strides, because that's what BLAS needs, instead of calling .Strides() // // lda in colmajor = number of rows; // // lda in row major = number of cols var lda, ldb, ldc int tA, tB := blas.Trans, blas.Trans za := ado.IsTransposed() zb := bdo.IsTransposed() // swapping around the operands if they are row major (a becomes b, and b becomes a) switch { case ado.IsColMajor() && bdo.IsColMajor() && !za && !zb: lda = m ldb = k ldc = prealloc.Shape()[0] tA, tB = blas.NoTrans, blas.NoTrans case ado.IsColMajor() && bdo.IsColMajor() && za && !zb: lda = k ldb = k ldc = prealloc.Shape()[0] tA, tB = blas.Trans, blas.NoTrans case ado.IsColMajor() && bdo.IsColMajor() && za && zb: lda = k ldb = n ldc = prealloc.Shape()[0] tA, tB = blas.Trans, blas.Trans case ado.IsColMajor() && bdo.IsColMajor() && !za && zb: lda = m ldb = n ldc = prealloc.Shape()[0] tA, tB = blas.NoTrans, blas.Trans case ado.IsRowMajor() && bdo.IsRowMajor() && !za && !zb: lda = k ldb = n ldc = prealloc.Shape()[1] tA, tB = blas.NoTrans, blas.NoTrans // magic swappy thingy m, n = n, m lda, ldb = ldb, lda ad, bd = bd, ad case ado.IsRowMajor() && bdo.IsRowMajor() && za && !zb: lda = m ldb = n ldc = prealloc.Shape()[1] tA, tB = blas.Trans, blas.NoTrans // magic swappy thingy m, n = n, m lda, ldb = ldb, lda tA, tB = tB, tA ad, bd = bd, ad case ado.IsRowMajor() && bdo.IsRowMajor() && za && zb: lda = m ldb = k ldc = prealloc.Shape()[1] tA, tB = blas.Trans, blas.Trans // magic swappy thingy m, n = n, m lda, ldb = ldb, lda ad, bd = bd, ad case ado.IsRowMajor() && bdo.IsRowMajor() && !za && zb: lda = k ldb = k ldc = prealloc.Shape()[1] tA, tB = blas.NoTrans, blas.Trans // magic swappy thingy m, n = n, m lda, ldb = ldb, lda tA, tB = tB, tA ad, bd = bd, ad default: panic("Unreachable") } switch ad.Dtype() { case tensor.Float64: A := ad.Float64s() B := bd.Float64s() C := pd.Float64s() alpha, beta := float64(1), float64(0) e.Standard.Dgemm(tA, tB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc) case tensor.Float32: A := ad.Float32s() B := bd.Float32s() C := pd.Float32s() alpha, beta := float32(1), float32(0) e.Standard.Sgemm(tA, tB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc) default: return errors.Errorf("Unsupported Dtype %v", ad.Dtype()) } return e.Standard.Err() } func (e *Engine) Outer(a, b, prealloc tensor.Tensor) (err error) { var ad, bd, pd *tensor.Dense if ad, bd, pd, err = e.checkThreeFloat(a, b, prealloc); err != nil { return errors.Wrapf(err, "MatVecMul failed pre check") } m := ad.Size() n := bd.Size() pdo := pd.DataOrder() var lda int switch { case pdo.IsColMajor(): lda = pd.Shape()[0] case pdo.IsRowMajor(): aShape := a.Shape().Clone() bShape := b.Shape().Clone() if err = a.Reshape(aShape[0], 1); err != nil { return err } if err = b.Reshape(1, bShape[0]); err != nil { return err } if err = e.MatMul(a, b, prealloc); err != nil { return err } if err = b.Reshape(bShape...); err != nil { return } if err = a.Reshape(aShape...); err != nil { return } return nil } incX, incY := 1, 1 switch ad.Dtype() { case tensor.Float64: x := ad.Float64s() y := bd.Float64s() A := pd.Float64s() alpha := float64(1) e.Standard.Dger(m, n, alpha, x, incX, y, incY, A, lda) case tensor.Float32: x := ad.Float32s() y := bd.Float32s() A := pd.Float32s() alpha := float32(1) e.Standard.Sger(m, n, alpha, x, incX, y, incY, A, lda) } return e.Standard.Err() } func main() { // debug.SetGCPercent(-1) runtime.LockOSThread() defer runtime.UnlockOSThread() matVecMulColmajorNonTransposed() matVecMulColmajorTransposed() matVecMulRowMajorNonTransposed() matVecMulRowMajorTransposed() matMulColmajorNTNT() matMulColmajorTNT() matMulColmajorTT() matMulColmajorNTT() matMulRowmajorNTNT() matMulRowmajorTNT() matMulRowmajorTT() matMulRowmajorNTT() outerColMajor() outerRowMajor() }
Output: ColMajor Non Transposed A: ⎡1 2 3⎤ ⎣4 5 6⎦ B:[1 2 3] C:[1000 1000] C: [14 32] ========== ColMajor Transposed A: ⎡1 4⎤ ⎢2 5⎥ ⎣3 6⎦ B:[1 2] C[1000 1000 1000] C: [9 12 15] ========== RowMajor Non Transposed A: ⎡1 2 3⎤ ⎣4 5 6⎦ B:[1 2 3] C[1000 1000] C: [14 32] ========== RowMajor Transposed A: ⎡1 4⎤ ⎢2 5⎥ ⎣3 6⎦ B:[1 2] C[1000 1000 1000] C: [9 12 15] ========== ColMajor Non Transposed Non Transposed A: ⎡1 2 3⎤ ⎣4 5 6⎦ B: ⎡ 0 1 2 3⎤ ⎢ 4 5 6 7⎥ ⎣ 8 9 10 11⎦ C: ⎡1000 1000 1000 1000⎤ ⎣1000 1000 1000 1000⎦ C: [32 68 38 83 44 98 50 113] ========== ColMajor Transposed Non Transposed A: ⎡1 4⎤ ⎢2 5⎥ ⎣3 6⎦ B: ⎡0 1 2 3⎤ ⎣4 5 6 7⎦ C: ⎡1000 1000 1000 1000⎤ ⎢1000 1000 1000 1000⎥ ⎣1000 1000 1000 1000⎦ C: [16 20 24 21 27 33 26 34 42 31 41 51] ========== ColMajor Transposed Transposed A: ⎡1 4⎤ ⎢2 5⎥ ⎣3 6⎦ B: ⎡0 2 4 6⎤ ⎣1 3 5 7⎦ C: ⎡1000 1000 1000 1000⎤ ⎢1000 1000 1000 1000⎥ ⎣1000 1000 1000 1000⎦ C: [4 5 6 14 19 24 24 33 42 34 47 60] ========== ColMajor Non Transposed Transposed A: ⎡1 2 3⎤ ⎣4 5 6⎦ B: ⎡ 0 3 6 9⎤ ⎢ 1 4 7 10⎥ ⎣ 2 5 8 11⎦ C: ⎡1000 1000 1000 1000⎤ ⎣1000 1000 1000 1000⎦ C: [8 17 26 62 44 107 62 152] ========== RowMajor Non Transposed Non Transposed A: ⎡1 2 3⎤ ⎣4 5 6⎦ B: ⎡ 0 1 2 3⎤ ⎢ 4 5 6 7⎥ ⎣ 8 9 10 11⎦ C: ⎡1000 1000 1000 1000⎤ ⎣1000 1000 1000 1000⎦ C: [32 38 44 50 68 83 98 113] ========== RowMajor Transposed Non Transposed A: ⎡1 3 5⎤ ⎣2 4 6⎦ B: ⎡ 0 1 2 3⎤ ⎢ 4 5 6 7⎥ ⎣ 8 9 10 11⎦ C: ⎡1000 1000 1000 1000⎤ ⎣1000 1000 1000 1000⎦ C: [52 61 70 79 64 76 88 100] ========== RowMajor Transposed Non Transposed A: ⎡1 3 5⎤ ⎣2 4 6⎦ B: ⎡ 0 3 6 9⎤ ⎢ 1 4 7 10⎥ ⎣ 2 5 8 11⎦ C: ⎡1000 1000 1000 1000⎤ ⎣1000 1000 1000 1000⎦ C: [13 40 67 94 16 52 88 124] ========== RowMajor Transposed Non Transposed A: ⎡1 2 3⎤ ⎣4 5 6⎦ B: ⎡ 0 3 6 9⎤ ⎢ 1 4 7 10⎥ ⎣ 2 5 8 11⎦ C: ⎡1000 1000 1000 1000⎤ ⎣1000 1000 1000 1000⎦ C: [8 26 44 62 17 62 107 152] ========== RowMajor Non Transposed A: [1 2 3] B:[0 1] C ⎡1000 1000⎤ ⎢1000 1000⎥ ⎣1000 1000⎦ C: [0 0 0 1 2 3] ========== RowMajor Non Transposed A: [1 2 3] B:[0 1] C ⎡1000 1000⎤ ⎢1000 1000⎥ ⎣1000 1000⎦ C: [0 1 0 2 0 3] ==========
Index ¶
- Constants
- type BLAS
- type ConsOpt
- type Order
- type PointerMode
- type Standard
- func (impl *Standard) Caxpy(n int, alpha complex64, x []complex64, incX int, y []complex64, incY int)
- func (impl *Standard) Ccopy(n int, x []complex64, incX int, y []complex64, incY int)
- func (impl *Standard) Cdgmm(mode blas.Side, m, n int, a []complex64, lda int, x []complex64, incX int, ...)
- func (impl *Standard) Cdotc(n int, x []complex64, incX int, y []complex64, incY int) (dotc complex64)
- func (impl *Standard) Cdotu(n int, x []complex64, incX int, y []complex64, incY int) (dotu complex64)
- func (impl *Standard) Cgbmv(tA blas.Transpose, m, n, kl, ku int, alpha complex64, a []complex64, lda int, ...)
- func (impl *Standard) Cgeam(tA, tB blas.Transpose, m, n int, alpha complex64, a []complex64, lda int, ...)
- func (impl *Standard) Cgemm(tA, tB blas.Transpose, m, n, k int, alpha complex64, a []complex64, lda int, ...)
- func (impl *Standard) Cgemm3m(tA, tB blas.Transpose, m, n, k int, alpha complex64, a []complex64, lda int, ...)
- func (impl *Standard) Cgemv(tA blas.Transpose, m, n int, alpha complex64, a []complex64, lda int, ...)
- func (impl *Standard) Cgerc(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, ...)
- func (impl *Standard) Cgeru(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, ...)
- func (impl *Standard) Chbmv(ul blas.Uplo, n, k int, alpha complex64, a []complex64, lda int, x []complex64, ...)
- func (impl *Standard) Chemm(s blas.Side, ul blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, ...)
- func (impl *Standard) Chemv(ul blas.Uplo, n int, alpha complex64, a []complex64, lda int, x []complex64, ...)
- func (impl *Standard) Cher(ul blas.Uplo, n int, alpha float32, x []complex64, incX int, a []complex64, ...)
- func (impl *Standard) Cher2(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, ...)
- func (impl *Standard) Cher2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, ...)
- func (impl *Standard) Cherk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float32, a []complex64, ...)
- func (impl *Standard) Cherkx(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, ...)
- func (impl *Standard) Chpmv(ul blas.Uplo, n int, alpha complex64, aP, x []complex64, incX int, ...)
- func (impl *Standard) Chpr(ul blas.Uplo, n int, alpha float32, x []complex64, incX int, aP []complex64)
- func (impl *Standard) Chpr2(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, ...)
- func (impl *Standard) Close() error
- func (impl *Standard) Crot(n int, x []complex64, incX int, y []complex64, incY int, cScalar float32, ...)
- func (impl *Standard) Cscal(n int, alpha complex64, x []complex64, incX int)
- func (impl *Standard) Csscal(n int, alpha float32, x []complex64, incX int)
- func (impl *Standard) Cswap(n int, x []complex64, incX int, y []complex64, incY int)
- func (impl *Standard) Csymm(s blas.Side, ul blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, ...)
- func (impl *Standard) Csymv(ul blas.Uplo, n int, alpha complex64, a []complex64, lda int, x []complex64, ...)
- func (impl *Standard) Csyr(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, a []complex64, ...)
- func (impl *Standard) Csyr2(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, ...)
- func (impl *Standard) Csyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, ...)
- func (impl *Standard) Csyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, ...)
- func (impl *Standard) Csyrkx(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, ...)
- func (impl *Standard) Ctbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex64, lda int, ...)
- func (impl *Standard) Ctbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex64, lda int, ...)
- func (impl *Standard) Ctpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []complex64, ...)
- func (impl *Standard) Ctpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []complex64, ...)
- func (impl *Standard) Ctpttr(ul blas.Uplo, n int, aP, a []complex64, lda int)
- func (impl *Standard) Ctrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, ...)
- func (impl *Standard) Ctrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex64, lda int, ...)
- func (impl *Standard) Ctrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, ...)
- func (impl *Standard) Ctrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex64, lda int, ...)
- func (impl *Standard) Ctrttp(ul blas.Uplo, n int, a []complex64, lda int, aP []complex64)
- func (impl *Standard) Dasum(n int, x []float64, incX int) (retVal float64)
- func (impl *Standard) Daxpy(n int, alpha float64, x []float64, incX int, y []float64, incY int)
- func (impl *Standard) Dcopy(n int, x []float64, incX int, y []float64, incY int)
- func (impl *Standard) Ddgmm(mode blas.Side, m, n int, a []float64, lda int, x []float64, incX int, ...)
- func (impl *Standard) Ddot(n int, x []float64, incX int, y []float64, incY int) (retVal float64)
- func (impl *Standard) Dgbmv(tA blas.Transpose, m, n, kl, ku int, alpha float64, a []float64, lda int, ...)
- func (impl *Standard) Dgeam(tA, tB blas.Transpose, m, n int, alpha float64, a []float64, lda int, ...)
- func (impl *Standard) Dgemm(tA, tB blas.Transpose, m, n, k int, alpha float64, a []float64, lda int, ...)
- func (impl *Standard) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, ...)
- func (impl *Standard) Dger(m, n int, alpha float64, x []float64, incX int, y []float64, incY int, ...)
- func (impl *Standard) Dnrm2(n int, x []float64, incX int) (retVal float64)
- func (impl *Standard) Drot(n int, x []float64, incX int, y []float64, incY int, cScalar, sScalar float64)
- func (impl *Standard) Drotg(a float64, b float64) (c float64, s float64, r float64, z float64)
- func (impl *Standard) Drotm(n int, x []float64, incX int, y []float64, incY int, p blas.DrotmParams)
- func (impl *Standard) Drotmg(d1 float64, d2 float64, b1 float64, b2 float64) (p blas.DrotmParams, rd1 float64, rd2 float64, rb1 float64)
- func (impl *Standard) Dsbmv(ul blas.Uplo, n, k int, alpha float64, a []float64, lda int, x []float64, ...)
- func (impl *Standard) Dscal(n int, alpha float64, x []float64, incX int)
- func (impl *Standard) Dsdot(n int, x []float32, incX int, y []float32, incY int) float64
- func (impl *Standard) Dspmv(ul blas.Uplo, n int, alpha float64, aP, x []float64, incX int, beta float64, ...)
- func (impl *Standard) Dspr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, aP []float64)
- func (impl *Standard) Dspr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, ...)
- func (impl *Standard) Dswap(n int, x []float64, incX int, y []float64, incY int)
- func (impl *Standard) Dsymm(s blas.Side, ul blas.Uplo, m, n int, alpha float64, a []float64, lda int, ...)
- func (impl *Standard) Dsymv(ul blas.Uplo, n int, alpha float64, a []float64, lda int, x []float64, ...)
- func (impl *Standard) Dsyr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, a []float64, ...)
- func (impl *Standard) Dsyr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, ...)
- func (impl *Standard) Dsyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []float64, lda int, ...)
- func (impl *Standard) Dsyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []float64, lda int, ...)
- func (impl *Standard) Dsyrkx(ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []float64, lda int, ...)
- func (impl *Standard) Dtbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, ...)
- func (impl *Standard) Dtbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, ...)
- func (impl *Standard) Dtpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []float64, incX int)
- func (impl *Standard) Dtpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []float64, incX int)
- func (impl *Standard) Dtpttr(ul blas.Uplo, n int, aP, a []float64, lda int)
- func (impl *Standard) Dtrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, ...)
- func (impl *Standard) Dtrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, ...)
- func (impl *Standard) Dtrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, ...)
- func (impl *Standard) Dtrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, ...)
- func (impl *Standard) Dtrttp(ul blas.Uplo, n int, a []float64, lda int, aP []float64)
- func (impl *Standard) Dzasum(n int, x []complex128, incX int) (retVal float64)
- func (impl *Standard) Dznrm2(n int, x []complex128, incX int) (retVal float64)
- func (impl *Standard) Err() error
- func (impl *Standard) Icamax(n int, x []complex64, incX int) (retVal int)
- func (impl *Standard) Icamin(n int, x []complex64, incX int) (retVal int)
- func (impl *Standard) Idamax(n int, x []float64, incX int) (retVal int)
- func (impl *Standard) Idamin(n int, x []float64, incX int) (retVal int)
- func (impl *Standard) Init(opts ...ConsOpt) error
- func (impl *Standard) Isamax(n int, x []float32, incX int) (retVal int)
- func (impl *Standard) Isamin(n int, x []float32, incX int) (retVal int)
- func (impl *Standard) Izamax(n int, x []complex128, incX int) (retVal int)
- func (impl *Standard) Izamin(n int, x []complex128, incX int) (retVal int)
- func (impl *Standard) Sasum(n int, x []float32, incX int) (retVal float32)
- func (impl *Standard) Saxpy(n int, alpha float32, x []float32, incX int, y []float32, incY int)
- func (impl *Standard) Scasum(n int, x []complex64, incX int) (retVal float32)
- func (impl *Standard) Scnrm2(n int, x []complex64, incX int) (retVal float32)
- func (impl *Standard) Scopy(n int, x []float32, incX int, y []float32, incY int)
- func (impl *Standard) Sdgmm(mode blas.Side, m, n int, a []float32, lda int, x []float32, incX int, ...)
- func (impl *Standard) Sdot(n int, x []float32, incX int, y []float32, incY int) (retVal float32)
- func (impl *Standard) Sdsdot(n int, alpha float32, x []float32, incX int, y []float32, incY int) float32
- func (impl *Standard) Sgbmv(tA blas.Transpose, m, n, kl, ku int, alpha float32, a []float32, lda int, ...)
- func (impl *Standard) Sgeam(tA, tB blas.Transpose, m, n int, alpha float32, a []float32, lda int, ...)
- func (impl *Standard) Sgemm(tA, tB blas.Transpose, m, n, k int, alpha float32, a []float32, lda int, ...)
- func (impl *Standard) Sgemv(tA blas.Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, ...)
- func (impl *Standard) Sger(m, n int, alpha float32, x []float32, incX int, y []float32, incY int, ...)
- func (impl *Standard) Snrm2(n int, x []float32, incX int) (retVal float32)
- func (impl *Standard) Srot(n int, x []float32, incX int, y []float32, incY int, cScalar, sScalar float32)
- func (impl *Standard) Srotg(a float32, b float32) (c float32, s float32, r float32, z float32)
- func (impl *Standard) Srotm(n int, x []float32, incX int, y []float32, incY int, p blas.SrotmParams)
- func (impl *Standard) Srotmg(d1 float32, d2 float32, b1 float32, b2 float32) (p blas.SrotmParams, rd1 float32, rd2 float32, rb1 float32)
- func (impl *Standard) Ssbmv(ul blas.Uplo, n, k int, alpha float32, a []float32, lda int, x []float32, ...)
- func (impl *Standard) Sscal(n int, alpha float32, x []float32, incX int)
- func (impl *Standard) Sspmv(ul blas.Uplo, n int, alpha float32, aP, x []float32, incX int, beta float32, ...)
- func (impl *Standard) Sspr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, aP []float32)
- func (impl *Standard) Sspr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, ...)
- func (impl *Standard) Sswap(n int, x []float32, incX int, y []float32, incY int)
- func (impl *Standard) Ssymm(s blas.Side, ul blas.Uplo, m, n int, alpha float32, a []float32, lda int, ...)
- func (impl *Standard) Ssymv(ul blas.Uplo, n int, alpha float32, a []float32, lda int, x []float32, ...)
- func (impl *Standard) Ssyr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, a []float32, ...)
- func (impl *Standard) Ssyr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, ...)
- func (impl *Standard) Ssyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha float32, a []float32, lda int, ...)
- func (impl *Standard) Ssyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float32, a []float32, lda int, ...)
- func (impl *Standard) Ssyrkx(ul blas.Uplo, t blas.Transpose, n, k int, alpha float32, a []float32, lda int, ...)
- func (impl *Standard) Stbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, ...)
- func (impl *Standard) Stbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, ...)
- func (impl *Standard) Stpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []float32, incX int)
- func (impl *Standard) Stpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []float32, incX int)
- func (impl *Standard) Stpttr(ul blas.Uplo, n int, aP, a []float32, lda int)
- func (impl *Standard) Strmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, ...)
- func (impl *Standard) Strmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, ...)
- func (impl *Standard) Strsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, ...)
- func (impl *Standard) Strsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, ...)
- func (impl *Standard) Strttp(ul blas.Uplo, n int, a []float32, lda int, aP []float32)
- func (impl *Standard) Zaxpy(n int, alpha complex128, x []complex128, incX int, y []complex128, incY int)
- func (impl *Standard) Zcopy(n int, x []complex128, incX int, y []complex128, incY int)
- func (impl *Standard) Zdgmm(mode blas.Side, m, n int, a []complex128, lda int, x []complex128, incX int, ...)
- func (impl *Standard) Zdotc(n int, x []complex128, incX int, y []complex128, incY int) (dotc complex128)
- func (impl *Standard) Zdotu(n int, x []complex128, incX int, y []complex128, incY int) (dotu complex128)
- func (impl *Standard) Zdscal(n int, alpha float64, x []complex128, incX int)
- func (impl *Standard) Zgbmv(tA blas.Transpose, m, n, kl, ku int, alpha complex128, a []complex128, lda int, ...)
- func (impl *Standard) Zgeam(tA, tB blas.Transpose, m, n int, alpha complex128, a []complex128, lda int, ...)
- func (impl *Standard) Zgemm(tA, tB blas.Transpose, m, n, k int, alpha complex128, a []complex128, lda int, ...)
- func (impl *Standard) Zgemm3m(tA, tB blas.Transpose, m, n, k int, alpha complex128, a []complex128, lda int, ...)
- func (impl *Standard) Zgemv(tA blas.Transpose, m, n int, alpha complex128, a []complex128, lda int, ...)
- func (impl *Standard) Zgerc(m, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, ...)
- func (impl *Standard) Zgeru(m, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, ...)
- func (impl *Standard) Zhbmv(ul blas.Uplo, n, k int, alpha complex128, a []complex128, lda int, ...)
- func (impl *Standard) Zhemm(s blas.Side, ul blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, ...)
- func (impl *Standard) Zhemv(ul blas.Uplo, n int, alpha complex128, a []complex128, lda int, x []complex128, ...)
- func (impl *Standard) Zher(ul blas.Uplo, n int, alpha float64, x []complex128, incX int, a []complex128, ...)
- func (impl *Standard) Zher2(ul blas.Uplo, n int, alpha complex128, x []complex128, incX int, ...)
- func (impl *Standard) Zher2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, ...)
- func (impl *Standard) Zherk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []complex128, ...)
- func (impl *Standard) Zherkx(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, ...)
- func (impl *Standard) Zhpmv(ul blas.Uplo, n int, alpha complex128, aP, x []complex128, incX int, ...)
- func (impl *Standard) Zhpr(ul blas.Uplo, n int, alpha float64, x []complex128, incX int, aP []complex128)
- func (impl *Standard) Zhpr2(ul blas.Uplo, n int, alpha complex128, x []complex128, incX int, ...)
- func (impl *Standard) Zrot(n int, x []complex128, incX int, y []complex128, incY int, cScalar float64, ...)
- func (impl *Standard) Zscal(n int, alpha complex128, x []complex128, incX int)
- func (impl *Standard) Zswap(n int, x []complex128, incX int, y []complex128, incY int)
- func (impl *Standard) Zsymm(s blas.Side, ul blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, ...)
- func (impl *Standard) Zsymv(ul blas.Uplo, n int, alpha complex128, a []complex128, lda int, x []complex128, ...)
- func (impl *Standard) Zsyr(ul blas.Uplo, n int, alpha complex128, x []complex128, incX int, ...)
- func (impl *Standard) Zsyr2(ul blas.Uplo, n int, alpha complex128, x []complex128, incX int, ...)
- func (impl *Standard) Zsyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, ...)
- func (impl *Standard) Zsyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, ...)
- func (impl *Standard) Zsyrkx(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, ...)
- func (impl *Standard) Ztbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex128, ...)
- func (impl *Standard) Ztbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex128, ...)
- func (impl *Standard) Ztpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []complex128, ...)
- func (impl *Standard) Ztpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []complex128, ...)
- func (impl *Standard) Ztpttr(ul blas.Uplo, n int, aP, a []complex128, lda int)
- func (impl *Standard) Ztrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, ...)
- func (impl *Standard) Ztrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex128, lda int, ...)
- func (impl *Standard) Ztrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, ...)
- func (impl *Standard) Ztrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex128, lda int, ...)
- func (impl *Standard) Ztrttp(ul blas.Uplo, n int, a []complex128, lda int, aP []complex128)
- type Status
Examples ¶
Constants ¶
const ( NoTrans = C.CUBLAS_OP_N // NoTrans represents the no-transpose operation Trans = C.CUBLAS_OP_T // Trans represents the transpose operation ConjTrans = C.CUBLAS_OP_C // ConjTrans represents the conjugate transpose operation Upper = C.CUBLAS_FILL_MODE_UPPER // Upper is used to specify that the matrix is an upper triangular matrix Lower = C.CUBLAS_FILL_MODE_LOWER // Lower is used to specify that the matrix is an lower triangular matrix NonUnit = C.CUBLAS_DIAG_NON_UNIT // NonUnit is used to specify that the matrix is not a unit triangular matrix Unit = C.CUBLAS_DIAG_UNIT // Unit is used to specify that the matrix is a unit triangular matrix Left = C.CUBLAS_SIDE_LEFT // Left is used to specify a multiplication op is performed from the left Right = C.CUBLAS_SIDE_RIGHT // Right is used to specify a multiplication op is performed from the right )
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ConsOpt ¶
type ConsOpt func(impl *Standard)
func WithContext ¶
func WithNativeData ¶
func WithNativeData() ConsOpt
type Order ¶
type Order byte
Order is used to specify the matrix storage format. We still interact with an API that allows client calls to specify order, so this is here to document that fact.
type Standard ¶
Standard is the standard cuBLAS handler. By default it assumes that the data is in RowMajor, DESPITE the fact that cuBLAS takes ColMajor only. This is done for the ease of use of developers writing in Go.
Use New to create a new BLAS handler. Use the various ConsOpts to set the options
func (*Standard) Dasum ¶
Dasum computes the sum of the absolute values of the elements of x.
\sum_i |x[i]|
Dasum returns 0 if incX is negative.
func (*Standard) Dcopy ¶
Dcopy copies the elements of x into the elements of y.
y[i] = x[i] for all i
func (*Standard) Dgbmv ¶
func (impl *Standard) Dgbmv(tA blas.Transpose, m, n, kl, ku int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int)
Dgbmv computes
y = alpha * A * x + beta * y if tA == blas.NoTrans y = alpha * A^T * x + beta * y if tA == blas.Trans or blas.ConjTrans
where a is an m×n band matrix kL subdiagonals and kU super-diagonals, and m and n refer to the size of the full dense matrix it represents. x and y are vectors, and alpha and beta are scalars.
func (*Standard) Dgemm ¶
func (impl *Standard) Dgemm(tA, tB blas.Transpose, m, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int)
Dgemm computes
C = beta * C + alpha * A * B,
where A, B, and C are dense matrices, and alpha and beta are scalars. tA and tB specify whether A or B are transposed.
func (*Standard) Dgemv ¶
func (impl *Standard) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int)
Dgemv computes
y = alpha * a * x + beta * y if tA = blas.NoTrans y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans
where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
func (*Standard) Dger ¶
func (impl *Standard) Dger(m, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int)
Dger performs the rank-one operation
A += alpha * x * y^T
where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
func (*Standard) Dnrm2 ¶
Dnrm2 computes the Euclidean norm of a vector,
sqrt(\sum_i x[i] * x[i]).
This function returns 0 if incX is negative.
func (*Standard) Drot ¶
func (impl *Standard) Drot(n int, x []float64, incX int, y []float64, incY int, cScalar, sScalar float64)
Drot applies a plane transformation.
x[i] = c * x[i] + s * y[i] y[i] = c * y[i] - s * x[i]
func (*Standard) Dsbmv ¶
func (impl *Standard) Dsbmv(ul blas.Uplo, n, k int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int)
Dsbmv performs
y = alpha * A * x + beta * y
where A is an n×n symmetric banded matrix, x and y are vectors, and alpha and beta are scalars.
func (*Standard) Dspmv ¶
func (impl *Standard) Dspmv(ul blas.Uplo, n int, alpha float64, aP, x []float64, incX int, beta float64, y []float64, incY int)
Dspmv performs
y = alpha * A * x + beta * y,
where A is an n×n symmetric matrix in packed format, x and y are vectors and alpha and beta are scalars.
func (*Standard) Dspr ¶
Dspr computes the rank-one operation
a += alpha * x * x^T
where a is an n×n symmetric matrix in packed format, x is a vector, and alpha is a scalar.
func (*Standard) Dspr2 ¶
func (impl *Standard) Dspr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, aP []float64)
Dspr2 performs the symmetric rank-2 update
A += alpha * x * y^T + alpha * y * x^T,
where A is an n×n symmetric matrix in packed format, x and y are vectors, and alpha is a scalar.
func (*Standard) Dswap ¶
Dswap exchanges the elements of two vectors.
x[i], y[i] = y[i], x[i] for all i
func (*Standard) Dsymm ¶
func (impl *Standard) Dsymm(s blas.Side, ul blas.Uplo, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int)
Dsymm performs one of
C = alpha * A * B + beta * C, if side == blas.Left, C = alpha * B * A + beta * C, if side == blas.Right,
where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha is a scalar.
func (*Standard) Dsymv ¶
func (impl *Standard) Dsymv(ul blas.Uplo, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int)
Dsymv computes
y = alpha * A * x + beta * y,
where a is an n×n symmetric matrix, x and y are vectors, and alpha and beta are scalars.
func (*Standard) Dsyr ¶
func (impl *Standard) Dsyr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, a []float64, lda int)
Dsyr performs the rank-one update
a += alpha * x * x^T
where a is an n×n symmetric matrix, and x is a vector.
func (*Standard) Dsyr2 ¶
func (impl *Standard) Dsyr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int)
Dsyr2 performs the symmetric rank-two update
A += alpha * x * y^T + alpha * y * x^T
where A is a symmetric n×n matrix, x and y are vectors, and alpha is a scalar.
func (*Standard) Dsyr2k ¶
func (impl *Standard) Dsyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int)
Dsyr2k performs the symmetric rank 2k operation
C = alpha * A * B^T + alpha * B * A^T + beta * C
where C is an n×n symmetric matrix. A and B are n×k matrices if tA == NoTrans and k×n otherwise. alpha and beta are scalars.
func (*Standard) Dsyrk ¶
func (impl *Standard) Dsyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []float64, lda int, beta float64, c []float64, ldc int)
Dsyrk performs the symmetric rank-k operation
C = alpha * A * A^T + beta*C
C is an n×n symmetric matrix. A is an n×k matrix if tA == blas.NoTrans, and a k×n matrix otherwise. alpha and beta are scalars.
func (*Standard) Dtbmv ¶
func (impl *Standard) Dtbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int)
Dtbmv computes
x = A * x if tA == blas.NoTrans x = A^T * x if tA == blas.Trans or blas.ConjTrans
where A is an n×n triangular banded matrix with k diagonals, and x is a vector.
func (*Standard) Dtbsv ¶
func (impl *Standard) Dtbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int)
Dtbsv solves
A * x = b
where A is an n×n triangular banded matrix with k diagonals in packed format, and x is a vector. At entry to the function, x contains the values of b, and the result is stored in place into x.
No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.
func (*Standard) Dtpmv ¶
func (impl *Standard) Dtpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []float64, incX int)
Dtpmv computes
x = A * x if tA == blas.NoTrans x = A^T * x if tA == blas.Trans or blas.ConjTrans
where A is an n×n unit triangular matrix in packed format, and x is a vector.
func (*Standard) Dtpsv ¶
func (impl *Standard) Dtpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []float64, incX int)
Dtpsv solves
A * x = b if tA == blas.NoTrans A^T * x = b if tA == blas.Trans or blas.ConjTrans
where A is an n×n triangular matrix in packed format and x is a vector. At entry to the function, x contains the values of b, and the result is stored in place into x.
No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.
func (*Standard) Dtrmv ¶
func (impl *Standard) Dtrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int)
Dtrmv computes
x = A * x if tA == blas.NoTrans x = A^T * x if tA == blas.Trans or blas.ConjTrans
A is an n×n Triangular matrix and x is a vector.
func (*Standard) Dtrsm ¶
func (impl *Standard) Dtrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int)
Dtrsm solves
A * X = alpha * B, if tA == blas.NoTrans side == blas.Left, A^T * X = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Left, X * A = alpha * B, if tA == blas.NoTrans side == blas.Right, X * A^T = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Right,
where A is an n×n or m×m triangular matrix, X is an m×n matrix, and alpha is a scalar.
At entry to the function, X contains the values of B, and the result is stored in place into X.
No check is made that A is invertible.
func (*Standard) Dtrsv ¶
func (impl *Standard) Dtrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int)
Dtrsv solves
A * x = b if tA == blas.NoTrans A^T * x = b if tA == blas.Trans or blas.ConjTrans
A is an n×n triangular matrix and x is a vector. At entry to the function, x contains the values of b, and the result is stored in place into x.
No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.
func (*Standard) Dzasum ¶
func (impl *Standard) Dzasum(n int, x []complex128, incX int) (retVal float64)
func (*Standard) Dznrm2 ¶
func (impl *Standard) Dznrm2(n int, x []complex128, incX int) (retVal float64)
func (*Standard) Idamax ¶
Idamax returns the index of an element of x with the largest absolute value. If there are multiple such indices the earliest is returned. Idamax returns -1 if n == 0.
func (*Standard) Isamax ¶
Isamax returns the index of an element of x with the largest absolute value. If there are multiple such indices the earliest is returned. Isamax returns -1 if n == 0.
func (*Standard) Izamax ¶
func (impl *Standard) Izamax(n int, x []complex128, incX int) (retVal int)
func (*Standard) Izamin ¶
func (impl *Standard) Izamin(n int, x []complex128, incX int) (retVal int)
func (*Standard) Sasum ¶
Sasum computes the sum of the absolute values of the elements of x.
\sum_i |x[i]|
Sasum returns 0 if incX is negative.
func (*Standard) Scopy ¶
Scopy copies the elements of x into the elements of y.
y[i] = x[i] for all i
func (*Standard) Sgbmv ¶
func (impl *Standard) Sgbmv(tA blas.Transpose, m, n, kl, ku int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int)
Sgbmv computes
y = alpha * A * x + beta * y if tA == blas.NoTrans y = alpha * A^T * x + beta * y if tA == blas.Trans or blas.ConjTrans
where a is an m×n band matrix kL subdiagonals and kU super-diagonals, and m and n refer to the size of the full dense matrix it represents. x and y are vectors, and alpha and beta are scalars.
func (*Standard) Sgemm ¶
func (impl *Standard) Sgemm(tA, tB blas.Transpose, m, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int)
Sgemm computes
C = beta * C + alpha * A * B,
where A, B, and C are dense matrices, and alpha and beta are scalars. tA and tB specify whether A or B are transposed.
func (*Standard) Sgemv ¶
func (impl *Standard) Sgemv(tA blas.Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int)
Sgemv computes
y = alpha * a * x + beta * y if tA = blas.NoTrans y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans
where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
func (*Standard) Sger ¶
func (impl *Standard) Sger(m, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int)
Sger performs the rank-one operation
A += alpha * x * y^T
where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.
func (*Standard) Snrm2 ¶
Snrm2 computes the Euclidean norm of a vector,
sqrt(\sum_i x[i] * x[i]).
This function returns 0 if incX is negative.
func (*Standard) Srot ¶
func (impl *Standard) Srot(n int, x []float32, incX int, y []float32, incY int, cScalar, sScalar float32)
Srot applies a plane transformation.
x[i] = c * x[i] + s * y[i] y[i] = c * y[i] - s * x[i]
func (*Standard) Ssbmv ¶
func (impl *Standard) Ssbmv(ul blas.Uplo, n, k int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int)
Ssbmv performs
y = alpha * A * x + beta * y
where A is an n×n symmetric banded matrix, x and y are vectors, and alpha and beta are scalars.
func (*Standard) Sspmv ¶
func (impl *Standard) Sspmv(ul blas.Uplo, n int, alpha float32, aP, x []float32, incX int, beta float32, y []float32, incY int)
Sspmv performs
y = alpha * A * x + beta * y,
where A is an n×n symmetric matrix in packed format, x and y are vectors and alpha and beta are scalars.
func (*Standard) Sspr ¶
Sspr computes the rank-one operation
a += alpha * x * x^T
where a is an n×n symmetric matrix in packed format, x is a vector, and alpha is a scalar.
func (*Standard) Sspr2 ¶
func (impl *Standard) Sspr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, aP []float32)
Sspr2 performs the symmetric rank-2 update
A += alpha * x * y^T + alpha * y * x^T,
where A is an n×n symmetric matrix in packed format, x and y are vectors, and alpha is a scalar.
func (*Standard) Sswap ¶
Sswap exchanges the elements of two vectors.
x[i], y[i] = y[i], x[i] for all i
func (*Standard) Ssymm ¶
func (impl *Standard) Ssymm(s blas.Side, ul blas.Uplo, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int)
Ssymm performs one of
C = alpha * A * B + beta * C, if side == blas.Left, C = alpha * B * A + beta * C, if side == blas.Right,
where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha is a scalar.
func (*Standard) Ssymv ¶
func (impl *Standard) Ssymv(ul blas.Uplo, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int)
Ssymv computes
y = alpha * A * x + beta * y,
where a is an n×n symmetric matrix, x and y are vectors, and alpha and beta are scalars.
func (*Standard) Ssyr ¶
func (impl *Standard) Ssyr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, a []float32, lda int)
Ssyr performs the rank-one update
a += alpha * x * x^T
where a is an n×n symmetric matrix, and x is a vector.
func (*Standard) Ssyr2 ¶
func (impl *Standard) Ssyr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int)
Ssyr2 performs the symmetric rank-two update
A += alpha * x * y^T + alpha * y * x^T
where A is a symmetric n×n matrix, x and y are vectors, and alpha is a scalar.
func (*Standard) Ssyr2k ¶
func (impl *Standard) Ssyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int)
Ssyr2k performs the symmetric rank 2k operation
C = alpha * A * B^T + alpha * B * A^T + beta * C
where C is an n×n symmetric matrix. A and B are n×k matrices if tA == NoTrans and k×n otherwise. alpha and beta are scalars.
func (*Standard) Ssyrk ¶
func (impl *Standard) Ssyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float32, a []float32, lda int, beta float32, c []float32, ldc int)
Ssyrk performs the symmetric rank-k operation
C = alpha * A * A^T + beta*C
C is an n×n symmetric matrix. A is an n×k matrix if tA == blas.NoTrans, and a k×n matrix otherwise. alpha and beta are scalars.
func (*Standard) Stbmv ¶
func (impl *Standard) Stbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int)
Stbmv computes
x = A * x if tA == blas.NoTrans x = A^T * x if tA == blas.Trans or blas.ConjTrans
where A is an n×n triangular banded matrix with k diagonals, and x is a vector.
func (*Standard) Stbsv ¶
func (impl *Standard) Stbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int)
Stbsv solves
A * x = b
where A is an n×n triangular banded matrix with k diagonals in packed format, and x is a vector. At entry to the function, x contains the values of b, and the result is stored in place into x.
No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.
func (*Standard) Stpmv ¶
func (impl *Standard) Stpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []float32, incX int)
Stpmv computes
x = A * x if tA == blas.NoTrans x = A^T * x if tA == blas.Trans or blas.ConjTrans
where A is an n×n unit triangular matrix in packed format, and x is a vector.
func (*Standard) Stpsv ¶
func (impl *Standard) Stpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []float32, incX int)
Stpsv solves
A * x = b if tA == blas.NoTrans A^T * x = b if tA == blas.Trans or blas.ConjTrans
where A is an n×n triangular matrix in packed format and x is a vector. At entry to the function, x contains the values of b, and the result is stored in place into x.
No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.
func (*Standard) Strmv ¶
func (impl *Standard) Strmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int)
Strmv computes
x = A * x if tA == blas.NoTrans x = A^T * x if tA == blas.Trans or blas.ConjTrans
A is an n×n Triangular matrix and x is a vector.
func (*Standard) Strsm ¶
func (impl *Standard) Strsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int)
Strsm solves
A * X = alpha * B, if tA == blas.NoTrans side == blas.Left, A^T * X = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Left, X * A = alpha * B, if tA == blas.NoTrans side == blas.Right, X * A^T = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Right,
where A is an n×n or m×m triangular matrix, X is an m×n matrix, and alpha is a scalar.
At entry to the function, X contains the values of B, and the result is stored in place into X.
No check is made that A is invertible.
func (*Standard) Strsv ¶
func (impl *Standard) Strsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int)
Strsv solves
A * x = b if tA == blas.NoTrans A^T * x = b if tA == blas.Trans or blas.ConjTrans
A is an n×n triangular matrix and x is a vector. At entry to the function, x contains the values of b, and the result is stored in place into x.
No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.
func (*Standard) Zaxpy ¶
func (impl *Standard) Zaxpy(n int, alpha complex128, x []complex128, incX int, y []complex128, incY int)
func (*Standard) Zcopy ¶
func (impl *Standard) Zcopy(n int, x []complex128, incX int, y []complex128, incY int)
func (*Standard) Zdgmm ¶
func (impl *Standard) Zdgmm(mode blas.Side, m, n int, a []complex128, lda int, x []complex128, incX int, c []complex128, ldc int)
func (*Standard) Zdotc ¶
func (impl *Standard) Zdotc(n int, x []complex128, incX int, y []complex128, incY int) (dotc complex128)
func (*Standard) Zdotu ¶
func (impl *Standard) Zdotu(n int, x []complex128, incX int, y []complex128, incY int) (dotu complex128)
func (*Standard) Zdscal ¶
func (impl *Standard) Zdscal(n int, alpha float64, x []complex128, incX int)
func (*Standard) Zgbmv ¶
func (impl *Standard) Zgbmv(tA blas.Transpose, m, n, kl, ku int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)
func (*Standard) Zgeam ¶
func (impl *Standard) Zgeam(tA, tB blas.Transpose, m, n int, alpha complex128, a []complex128, lda int, beta complex128, b []complex128, ldb int, c []complex128, ldc int)
func (*Standard) Zgemm ¶
func (impl *Standard) Zgemm(tA, tB blas.Transpose, m, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)
func (*Standard) Zgemm3m ¶
func (impl *Standard) Zgemm3m(tA, tB blas.Transpose, m, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)
func (*Standard) Zgemv ¶
func (impl *Standard) Zgemv(tA blas.Transpose, m, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)
func (*Standard) Zgerc ¶
func (impl *Standard) Zgerc(m, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int)
func (*Standard) Zgeru ¶
func (impl *Standard) Zgeru(m, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int)
func (*Standard) Zhbmv ¶
func (impl *Standard) Zhbmv(ul blas.Uplo, n, k int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)
func (*Standard) Zhemm ¶
func (impl *Standard) Zhemm(s blas.Side, ul blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)
func (*Standard) Zhemv ¶
func (impl *Standard) Zhemv(ul blas.Uplo, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)
func (*Standard) Zher ¶
func (impl *Standard) Zher(ul blas.Uplo, n int, alpha float64, x []complex128, incX int, a []complex128, lda int)
func (*Standard) Zher2 ¶
func (impl *Standard) Zher2(ul blas.Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int)
func (*Standard) Zher2k ¶
func (impl *Standard) Zher2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta float64, c []complex128, ldc int)
func (*Standard) Zherkx ¶
func (impl *Standard) Zherkx(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta float64, c []complex128, ldc int)
func (*Standard) Zhpmv ¶
func (impl *Standard) Zhpmv(ul blas.Uplo, n int, alpha complex128, aP, x []complex128, incX int, beta complex128, y []complex128, incY int)
func (*Standard) Zhpr ¶
func (impl *Standard) Zhpr(ul blas.Uplo, n int, alpha float64, x []complex128, incX int, aP []complex128)
func (*Standard) Zhpr2 ¶
func (impl *Standard) Zhpr2(ul blas.Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, aP []complex128)
func (*Standard) Zrot ¶
func (impl *Standard) Zrot(n int, x []complex128, incX int, y []complex128, incY int, cScalar float64, sScalar complex128)
func (*Standard) Zscal ¶
func (impl *Standard) Zscal(n int, alpha complex128, x []complex128, incX int)
func (*Standard) Zswap ¶
func (impl *Standard) Zswap(n int, x []complex128, incX int, y []complex128, incY int)
func (*Standard) Zsymm ¶
func (impl *Standard) Zsymm(s blas.Side, ul blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)
func (*Standard) Zsymv ¶
func (impl *Standard) Zsymv(ul blas.Uplo, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)
func (*Standard) Zsyr ¶
func (impl *Standard) Zsyr(ul blas.Uplo, n int, alpha complex128, x []complex128, incX int, a []complex128, lda int)
func (*Standard) Zsyr2 ¶
func (impl *Standard) Zsyr2(ul blas.Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int)
func (*Standard) Zsyr2k ¶
func (impl *Standard) Zsyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)
func (*Standard) Zsyrk ¶
func (impl *Standard) Zsyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, beta complex128, c []complex128, ldc int)
func (*Standard) Zsyrkx ¶
func (impl *Standard) Zsyrkx(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)
func (*Standard) Ztrmm ¶
func (impl *Standard) Ztrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int)
func (*Standard) Ztrsm ¶
func (impl *Standard) Ztrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int)
func (*Standard) Ztrttp ¶
func (impl *Standard) Ztrttp(ul blas.Uplo, n int, a []complex128, lda int, aP []complex128)
type Status ¶
type Status int
Status is the cublas status.
const ( Success Status = C.CUBLAS_STATUS_SUCCESS // The operation completed successfully. NotInitialized Status = C.CUBLAS_STATUS_NOT_INITIALIZED // The cuBLAS library was not initialized. This is usually caused by the lack of a prior cublasCreate() call, AllocFailed Status = C.CUBLAS_STATUS_ALLOC_FAILED // Resource allocation failed inside the cuBLAS library. InvalidValue Status = C.CUBLAS_STATUS_INVALID_VALUE // An unsupported value or parameter was passed to the function (a negative vector size, for example). ArchMismatch Status = C.CUBLAS_STATUS_ARCH_MISMATCH // The function requires a feature absent from the device architecture; usually caused by the lack of support for double precision. MappingError Status = C.CUBLAS_STATUS_MAPPING_ERROR // An access to GPU memory space failed, which is usually caused by a failure to bind a texture. ExecFailed Status = C.CUBLAS_STATUS_EXECUTION_FAILED // The GPU program failed to execute. This is often caused by a launch failure of the kernel on the GPU, which can be caused by multiple reasons. InternalError Status = C.CUBLAS_STATUS_INTERNAL_ERROR // An internal cuBLAS operation failed. This error is usually caused by a cudaMemcpyAsync() failure. Unsupported Status = C.CUBLAS_STATUS_NOT_SUPPORTED // The functionnality requested is not supported LicenceError Status = C.CUBLAS_STATUS_LICENSE_ERROR // The functionnality requested requires some license and an error was detected when trying to check the current licensing. )