cublas

package
v0.0.0-...-4c97a61 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 9, 2023 License: Apache-2.0 Imports: 6 Imported by: 0

README

cublas

Package cublas implements a Go API for CUDA's cuBLAS. It matches the gonum/blas interface.

How To Use

To install: go get -u gorgonia.org/cu

The CUDA Toolkit 8.0 is required. LDFlags and CFlags may not be quite accurate. File an issue if you find one, thank you.

Bear in mind that cublas only supports FORTRAN ordered matrices. Most Go matrices are created with the C ordering (gonum/matrix, gorgonia/tensor), therefore care must be applied.

For example, here's how to use Dgemm:

func main() {
	dev := cu.Device(0)
	ctx, err := dev.MakeContext(cu.SchedAuto)
	if err != nil {
		log.Fatal(err)
	}
	defer cu.DestroyContext(&ctx)

	dt := tensor.Float64
	s0 := tensor.Shape{5, 10}
	s1 := tensor.Shape{10, 12}
	s2 := tensor.Shape{5, 12}

	memsize0 := calcMemsize(dt, s0)
	mem0, err := cu.MemAllocManaged(memsize0, cu.AttachGlobal)
	if err != nil {
		log.Fatal(err)
	}
	mat0 := tensor.New(tensor.Of(dt), tensor.WithShape(s0...), tensor.FromMemory(uintptr(mem0), uintptr(memsize0)))
	d0 := mat0.Data().([]float64)
	for i := range d0 {
		d0[i] = float64(i + 1)
	}
	fmt.Printf("A: \n%#v\n", mat0)

	memsize1 := calcMemsize(dt, s1)
	mem1, err := cu.MemAllocManaged(memsize1, cu.AttachGlobal)
	if err != nil {
		log.Fatal(err)
	}
	mat1 := tensor.New(tensor.Of(dt), tensor.WithShape(s1...), tensor.FromMemory(uintptr(mem1), uintptr(memsize1)))
	d1 := mat1.Data().([]float64)
	for i := range d1 {
		d1[i] = float64(i + 1)
	}
	fmt.Printf("B: \n%#v\n", mat1)

	memsize2 := calcMemsize(dt, s2)
	mem2, err := cu.MemAllocManaged(memsize2, cu.AttachGlobal)
	if err != nil {
		log.Fatal(err)
	}
	mat2 := tensor.New(tensor.Of(dt), tensor.WithShape(s2...), tensor.FromMemory(uintptr(mem2), uintptr(memsize2)))
	d2 := mat2.Data().([]float64)
	fmt.Printf("C: \n%#v\n", mat2)

	impl := cublas.NewImplementation()

	m := s0[0]
	k := s0[1]
	n := s1[1]
	lda := mat0.Strides()[0]
	ldb := mat1.Strides()[0]
	ldc := mat2.Strides()[0]
	alpha := 1.0
	beta := 0.0
	impl.Dgemm(blas.NoTrans, blas.NoTrans, n, m, k, alpha, d1, ldn, d0, lda, beta, d2, ldc)
	if err := cu.Synchronize(); err != nil {
		log.Fatal(err)
	}
	fmt.Printf("C: \n%#v\n", mat2)
	cu.MemFree(mem0)
	cu.MemFree(mem1)
	cu.MemFree(mem2)
}

These are things to note: To do a A×B, you need to essentially do Bᵀ×Aᵀ.

How This Package Is Developed

The majority of the CUDA interface was generated with the cublasgen program. The cublasgen program was adapted from the cgo generator from the gonum/blas package.

The cudagen.h file was generated based off the propietary header from nvidia, then further edited (several variable names were renamed) to match the cblas interface in order to quickly generate the API.

Documentation

Overview

Example
package main

import (
	"reflect"
	"runtime"
	"unsafe"

	"github.com/pkg/errors"
	"gonum.org/v1/gonum/blas"
	"gorgonia.org/cu"
	cublas "gorgonia.org/cu/blas"
	"gorgonia.org/tensor"
)

type Engine struct {
	tensor.StdEng
	ctx cu.Context
	*cublas.Standard
}

func newEngine() *Engine {
	ctx := cu.NewContext(cu.Device(0), cu.SchedAuto)
	blas := cublas.New(cublas.WithContext(ctx))
	return &Engine{
		ctx:      ctx,
		Standard: blas,
	}
}

func (e *Engine) AllocAccessible() bool { return true }

func (e *Engine) Alloc(size int64) (tensor.Memory, error) {
	return e.ctx.MemAllocManaged(size, cu.AttachGlobal)
}

func (e *Engine) AllocFlags() (tensor.MemoryFlag, tensor.DataOrder) {
	return tensor.MakeMemoryFlag(tensor.ManuallyManaged), tensor.ColMajor
}

func (e *Engine) Free(mem tensor.Memory, size int64) error {
	e.ctx.MemFree(mem.(cu.DevicePtr))
	return nil
}

func (e *Engine) Memset(mem tensor.Memory, val interface{}) error {
	panic("not implemented")
}

func (e *Engine) Memclr(mem tensor.Memory) {
	panic("not implemented")
}

func (e *Engine) Memcpy(dst tensor.Memory, src tensor.Memory) error {
	panic("not implemented")
}

func (e *Engine) Accessible(mem tensor.Memory) (tensor.Memory, error) {
	// panic("not implemented")
	size := mem.MemSize()
	retVal := make([]byte, int(size))
	e.ctx.MemcpyDtoH(unsafe.Pointer(&retVal[0]), cu.DevicePtr(mem.Uintptr()), int64(size))
	l := int(size / 8)
	foo2 := &reflect.SliceHeader{
		Data: uintptr(unsafe.Pointer(&retVal[0])),
		Len:  l,
		Cap:  l,
	}
	return *(*foomem)(unsafe.Pointer(foo2)), e.ctx.Error()

}

func (e *Engine) WorksWith(order tensor.DataOrder) bool { return true }

func (e *Engine) NonStdAlloc() {}

func (e *Engine) ContextErr() error { return e.ctx.Error() }

type foomem []float64

func (m foomem) Uintptr() uintptr { return uintptr(unsafe.Pointer(&m[0])) }
func (m foomem) MemSize() uintptr { return uintptr(len(m) * 8) }

func (e *Engine) checkThreeFloat(a, b, ret tensor.Tensor) (ad, bd, retVal *tensor.Dense, err error) {
	if /*a.IsNativelyAccessible() &&*/ !a.IsManuallyManaged() {
		return nil, nil, nil, errors.New("CUDA Engine only takes non-natively accessible memory (memory on graphics cards). a isn't.")
	}

	if /* b.IsNativelyAccessible() && */ !b.IsManuallyManaged() {
		return nil, nil, nil, errors.New("CUDA Engine only takes non-natively accessible memory (memory on graphics cards). b isn't")
	}

	if /* ret.IsNativelyAccessible() && */ !ret.IsManuallyManaged() {
		return nil, nil, nil, errors.New("CUDA Engine only takes non-natively accessible memory (memory on graphics cards). ret isn't")
	}

	if a.Dtype() != b.Dtype() || b.Dtype() != ret.Dtype() {
		return nil, nil, nil, errors.New("Expected a and b and retVal all to have the same Dtype")
	}
	var ok bool
	if ad, ok = a.(*tensor.Dense); !ok {
		return nil, nil, nil, errors.New("Expected a to be a *tensor.Dense")
	}
	if bd, ok = b.(*tensor.Dense); !ok {
		return nil, nil, nil, errors.New("Expected b to be a *tensor.Dense")
	}
	if retVal, ok = ret.(*tensor.Dense); !ok {
		return nil, nil, nil, errors.New("Expected ret to be a *tensor.Dense")
	}
	return
}

func (e *Engine) MatVecMul(a, b, prealloc tensor.Tensor) (err error) {
	var ad, bd, pd *tensor.Dense
	if ad, bd, pd, err = e.checkThreeFloat(a, b, prealloc); err != nil {
		return errors.Wrapf(err, "MatVecMul failed pre check")
	}

	tA := blas.Trans
	do := a.DataOrder()
	z := do.IsTransposed()

	m := a.Shape()[0]
	n := a.Shape()[1]

	var lda int
	switch {
	case do.IsRowMajor() && z:
		tA = blas.NoTrans
		lda = m
	case do.IsRowMajor() && !z:
		lda = n
		m, n = n, m
	case do.IsColMajor() && z:
		tA = blas.Trans
		lda = n
		m, n = n, m
	case do.IsColMajor() && !z:
		lda = m
		tA = blas.NoTrans
	}

	incX, incY := 1, 1 // step size

	// ASPIRATIONAL TODO: different incX and incY
	// TECHNICAL DEBT. TECHDEBT. TECH DEBT
	// Example use case:
	// log.Printf("a %v %v", ad.Strides(), ad.ostrides())
	// log.Printf("b %v", b.Strides())
	// incX := a.Strides()[0]
	// incY = b.Strides()[0]

	switch ad.Dtype() {
	case tensor.Float64:
		A := ad.Float64s()
		X := bd.Float64s()
		Y := pd.Float64s()
		alpha, beta := float64(1), float64(0)
		e.Standard.Dgemv(tA, m, n, alpha, A, lda, X, incX, beta, Y, incY)
	case tensor.Float32:
		A := ad.Float32s()
		X := bd.Float32s()
		Y := pd.Float32s()
		alpha, beta := float32(1), float32(0)
		e.Standard.Sgemv(tA, m, n, alpha, A, lda, X, incX, beta, Y, incY)
	default:
		return errors.New("Unsupported Dtype")
	}
	return e.Standard.Err()
}

func (e *Engine) MatMul(a, b, prealloc tensor.Tensor) (err error) {
	var ad, bd, pd *tensor.Dense
	if ad, bd, pd, err = e.checkThreeFloat(a, b, prealloc); err != nil {
		return errors.Wrapf(err, "MatVecMul failed pre check")
	}

	ado := a.DataOrder()
	bdo := b.DataOrder()
	if !ado.HasSameOrder(bdo) {
		return errors.Errorf("a does not have the same data order as b. a is %v. b is %v", a.DataOrder(), b.DataOrder())
	}

	// get result shapes. k is the shared dimension
	// a is (m, k)
	// b is (k, n)
	// c is (m, n)
	var m, n, k int
	m = ad.Shape()[0]
	k = ad.Shape()[1]
	n = bd.Shape()[1]

	// // wrt the strides, we use the original strides, because that's what BLAS needs, instead of calling .Strides()
	// // lda in colmajor = number of rows;
	// // lda in row major = number of cols
	var lda, ldb, ldc int
	tA, tB := blas.Trans, blas.Trans
	za := ado.IsTransposed()
	zb := bdo.IsTransposed()

	// swapping around the operands if they are row major (a becomes b, and b becomes a)
	switch {
	case ado.IsColMajor() && bdo.IsColMajor() && !za && !zb:
		lda = m
		ldb = k
		ldc = prealloc.Shape()[0]
		tA, tB = blas.NoTrans, blas.NoTrans
	case ado.IsColMajor() && bdo.IsColMajor() && za && !zb:
		lda = k
		ldb = k
		ldc = prealloc.Shape()[0]
		tA, tB = blas.Trans, blas.NoTrans
	case ado.IsColMajor() && bdo.IsColMajor() && za && zb:
		lda = k
		ldb = n
		ldc = prealloc.Shape()[0]
		tA, tB = blas.Trans, blas.Trans
	case ado.IsColMajor() && bdo.IsColMajor() && !za && zb:
		lda = m
		ldb = n
		ldc = prealloc.Shape()[0]
		tA, tB = blas.NoTrans, blas.Trans
	case ado.IsRowMajor() && bdo.IsRowMajor() && !za && !zb:
		lda = k
		ldb = n
		ldc = prealloc.Shape()[1]
		tA, tB = blas.NoTrans, blas.NoTrans

		// magic swappy thingy
		m, n = n, m
		lda, ldb = ldb, lda
		ad, bd = bd, ad
	case ado.IsRowMajor() && bdo.IsRowMajor() && za && !zb:
		lda = m
		ldb = n
		ldc = prealloc.Shape()[1]
		tA, tB = blas.Trans, blas.NoTrans

		// magic swappy thingy
		m, n = n, m
		lda, ldb = ldb, lda
		tA, tB = tB, tA
		ad, bd = bd, ad
	case ado.IsRowMajor() && bdo.IsRowMajor() && za && zb:
		lda = m
		ldb = k
		ldc = prealloc.Shape()[1]
		tA, tB = blas.Trans, blas.Trans

		// magic swappy thingy
		m, n = n, m
		lda, ldb = ldb, lda
		ad, bd = bd, ad
	case ado.IsRowMajor() && bdo.IsRowMajor() && !za && zb:
		lda = k
		ldb = k
		ldc = prealloc.Shape()[1]
		tA, tB = blas.NoTrans, blas.Trans

		// magic swappy thingy
		m, n = n, m
		lda, ldb = ldb, lda
		tA, tB = tB, tA
		ad, bd = bd, ad

	default:
		panic("Unreachable")
	}

	switch ad.Dtype() {
	case tensor.Float64:
		A := ad.Float64s()
		B := bd.Float64s()
		C := pd.Float64s()
		alpha, beta := float64(1), float64(0)
		e.Standard.Dgemm(tA, tB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc)

	case tensor.Float32:
		A := ad.Float32s()
		B := bd.Float32s()
		C := pd.Float32s()
		alpha, beta := float32(1), float32(0)
		e.Standard.Sgemm(tA, tB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc)
	default:
		return errors.Errorf("Unsupported Dtype %v", ad.Dtype())
	}
	return e.Standard.Err()
}

func (e *Engine) Outer(a, b, prealloc tensor.Tensor) (err error) {
	var ad, bd, pd *tensor.Dense
	if ad, bd, pd, err = e.checkThreeFloat(a, b, prealloc); err != nil {
		return errors.Wrapf(err, "MatVecMul failed pre check")
	}
	m := ad.Size()
	n := bd.Size()
	pdo := pd.DataOrder()

	var lda int
	switch {
	case pdo.IsColMajor():
		lda = pd.Shape()[0]
	case pdo.IsRowMajor():
		aShape := a.Shape().Clone()
		bShape := b.Shape().Clone()
		if err = a.Reshape(aShape[0], 1); err != nil {
			return err
		}
		if err = b.Reshape(1, bShape[0]); err != nil {
			return err
		}

		if err = e.MatMul(a, b, prealloc); err != nil {
			return err
		}

		if err = b.Reshape(bShape...); err != nil {
			return
		}
		if err = a.Reshape(aShape...); err != nil {
			return
		}
		return nil
	}
	incX, incY := 1, 1
	switch ad.Dtype() {
	case tensor.Float64:
		x := ad.Float64s()
		y := bd.Float64s()
		A := pd.Float64s()
		alpha := float64(1)
		e.Standard.Dger(m, n, alpha, x, incX, y, incY, A, lda)
	case tensor.Float32:
		x := ad.Float32s()
		y := bd.Float32s()
		A := pd.Float32s()
		alpha := float32(1)
		e.Standard.Sger(m, n, alpha, x, incX, y, incY, A, lda)
	}
	return e.Standard.Err()
}
func main() {
	// debug.SetGCPercent(-1)
	runtime.LockOSThread()
	defer runtime.UnlockOSThread()

	matVecMulColmajorNonTransposed()
	matVecMulColmajorTransposed()
	matVecMulRowMajorNonTransposed()
	matVecMulRowMajorTransposed()

	matMulColmajorNTNT()
	matMulColmajorTNT()
	matMulColmajorTT()
	matMulColmajorNTT()

	matMulRowmajorNTNT()
	matMulRowmajorTNT()
	matMulRowmajorTT()
	matMulRowmajorNTT()

	outerColMajor()
	outerRowMajor()

}
Output:

ColMajor Non Transposed
A:
⎡1  2  3⎤
⎣4  5  6⎦

B:[1  2  3]
C:[1000  1000]
C:
[14 32]
==========
ColMajor Transposed
A:
⎡1  4⎤
⎢2  5⎥
⎣3  6⎦

B:[1  2]
C[1000  1000  1000]
C:
[9 12 15]
==========
RowMajor Non Transposed
A:
⎡1  2  3⎤
⎣4  5  6⎦

B:[1  2  3]
C[1000  1000]
C:
[14 32]
==========
RowMajor Transposed
A:
⎡1  4⎤
⎢2  5⎥
⎣3  6⎦

B:[1  2]
C[1000  1000  1000]
C:
[9 12 15]
==========
ColMajor Non Transposed Non Transposed
A:
⎡1  2  3⎤
⎣4  5  6⎦

B:
⎡ 0   1   2   3⎤
⎢ 4   5   6   7⎥
⎣ 8   9  10  11⎦

C:
⎡1000  1000  1000  1000⎤
⎣1000  1000  1000  1000⎦

C:
[32 68 38 83 44 98 50 113]
==========
ColMajor Transposed Non Transposed
A:
⎡1  4⎤
⎢2  5⎥
⎣3  6⎦

B:
⎡0  1  2  3⎤
⎣4  5  6  7⎦

C:
⎡1000  1000  1000  1000⎤
⎢1000  1000  1000  1000⎥
⎣1000  1000  1000  1000⎦

C:
[16 20 24 21 27 33 26 34 42 31 41 51]
==========
ColMajor Transposed Transposed
A:
⎡1  4⎤
⎢2  5⎥
⎣3  6⎦

B:
⎡0  2  4  6⎤
⎣1  3  5  7⎦

C:
⎡1000  1000  1000  1000⎤
⎢1000  1000  1000  1000⎥
⎣1000  1000  1000  1000⎦

C:
[4 5 6 14 19 24 24 33 42 34 47 60]
==========
ColMajor Non Transposed Transposed
A:
⎡1  2  3⎤
⎣4  5  6⎦

B:
⎡ 0   3   6   9⎤
⎢ 1   4   7  10⎥
⎣ 2   5   8  11⎦

C:
⎡1000  1000  1000  1000⎤
⎣1000  1000  1000  1000⎦

C:
[8 17 26 62 44 107 62 152]
==========
RowMajor Non Transposed Non Transposed
A:
⎡1  2  3⎤
⎣4  5  6⎦

B:
⎡ 0   1   2   3⎤
⎢ 4   5   6   7⎥
⎣ 8   9  10  11⎦

C:
⎡1000  1000  1000  1000⎤
⎣1000  1000  1000  1000⎦

C:
[32 38 44 50 68 83 98 113]
==========
RowMajor Transposed Non Transposed
A:
⎡1  3  5⎤
⎣2  4  6⎦

B:
⎡ 0   1   2   3⎤
⎢ 4   5   6   7⎥
⎣ 8   9  10  11⎦

C:
⎡1000  1000  1000  1000⎤
⎣1000  1000  1000  1000⎦

C:
[52 61 70 79 64 76 88 100]
==========
RowMajor Transposed Non Transposed
A:
⎡1  3  5⎤
⎣2  4  6⎦

B:
⎡ 0   3   6   9⎤
⎢ 1   4   7  10⎥
⎣ 2   5   8  11⎦

C:
⎡1000  1000  1000  1000⎤
⎣1000  1000  1000  1000⎦

C:
[13 40 67 94 16 52 88 124]
==========
RowMajor Transposed Non Transposed
A:
⎡1  2  3⎤
⎣4  5  6⎦

B:
⎡ 0   3   6   9⎤
⎢ 1   4   7  10⎥
⎣ 2   5   8  11⎦

C:
⎡1000  1000  1000  1000⎤
⎣1000  1000  1000  1000⎦

C:
[8 26 44 62 17 62 107 152]
==========
RowMajor Non Transposed
A:
[1  2  3]
B:[0  1]
C
⎡1000  1000⎤
⎢1000  1000⎥
⎣1000  1000⎦

C:
[0 0 0 1 2 3]
==========
RowMajor Non Transposed
A:
[1  2  3]
B:[0  1]
C
⎡1000  1000⎤
⎢1000  1000⎥
⎣1000  1000⎦

C:
[0 1 0 2 0 3]
==========

Index

Examples

Constants

View Source
const (
	NoTrans   = C.CUBLAS_OP_N // NoTrans represents the no-transpose operation
	Trans     = C.CUBLAS_OP_T // Trans represents the transpose operation
	ConjTrans = C.CUBLAS_OP_C // ConjTrans represents the conjugate transpose operation

	Upper = C.CUBLAS_FILL_MODE_UPPER // Upper is used to specify that the matrix is an upper triangular matrix
	Lower = C.CUBLAS_FILL_MODE_LOWER // Lower is used to specify that the matrix is an lower triangular matrix

	NonUnit = C.CUBLAS_DIAG_NON_UNIT // NonUnit is used to specify that the matrix is not a unit triangular matrix
	Unit    = C.CUBLAS_DIAG_UNIT     // Unit is used to specify that the matrix is a unit triangular matrix

	Left  = C.CUBLAS_SIDE_LEFT  // Left is used to specify a multiplication op is performed from the left
	Right = C.CUBLAS_SIDE_RIGHT // Right is used to specify a multiplication op is performed from the right
)

Variables

This section is empty.

Functions

This section is empty.

Types

type BLAS

BLAS is the interface for all cuBLAS implementaions

type ConsOpt

type ConsOpt func(impl *Standard)

func WithContext

func WithContext(ctx cu.Context) ConsOpt

func WithNativeData

func WithNativeData() ConsOpt

type Order

type Order byte

Order is used to specify the matrix storage format. We still interact with an API that allows client calls to specify order, so this is here to document that fact.

const (
	RowMajor Order = iota // Row Major
	ColMajor              // Column Major (cublas assumes all matrices be stored in this order)
)

type PointerMode

type PointerMode byte

PointerMode

const (
	Host PointerMode = iota
	Device
)

type Standard

type Standard struct {
	cu.Context

	sync.Mutex
	// contains filtered or unexported fields
}

Standard is the standard cuBLAS handler. By default it assumes that the data is in RowMajor, DESPITE the fact that cuBLAS takes ColMajor only. This is done for the ease of use of developers writing in Go.

Use New to create a new BLAS handler. Use the various ConsOpts to set the options

func New

func New(opts ...ConsOpt) *Standard

func (*Standard) Caxpy

func (impl *Standard) Caxpy(n int, alpha complex64, x []complex64, incX int, y []complex64, incY int)

func (*Standard) Ccopy

func (impl *Standard) Ccopy(n int, x []complex64, incX int, y []complex64, incY int)

func (*Standard) Cdgmm

func (impl *Standard) Cdgmm(mode blas.Side, m, n int, a []complex64, lda int, x []complex64, incX int, c []complex64, ldc int)

func (*Standard) Cdotc

func (impl *Standard) Cdotc(n int, x []complex64, incX int, y []complex64, incY int) (dotc complex64)

func (*Standard) Cdotu

func (impl *Standard) Cdotu(n int, x []complex64, incX int, y []complex64, incY int) (dotu complex64)

func (*Standard) Cgbmv

func (impl *Standard) Cgbmv(tA blas.Transpose, m, n, kl, ku int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int)

func (*Standard) Cgeam

func (impl *Standard) Cgeam(tA, tB blas.Transpose, m, n int, alpha complex64, a []complex64, lda int, beta complex64, b []complex64, ldb int, c []complex64, ldc int)

func (*Standard) Cgemm

func (impl *Standard) Cgemm(tA, tB blas.Transpose, m, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int)

func (*Standard) Cgemm3m

func (impl *Standard) Cgemm3m(tA, tB blas.Transpose, m, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int)

func (*Standard) Cgemv

func (impl *Standard) Cgemv(tA blas.Transpose, m, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int)

func (*Standard) Cgerc

func (impl *Standard) Cgerc(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int)

func (*Standard) Cgeru

func (impl *Standard) Cgeru(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int)

func (*Standard) Chbmv

func (impl *Standard) Chbmv(ul blas.Uplo, n, k int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int)

func (*Standard) Chemm

func (impl *Standard) Chemm(s blas.Side, ul blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int)

func (*Standard) Chemv

func (impl *Standard) Chemv(ul blas.Uplo, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int)

func (*Standard) Cher

func (impl *Standard) Cher(ul blas.Uplo, n int, alpha float32, x []complex64, incX int, a []complex64, lda int)

func (*Standard) Cher2

func (impl *Standard) Cher2(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int)

func (*Standard) Cher2k

func (impl *Standard) Cher2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta float32, c []complex64, ldc int)

func (*Standard) Cherk

func (impl *Standard) Cherk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float32, a []complex64, lda int, beta float32, c []complex64, ldc int)

func (*Standard) Cherkx

func (impl *Standard) Cherkx(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta float32, c []complex64, ldc int)

func (*Standard) Chpmv

func (impl *Standard) Chpmv(ul blas.Uplo, n int, alpha complex64, aP, x []complex64, incX int, beta complex64, y []complex64, incY int)

func (*Standard) Chpr

func (impl *Standard) Chpr(ul blas.Uplo, n int, alpha float32, x []complex64, incX int, aP []complex64)

func (*Standard) Chpr2

func (impl *Standard) Chpr2(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, aP []complex64)

func (*Standard) Close

func (impl *Standard) Close() error

func (*Standard) Crot

func (impl *Standard) Crot(n int, x []complex64, incX int, y []complex64, incY int, cScalar float32, sScalar []complex64)

func (*Standard) Cscal

func (impl *Standard) Cscal(n int, alpha complex64, x []complex64, incX int)

func (*Standard) Csscal

func (impl *Standard) Csscal(n int, alpha float32, x []complex64, incX int)

func (*Standard) Cswap

func (impl *Standard) Cswap(n int, x []complex64, incX int, y []complex64, incY int)

func (*Standard) Csymm

func (impl *Standard) Csymm(s blas.Side, ul blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int)

func (*Standard) Csymv

func (impl *Standard) Csymv(ul blas.Uplo, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int)

func (*Standard) Csyr

func (impl *Standard) Csyr(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, a []complex64, lda int)

func (*Standard) Csyr2

func (impl *Standard) Csyr2(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int)

func (*Standard) Csyr2k

func (impl *Standard) Csyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int)

func (*Standard) Csyrk

func (impl *Standard) Csyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, beta complex64, c []complex64, ldc int)

func (*Standard) Csyrkx

func (impl *Standard) Csyrkx(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int)

func (*Standard) Ctbmv

func (impl *Standard) Ctbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex64, lda int, x []complex64, incX int)

func (*Standard) Ctbsv

func (impl *Standard) Ctbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex64, lda int, x []complex64, incX int)

func (*Standard) Ctpmv

func (impl *Standard) Ctpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []complex64, incX int)

func (*Standard) Ctpsv

func (impl *Standard) Ctpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []complex64, incX int)

func (*Standard) Ctpttr

func (impl *Standard) Ctpttr(ul blas.Uplo, n int, aP, a []complex64, lda int)

func (*Standard) Ctrmm

func (impl *Standard) Ctrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int)

func (*Standard) Ctrmv

func (impl *Standard) Ctrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex64, lda int, x []complex64, incX int)

func (*Standard) Ctrsm

func (impl *Standard) Ctrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int)

func (*Standard) Ctrsv

func (impl *Standard) Ctrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex64, lda int, x []complex64, incX int)

func (*Standard) Ctrttp

func (impl *Standard) Ctrttp(ul blas.Uplo, n int, a []complex64, lda int, aP []complex64)

func (*Standard) Dasum

func (impl *Standard) Dasum(n int, x []float64, incX int) (retVal float64)

Dasum computes the sum of the absolute values of the elements of x.

\sum_i |x[i]|

Dasum returns 0 if incX is negative.

func (*Standard) Daxpy

func (impl *Standard) Daxpy(n int, alpha float64, x []float64, incX int, y []float64, incY int)

Daxpy adds alpha times x to y

y[i] += alpha * x[i] for all i

func (*Standard) Dcopy

func (impl *Standard) Dcopy(n int, x []float64, incX int, y []float64, incY int)

Dcopy copies the elements of x into the elements of y.

y[i] = x[i] for all i

func (*Standard) Ddgmm

func (impl *Standard) Ddgmm(mode blas.Side, m, n int, a []float64, lda int, x []float64, incX int, c []float64, ldc int)

func (*Standard) Ddot

func (impl *Standard) Ddot(n int, x []float64, incX int, y []float64, incY int) (retVal float64)

Ddot computes the dot product of the two vectors

\sum_i x[i]*y[i]

func (*Standard) Dgbmv

func (impl *Standard) Dgbmv(tA blas.Transpose, m, n, kl, ku int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int)

Dgbmv computes

y = alpha * A * x + beta * y if tA == blas.NoTrans
y = alpha * A^T * x + beta * y if tA == blas.Trans or blas.ConjTrans

where a is an m×n band matrix kL subdiagonals and kU super-diagonals, and m and n refer to the size of the full dense matrix it represents. x and y are vectors, and alpha and beta are scalars.

func (*Standard) Dgeam

func (impl *Standard) Dgeam(tA, tB blas.Transpose, m, n int, alpha float64, a []float64, lda int, beta float64, b []float64, ldb int, c []float64, ldc int)

func (*Standard) Dgemm

func (impl *Standard) Dgemm(tA, tB blas.Transpose, m, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int)

Dgemm computes

C = beta * C + alpha * A * B,

where A, B, and C are dense matrices, and alpha and beta are scalars. tA and tB specify whether A or B are transposed.

func (*Standard) Dgemv

func (impl *Standard) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int)

Dgemv computes

y = alpha * a * x + beta * y if tA = blas.NoTrans
y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans

where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.

func (*Standard) Dger

func (impl *Standard) Dger(m, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int)

Dger performs the rank-one operation

A += alpha * x * y^T

where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.

func (*Standard) Dnrm2

func (impl *Standard) Dnrm2(n int, x []float64, incX int) (retVal float64)

Dnrm2 computes the Euclidean norm of a vector,

sqrt(\sum_i x[i] * x[i]).

This function returns 0 if incX is negative.

func (*Standard) Drot

func (impl *Standard) Drot(n int, x []float64, incX int, y []float64, incY int, cScalar, sScalar float64)

Drot applies a plane transformation.

x[i] = c * x[i] + s * y[i]
y[i] = c * y[i] - s * x[i]

func (*Standard) Drotg

func (impl *Standard) Drotg(a float64, b float64) (c float64, s float64, r float64, z float64)

func (*Standard) Drotm

func (impl *Standard) Drotm(n int, x []float64, incX int, y []float64, incY int, p blas.DrotmParams)

func (*Standard) Drotmg

func (impl *Standard) Drotmg(d1 float64, d2 float64, b1 float64, b2 float64) (p blas.DrotmParams, rd1 float64, rd2 float64, rb1 float64)

func (*Standard) Dsbmv

func (impl *Standard) Dsbmv(ul blas.Uplo, n, k int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int)

Dsbmv performs

y = alpha * A * x + beta * y

where A is an n×n symmetric banded matrix, x and y are vectors, and alpha and beta are scalars.

func (*Standard) Dscal

func (impl *Standard) Dscal(n int, alpha float64, x []float64, incX int)

Dscal scales x by alpha.

x[i] *= alpha

Dscal has no effect if incX < 0.

func (*Standard) Dsdot

func (impl *Standard) Dsdot(n int, x []float32, incX int, y []float32, incY int) float64

func (*Standard) Dspmv

func (impl *Standard) Dspmv(ul blas.Uplo, n int, alpha float64, aP, x []float64, incX int, beta float64, y []float64, incY int)

Dspmv performs

y = alpha * A * x + beta * y,

where A is an n×n symmetric matrix in packed format, x and y are vectors and alpha and beta are scalars.

func (*Standard) Dspr

func (impl *Standard) Dspr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, aP []float64)

Dspr computes the rank-one operation

a += alpha * x * x^T

where a is an n×n symmetric matrix in packed format, x is a vector, and alpha is a scalar.

func (*Standard) Dspr2

func (impl *Standard) Dspr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, aP []float64)

Dspr2 performs the symmetric rank-2 update

A += alpha * x * y^T + alpha * y * x^T,

where A is an n×n symmetric matrix in packed format, x and y are vectors, and alpha is a scalar.

func (*Standard) Dswap

func (impl *Standard) Dswap(n int, x []float64, incX int, y []float64, incY int)

Dswap exchanges the elements of two vectors.

x[i], y[i] = y[i], x[i] for all i

func (*Standard) Dsymm

func (impl *Standard) Dsymm(s blas.Side, ul blas.Uplo, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int)

Dsymm performs one of

C = alpha * A * B + beta * C, if side == blas.Left,
C = alpha * B * A + beta * C, if side == blas.Right,

where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha is a scalar.

func (*Standard) Dsymv

func (impl *Standard) Dsymv(ul blas.Uplo, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int)

Dsymv computes

y = alpha * A * x + beta * y,

where a is an n×n symmetric matrix, x and y are vectors, and alpha and beta are scalars.

func (*Standard) Dsyr

func (impl *Standard) Dsyr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, a []float64, lda int)

Dsyr performs the rank-one update

a += alpha * x * x^T

where a is an n×n symmetric matrix, and x is a vector.

func (*Standard) Dsyr2

func (impl *Standard) Dsyr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int)

Dsyr2 performs the symmetric rank-two update

A += alpha * x * y^T + alpha * y * x^T

where A is a symmetric n×n matrix, x and y are vectors, and alpha is a scalar.

func (*Standard) Dsyr2k

func (impl *Standard) Dsyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int)

Dsyr2k performs the symmetric rank 2k operation

C = alpha * A * B^T + alpha * B * A^T + beta * C

where C is an n×n symmetric matrix. A and B are n×k matrices if tA == NoTrans and k×n otherwise. alpha and beta are scalars.

func (*Standard) Dsyrk

func (impl *Standard) Dsyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []float64, lda int, beta float64, c []float64, ldc int)

Dsyrk performs the symmetric rank-k operation

C = alpha * A * A^T + beta*C

C is an n×n symmetric matrix. A is an n×k matrix if tA == blas.NoTrans, and a k×n matrix otherwise. alpha and beta are scalars.

func (*Standard) Dsyrkx

func (impl *Standard) Dsyrkx(ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int)

func (*Standard) Dtbmv

func (impl *Standard) Dtbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int)

Dtbmv computes

x = A * x if tA == blas.NoTrans
x = A^T * x if tA == blas.Trans or blas.ConjTrans

where A is an n×n triangular banded matrix with k diagonals, and x is a vector.

func (*Standard) Dtbsv

func (impl *Standard) Dtbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int)

Dtbsv solves

A * x = b

where A is an n×n triangular banded matrix with k diagonals in packed format, and x is a vector. At entry to the function, x contains the values of b, and the result is stored in place into x.

No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.

func (*Standard) Dtpmv

func (impl *Standard) Dtpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []float64, incX int)

Dtpmv computes

x = A * x if tA == blas.NoTrans
x = A^T * x if tA == blas.Trans or blas.ConjTrans

where A is an n×n unit triangular matrix in packed format, and x is a vector.

func (*Standard) Dtpsv

func (impl *Standard) Dtpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []float64, incX int)

Dtpsv solves

A * x = b if tA == blas.NoTrans
A^T * x = b if tA == blas.Trans or blas.ConjTrans

where A is an n×n triangular matrix in packed format and x is a vector. At entry to the function, x contains the values of b, and the result is stored in place into x.

No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.

func (*Standard) Dtpttr

func (impl *Standard) Dtpttr(ul blas.Uplo, n int, aP, a []float64, lda int)

func (*Standard) Dtrmm

func (impl *Standard) Dtrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int)

func (*Standard) Dtrmv

func (impl *Standard) Dtrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int)

Dtrmv computes

x = A * x if tA == blas.NoTrans
x = A^T * x if tA == blas.Trans or blas.ConjTrans

A is an n×n Triangular matrix and x is a vector.

func (*Standard) Dtrsm

func (impl *Standard) Dtrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int)

Dtrsm solves

A * X = alpha * B,   if tA == blas.NoTrans side == blas.Left,
A^T * X = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Left,
X * A = alpha * B,   if tA == blas.NoTrans side == blas.Right,
X * A^T = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Right,

where A is an n×n or m×m triangular matrix, X is an m×n matrix, and alpha is a scalar.

At entry to the function, X contains the values of B, and the result is stored in place into X.

No check is made that A is invertible.

func (*Standard) Dtrsv

func (impl *Standard) Dtrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int)

Dtrsv solves

A * x = b if tA == blas.NoTrans
A^T * x = b if tA == blas.Trans or blas.ConjTrans

A is an n×n triangular matrix and x is a vector. At entry to the function, x contains the values of b, and the result is stored in place into x.

No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.

func (*Standard) Dtrttp

func (impl *Standard) Dtrttp(ul blas.Uplo, n int, a []float64, lda int, aP []float64)

func (*Standard) Dzasum

func (impl *Standard) Dzasum(n int, x []complex128, incX int) (retVal float64)

func (*Standard) Dznrm2

func (impl *Standard) Dznrm2(n int, x []complex128, incX int) (retVal float64)

func (*Standard) Err

func (impl *Standard) Err() error

func (*Standard) Icamax

func (impl *Standard) Icamax(n int, x []complex64, incX int) (retVal int)

func (*Standard) Icamin

func (impl *Standard) Icamin(n int, x []complex64, incX int) (retVal int)

func (*Standard) Idamax

func (impl *Standard) Idamax(n int, x []float64, incX int) (retVal int)

Idamax returns the index of an element of x with the largest absolute value. If there are multiple such indices the earliest is returned. Idamax returns -1 if n == 0.

func (*Standard) Idamin

func (impl *Standard) Idamin(n int, x []float64, incX int) (retVal int)

func (*Standard) Init

func (impl *Standard) Init(opts ...ConsOpt) error

func (*Standard) Isamax

func (impl *Standard) Isamax(n int, x []float32, incX int) (retVal int)

Isamax returns the index of an element of x with the largest absolute value. If there are multiple such indices the earliest is returned. Isamax returns -1 if n == 0.

func (*Standard) Isamin

func (impl *Standard) Isamin(n int, x []float32, incX int) (retVal int)

func (*Standard) Izamax

func (impl *Standard) Izamax(n int, x []complex128, incX int) (retVal int)

func (*Standard) Izamin

func (impl *Standard) Izamin(n int, x []complex128, incX int) (retVal int)

func (*Standard) Sasum

func (impl *Standard) Sasum(n int, x []float32, incX int) (retVal float32)

Sasum computes the sum of the absolute values of the elements of x.

\sum_i |x[i]|

Sasum returns 0 if incX is negative.

func (*Standard) Saxpy

func (impl *Standard) Saxpy(n int, alpha float32, x []float32, incX int, y []float32, incY int)

Saxpy adds alpha times x to y

y[i] += alpha * x[i] for all i

func (*Standard) Scasum

func (impl *Standard) Scasum(n int, x []complex64, incX int) (retVal float32)

func (*Standard) Scnrm2

func (impl *Standard) Scnrm2(n int, x []complex64, incX int) (retVal float32)

func (*Standard) Scopy

func (impl *Standard) Scopy(n int, x []float32, incX int, y []float32, incY int)

Scopy copies the elements of x into the elements of y.

y[i] = x[i] for all i

func (*Standard) Sdgmm

func (impl *Standard) Sdgmm(mode blas.Side, m, n int, a []float32, lda int, x []float32, incX int, c []float32, ldc int)

func (*Standard) Sdot

func (impl *Standard) Sdot(n int, x []float32, incX int, y []float32, incY int) (retVal float32)

Sdot computes the dot product of the two vectors

\sum_i x[i]*y[i]

func (*Standard) Sdsdot

func (impl *Standard) Sdsdot(n int, alpha float32, x []float32, incX int, y []float32, incY int) float32

func (*Standard) Sgbmv

func (impl *Standard) Sgbmv(tA blas.Transpose, m, n, kl, ku int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int)

Sgbmv computes

y = alpha * A * x + beta * y if tA == blas.NoTrans
y = alpha * A^T * x + beta * y if tA == blas.Trans or blas.ConjTrans

where a is an m×n band matrix kL subdiagonals and kU super-diagonals, and m and n refer to the size of the full dense matrix it represents. x and y are vectors, and alpha and beta are scalars.

func (*Standard) Sgeam

func (impl *Standard) Sgeam(tA, tB blas.Transpose, m, n int, alpha float32, a []float32, lda int, beta float32, b []float32, ldb int, c []float32, ldc int)

func (*Standard) Sgemm

func (impl *Standard) Sgemm(tA, tB blas.Transpose, m, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int)

Sgemm computes

C = beta * C + alpha * A * B,

where A, B, and C are dense matrices, and alpha and beta are scalars. tA and tB specify whether A or B are transposed.

func (*Standard) Sgemv

func (impl *Standard) Sgemv(tA blas.Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int)

Sgemv computes

y = alpha * a * x + beta * y if tA = blas.NoTrans
y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans

where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.

func (*Standard) Sger

func (impl *Standard) Sger(m, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int)

Sger performs the rank-one operation

A += alpha * x * y^T

where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.

func (*Standard) Snrm2

func (impl *Standard) Snrm2(n int, x []float32, incX int) (retVal float32)

Snrm2 computes the Euclidean norm of a vector,

sqrt(\sum_i x[i] * x[i]).

This function returns 0 if incX is negative.

func (*Standard) Srot

func (impl *Standard) Srot(n int, x []float32, incX int, y []float32, incY int, cScalar, sScalar float32)

Srot applies a plane transformation.

x[i] = c * x[i] + s * y[i]
y[i] = c * y[i] - s * x[i]

func (*Standard) Srotg

func (impl *Standard) Srotg(a float32, b float32) (c float32, s float32, r float32, z float32)

func (*Standard) Srotm

func (impl *Standard) Srotm(n int, x []float32, incX int, y []float32, incY int, p blas.SrotmParams)

func (*Standard) Srotmg

func (impl *Standard) Srotmg(d1 float32, d2 float32, b1 float32, b2 float32) (p blas.SrotmParams, rd1 float32, rd2 float32, rb1 float32)

func (*Standard) Ssbmv

func (impl *Standard) Ssbmv(ul blas.Uplo, n, k int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int)

Ssbmv performs

y = alpha * A * x + beta * y

where A is an n×n symmetric banded matrix, x and y are vectors, and alpha and beta are scalars.

func (*Standard) Sscal

func (impl *Standard) Sscal(n int, alpha float32, x []float32, incX int)

Sscal scales x by alpha.

x[i] *= alpha

Sscal has no effect if incX < 0.

func (*Standard) Sspmv

func (impl *Standard) Sspmv(ul blas.Uplo, n int, alpha float32, aP, x []float32, incX int, beta float32, y []float32, incY int)

Sspmv performs

y = alpha * A * x + beta * y,

where A is an n×n symmetric matrix in packed format, x and y are vectors and alpha and beta are scalars.

func (*Standard) Sspr

func (impl *Standard) Sspr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, aP []float32)

Sspr computes the rank-one operation

a += alpha * x * x^T

where a is an n×n symmetric matrix in packed format, x is a vector, and alpha is a scalar.

func (*Standard) Sspr2

func (impl *Standard) Sspr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, aP []float32)

Sspr2 performs the symmetric rank-2 update

A += alpha * x * y^T + alpha * y * x^T,

where A is an n×n symmetric matrix in packed format, x and y are vectors, and alpha is a scalar.

func (*Standard) Sswap

func (impl *Standard) Sswap(n int, x []float32, incX int, y []float32, incY int)

Sswap exchanges the elements of two vectors.

x[i], y[i] = y[i], x[i] for all i

func (*Standard) Ssymm

func (impl *Standard) Ssymm(s blas.Side, ul blas.Uplo, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int)

Ssymm performs one of

C = alpha * A * B + beta * C, if side == blas.Left,
C = alpha * B * A + beta * C, if side == blas.Right,

where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha is a scalar.

func (*Standard) Ssymv

func (impl *Standard) Ssymv(ul blas.Uplo, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int)

Ssymv computes

y = alpha * A * x + beta * y,

where a is an n×n symmetric matrix, x and y are vectors, and alpha and beta are scalars.

func (*Standard) Ssyr

func (impl *Standard) Ssyr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, a []float32, lda int)

Ssyr performs the rank-one update

a += alpha * x * x^T

where a is an n×n symmetric matrix, and x is a vector.

func (*Standard) Ssyr2

func (impl *Standard) Ssyr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int)

Ssyr2 performs the symmetric rank-two update

A += alpha * x * y^T + alpha * y * x^T

where A is a symmetric n×n matrix, x and y are vectors, and alpha is a scalar.

func (*Standard) Ssyr2k

func (impl *Standard) Ssyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int)

Ssyr2k performs the symmetric rank 2k operation

C = alpha * A * B^T + alpha * B * A^T + beta * C

where C is an n×n symmetric matrix. A and B are n×k matrices if tA == NoTrans and k×n otherwise. alpha and beta are scalars.

func (*Standard) Ssyrk

func (impl *Standard) Ssyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float32, a []float32, lda int, beta float32, c []float32, ldc int)

Ssyrk performs the symmetric rank-k operation

C = alpha * A * A^T + beta*C

C is an n×n symmetric matrix. A is an n×k matrix if tA == blas.NoTrans, and a k×n matrix otherwise. alpha and beta are scalars.

func (*Standard) Ssyrkx

func (impl *Standard) Ssyrkx(ul blas.Uplo, t blas.Transpose, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int)

func (*Standard) Stbmv

func (impl *Standard) Stbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int)

Stbmv computes

x = A * x if tA == blas.NoTrans
x = A^T * x if tA == blas.Trans or blas.ConjTrans

where A is an n×n triangular banded matrix with k diagonals, and x is a vector.

func (*Standard) Stbsv

func (impl *Standard) Stbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int)

Stbsv solves

A * x = b

where A is an n×n triangular banded matrix with k diagonals in packed format, and x is a vector. At entry to the function, x contains the values of b, and the result is stored in place into x.

No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.

func (*Standard) Stpmv

func (impl *Standard) Stpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []float32, incX int)

Stpmv computes

x = A * x if tA == blas.NoTrans
x = A^T * x if tA == blas.Trans or blas.ConjTrans

where A is an n×n unit triangular matrix in packed format, and x is a vector.

func (*Standard) Stpsv

func (impl *Standard) Stpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []float32, incX int)

Stpsv solves

A * x = b if tA == blas.NoTrans
A^T * x = b if tA == blas.Trans or blas.ConjTrans

where A is an n×n triangular matrix in packed format and x is a vector. At entry to the function, x contains the values of b, and the result is stored in place into x.

No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.

func (*Standard) Stpttr

func (impl *Standard) Stpttr(ul blas.Uplo, n int, aP, a []float32, lda int)

func (*Standard) Strmm

func (impl *Standard) Strmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int)

func (*Standard) Strmv

func (impl *Standard) Strmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int)

Strmv computes

x = A * x if tA == blas.NoTrans
x = A^T * x if tA == blas.Trans or blas.ConjTrans

A is an n×n Triangular matrix and x is a vector.

func (*Standard) Strsm

func (impl *Standard) Strsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int)

Strsm solves

A * X = alpha * B,   if tA == blas.NoTrans side == blas.Left,
A^T * X = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Left,
X * A = alpha * B,   if tA == blas.NoTrans side == blas.Right,
X * A^T = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Right,

where A is an n×n or m×m triangular matrix, X is an m×n matrix, and alpha is a scalar.

At entry to the function, X contains the values of B, and the result is stored in place into X.

No check is made that A is invertible.

func (*Standard) Strsv

func (impl *Standard) Strsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int)

Strsv solves

A * x = b if tA == blas.NoTrans
A^T * x = b if tA == blas.Trans or blas.ConjTrans

A is an n×n triangular matrix and x is a vector. At entry to the function, x contains the values of b, and the result is stored in place into x.

No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.

func (*Standard) Strttp

func (impl *Standard) Strttp(ul blas.Uplo, n int, a []float32, lda int, aP []float32)

func (*Standard) Zaxpy

func (impl *Standard) Zaxpy(n int, alpha complex128, x []complex128, incX int, y []complex128, incY int)

func (*Standard) Zcopy

func (impl *Standard) Zcopy(n int, x []complex128, incX int, y []complex128, incY int)

func (*Standard) Zdgmm

func (impl *Standard) Zdgmm(mode blas.Side, m, n int, a []complex128, lda int, x []complex128, incX int, c []complex128, ldc int)

func (*Standard) Zdotc

func (impl *Standard) Zdotc(n int, x []complex128, incX int, y []complex128, incY int) (dotc complex128)

func (*Standard) Zdotu

func (impl *Standard) Zdotu(n int, x []complex128, incX int, y []complex128, incY int) (dotu complex128)

func (*Standard) Zdscal

func (impl *Standard) Zdscal(n int, alpha float64, x []complex128, incX int)

func (*Standard) Zgbmv

func (impl *Standard) Zgbmv(tA blas.Transpose, m, n, kl, ku int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)

func (*Standard) Zgeam

func (impl *Standard) Zgeam(tA, tB blas.Transpose, m, n int, alpha complex128, a []complex128, lda int, beta complex128, b []complex128, ldb int, c []complex128, ldc int)

func (*Standard) Zgemm

func (impl *Standard) Zgemm(tA, tB blas.Transpose, m, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)

func (*Standard) Zgemm3m

func (impl *Standard) Zgemm3m(tA, tB blas.Transpose, m, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)

func (*Standard) Zgemv

func (impl *Standard) Zgemv(tA blas.Transpose, m, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)

func (*Standard) Zgerc

func (impl *Standard) Zgerc(m, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int)

func (*Standard) Zgeru

func (impl *Standard) Zgeru(m, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int)

func (*Standard) Zhbmv

func (impl *Standard) Zhbmv(ul blas.Uplo, n, k int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)

func (*Standard) Zhemm

func (impl *Standard) Zhemm(s blas.Side, ul blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)

func (*Standard) Zhemv

func (impl *Standard) Zhemv(ul blas.Uplo, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)

func (*Standard) Zher

func (impl *Standard) Zher(ul blas.Uplo, n int, alpha float64, x []complex128, incX int, a []complex128, lda int)

func (*Standard) Zher2

func (impl *Standard) Zher2(ul blas.Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int)

func (*Standard) Zher2k

func (impl *Standard) Zher2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta float64, c []complex128, ldc int)

func (*Standard) Zherk

func (impl *Standard) Zherk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []complex128, lda int, beta float64, c []complex128, ldc int)

func (*Standard) Zherkx

func (impl *Standard) Zherkx(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta float64, c []complex128, ldc int)

func (*Standard) Zhpmv

func (impl *Standard) Zhpmv(ul blas.Uplo, n int, alpha complex128, aP, x []complex128, incX int, beta complex128, y []complex128, incY int)

func (*Standard) Zhpr

func (impl *Standard) Zhpr(ul blas.Uplo, n int, alpha float64, x []complex128, incX int, aP []complex128)

func (*Standard) Zhpr2

func (impl *Standard) Zhpr2(ul blas.Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, aP []complex128)

func (*Standard) Zrot

func (impl *Standard) Zrot(n int, x []complex128, incX int, y []complex128, incY int, cScalar float64, sScalar complex128)

func (*Standard) Zscal

func (impl *Standard) Zscal(n int, alpha complex128, x []complex128, incX int)

func (*Standard) Zswap

func (impl *Standard) Zswap(n int, x []complex128, incX int, y []complex128, incY int)

func (*Standard) Zsymm

func (impl *Standard) Zsymm(s blas.Side, ul blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)

func (*Standard) Zsymv

func (impl *Standard) Zsymv(ul blas.Uplo, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)

func (*Standard) Zsyr

func (impl *Standard) Zsyr(ul blas.Uplo, n int, alpha complex128, x []complex128, incX int, a []complex128, lda int)

func (*Standard) Zsyr2

func (impl *Standard) Zsyr2(ul blas.Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int)

func (*Standard) Zsyr2k

func (impl *Standard) Zsyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)

func (*Standard) Zsyrk

func (impl *Standard) Zsyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, beta complex128, c []complex128, ldc int)

func (*Standard) Zsyrkx

func (impl *Standard) Zsyrkx(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)

func (*Standard) Ztbmv

func (impl *Standard) Ztbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex128, lda int, x []complex128, incX int)

func (*Standard) Ztbsv

func (impl *Standard) Ztbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex128, lda int, x []complex128, incX int)

func (*Standard) Ztpmv

func (impl *Standard) Ztpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []complex128, incX int)

func (*Standard) Ztpsv

func (impl *Standard) Ztpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []complex128, incX int)

func (*Standard) Ztpttr

func (impl *Standard) Ztpttr(ul blas.Uplo, n int, aP, a []complex128, lda int)

func (*Standard) Ztrmm

func (impl *Standard) Ztrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int)

func (*Standard) Ztrmv

func (impl *Standard) Ztrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex128, lda int, x []complex128, incX int)

func (*Standard) Ztrsm

func (impl *Standard) Ztrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int)

func (*Standard) Ztrsv

func (impl *Standard) Ztrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex128, lda int, x []complex128, incX int)

func (*Standard) Ztrttp

func (impl *Standard) Ztrttp(ul blas.Uplo, n int, a []complex128, lda int, aP []complex128)

type Status

type Status int

Status is the cublas status.

const (
	Success        Status = C.CUBLAS_STATUS_SUCCESS          // The operation completed successfully.
	NotInitialized Status = C.CUBLAS_STATUS_NOT_INITIALIZED  // The cuBLAS library was not initialized. This is usually caused by the lack of a prior cublasCreate() call,
	AllocFailed    Status = C.CUBLAS_STATUS_ALLOC_FAILED     // Resource allocation failed inside the cuBLAS library.
	InvalidValue   Status = C.CUBLAS_STATUS_INVALID_VALUE    // An unsupported value or parameter was passed to the function (a negative vector size, for example).
	ArchMismatch   Status = C.CUBLAS_STATUS_ARCH_MISMATCH    // The function requires a feature absent from the device architecture; usually caused by the lack of support for double precision.
	MappingError   Status = C.CUBLAS_STATUS_MAPPING_ERROR    // An access to GPU memory space failed, which is usually caused by a failure to bind a texture.
	ExecFailed     Status = C.CUBLAS_STATUS_EXECUTION_FAILED // The GPU program failed to execute. This is often caused by a launch failure of the kernel on the GPU, which can be caused by multiple reasons.
	InternalError  Status = C.CUBLAS_STATUS_INTERNAL_ERROR   // An internal cuBLAS operation failed. This error is usually caused by a cudaMemcpyAsync() failure.
	Unsupported    Status = C.CUBLAS_STATUS_NOT_SUPPORTED    // The functionnality requested is not supported
	LicenceError   Status = C.CUBLAS_STATUS_LICENSE_ERROR    // The functionnality requested requires some license and an error was detected when trying to check the current licensing.
)

func (Status) Error

func (err Status) Error() string

func (Status) String

func (err Status) String() string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL