Documentation
¶
Index ¶
- Constants
- Variables
- func AreSameShape(a, b *Tensor) bool
- func BuildForwardExpand(graph *Graph, tensor *Tensor)
- func BuildForwardImpl(graph *Graph, tensor *Tensor, expand bool)
- func CanMulMat(t0, t1 *Tensor) bool
- func CheckGraph()
- func ComputeBackward(ctx *Context, tensor *Tensor, inplace bool)
- func ComputeForward(ctx *Context, graph *Graph, params *ComputeParams, tensor *Tensor)
- func ComputeForwardAddFP32(params *ComputeParams, src0, src1, dst *Tensor)
- func ComputeForwardCopy(params *ComputeParams, src0, dst *Tensor)
- func ComputeForwardDiagMaskInfFP32(params *ComputeParams, src0, src1, dst *Tensor)
- func ComputeForwardDupFP32(params *ComputeParams, src0, dst *Tensor)
- func ComputeForwardGetRows(params *ComputeParams, src0, src1, dst *Tensor)
- func ComputeForwardMulFP32(params *ComputeParams, src0, src1, dst *Tensor)
- func ComputeForwardMulMatFP32(params *ComputeParams, src0, src1, dst *Tensor)
- func ComputeForwardPermute(params *ComputeParams, src0 *Tensor)
- func ComputeForwardRMSNormFP32(params *ComputeParams, src0, dst *Tensor)
- func ComputeForwardRepeatFP32(params *ComputeParams, src0, dst *Tensor)
- func ComputeForwardReshape(params *ComputeParams, src0, dst *Tensor)
- func ComputeForwardRopeFP32(params *ComputeParams, src0, src1, dst *Tensor)
- func ComputeForwardScaleFP32(params *ComputeParams, src0, src1, dst *Tensor)
- func ComputeForwardSiluFP32(params *ComputeParams, src0, dst *Tensor)
- func ComputeForwardSoftMaxFP32(params *ComputeParams, src0, dst *Tensor)
- func ComputeForwardView(params *ComputeParams, src0 *Tensor)
- func Do(params *ComputeParams, id int)
- func GraphCompute(ctx *Context, graph *Graph)
- func Init(params InitParams)
- func IsMatrix(tensor *Tensor) bool
- func IsScalar(tensor *Tensor) bool
- func IsVector(tensor *Tensor) bool
- func Job(listen <-chan *ComputeParams, id int)
- func SiluFP32(x float32) float32
- func Token2Str(vocab *Vocab, token uint32) string
- func Tokenize(vocab *Vocab, text string, bos bool) []uint32
- func TryAddBigram(vocab *Vocab, symbols []Symbol, workQueue *[]Bigram, left, right int)
- func TypeSizeFloat(dt DType) float32
- func VecAccFP32(n uint32, y, x []float32)
- func VecAddFP32(n uint32, z, x, y []float32)
- func VecCopyFP32(n uint32, y, x []float32)
- func VecDotFP32(n uint32, x, y []float32) float32
- func VecMadFP32(n uint32, y, x []float32, v float32)
- func VecMaxFP32(n uint32, x []float32) float32
- func VecMulFP32(n uint32, z, x, y []float32)
- func VecScaleFP32(n uint32, y []float32, v float32)
- func VecSiluFP32(n uint32, y, x []float32)
- func VisitParents(graph *Graph, node *Tensor)
- type Allocator
- type Bigram
- type ComputeParams
- type Context
- type DType
- type Graph
- type InitParams
- type Symbol
- type TaskType
- type Tensor
- func Add(ctx *Context, a, b *Tensor) *Tensor
- func AddImpl(ctx *Context, a, b *Tensor, inplace bool) *Tensor
- func AddInplace(ctx *Context, a, b *Tensor) *Tensor
- func Copy(ctx *Context, a, b *Tensor) *Tensor
- func CopyImpl(ctx *Context, a, b *Tensor, inplace bool) *Tensor
- func CopyInplace(ctx *Context, a, b *Tensor) *Tensor
- func DiagMaskInf(ctx *Context, a *Tensor, past uint32) *Tensor
- func Div(ctx *Context, a, b *Tensor) *Tensor
- func DivImpl(ctx *Context, a, b *Tensor, inplace bool) *Tensor
- func DivInplace(ctx *Context, a, b *Tensor, inplace bool) *Tensor
- func DupTensor(ctx *Context, src *Tensor) *Tensor
- func GetRows(ctx *Context, a, b *Tensor) *Tensor
- func Mul(ctx *Context, a, b *Tensor) *Tensor
- func MulImpl(ctx *Context, a, b *Tensor, inplace bool) *Tensor
- func MulInplace(ctx *Context, a, b *Tensor) *Tensor
- func MulMat(ctx *Context, a, b *Tensor) *Tensor
- func NewFP32(ctx *Context, value float32) *Tensor
- func NewTensor(ctx *Context, dt DType, dims uint32, ne0, ne1, ne2, ne3 uint32, data []float32) *Tensor
- func NewTensor1D(ctx *Context, dt DType, ne0 uint32) *Tensor
- func NewTensor2D(ctx *Context, dt DType, ne0, ne1 uint32) *Tensor
- func NewTensor3D(ctx *Context, dt DType, ne0, ne1, ne2 uint32) *Tensor
- func NewTensor4D(ctx *Context, dt DType, ne0, ne1, ne2, ne3 uint32) *Tensor
- func Permute(ctx *Context, a *Tensor, axis0, axis1, axis2, axis3 uint32) *Tensor
- func RMSNorm(ctx *Context, a *Tensor) *Tensor
- func RMSNormImpl(ctx *Context, a *Tensor, inplace bool) *Tensor
- func RMSNormInplace(ctx *Context, a *Tensor) *Tensor
- func Repeat(ctx *Context, a, b *Tensor) *Tensor
- func Reshape3D(ctx *Context, a *Tensor, ne0, ne1, ne2 uint32) *Tensor
- func Rope(ctx *Context, a *Tensor, past, dims, mode uint32) *Tensor
- func Scale(ctx *Context, a, b *Tensor) *Tensor
- func ScaleImpl(ctx *Context, a, b *Tensor, inplace bool) *Tensor
- func ScaleInplace(ctx *Context, a, b *Tensor) *Tensor
- func SetFP32(tensor *Tensor, value float32) *Tensor
- func Sgn(ctx *Context, a *Tensor) *Tensor
- func SgnImpl(ctx *Context, a *Tensor, inplace bool) *Tensor
- func SgnInplace(ctx *Context, a *Tensor) *Tensor
- func Silu(ctx *Context, a *Tensor) *Tensor
- func SiluImpl(ctx *Context, a *Tensor, inplace bool) *Tensor
- func SiluInplace(ctx *Context, a *Tensor) *Tensor
- func SoftMax(ctx *Context, a *Tensor) *Tensor
- func Step(ctx *Context, a *Tensor) *Tensor
- func StepImpl(ctx *Context, a *Tensor, inplace bool) *Tensor
- func StepInplace(ctx *Context, a *Tensor) *Tensor
- func Sub(ctx *Context, a, b *Tensor) *Tensor
- func SubImpl(ctx *Context, a, b *Tensor, inplace bool) *Tensor
- func SubInplace(ctx *Context, a, b *Tensor) *Tensor
- func Sum(ctx *Context, a *Tensor) *Tensor
- func Transpose(ctx *Context, a *Tensor) *Tensor
- func View1D(ctx *Context, a *Tensor, ne0 uint32, offset uint32) *Tensor
- func ViewTensor(ctx *Context, src *Tensor) *Tensor
- type TokenScore
- type Vocab
Constants ¶
const ( DEBUG = false MAX_DIMS = 4 MAX_NODES = 4096 MAX_PARAMS = 16 MAX_OPT = 4 QK = 32 // quantization TOKEN_BOS = 1 TOKEN_EOS = 2 )
const ( OP_NONE optype = iota OP_DUP OP_ADD OP_SUB OP_MUL OP_DIV OP_SQR OP_SQRT OP_SUM OP_MEAN OP_REPEAT OP_ABS OP_SGN OP_NEG OP_STEP OP_RELU OP_GELU OP_SILU OP_NORM OP_RMS_NORM OP_MUL_MAT OP_SCALE OP_CPY OP_RESHAPE OP_VIEW OP_PERMUTE OP_TRANSPOSE OP_GET_ROWS OP_DIAG_MASK_INF OP_SOFT_MAX OP_ROPE OP_CONV_1D_1S OP_CONV_1D_2S OP_FLASH_ATTN OP_FLASH_FF OP_COUNT )
const MaxMem = 0 // 28_000_000_000
const MaxPool = 0 // 2_000_000_000
TODO: Precompute max needed RAM size
const NewLineToken = 13 // ml.Tokenize(Ctx.Vocab, "\n", false)[0]
Variables ¶
var BLCK_SIZE [TYPE_COUNT]uint32 = [TYPE_COUNT]uint32{1, 1, QK, QK, 1, 1, 1, 0}
var TYPE_SIZE [TYPE_COUNT]uint32 = [TYPE_COUNT]uint32{4, 2, 4 + QK/2, 4*2 + QK/2, 1, 2, 4, 0}
var TableExpFP16 [1 << 16]float16.Float16
precomputed exp table for f16 (128 KB) static ggml_fp16_t table_exp_f16[1 << 16];
Functions ¶
func AreSameShape ¶
func BuildForwardExpand ¶
ggml_build_forward_expand
func BuildForwardImpl ¶
ggml_build_forward_impl
func CheckGraph ¶
func CheckGraph()
TODO: Implement all the tensor asserts BEFORE the real computing
func ComputeBackward ¶
func ComputeForward ¶
func ComputeForward(ctx *Context, graph *Graph, params *ComputeParams, tensor *Tensor)
func ComputeForwardAddFP32 ¶
func ComputeForwardAddFP32(params *ComputeParams, src0, src1, dst *Tensor)
ggml_compute_forward_add
func ComputeForwardCopy ¶
func ComputeForwardCopy(params *ComputeParams, src0, dst *Tensor)
func ComputeForwardDiagMaskInfFP32 ¶
func ComputeForwardDiagMaskInfFP32(params *ComputeParams, src0, src1, dst *Tensor)
ggml_compute_forward_diag_mask_inf
func ComputeForwardDupFP32 ¶
func ComputeForwardDupFP32(params *ComputeParams, src0, dst *Tensor)
ggml_compute_forward_dup_f32
func ComputeForwardGetRows ¶
func ComputeForwardGetRows(params *ComputeParams, src0, src1, dst *Tensor)
ggml_compute_forward_get_rows_f32
func ComputeForwardMulFP32 ¶
func ComputeForwardMulFP32(params *ComputeParams, src0, src1, dst *Tensor)
ggml_compute_forward_mul
func ComputeForwardMulMatFP32 ¶
func ComputeForwardMulMatFP32(params *ComputeParams, src0, src1, dst *Tensor)
ggml_compute_forward_mul_mat_f32
func ComputeForwardPermute ¶
func ComputeForwardPermute(params *ComputeParams, src0 *Tensor)
ggml_compute_forward_permute
func ComputeForwardRMSNormFP32 ¶
func ComputeForwardRMSNormFP32(params *ComputeParams, src0, dst *Tensor)
ggml_compute_forward_rms_norm_f32
func ComputeForwardRepeatFP32 ¶
func ComputeForwardRepeatFP32(params *ComputeParams, src0, dst *Tensor)
ggml_compute_forward_repeat
func ComputeForwardReshape ¶
func ComputeForwardReshape(params *ComputeParams, src0, dst *Tensor)
ggml_compute_forward_reshape
func ComputeForwardRopeFP32 ¶
func ComputeForwardRopeFP32(params *ComputeParams, src0, src1, dst *Tensor)
ggml_compute_forward_rope
func ComputeForwardScaleFP32 ¶
func ComputeForwardScaleFP32(params *ComputeParams, src0, src1, dst *Tensor)
ggml_compute_forward_scale_f32
func ComputeForwardSiluFP32 ¶
func ComputeForwardSiluFP32(params *ComputeParams, src0, dst *Tensor)
ggml_compute_forward_silu
func ComputeForwardSoftMaxFP32 ¶
func ComputeForwardSoftMaxFP32(params *ComputeParams, src0, dst *Tensor)
ggml_compute_forward_soft_max
func ComputeForwardView ¶
func ComputeForwardView(params *ComputeParams, src0 *Tensor)
ggml_compute_forward_view
func Do ¶
func Do(params *ComputeParams, id int)
Do is an experimental alternative for always waiting Job threads
func GraphCompute ¶
func Job ¶
func Job(listen <-chan *ComputeParams, id int)
Job is goroutine existing while the computation loop is active The main purpose of the Job is to perform some part of time consuming matrix multiplications TODO: Investigate https://pkg.go.dev/runtime#LockOSThread
func TryAddBigram ¶
func TypeSizeFloat ¶
func VecAddFP32 ¶
inline static void ggml_vec_add_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i] + y[i]; }
func VecCopyFP32 ¶
func VecMaxFP32 ¶
func VecMulFP32 ¶
func VecSiluFP32 ¶
inline static void ggml_vec_silu_f32(const int n, float * y, const float * x) {
Types ¶
type Allocator ¶
Allocator is an experimental memory pool for FP32 slices TODO: Investigate https://github.com/valyala/bytebufferpool
func NewAllocator ¶
func NewAllocator() *Allocator
type ComputeParams ¶
type Context ¶
type Context struct { MaxThreads int UseAVX bool UseNEON bool //Graph *Graph Compute chan *ComputeParams Allocator *Allocator }
func NewContext ¶
func (*Context) ReleaseContext ¶
func (ctx *Context) ReleaseContext()
ReleaseContext frees all context resources - channel will be closed and goroutines stopped
type Graph ¶
type Graph struct { NodesCount uint32 LeafsCount uint32 Jobs chan *ComputeParams Nodes [MAX_NODES]*Tensor Grads [MAX_NODES]*Tensor Leafs [MAX_NODES]*Tensor }
computation graph
func BuildForward ¶
type InitParams ¶
type InitParams struct { }
type Tensor ¶
type Tensor struct { Type DType Reusable bool // this tensor Data buffer might be reused with pooling Dims uint32 NE [MAX_DIMS]uint32 // number of elements NB [MAX_DIMS]uint32 // stride in bytes TasksCount int Data []float32 // contains filtered or unexported fields }
func AddInplace ¶
func CopyInplace ¶
func DiagMaskInf ¶
ggml_diag_mask_inf
func MulInplace ¶
struct ggml_tensor * Mul_inplace(
func NewTensor ¶
func NewTensor(ctx *Context, dt DType, dims uint32, ne0, ne1, ne2, ne3 uint32, data []float32) *Tensor
ggml_new_tensor_impl
func NewTensor2D ¶
ggml_new_tensor_2d
func RMSNormImpl ¶
ggml_rms_norm_impl