Documentation ¶
Overview ¶
github.com/ggerganov/whisper.cpp/bindings/go provides a speech-to-text service bindings for the Go programming language.
Index ¶
- Constants
- Variables
- func Whisper_lang_max_id() int
- func Whisper_lang_str(id int) string
- func Whisper_print_system_info() string
- type Context
- func (ctx *Context) Whisper_decode(tokens []Token, past, threads int) error
- func (ctx *Context) Whisper_encode(offset, threads int) error
- func (ctx *Context) Whisper_free()
- func (ctx *Context) Whisper_full(params Params, samples []float32, encoderBeginCallback func() bool, ...) error
- func (ctx *Context) Whisper_full_default_params(strategy SamplingStrategy) Params
- func (ctx *Context) Whisper_full_get_segment_t0(segment int) int64
- func (ctx *Context) Whisper_full_get_segment_t1(segment int) int64
- func (ctx *Context) Whisper_full_get_segment_text(segment int) string
- func (ctx *Context) Whisper_full_get_token_data(segment int, token int) TokenData
- func (ctx *Context) Whisper_full_get_token_id(segment int, token int) Token
- func (ctx *Context) Whisper_full_get_token_p(segment int, token int) float32
- func (ctx *Context) Whisper_full_get_token_text(segment int, token int) string
- func (ctx *Context) Whisper_full_lang_id() int
- func (ctx *Context) Whisper_full_n_segments() int
- func (ctx *Context) Whisper_full_n_tokens(segment int) int
- func (ctx *Context) Whisper_full_parallel(params Params, samples []float32, processors int, ...) error
- func (ctx *Context) Whisper_is_multilingual() int
- func (ctx *Context) Whisper_lang_auto_detect(offset_ms, n_threads int) ([]float32, error)
- func (ctx *Context) Whisper_lang_id(lang string) int
- func (ctx *Context) Whisper_n_audio_ctx() int
- func (ctx *Context) Whisper_n_len() int
- func (ctx *Context) Whisper_n_text_ctx() int
- func (ctx *Context) Whisper_n_vocab() int
- func (ctx *Context) Whisper_pcm_to_mel(data []float32, threads int) error
- func (ctx *Context) Whisper_print_timings()
- func (ctx *Context) Whisper_reset_timings()
- func (ctx *Context) Whisper_set_mel(data []float32, n_mel int) error
- func (ctx *Context) Whisper_token_beg() Token
- func (ctx *Context) Whisper_token_eot() Token
- func (ctx *Context) Whisper_token_lang(lang_id int) Token
- func (ctx *Context) Whisper_token_not() Token
- func (ctx *Context) Whisper_token_prev() Token
- func (ctx *Context) Whisper_token_solm() Token
- func (ctx *Context) Whisper_token_sot() Token
- func (ctx *Context) Whisper_token_to_str(token Token) string
- func (ctx *Context) Whisper_token_transcribe() Token
- func (ctx *Context) Whisper_token_translate() Token
- func (ctx *Context) Whisper_tokenize(text string, tokens []Token) (int, error)
- type Params
- func (p *Params) Language() int
- func (p *Params) SetAudioCtx(n int)
- func (p *Params) SetBeamSize(n int)
- func (p *Params) SetDuration(duration_ms int)
- func (p *Params) SetEntropyThold(t float32)
- func (p *Params) SetInitialPrompt(prompt string)
- func (p *Params) SetLanguage(lang int) error
- func (p *Params) SetMaxContext(n int)
- func (p *Params) SetMaxSegmentLength(n int)
- func (p *Params) SetMaxTokensPerSegment(n int)
- func (p *Params) SetNoContext(v bool)
- func (p *Params) SetOffset(offset_ms int)
- func (p *Params) SetPrintProgress(v bool)
- func (p *Params) SetPrintRealtime(v bool)
- func (p *Params) SetPrintSpecial(v bool)
- func (p *Params) SetPrintTimestamps(v bool)
- func (p *Params) SetSingleSegment(v bool)
- func (p *Params) SetSplitOnWord(v bool)
- func (p *Params) SetTemperature(t float32)
- func (p *Params) SetTemperatureFallback(t float32)
- func (p *Params) SetThreads(threads int)
- func (p *Params) SetTokenSumThreshold(t float32)
- func (p *Params) SetTokenThreshold(t float32)
- func (p *Params) SetTokenTimestamps(b bool)
- func (p *Params) SetTranslate(v bool)
- func (p *Params) String() string
- func (p *Params) Threads() int
- type SamplingStrategy
- type Token
- type TokenData
Constants ¶
const ( SampleRate = C.WHISPER_SAMPLE_RATE // Expected sample rate, samples per second SampleBits = uint16(unsafe.Sizeof(C.float(0))) * 8 // Sample size in bits NumFFT = C.WHISPER_N_FFT HopLength = C.WHISPER_HOP_LENGTH ChunkSize = C.WHISPER_CHUNK_SIZE )
Variables ¶
Functions ¶
func Whisper_lang_max_id ¶
func Whisper_lang_max_id() int
Largest language id (i.e. number of available languages - 1)
func Whisper_lang_str ¶
Return the short string of the specified language id (e.g. 2 -> "de"), returns empty string if not found
Types ¶
type Context ¶
type Context C.struct_whisper_context
func Whisper_init ¶
Allocates all memory needed for the model and loads the model from the given file. Returns NULL on failure.
func (*Context) Whisper_decode ¶
Run the Whisper decoder to obtain the logits and probabilities for the next token. Make sure to call whisper_encode() first. tokens + n_tokens is the provided context for the decoder. n_past is the number of tokens to use from previous decoder calls.
func (*Context) Whisper_encode ¶
Run the Whisper encoder on the log mel spectrogram stored inside the provided whisper context. Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first. offset can be used to specify the offset of the first frame in the spectrogram.
func (*Context) Whisper_free ¶
func (ctx *Context) Whisper_free()
Frees all memory allocated by the model.
func (*Context) Whisper_full ¶
func (ctx *Context) Whisper_full( params Params, samples []float32, encoderBeginCallback func() bool, newSegmentCallback func(int), progressCallback func(int), ) error
Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text Uses the specified decoding strategy to obtain the text.
func (*Context) Whisper_full_default_params ¶
func (ctx *Context) Whisper_full_default_params(strategy SamplingStrategy) Params
Return default parameters for a strategy
func (*Context) Whisper_full_get_segment_t0 ¶
Get the start and end time of the specified segment.
func (*Context) Whisper_full_get_segment_t1 ¶
Get the start and end time of the specified segment.
func (*Context) Whisper_full_get_segment_text ¶
Get the text of the specified segment.
func (*Context) Whisper_full_get_token_data ¶
Get token data for the specified token in the specified segment. This contains probabilities, timestamps, etc.
func (*Context) Whisper_full_get_token_id ¶
Get the token of the specified token index in the specified segment.
func (*Context) Whisper_full_get_token_p ¶
Get the probability of the specified token in the specified segment.
func (*Context) Whisper_full_get_token_text ¶
Get the token text of the specified token index in the specified segment.
func (*Context) Whisper_full_lang_id ¶
Return the id of the autodetected language, returns -1 if not found Added to whisper.cpp in https://github.com/ggerganov/whisper.cpp/commit/a1c1583cc7cd8b75222857afc936f0638c5683d6
Examples:
"de" -> 2 "german" -> 2
func (*Context) Whisper_full_n_segments ¶
Number of generated text segments. A segment can be a few words, a sentence, or even a paragraph.
func (*Context) Whisper_full_n_tokens ¶
Get number of tokens in the specified segment.
func (*Context) Whisper_full_parallel ¶
func (ctx *Context) Whisper_full_parallel(params Params, samples []float32, processors int, encoderBeginCallback func() bool, newSegmentCallback func(int)) error
Split the input audio in chunks and process each chunk separately using whisper_full() It seems this approach can offer some speedup in some cases. However, the transcription accuracy can be worse at the beginning and end of each chunk.
func (*Context) Whisper_is_multilingual ¶
func (*Context) Whisper_lang_auto_detect ¶
Use mel data at offset_ms to try and auto-detect the spoken language Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first. Returns the probabilities of all languages. ref: https://github.com/openai/whisper/blob/main/whisper/decoding.py#L18-L69
func (*Context) Whisper_lang_id ¶
Return the id of the specified language, returns -1 if not found Examples:
"de" -> 2 "german" -> 2
func (*Context) Whisper_n_audio_ctx ¶
func (*Context) Whisper_n_len ¶
func (*Context) Whisper_n_text_ctx ¶
func (*Context) Whisper_n_vocab ¶
func (*Context) Whisper_pcm_to_mel ¶
Convert RAW PCM audio to log mel spectrogram. The resulting spectrogram is stored inside the provided whisper context.
func (*Context) Whisper_print_timings ¶
func (ctx *Context) Whisper_print_timings()
Performance information
func (*Context) Whisper_reset_timings ¶
func (ctx *Context) Whisper_reset_timings()
Performance information
func (*Context) Whisper_set_mel ¶
This can be used to set a custom log mel spectrogram inside the provided whisper context. Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram. n_mel must be 80
func (*Context) Whisper_token_lang ¶
Special tokens
func (*Context) Whisper_token_to_str ¶
Token Id -> String. Uses the vocabulary in the provided context
func (*Context) Whisper_token_transcribe ¶
Task tokens
func (*Context) Whisper_token_translate ¶
Task tokens
type Params ¶
type Params C.struct_whisper_full_params
func (*Params) SetBeamSize ¶
func (*Params) SetDuration ¶
Set audio duration to process in ms
func (*Params) SetEntropyThold ¶
func (*Params) SetInitialPrompt ¶
Set initial prompt
func (*Params) SetMaxContext ¶
func (*Params) SetMaxSegmentLength ¶
Set max segment length in characters
func (*Params) SetMaxTokensPerSegment ¶
Set max tokens per segment (0 = no limit)
func (*Params) SetNoContext ¶
func (*Params) SetPrintProgress ¶
func (*Params) SetPrintRealtime ¶
func (*Params) SetPrintSpecial ¶
func (*Params) SetPrintTimestamps ¶
func (*Params) SetSingleSegment ¶
func (*Params) SetSplitOnWord ¶
func (*Params) SetTemperature ¶
func (*Params) SetTemperatureFallback ¶
Sets the fallback temperature incrementation Pass -1.0 to disable this feature
func (*Params) SetTokenSumThreshold ¶
Set timestamp token sum probability threshold (~0.01)
func (*Params) SetTokenThreshold ¶
Set timestamp token probability threshold (~0.01)
func (*Params) SetTokenTimestamps ¶
func (*Params) SetTranslate ¶
type SamplingStrategy ¶
type SamplingStrategy C.enum_whisper_sampling_strategy
const ( SAMPLING_GREEDY SamplingStrategy = C.WHISPER_SAMPLING_GREEDY SAMPLING_BEAM_SEARCH SamplingStrategy = C.WHISPER_SAMPLING_BEAM_SEARCH )
type Token ¶
type Token C.whisper_token