Documentation ¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ChatRequest ¶
type ChatResponse ¶
type ChatResponseFunc ¶
type ChatResponseFunc func(ChatResponse) error
type Client ¶
func (*Client) Generate ¶
func (c *Client) Generate(ctx context.Context, req *ChatRequest, fn ChatResponseFunc) error
type Options ¶
type Options struct { // Token Token string `json:"-"` // default: true // If True, the model will run in chat mode, where messages is a string containing the // user's message or a list of messages containing the iterations of the conversation // between user and assistant. If False, messages must be a string containing the desired prompt. ChatMode bool `json:"chat_mode,omitempty"` // minimum: 1 // Maximum number of tokens that will be generated by the mode MaxTokens int `json:"max_tokens,omitempty"` // Name of the model that will be used for inference. Currently, only the "sabia-2-medium" and "sabia-2-small" model is available. Model string `json:"model"` // Default: true // If True, the model's generation will be sampled via top-k sampling. // Otherwise, the generation will always select the token with the highest probability. // Using do_sample=False leads to a deterministic result, but with less diversity. DoSample bool `json:"do_sample,omitempty"` // minimum: 0 // default: 0.7 // Sampling temperature (greater than or equal to zero). // Higher values lead to greater diversity in generation but also increase the likelihood of generating nonsensical texts. // Values closer to zero result in more plausible texts but increase the chances of generating repetitive texts. Temperature float64 `json:"temperature,omitempty"` // exclusiveMaximum: 1 // exclusiveMinimum: 0 // default: 0.95 // If less than 1, it retains only the top tokens with cumulative probability >= top_p (nucleus filtering). // For example, 0.95 means that only the tokens that make up the top 95% of the probability mass are considered when predicting the next token. // Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751). TopP float64 `json:"top_p,omitempty"` // minimum: 0 // default: 1 // Repetition penalty. Positive values encourage the model not to repeat previously generated tokens. RepetitionPenalty float64 `json:"repetition_penalty,omitempty"` // List of tokens that, when generated, indicate that the model should stop generating tokens. StoppingTokens []string `json:"stopping_tokens,omitempty"` // default: false // If True, the model will run in streaming mode, // where tokens will be generated and returned to the client as they are produced. // If False, the model will run in batch mode, where all tokens will be generated before being returned to the client. Stream bool `json:"stream,omitempty"` // minimum: 1 // default: 4 // Number of tokens that will be returned per message. This field is ignored if stream=False. NumTokensPerMessage int `json:"num_tokens_per_message,omitempty"` }
type StatusError ¶
type StatusError struct { Status string `json:"status,omitempty"` ErrorMessage string `json:"error"` StatusCode int `json:"code,omitempty"` }
func (StatusError) Error ¶
func (e StatusError) Error() string
Click to show internal directories.
Click to hide internal directories.