Documentation ¶
Overview ¶
These APIs allow you to manage Apps, Serving Endpoints, etc.
Index ¶
- type Ai21LabsConfig
- type AmazonBedrockConfig
- type AmazonBedrockConfigBedrockProvider
- type AnthropicConfig
- type App
- type AppDeployment
- type AppDeploymentState
- type AppDeploymentStatus
- type AppEnvironment
- type AppState
- type AppStatus
- type AppsAPI
- func (a *AppsAPI) Create(ctx context.Context, createAppRequest CreateAppRequest) (*WaitGetAppIdle[App], error)
- func (a *AppsAPI) CreateAndWait(ctx context.Context, createAppRequest CreateAppRequest, ...) (*App, error)deprecated
- func (a *AppsAPI) CreateDeployment(ctx context.Context, createAppDeploymentRequest CreateAppDeploymentRequest) (*WaitGetDeploymentAppSucceeded[AppDeployment], error)
- func (a *AppsAPI) CreateDeploymentAndWait(ctx context.Context, createAppDeploymentRequest CreateAppDeploymentRequest, ...) (*AppDeployment, error)deprecated
- func (a *AppsAPI) Delete(ctx context.Context, request DeleteAppRequest) error
- func (a *AppsAPI) DeleteByName(ctx context.Context, name string) error
- func (a *AppsAPI) Get(ctx context.Context, request GetAppRequest) (*App, error)
- func (a *AppsAPI) GetByName(ctx context.Context, name string) (*App, error)
- func (a *AppsAPI) GetDeployment(ctx context.Context, request GetAppDeploymentRequest) (*AppDeployment, error)
- func (a *AppsAPI) GetDeploymentByAppNameAndDeploymentId(ctx context.Context, appName string, deploymentId string) (*AppDeployment, error)
- func (a *AppsAPI) GetEnvironment(ctx context.Context, request GetAppEnvironmentRequest) (*AppEnvironment, error)
- func (a *AppsAPI) GetEnvironmentByName(ctx context.Context, name string) (*AppEnvironment, error)
- func (a *AppsAPI) Impl() AppsService
- func (a *AppsAPI) List(ctx context.Context, request ListAppsRequest) listing.Iterator[App]
- func (a *AppsAPI) ListAll(ctx context.Context, request ListAppsRequest) ([]App, error)
- func (a *AppsAPI) ListDeployments(ctx context.Context, request ListAppDeploymentsRequest) listing.Iterator[AppDeployment]
- func (a *AppsAPI) ListDeploymentsAll(ctx context.Context, request ListAppDeploymentsRequest) ([]AppDeployment, error)
- func (a *AppsAPI) ListDeploymentsByAppName(ctx context.Context, appName string) (*ListAppDeploymentsResponse, error)
- func (a *AppsAPI) Stop(ctx context.Context, request StopAppRequest) error
- func (a *AppsAPI) Update(ctx context.Context, request UpdateAppRequest) (*App, error)
- func (a *AppsAPI) WaitGetAppIdle(ctx context.Context, name string, timeout time.Duration, callback func(*App)) (*App, error)
- func (a *AppsAPI) WaitGetDeploymentAppSucceeded(ctx context.Context, appName string, deploymentId string, ...) (*AppDeployment, error)
- func (a *AppsAPI) WithImpl(impl AppsService) AppsInterface
- type AppsInterface
- type AppsService
- type AutoCaptureConfigInput
- type AutoCaptureConfigOutput
- type AutoCaptureState
- type BuildLogsRequest
- type BuildLogsResponse
- type ChatMessage
- type ChatMessageRole
- type CohereConfig
- type CreateAppDeploymentRequest
- type CreateAppRequest
- type CreateServingEndpoint
- type DatabricksModelServingConfig
- type DataframeSplitInput
- type DeleteAppRequest
- type DeleteResponse
- type DeleteServingEndpointRequest
- type EmbeddingsV1ResponseEmbeddingElement
- type EmbeddingsV1ResponseEmbeddingElementObject
- type EndpointCoreConfigInput
- type EndpointCoreConfigOutput
- type EndpointCoreConfigSummary
- type EndpointPendingConfig
- type EndpointState
- type EndpointStateConfigUpdate
- type EndpointStateReady
- type EndpointTag
- type EnvVariable
- type ExportMetricsRequest
- type ExportMetricsResponse
- type ExternalModel
- type ExternalModelProvider
- type ExternalModelUsageElement
- type FoundationModel
- type GetAppDeploymentRequest
- type GetAppEnvironmentRequest
- type GetAppRequest
- type GetOpenApiRequest
- type GetOpenApiResponse
- type GetServingEndpointPermissionLevelsRequest
- type GetServingEndpointPermissionLevelsResponse
- type GetServingEndpointPermissionsRequest
- type GetServingEndpointRequest
- type ListAppDeploymentsRequest
- type ListAppDeploymentsResponse
- type ListAppsRequest
- type ListAppsResponse
- type ListEndpointsResponse
- type LogsRequest
- type OpenAiConfig
- type PaLmConfig
- type PatchServingEndpointTags
- type PayloadTable
- type PutRequest
- type PutResponse
- type QueryEndpointInput
- type QueryEndpointResponse
- type QueryEndpointResponseObject
- type RateLimit
- type RateLimitKey
- type RateLimitRenewalPeriod
- type Route
- type ServedEntityInput
- type ServedEntityOutput
- type ServedEntitySpec
- type ServedModelInput
- type ServedModelInputWorkloadSize
- type ServedModelInputWorkloadType
- type ServedModelOutput
- type ServedModelSpec
- type ServedModelState
- type ServedModelStateDeployment
- type ServerLogsResponse
- type ServingEndpoint
- type ServingEndpointAccessControlRequest
- type ServingEndpointAccessControlResponse
- type ServingEndpointDetailed
- type ServingEndpointDetailedPermissionLevel
- type ServingEndpointPermission
- type ServingEndpointPermissionLevel
- type ServingEndpointPermissions
- type ServingEndpointPermissionsDescription
- type ServingEndpointPermissionsRequest
- type ServingEndpointsAPI
- func (a *ServingEndpointsAPI) BuildLogs(ctx context.Context, request BuildLogsRequest) (*BuildLogsResponse, error)
- func (a *ServingEndpointsAPI) BuildLogsByNameAndServedModelName(ctx context.Context, name string, servedModelName string) (*BuildLogsResponse, error)
- func (a *ServingEndpointsAPI) Create(ctx context.Context, createServingEndpoint CreateServingEndpoint) (*WaitGetServingEndpointNotUpdating[ServingEndpointDetailed], error)
- func (a *ServingEndpointsAPI) CreateAndWait(ctx context.Context, createServingEndpoint CreateServingEndpoint, ...) (*ServingEndpointDetailed, error)deprecated
- func (a *ServingEndpointsAPI) Delete(ctx context.Context, request DeleteServingEndpointRequest) error
- func (a *ServingEndpointsAPI) DeleteByName(ctx context.Context, name string) error
- func (a *ServingEndpointsAPI) ExportMetrics(ctx context.Context, request ExportMetricsRequest) error
- func (a *ServingEndpointsAPI) ExportMetricsByName(ctx context.Context, name string) error
- func (a *ServingEndpointsAPI) Get(ctx context.Context, request GetServingEndpointRequest) (*ServingEndpointDetailed, error)
- func (a *ServingEndpointsAPI) GetByName(ctx context.Context, name string) (*ServingEndpointDetailed, error)
- func (a *ServingEndpointsAPI) GetOpenApi(ctx context.Context, request GetOpenApiRequest) error
- func (a *ServingEndpointsAPI) GetOpenApiByName(ctx context.Context, name string) error
- func (a *ServingEndpointsAPI) GetPermissionLevels(ctx context.Context, request GetServingEndpointPermissionLevelsRequest) (*GetServingEndpointPermissionLevelsResponse, error)
- func (a *ServingEndpointsAPI) GetPermissionLevelsByServingEndpointId(ctx context.Context, servingEndpointId string) (*GetServingEndpointPermissionLevelsResponse, error)
- func (a *ServingEndpointsAPI) GetPermissions(ctx context.Context, request GetServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error)
- func (a *ServingEndpointsAPI) GetPermissionsByServingEndpointId(ctx context.Context, servingEndpointId string) (*ServingEndpointPermissions, error)
- func (a *ServingEndpointsAPI) Impl() ServingEndpointsService
- func (a *ServingEndpointsAPI) List(ctx context.Context) listing.Iterator[ServingEndpoint]
- func (a *ServingEndpointsAPI) ListAll(ctx context.Context) ([]ServingEndpoint, error)
- func (a *ServingEndpointsAPI) Logs(ctx context.Context, request LogsRequest) (*ServerLogsResponse, error)
- func (a *ServingEndpointsAPI) LogsByNameAndServedModelName(ctx context.Context, name string, servedModelName string) (*ServerLogsResponse, error)
- func (a *ServingEndpointsAPI) Patch(ctx context.Context, request PatchServingEndpointTags) ([]EndpointTag, error)
- func (a *ServingEndpointsAPI) Put(ctx context.Context, request PutRequest) (*PutResponse, error)
- func (a *ServingEndpointsAPI) Query(ctx context.Context, request QueryEndpointInput) (*QueryEndpointResponse, error)
- func (a *ServingEndpointsAPI) SetPermissions(ctx context.Context, request ServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error)
- func (a *ServingEndpointsAPI) UpdateConfig(ctx context.Context, endpointCoreConfigInput EndpointCoreConfigInput) (*WaitGetServingEndpointNotUpdating[ServingEndpointDetailed], error)
- func (a *ServingEndpointsAPI) UpdateConfigAndWait(ctx context.Context, endpointCoreConfigInput EndpointCoreConfigInput, ...) (*ServingEndpointDetailed, error)deprecated
- func (a *ServingEndpointsAPI) UpdatePermissions(ctx context.Context, request ServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error)
- func (a *ServingEndpointsAPI) WaitGetServingEndpointNotUpdating(ctx context.Context, name string, timeout time.Duration, ...) (*ServingEndpointDetailed, error)
- func (a *ServingEndpointsAPI) WithImpl(impl ServingEndpointsService) ServingEndpointsInterface
- type ServingEndpointsInterface
- type ServingEndpointsService
- type StopAppRequest
- type StopAppResponse
- type TrafficConfig
- type UpdateAppRequest
- type V1ResponseChoiceElement
- type WaitGetAppIdle
- type WaitGetDeploymentAppSucceeded
- type WaitGetServingEndpointNotUpdating
- func (w *WaitGetServingEndpointNotUpdating[R]) Get() (*ServingEndpointDetailed, error)
- func (w *WaitGetServingEndpointNotUpdating[R]) GetWithTimeout(timeout time.Duration) (*ServingEndpointDetailed, error)
- func (w *WaitGetServingEndpointNotUpdating[R]) OnProgress(callback func(*ServingEndpointDetailed)) *WaitGetServingEndpointNotUpdating[R]
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Ai21LabsConfig ¶ added in v0.27.0
type Ai21LabsConfig struct { // The Databricks secret key reference for an AI21Labs API key. Ai21labsApiKey string `json:"ai21labs_api_key"` }
type AmazonBedrockConfig ¶ added in v0.37.0
type AmazonBedrockConfig struct { // The Databricks secret key reference for an AWS Access Key ID with // permissions to interact with Bedrock services. AwsAccessKeyId string `json:"aws_access_key_id"` // The AWS region to use. Bedrock has to be enabled there. AwsRegion string `json:"aws_region"` // The Databricks secret key reference for an AWS Secret Access Key paired // with the access key ID, with permissions to interact with Bedrock // services. AwsSecretAccessKey string `json:"aws_secret_access_key"` // The underlying provider in Amazon Bedrock. Supported values (case // insensitive) include: Anthropic, Cohere, AI21Labs, Amazon. BedrockProvider AmazonBedrockConfigBedrockProvider `json:"bedrock_provider"` }
type AmazonBedrockConfigBedrockProvider ¶ added in v0.37.0
type AmazonBedrockConfigBedrockProvider string
The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.
const AmazonBedrockConfigBedrockProviderAi21labs AmazonBedrockConfigBedrockProvider = `ai21labs`
const AmazonBedrockConfigBedrockProviderAmazon AmazonBedrockConfigBedrockProvider = `amazon`
const AmazonBedrockConfigBedrockProviderAnthropic AmazonBedrockConfigBedrockProvider = `anthropic`
const AmazonBedrockConfigBedrockProviderCohere AmazonBedrockConfigBedrockProvider = `cohere`
func (*AmazonBedrockConfigBedrockProvider) Set ¶ added in v0.37.0
func (f *AmazonBedrockConfigBedrockProvider) Set(v string) error
Set raw string value and validate it against allowed values
func (*AmazonBedrockConfigBedrockProvider) String ¶ added in v0.37.0
func (f *AmazonBedrockConfigBedrockProvider) String() string
String representation for fmt.Print
func (*AmazonBedrockConfigBedrockProvider) Type ¶ added in v0.37.0
func (f *AmazonBedrockConfigBedrockProvider) Type() string
Type always returns AmazonBedrockConfigBedrockProvider to satisfy [pflag.Value] interface
type AnthropicConfig ¶ added in v0.27.0
type AnthropicConfig struct { // The Databricks secret key reference for an Anthropic API key. AnthropicApiKey string `json:"anthropic_api_key"` }
type App ¶ added in v0.40.0
type App struct { // The active deployment of the app. ActiveDeployment *AppDeployment `json:"active_deployment,omitempty"` // The creation time of the app. Formatted timestamp in ISO 6801. CreateTime string `json:"create_time,omitempty"` // The email of the user that created the app. Creator string `json:"creator,omitempty"` // The description of the app. Description string `json:"description,omitempty"` // The name of the app. The name must contain only lowercase alphanumeric // characters and hyphens and be between 2 and 30 characters long. It must // be unique within the workspace. Name string `json:"name"` // The pending deployment of the app. PendingDeployment *AppDeployment `json:"pending_deployment,omitempty"` Status *AppStatus `json:"status,omitempty"` // The update time of the app. Formatted timestamp in ISO 6801. UpdateTime string `json:"update_time,omitempty"` // The email of the user that last updated the app. Updater string `json:"updater,omitempty"` // The URL of the app once it is deployed. Url string `json:"url,omitempty"` ForceSendFields []string `json:"-"` }
func (App) MarshalJSON ¶ added in v0.40.0
func (*App) UnmarshalJSON ¶ added in v0.40.0
type AppDeployment ¶ added in v0.40.0
type AppDeployment struct { // The creation time of the deployment. Formatted timestamp in ISO 6801. CreateTime string `json:"create_time,omitempty"` // The email of the user creates the deployment. Creator string `json:"creator,omitempty"` // The unique id of the deployment. DeploymentId string `json:"deployment_id,omitempty"` // The source code path of the deployment. SourceCodePath string `json:"source_code_path"` // Status and status message of the deployment Status *AppDeploymentStatus `json:"status,omitempty"` // The update time of the deployment. Formatted timestamp in ISO 6801. UpdateTime string `json:"update_time,omitempty"` ForceSendFields []string `json:"-"` }
func (AppDeployment) MarshalJSON ¶ added in v0.40.0
func (s AppDeployment) MarshalJSON() ([]byte, error)
func (*AppDeployment) UnmarshalJSON ¶ added in v0.40.0
func (s *AppDeployment) UnmarshalJSON(b []byte) error
type AppDeploymentState ¶ added in v0.40.0
type AppDeploymentState string
const AppDeploymentStateCancelled AppDeploymentState = `CANCELLED`
const AppDeploymentStateFailed AppDeploymentState = `FAILED`
const AppDeploymentStateInProgress AppDeploymentState = `IN_PROGRESS`
const AppDeploymentStateStateUnspecified AppDeploymentState = `STATE_UNSPECIFIED`
const AppDeploymentStateSucceeded AppDeploymentState = `SUCCEEDED`
func (*AppDeploymentState) Set ¶ added in v0.40.0
func (f *AppDeploymentState) Set(v string) error
Set raw string value and validate it against allowed values
func (*AppDeploymentState) String ¶ added in v0.40.0
func (f *AppDeploymentState) String() string
String representation for fmt.Print
func (*AppDeploymentState) Type ¶ added in v0.40.0
func (f *AppDeploymentState) Type() string
Type always returns AppDeploymentState to satisfy [pflag.Value] interface
type AppDeploymentStatus ¶ added in v0.40.0
type AppDeploymentStatus struct { // Message corresponding with the deployment state. Message string `json:"message,omitempty"` // State of the deployment. State AppDeploymentState `json:"state,omitempty"` ForceSendFields []string `json:"-"` }
func (AppDeploymentStatus) MarshalJSON ¶ added in v0.40.0
func (s AppDeploymentStatus) MarshalJSON() ([]byte, error)
func (*AppDeploymentStatus) UnmarshalJSON ¶ added in v0.40.0
func (s *AppDeploymentStatus) UnmarshalJSON(b []byte) error
type AppEnvironment ¶ added in v0.40.0
type AppEnvironment struct {
Env []EnvVariable `json:"env,omitempty"`
}
type AppState ¶ added in v0.40.0
type AppState string
const AppStateCreating AppState = `CREATING`
const AppStateDeleted AppState = `DELETED`
const AppStateDeleting AppState = `DELETING`
const AppStateDeployed AppState = `DEPLOYED`
const AppStateDeploying AppState = `DEPLOYING`
const AppStateError AppState = `ERROR`
const AppStateIdle AppState = `IDLE`
const AppStateReady AppState = `READY`
const AppStateRunning AppState = `RUNNING`
const AppStateStarting AppState = `STARTING`
const AppStateStateUnspecified AppState = `STATE_UNSPECIFIED`
const AppStateUpdating AppState = `UPDATING`
type AppStatus ¶ added in v0.40.0
type AppStatus struct { // Message corresponding with the app state. Message string `json:"message,omitempty"` // State of the app. State AppState `json:"state,omitempty"` ForceSendFields []string `json:"-"` }
func (AppStatus) MarshalJSON ¶ added in v0.40.0
func (*AppStatus) UnmarshalJSON ¶ added in v0.40.0
type AppsAPI ¶ added in v0.24.0
type AppsAPI struct {
// contains filtered or unexported fields
}
Apps run directly on a customer’s Databricks instance, integrate with their data, use and extend Databricks services, and enable users to interact through single sign-on.
func NewApps ¶ added in v0.24.0
func NewApps(client *client.DatabricksClient) *AppsAPI
func (*AppsAPI) Create ¶ added in v0.24.0
func (a *AppsAPI) Create(ctx context.Context, createAppRequest CreateAppRequest) (*WaitGetAppIdle[App], error)
Create an App.
Creates a new app.
func (*AppsAPI) CreateAndWait
deprecated
added in
v0.40.0
func (a *AppsAPI) CreateAndWait(ctx context.Context, createAppRequest CreateAppRequest, options ...retries.Option[App]) (*App, error)
Calls AppsAPI.Create and waits to reach IDLE state
You can override the default timeout of 20 minutes by calling adding retries.Timeout[App](60*time.Minute) functional option.
Deprecated: use AppsAPI.Create.Get() or AppsAPI.WaitGetAppIdle
func (*AppsAPI) CreateDeployment ¶ added in v0.40.0
func (a *AppsAPI) CreateDeployment(ctx context.Context, createAppDeploymentRequest CreateAppDeploymentRequest) (*WaitGetDeploymentAppSucceeded[AppDeployment], error)
Create an App Deployment.
Creates an app deployment for the app with the supplied name.
func (*AppsAPI) CreateDeploymentAndWait
deprecated
added in
v0.40.0
func (a *AppsAPI) CreateDeploymentAndWait(ctx context.Context, createAppDeploymentRequest CreateAppDeploymentRequest, options ...retries.Option[AppDeployment]) (*AppDeployment, error)
Calls AppsAPI.CreateDeployment and waits to reach SUCCEEDED state
You can override the default timeout of 20 minutes by calling adding retries.Timeout[AppDeployment](60*time.Minute) functional option.
Deprecated: use AppsAPI.CreateDeployment.Get() or AppsAPI.WaitGetDeploymentAppSucceeded
func (*AppsAPI) Delete ¶ added in v0.24.0
func (a *AppsAPI) Delete(ctx context.Context, request DeleteAppRequest) error
Delete an App.
Deletes an app.
func (*AppsAPI) Get ¶ added in v0.24.0
Get an App.
Retrieves information for the app with the supplied name.
func (*AppsAPI) GetByName ¶ added in v0.24.0
Get an App.
Retrieves information for the app with the supplied name.
func (*AppsAPI) GetDeployment ¶ added in v0.40.0
func (a *AppsAPI) GetDeployment(ctx context.Context, request GetAppDeploymentRequest) (*AppDeployment, error)
Get an App Deployment.
Retrieves information for the app deployment with the supplied name and deployment id.
func (*AppsAPI) GetDeploymentByAppNameAndDeploymentId ¶ added in v0.40.0
func (a *AppsAPI) GetDeploymentByAppNameAndDeploymentId(ctx context.Context, appName string, deploymentId string) (*AppDeployment, error)
Get an App Deployment.
Retrieves information for the app deployment with the supplied name and deployment id.
func (*AppsAPI) GetEnvironment ¶ added in v0.40.0
func (a *AppsAPI) GetEnvironment(ctx context.Context, request GetAppEnvironmentRequest) (*AppEnvironment, error)
Get App Environment.
Retrieves app environment.
func (*AppsAPI) GetEnvironmentByName ¶ added in v0.40.0
Get App Environment.
Retrieves app environment.
func (*AppsAPI) Impl ¶ added in v0.24.0
func (a *AppsAPI) Impl() AppsService
Impl returns low-level Apps API implementation Deprecated: use MockAppsInterface instead.
func (*AppsAPI) List ¶ added in v0.40.0
List Apps.
Lists all apps in the workspace.
This method is generated by Databricks SDK Code Generator.
func (*AppsAPI) ListAll ¶ added in v0.40.0
List Apps.
Lists all apps in the workspace.
This method is generated by Databricks SDK Code Generator.
func (*AppsAPI) ListDeployments ¶ added in v0.40.0
func (a *AppsAPI) ListDeployments(ctx context.Context, request ListAppDeploymentsRequest) listing.Iterator[AppDeployment]
List App Deployments.
Lists all app deployments for the app with the supplied name.
This method is generated by Databricks SDK Code Generator.
func (*AppsAPI) ListDeploymentsAll ¶ added in v0.40.0
func (a *AppsAPI) ListDeploymentsAll(ctx context.Context, request ListAppDeploymentsRequest) ([]AppDeployment, error)
List App Deployments.
Lists all app deployments for the app with the supplied name.
This method is generated by Databricks SDK Code Generator.
func (*AppsAPI) ListDeploymentsByAppName ¶ added in v0.40.0
func (a *AppsAPI) ListDeploymentsByAppName(ctx context.Context, appName string) (*ListAppDeploymentsResponse, error)
List App Deployments.
Lists all app deployments for the app with the supplied name.
func (*AppsAPI) Stop ¶ added in v0.40.0
func (a *AppsAPI) Stop(ctx context.Context, request StopAppRequest) error
Stop an App.
Stops the active deployment of the app in the workspace.
func (*AppsAPI) WaitGetAppIdle ¶ added in v0.40.0
func (a *AppsAPI) WaitGetAppIdle(ctx context.Context, name string, timeout time.Duration, callback func(*App)) (*App, error)
WaitGetAppIdle repeatedly calls AppsAPI.Get and waits to reach IDLE state
func (*AppsAPI) WaitGetDeploymentAppSucceeded ¶ added in v0.40.0
func (a *AppsAPI) WaitGetDeploymentAppSucceeded(ctx context.Context, appName string, deploymentId string, timeout time.Duration, callback func(*AppDeployment)) (*AppDeployment, error)
WaitGetDeploymentAppSucceeded repeatedly calls AppsAPI.GetDeployment and waits to reach SUCCEEDED state
func (*AppsAPI) WithImpl ¶ added in v0.24.0
func (a *AppsAPI) WithImpl(impl AppsService) AppsInterface
WithImpl could be used to override low-level API implementations for unit testing purposes with github.com/golang/mock or other mocking frameworks. Deprecated: use MockAppsInterface instead.
type AppsInterface ¶ added in v0.29.0
type AppsInterface interface { // WithImpl could be used to override low-level API implementations for unit // testing purposes with [github.com/golang/mock] or other mocking frameworks. // Deprecated: use MockAppsInterface instead. WithImpl(impl AppsService) AppsInterface // Impl returns low-level Apps API implementation // Deprecated: use MockAppsInterface instead. Impl() AppsService // WaitGetAppIdle repeatedly calls [AppsAPI.Get] and waits to reach IDLE state WaitGetAppIdle(ctx context.Context, name string, timeout time.Duration, callback func(*App)) (*App, error) // WaitGetDeploymentAppSucceeded repeatedly calls [AppsAPI.GetDeployment] and waits to reach SUCCEEDED state WaitGetDeploymentAppSucceeded(ctx context.Context, appName string, deploymentId string, timeout time.Duration, callback func(*AppDeployment)) (*AppDeployment, error) // Create an App. // // Creates a new app. Create(ctx context.Context, createAppRequest CreateAppRequest) (*WaitGetAppIdle[App], error) // Calls [AppsAPIInterface.Create] and waits to reach IDLE state // // You can override the default timeout of 20 minutes by calling adding // retries.Timeout[App](60*time.Minute) functional option. // // Deprecated: use [AppsAPIInterface.Create].Get() or [AppsAPIInterface.WaitGetAppIdle] CreateAndWait(ctx context.Context, createAppRequest CreateAppRequest, options ...retries.Option[App]) (*App, error) // Create an App Deployment. // // Creates an app deployment for the app with the supplied name. CreateDeployment(ctx context.Context, createAppDeploymentRequest CreateAppDeploymentRequest) (*WaitGetDeploymentAppSucceeded[AppDeployment], error) // Calls [AppsAPIInterface.CreateDeployment] and waits to reach SUCCEEDED state // // You can override the default timeout of 20 minutes by calling adding // retries.Timeout[AppDeployment](60*time.Minute) functional option. // // Deprecated: use [AppsAPIInterface.CreateDeployment].Get() or [AppsAPIInterface.WaitGetDeploymentAppSucceeded] CreateDeploymentAndWait(ctx context.Context, createAppDeploymentRequest CreateAppDeploymentRequest, options ...retries.Option[AppDeployment]) (*AppDeployment, error) // Delete an App. // // Deletes an app. Delete(ctx context.Context, request DeleteAppRequest) error // Delete an App. // // Deletes an app. DeleteByName(ctx context.Context, name string) error // Get an App. // // Retrieves information for the app with the supplied name. Get(ctx context.Context, request GetAppRequest) (*App, error) // Get an App. // // Retrieves information for the app with the supplied name. GetByName(ctx context.Context, name string) (*App, error) // Get an App Deployment. // // Retrieves information for the app deployment with the supplied name and // deployment id. GetDeployment(ctx context.Context, request GetAppDeploymentRequest) (*AppDeployment, error) // Get an App Deployment. // // Retrieves information for the app deployment with the supplied name and // deployment id. GetDeploymentByAppNameAndDeploymentId(ctx context.Context, appName string, deploymentId string) (*AppDeployment, error) // Get App Environment. // // Retrieves app environment. GetEnvironment(ctx context.Context, request GetAppEnvironmentRequest) (*AppEnvironment, error) // Get App Environment. // // Retrieves app environment. GetEnvironmentByName(ctx context.Context, name string) (*AppEnvironment, error) // List Apps. // // Lists all apps in the workspace. // // This method is generated by Databricks SDK Code Generator. List(ctx context.Context, request ListAppsRequest) listing.Iterator[App] // List Apps. // // Lists all apps in the workspace. // // This method is generated by Databricks SDK Code Generator. ListAll(ctx context.Context, request ListAppsRequest) ([]App, error) // List App Deployments. // // Lists all app deployments for the app with the supplied name. // // This method is generated by Databricks SDK Code Generator. ListDeployments(ctx context.Context, request ListAppDeploymentsRequest) listing.Iterator[AppDeployment] // List App Deployments. // // Lists all app deployments for the app with the supplied name. // // This method is generated by Databricks SDK Code Generator. ListDeploymentsAll(ctx context.Context, request ListAppDeploymentsRequest) ([]AppDeployment, error) // List App Deployments. // // Lists all app deployments for the app with the supplied name. ListDeploymentsByAppName(ctx context.Context, appName string) (*ListAppDeploymentsResponse, error) // Stop an App. // // Stops the active deployment of the app in the workspace. Stop(ctx context.Context, request StopAppRequest) error // Update an App. // // Updates the app with the supplied name. Update(ctx context.Context, request UpdateAppRequest) (*App, error) }
type AppsService ¶ added in v0.24.0
type AppsService interface { // Create an App. // // Creates a new app. Create(ctx context.Context, request CreateAppRequest) (*App, error) // Create an App Deployment. // // Creates an app deployment for the app with the supplied name. CreateDeployment(ctx context.Context, request CreateAppDeploymentRequest) (*AppDeployment, error) // Delete an App. // // Deletes an app. Delete(ctx context.Context, request DeleteAppRequest) error // Get an App. // // Retrieves information for the app with the supplied name. Get(ctx context.Context, request GetAppRequest) (*App, error) // Get an App Deployment. // // Retrieves information for the app deployment with the supplied name and // deployment id. GetDeployment(ctx context.Context, request GetAppDeploymentRequest) (*AppDeployment, error) // Get App Environment. // // Retrieves app environment. GetEnvironment(ctx context.Context, request GetAppEnvironmentRequest) (*AppEnvironment, error) // List Apps. // // Lists all apps in the workspace. // // Use ListAll() to get all App instances, which will iterate over every result page. List(ctx context.Context, request ListAppsRequest) (*ListAppsResponse, error) // List App Deployments. // // Lists all app deployments for the app with the supplied name. // // Use ListDeploymentsAll() to get all AppDeployment instances, which will iterate over every result page. ListDeployments(ctx context.Context, request ListAppDeploymentsRequest) (*ListAppDeploymentsResponse, error) // Stop an App. // // Stops the active deployment of the app in the workspace. Stop(ctx context.Context, request StopAppRequest) error // Update an App. // // Updates the app with the supplied name. Update(ctx context.Context, request UpdateAppRequest) (*App, error) }
Apps run directly on a customer’s Databricks instance, integrate with their data, use and extend Databricks services, and enable users to interact through single sign-on.
type AutoCaptureConfigInput ¶ added in v0.27.0
type AutoCaptureConfigInput struct { // The name of the catalog in Unity Catalog. NOTE: On update, you cannot // change the catalog name if it was already set. CatalogName string `json:"catalog_name,omitempty"` // If inference tables are enabled or not. NOTE: If you have already // disabled payload logging once, you cannot enable again. Enabled bool `json:"enabled,omitempty"` // The name of the schema in Unity Catalog. NOTE: On update, you cannot // change the schema name if it was already set. SchemaName string `json:"schema_name,omitempty"` // The prefix of the table in Unity Catalog. NOTE: On update, you cannot // change the prefix name if it was already set. TableNamePrefix string `json:"table_name_prefix,omitempty"` ForceSendFields []string `json:"-"` }
func (AutoCaptureConfigInput) MarshalJSON ¶ added in v0.27.0
func (s AutoCaptureConfigInput) MarshalJSON() ([]byte, error)
func (*AutoCaptureConfigInput) UnmarshalJSON ¶ added in v0.27.0
func (s *AutoCaptureConfigInput) UnmarshalJSON(b []byte) error
type AutoCaptureConfigOutput ¶ added in v0.27.0
type AutoCaptureConfigOutput struct { // The name of the catalog in Unity Catalog. CatalogName string `json:"catalog_name,omitempty"` // If inference tables are enabled or not. Enabled bool `json:"enabled,omitempty"` // The name of the schema in Unity Catalog. SchemaName string `json:"schema_name,omitempty"` State *AutoCaptureState `json:"state,omitempty"` // The prefix of the table in Unity Catalog. TableNamePrefix string `json:"table_name_prefix,omitempty"` ForceSendFields []string `json:"-"` }
func (AutoCaptureConfigOutput) MarshalJSON ¶ added in v0.27.0
func (s AutoCaptureConfigOutput) MarshalJSON() ([]byte, error)
func (*AutoCaptureConfigOutput) UnmarshalJSON ¶ added in v0.27.0
func (s *AutoCaptureConfigOutput) UnmarshalJSON(b []byte) error
type AutoCaptureState ¶ added in v0.27.0
type AutoCaptureState struct {
PayloadTable *PayloadTable `json:"payload_table,omitempty"`
}
type BuildLogsRequest ¶
type BuildLogsRequest struct { // The name of the serving endpoint that the served model belongs to. This // field is required. Name string `json:"-" url:"-"` // The name of the served model that build logs will be retrieved for. This // field is required. ServedModelName string `json:"-" url:"-"` }
Get build logs for a served model
type BuildLogsResponse ¶
type BuildLogsResponse struct { // The logs associated with building the served entity's environment. Logs string `json:"logs"` }
type ChatMessage ¶ added in v0.27.0
type ChatMessage struct { // The content of the message. Content string `json:"content,omitempty"` // The role of the message. One of [system, user, assistant]. Role ChatMessageRole `json:"role,omitempty"` ForceSendFields []string `json:"-"` }
func (ChatMessage) MarshalJSON ¶ added in v0.27.0
func (s ChatMessage) MarshalJSON() ([]byte, error)
func (*ChatMessage) UnmarshalJSON ¶ added in v0.27.0
func (s *ChatMessage) UnmarshalJSON(b []byte) error
type ChatMessageRole ¶ added in v0.27.0
type ChatMessageRole string
The role of the message. One of [system, user, assistant].
const ChatMessageRoleAssistant ChatMessageRole = `assistant`
const ChatMessageRoleSystem ChatMessageRole = `system`
const ChatMessageRoleUser ChatMessageRole = `user`
func (*ChatMessageRole) Set ¶ added in v0.27.0
func (f *ChatMessageRole) Set(v string) error
Set raw string value and validate it against allowed values
func (*ChatMessageRole) String ¶ added in v0.27.0
func (f *ChatMessageRole) String() string
String representation for fmt.Print
func (*ChatMessageRole) Type ¶ added in v0.27.0
func (f *ChatMessageRole) Type() string
Type always returns ChatMessageRole to satisfy [pflag.Value] interface
type CohereConfig ¶ added in v0.27.0
type CohereConfig struct { // The Databricks secret key reference for a Cohere API key. CohereApiKey string `json:"cohere_api_key"` }
type CreateAppDeploymentRequest ¶ added in v0.40.0
type CreateAppRequest ¶ added in v0.40.0
type CreateAppRequest struct { // The description of the app. Description string `json:"description,omitempty"` // The name of the app. The name must contain only lowercase alphanumeric // characters and hyphens and be between 2 and 30 characters long. It must // be unique within the workspace. Name string `json:"name"` ForceSendFields []string `json:"-"` }
func (CreateAppRequest) MarshalJSON ¶ added in v0.40.0
func (s CreateAppRequest) MarshalJSON() ([]byte, error)
func (*CreateAppRequest) UnmarshalJSON ¶ added in v0.40.0
func (s *CreateAppRequest) UnmarshalJSON(b []byte) error
type CreateServingEndpoint ¶
type CreateServingEndpoint struct { // The core config of the serving endpoint. Config EndpointCoreConfigInput `json:"config"` // The name of the serving endpoint. This field is required and must be // unique across a Databricks workspace. An endpoint name can consist of // alphanumeric characters, dashes, and underscores. Name string `json:"name"` // Rate limits to be applied to the serving endpoint. NOTE: only external // and foundation model endpoints are supported as of now. RateLimits []RateLimit `json:"rate_limits,omitempty"` // Tags to be attached to the serving endpoint and automatically propagated // to billing logs. Tags []EndpointTag `json:"tags,omitempty"` }
type DatabricksModelServingConfig ¶ added in v0.27.0
type DatabricksModelServingConfig struct { // The Databricks secret key reference for a Databricks API token that // corresponds to a user or service principal with Can Query access to the // model serving endpoint pointed to by this external model. DatabricksApiToken string `json:"databricks_api_token"` // The URL of the Databricks workspace containing the model serving endpoint // pointed to by this external model. DatabricksWorkspaceUrl string `json:"databricks_workspace_url"` }
type DataframeSplitInput ¶ added in v0.21.0
type DeleteAppRequest ¶ added in v0.24.0
type DeleteAppRequest struct { // The name of the app. Name string `json:"-" url:"-"` }
Delete an App
type DeleteResponse ¶ added in v0.34.0
type DeleteResponse struct { }
type DeleteServingEndpointRequest ¶
type DeleteServingEndpointRequest struct { // The name of the serving endpoint. This field is required. Name string `json:"-" url:"-"` }
Delete a serving endpoint
type EmbeddingsV1ResponseEmbeddingElement ¶ added in v0.27.0
type EmbeddingsV1ResponseEmbeddingElement struct { Embedding []float64 `json:"embedding,omitempty"` // The index of the embedding in the response. Index int `json:"index,omitempty"` // This will always be 'embedding'. Object EmbeddingsV1ResponseEmbeddingElementObject `json:"object,omitempty"` ForceSendFields []string `json:"-"` }
func (EmbeddingsV1ResponseEmbeddingElement) MarshalJSON ¶ added in v0.27.0
func (s EmbeddingsV1ResponseEmbeddingElement) MarshalJSON() ([]byte, error)
func (*EmbeddingsV1ResponseEmbeddingElement) UnmarshalJSON ¶ added in v0.27.0
func (s *EmbeddingsV1ResponseEmbeddingElement) UnmarshalJSON(b []byte) error
type EmbeddingsV1ResponseEmbeddingElementObject ¶ added in v0.27.0
type EmbeddingsV1ResponseEmbeddingElementObject string
This will always be 'embedding'.
const EmbeddingsV1ResponseEmbeddingElementObjectEmbedding EmbeddingsV1ResponseEmbeddingElementObject = `embedding`
func (*EmbeddingsV1ResponseEmbeddingElementObject) Set ¶ added in v0.27.0
func (f *EmbeddingsV1ResponseEmbeddingElementObject) Set(v string) error
Set raw string value and validate it against allowed values
func (*EmbeddingsV1ResponseEmbeddingElementObject) String ¶ added in v0.27.0
func (f *EmbeddingsV1ResponseEmbeddingElementObject) String() string
String representation for fmt.Print
func (*EmbeddingsV1ResponseEmbeddingElementObject) Type ¶ added in v0.27.0
func (f *EmbeddingsV1ResponseEmbeddingElementObject) Type() string
Type always returns EmbeddingsV1ResponseEmbeddingElementObject to satisfy [pflag.Value] interface
type EndpointCoreConfigInput ¶
type EndpointCoreConfigInput struct { // Configuration for Inference Tables which automatically logs requests and // responses to Unity Catalog. AutoCaptureConfig *AutoCaptureConfigInput `json:"auto_capture_config,omitempty"` // The name of the serving endpoint to update. This field is required. Name string `json:"-" url:"-"` // A list of served entities for the endpoint to serve. A serving endpoint // can have up to 15 served entities. ServedEntities []ServedEntityInput `json:"served_entities,omitempty"` // (Deprecated, use served_entities instead) A list of served models for the // endpoint to serve. A serving endpoint can have up to 15 served models. ServedModels []ServedModelInput `json:"served_models,omitempty"` // The traffic config defining how invocations to the serving endpoint // should be routed. TrafficConfig *TrafficConfig `json:"traffic_config,omitempty"` }
type EndpointCoreConfigOutput ¶
type EndpointCoreConfigOutput struct { // Configuration for Inference Tables which automatically logs requests and // responses to Unity Catalog. AutoCaptureConfig *AutoCaptureConfigOutput `json:"auto_capture_config,omitempty"` // The config version that the serving endpoint is currently serving. ConfigVersion int `json:"config_version,omitempty"` // The list of served entities under the serving endpoint config. ServedEntities []ServedEntityOutput `json:"served_entities,omitempty"` // (Deprecated, use served_entities instead) The list of served models under // the serving endpoint config. ServedModels []ServedModelOutput `json:"served_models,omitempty"` // The traffic configuration associated with the serving endpoint config. TrafficConfig *TrafficConfig `json:"traffic_config,omitempty"` ForceSendFields []string `json:"-"` }
func (EndpointCoreConfigOutput) MarshalJSON ¶ added in v0.23.0
func (s EndpointCoreConfigOutput) MarshalJSON() ([]byte, error)
func (*EndpointCoreConfigOutput) UnmarshalJSON ¶ added in v0.23.0
func (s *EndpointCoreConfigOutput) UnmarshalJSON(b []byte) error
type EndpointCoreConfigSummary ¶
type EndpointCoreConfigSummary struct { // The list of served entities under the serving endpoint config. ServedEntities []ServedEntitySpec `json:"served_entities,omitempty"` // (Deprecated, use served_entities instead) The list of served models under // the serving endpoint config. ServedModels []ServedModelSpec `json:"served_models,omitempty"` }
type EndpointPendingConfig ¶
type EndpointPendingConfig struct { // Configuration for Inference Tables which automatically logs requests and // responses to Unity Catalog. AutoCaptureConfig *AutoCaptureConfigOutput `json:"auto_capture_config,omitempty"` // The config version that the serving endpoint is currently serving. ConfigVersion int `json:"config_version,omitempty"` // The list of served entities belonging to the last issued update to the // serving endpoint. ServedEntities []ServedEntityOutput `json:"served_entities,omitempty"` // (Deprecated, use served_entities instead) The list of served models // belonging to the last issued update to the serving endpoint. ServedModels []ServedModelOutput `json:"served_models,omitempty"` // The timestamp when the update to the pending config started. StartTime int64 `json:"start_time,omitempty"` // The traffic config defining how invocations to the serving endpoint // should be routed. TrafficConfig *TrafficConfig `json:"traffic_config,omitempty"` ForceSendFields []string `json:"-"` }
func (EndpointPendingConfig) MarshalJSON ¶ added in v0.23.0
func (s EndpointPendingConfig) MarshalJSON() ([]byte, error)
func (*EndpointPendingConfig) UnmarshalJSON ¶ added in v0.23.0
func (s *EndpointPendingConfig) UnmarshalJSON(b []byte) error
type EndpointState ¶
type EndpointState struct { // The state of an endpoint's config update. This informs the user if the // pending_config is in progress, if the update failed, or if there is no // update in progress. Note that if the endpoint's config_update state value // is IN_PROGRESS, another update can not be made until the update completes // or fails." ConfigUpdate EndpointStateConfigUpdate `json:"config_update,omitempty"` // The state of an endpoint, indicating whether or not the endpoint is // queryable. An endpoint is READY if all of the served entities in its // active configuration are ready. If any of the actively served entities // are in a non-ready state, the endpoint state will be NOT_READY. Ready EndpointStateReady `json:"ready,omitempty"` }
type EndpointStateConfigUpdate ¶
type EndpointStateConfigUpdate string
The state of an endpoint's config update. This informs the user if the pending_config is in progress, if the update failed, or if there is no update in progress. Note that if the endpoint's config_update state value is IN_PROGRESS, another update can not be made until the update completes or fails."
const EndpointStateConfigUpdateInProgress EndpointStateConfigUpdate = `IN_PROGRESS`
const EndpointStateConfigUpdateNotUpdating EndpointStateConfigUpdate = `NOT_UPDATING`
const EndpointStateConfigUpdateUpdateFailed EndpointStateConfigUpdate = `UPDATE_FAILED`
func (*EndpointStateConfigUpdate) Set ¶
func (f *EndpointStateConfigUpdate) Set(v string) error
Set raw string value and validate it against allowed values
func (*EndpointStateConfigUpdate) String ¶
func (f *EndpointStateConfigUpdate) String() string
String representation for fmt.Print
func (*EndpointStateConfigUpdate) Type ¶
func (f *EndpointStateConfigUpdate) Type() string
Type always returns EndpointStateConfigUpdate to satisfy [pflag.Value] interface
type EndpointStateReady ¶
type EndpointStateReady string
The state of an endpoint, indicating whether or not the endpoint is queryable. An endpoint is READY if all of the served entities in its active configuration are ready. If any of the actively served entities are in a non-ready state, the endpoint state will be NOT_READY.
const EndpointStateReadyNotReady EndpointStateReady = `NOT_READY`
const EndpointStateReadyReady EndpointStateReady = `READY`
func (*EndpointStateReady) Set ¶
func (f *EndpointStateReady) Set(v string) error
Set raw string value and validate it against allowed values
func (*EndpointStateReady) String ¶
func (f *EndpointStateReady) String() string
String representation for fmt.Print
func (*EndpointStateReady) Type ¶
func (f *EndpointStateReady) Type() string
Type always returns EndpointStateReady to satisfy [pflag.Value] interface
type EndpointTag ¶ added in v0.20.0
type EndpointTag struct { // Key field for a serving endpoint tag. Key string `json:"key"` // Optional value field for a serving endpoint tag. Value string `json:"value,omitempty"` ForceSendFields []string `json:"-"` }
func (EndpointTag) MarshalJSON ¶ added in v0.23.0
func (s EndpointTag) MarshalJSON() ([]byte, error)
func (*EndpointTag) UnmarshalJSON ¶ added in v0.23.0
func (s *EndpointTag) UnmarshalJSON(b []byte) error
type EnvVariable ¶ added in v0.40.0
type EnvVariable struct { Name string `json:"name,omitempty"` Value string `json:"value,omitempty"` ValueFrom string `json:"value_from,omitempty"` ForceSendFields []string `json:"-"` }
func (EnvVariable) MarshalJSON ¶ added in v0.40.0
func (s EnvVariable) MarshalJSON() ([]byte, error)
func (*EnvVariable) UnmarshalJSON ¶ added in v0.40.0
func (s *EnvVariable) UnmarshalJSON(b []byte) error
type ExportMetricsRequest ¶
type ExportMetricsRequest struct { // The name of the serving endpoint to retrieve metrics for. This field is // required. Name string `json:"-" url:"-"` }
Get metrics of a serving endpoint
type ExportMetricsResponse ¶ added in v0.34.0
type ExportMetricsResponse struct { }
type ExternalModel ¶ added in v0.27.0
type ExternalModel struct { // AI21Labs Config. Only required if the provider is 'ai21labs'. Ai21labsConfig *Ai21LabsConfig `json:"ai21labs_config,omitempty"` // Amazon Bedrock Config. Only required if the provider is 'amazon-bedrock'. AmazonBedrockConfig *AmazonBedrockConfig `json:"amazon_bedrock_config,omitempty"` // Anthropic Config. Only required if the provider is 'anthropic'. AnthropicConfig *AnthropicConfig `json:"anthropic_config,omitempty"` // Cohere Config. Only required if the provider is 'cohere'. CohereConfig *CohereConfig `json:"cohere_config,omitempty"` // Databricks Model Serving Config. Only required if the provider is // 'databricks-model-serving'. DatabricksModelServingConfig *DatabricksModelServingConfig `json:"databricks_model_serving_config,omitempty"` // The name of the external model. Name string `json:"name"` // OpenAI Config. Only required if the provider is 'openai'. OpenaiConfig *OpenAiConfig `json:"openai_config,omitempty"` // PaLM Config. Only required if the provider is 'palm'. PalmConfig *PaLmConfig `json:"palm_config,omitempty"` // The name of the provider for the external model. Currently, the supported // providers are 'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', // 'databricks-model-serving', 'openai', and 'palm'.", Provider ExternalModelProvider `json:"provider"` // The task type of the external model. Task string `json:"task"` }
type ExternalModelProvider ¶ added in v0.27.0
type ExternalModelProvider string
The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', 'databricks-model-serving', 'openai', and 'palm'.",
const ExternalModelProviderAi21labs ExternalModelProvider = `ai21labs`
const ExternalModelProviderAmazonBedrock ExternalModelProvider = `amazon-bedrock`
const ExternalModelProviderAnthropic ExternalModelProvider = `anthropic`
const ExternalModelProviderCohere ExternalModelProvider = `cohere`
const ExternalModelProviderDatabricksModelServing ExternalModelProvider = `databricks-model-serving`
const ExternalModelProviderOpenai ExternalModelProvider = `openai`
const ExternalModelProviderPalm ExternalModelProvider = `palm`
func (*ExternalModelProvider) Set ¶ added in v0.27.0
func (f *ExternalModelProvider) Set(v string) error
Set raw string value and validate it against allowed values
func (*ExternalModelProvider) String ¶ added in v0.27.0
func (f *ExternalModelProvider) String() string
String representation for fmt.Print
func (*ExternalModelProvider) Type ¶ added in v0.27.0
func (f *ExternalModelProvider) Type() string
Type always returns ExternalModelProvider to satisfy [pflag.Value] interface
type ExternalModelUsageElement ¶ added in v0.27.0
type ExternalModelUsageElement struct { // The number of tokens in the chat/completions response. CompletionTokens int `json:"completion_tokens,omitempty"` // The number of tokens in the prompt. PromptTokens int `json:"prompt_tokens,omitempty"` // The total number of tokens in the prompt and response. TotalTokens int `json:"total_tokens,omitempty"` ForceSendFields []string `json:"-"` }
func (ExternalModelUsageElement) MarshalJSON ¶ added in v0.27.0
func (s ExternalModelUsageElement) MarshalJSON() ([]byte, error)
func (*ExternalModelUsageElement) UnmarshalJSON ¶ added in v0.27.0
func (s *ExternalModelUsageElement) UnmarshalJSON(b []byte) error
type FoundationModel ¶ added in v0.27.0
type FoundationModel struct { // The description of the foundation model. Description string `json:"description,omitempty"` // The display name of the foundation model. DisplayName string `json:"display_name,omitempty"` // The URL to the documentation of the foundation model. Docs string `json:"docs,omitempty"` // The name of the foundation model. Name string `json:"name,omitempty"` ForceSendFields []string `json:"-"` }
func (FoundationModel) MarshalJSON ¶ added in v0.27.0
func (s FoundationModel) MarshalJSON() ([]byte, error)
func (*FoundationModel) UnmarshalJSON ¶ added in v0.27.0
func (s *FoundationModel) UnmarshalJSON(b []byte) error
type GetAppDeploymentRequest ¶ added in v0.40.0
type GetAppDeploymentRequest struct { // The name of the app. AppName string `json:"-" url:"-"` // The unique id of the deployment. DeploymentId string `json:"-" url:"-"` }
Get an App Deployment
type GetAppEnvironmentRequest ¶ added in v0.40.0
type GetAppEnvironmentRequest struct { // The name of the app. Name string `json:"-" url:"-"` }
Get App Environment
type GetAppRequest ¶ added in v0.24.0
type GetAppRequest struct { // The name of the app. Name string `json:"-" url:"-"` }
Get an App
type GetOpenApiRequest ¶ added in v0.39.0
type GetOpenApiRequest struct { // The name of the serving endpoint that the served model belongs to. This // field is required. Name string `json:"-" url:"-"` }
Get the schema for a serving endpoint
type GetOpenApiResponse ¶ added in v0.39.0
type GetOpenApiResponse struct { }
The response is an OpenAPI spec in JSON format that typically includes fields like openapi, info, servers and paths, etc.
type GetServingEndpointPermissionLevelsRequest ¶ added in v0.15.0
type GetServingEndpointPermissionLevelsRequest struct { // The serving endpoint for which to get or manage permissions. ServingEndpointId string `json:"-" url:"-"` }
Get serving endpoint permission levels
type GetServingEndpointPermissionLevelsResponse ¶ added in v0.15.0
type GetServingEndpointPermissionLevelsResponse struct { // Specific permission levels PermissionLevels []ServingEndpointPermissionsDescription `json:"permission_levels,omitempty"` }
type GetServingEndpointPermissionsRequest ¶ added in v0.15.0
type GetServingEndpointPermissionsRequest struct { // The serving endpoint for which to get or manage permissions. ServingEndpointId string `json:"-" url:"-"` }
Get serving endpoint permissions
type GetServingEndpointRequest ¶
type GetServingEndpointRequest struct { // The name of the serving endpoint. This field is required. Name string `json:"-" url:"-"` }
Get a single serving endpoint
type ListAppDeploymentsRequest ¶ added in v0.40.0
type ListAppDeploymentsRequest struct { // The name of the app. AppName string `json:"-" url:"-"` // Upper bound for items returned. PageSize int `json:"-" url:"page_size,omitempty"` // Pagination token to go to the next page of apps. Requests first page if // absent. PageToken string `json:"-" url:"page_token,omitempty"` ForceSendFields []string `json:"-"` }
List App Deployments
func (ListAppDeploymentsRequest) MarshalJSON ¶ added in v0.40.0
func (s ListAppDeploymentsRequest) MarshalJSON() ([]byte, error)
func (*ListAppDeploymentsRequest) UnmarshalJSON ¶ added in v0.40.0
func (s *ListAppDeploymentsRequest) UnmarshalJSON(b []byte) error
type ListAppDeploymentsResponse ¶ added in v0.40.0
type ListAppDeploymentsResponse struct { // Deployment history of the app. AppDeployments []AppDeployment `json:"app_deployments,omitempty"` // Pagination token to request the next page of apps. NextPageToken string `json:"next_page_token,omitempty"` ForceSendFields []string `json:"-"` }
func (ListAppDeploymentsResponse) MarshalJSON ¶ added in v0.40.0
func (s ListAppDeploymentsResponse) MarshalJSON() ([]byte, error)
func (*ListAppDeploymentsResponse) UnmarshalJSON ¶ added in v0.40.0
func (s *ListAppDeploymentsResponse) UnmarshalJSON(b []byte) error
type ListAppsRequest ¶ added in v0.40.0
type ListAppsRequest struct { // Upper bound for items returned. PageSize int `json:"-" url:"page_size,omitempty"` // Pagination token to go to the next page of apps. Requests first page if // absent. PageToken string `json:"-" url:"page_token,omitempty"` ForceSendFields []string `json:"-"` }
List Apps
func (ListAppsRequest) MarshalJSON ¶ added in v0.40.0
func (s ListAppsRequest) MarshalJSON() ([]byte, error)
func (*ListAppsRequest) UnmarshalJSON ¶ added in v0.40.0
func (s *ListAppsRequest) UnmarshalJSON(b []byte) error
type ListAppsResponse ¶ added in v0.25.0
type ListAppsResponse struct { Apps []App `json:"apps,omitempty"` // Pagination token to request the next page of apps. NextPageToken string `json:"next_page_token,omitempty"` ForceSendFields []string `json:"-"` }
func (ListAppsResponse) MarshalJSON ¶ added in v0.25.0
func (s ListAppsResponse) MarshalJSON() ([]byte, error)
func (*ListAppsResponse) UnmarshalJSON ¶ added in v0.25.0
func (s *ListAppsResponse) UnmarshalJSON(b []byte) error
type ListEndpointsResponse ¶
type ListEndpointsResponse struct { // The list of endpoints. Endpoints []ServingEndpoint `json:"endpoints,omitempty"` }
type LogsRequest ¶
type LogsRequest struct { // The name of the serving endpoint that the served model belongs to. This // field is required. Name string `json:"-" url:"-"` // The name of the served model that logs will be retrieved for. This field // is required. ServedModelName string `json:"-" url:"-"` }
Get the latest logs for a served model
type OpenAiConfig ¶ added in v0.27.0
type OpenAiConfig struct { // This is the base URL for the OpenAI API (default: // "https://api.openai.com/v1"). For Azure OpenAI, this field is required, // and is the base URL for the Azure OpenAI API service provided by Azure. OpenaiApiBase string `json:"openai_api_base,omitempty"` // The Databricks secret key reference for an OpenAI or Azure OpenAI API // key. OpenaiApiKey string `json:"openai_api_key"` // This is an optional field to specify the type of OpenAI API to use. For // Azure OpenAI, this field is required, and adjust this parameter to // represent the preferred security access validation protocol. For access // token validation, use azure. For authentication using Azure Active // Directory (Azure AD) use, azuread. OpenaiApiType string `json:"openai_api_type,omitempty"` // This is an optional field to specify the OpenAI API version. For Azure // OpenAI, this field is required, and is the version of the Azure OpenAI // service to utilize, specified by a date. OpenaiApiVersion string `json:"openai_api_version,omitempty"` // This field is only required for Azure OpenAI and is the name of the // deployment resource for the Azure OpenAI service. OpenaiDeploymentName string `json:"openai_deployment_name,omitempty"` // This is an optional field to specify the organization in OpenAI or Azure // OpenAI. OpenaiOrganization string `json:"openai_organization,omitempty"` ForceSendFields []string `json:"-"` }
func (OpenAiConfig) MarshalJSON ¶ added in v0.27.0
func (s OpenAiConfig) MarshalJSON() ([]byte, error)
func (*OpenAiConfig) UnmarshalJSON ¶ added in v0.27.0
func (s *OpenAiConfig) UnmarshalJSON(b []byte) error
type PaLmConfig ¶ added in v0.27.0
type PaLmConfig struct { // The Databricks secret key reference for a PaLM API key. PalmApiKey string `json:"palm_api_key"` }
type PatchServingEndpointTags ¶ added in v0.20.0
type PatchServingEndpointTags struct { // List of endpoint tags to add AddTags []EndpointTag `json:"add_tags,omitempty"` // List of tag keys to delete DeleteTags []string `json:"delete_tags,omitempty"` // The name of the serving endpoint who's tags to patch. This field is // required. Name string `json:"-" url:"-"` }
type PayloadTable ¶ added in v0.27.0
type PayloadTable struct { // The name of the payload table. Name string `json:"name,omitempty"` // The status of the payload table. Status string `json:"status,omitempty"` // The status message of the payload table. StatusMessage string `json:"status_message,omitempty"` ForceSendFields []string `json:"-"` }
func (PayloadTable) MarshalJSON ¶ added in v0.27.0
func (s PayloadTable) MarshalJSON() ([]byte, error)
func (*PayloadTable) UnmarshalJSON ¶ added in v0.27.0
func (s *PayloadTable) UnmarshalJSON(b []byte) error
type PutRequest ¶ added in v0.27.0
type PutRequest struct { // The name of the serving endpoint whose rate limits are being updated. // This field is required. Name string `json:"-" url:"-"` // The list of endpoint rate limits. RateLimits []RateLimit `json:"rate_limits,omitempty"` }
Update rate limits of a serving endpoint
type PutResponse ¶ added in v0.27.0
type PutResponse struct { // The list of endpoint rate limits. RateLimits []RateLimit `json:"rate_limits,omitempty"` }
type QueryEndpointInput ¶ added in v0.21.0
type QueryEndpointInput struct { // Pandas Dataframe input in the records orientation. DataframeRecords []any `json:"dataframe_records,omitempty"` // Pandas Dataframe input in the split orientation. DataframeSplit *DataframeSplitInput `json:"dataframe_split,omitempty"` // The extra parameters field used ONLY for __completions, chat,__ and // __embeddings external & foundation model__ serving endpoints. This is a // map of strings and should only be used with other external/foundation // model query fields. ExtraParams map[string]string `json:"extra_params,omitempty"` // The input string (or array of strings) field used ONLY for __embeddings // external & foundation model__ serving endpoints and is the only field // (along with extra_params if needed) used by embeddings queries. Input any `json:"input,omitempty"` // Tensor-based input in columnar format. Inputs any `json:"inputs,omitempty"` // Tensor-based input in row format. Instances []any `json:"instances,omitempty"` // The max tokens field used ONLY for __completions__ and __chat external & // foundation model__ serving endpoints. This is an integer and should only // be used with other chat/completions query fields. MaxTokens int `json:"max_tokens,omitempty"` // The messages field used ONLY for __chat external & foundation model__ // serving endpoints. This is a map of strings and should only be used with // other chat query fields. Messages []ChatMessage `json:"messages,omitempty"` // The n (number of candidates) field used ONLY for __completions__ and // __chat external & foundation model__ serving endpoints. This is an // integer between 1 and 5 with a default of 1 and should only be used with // other chat/completions query fields. N int `json:"n,omitempty"` // The name of the serving endpoint. This field is required. Name string `json:"-" url:"-"` // The prompt string (or array of strings) field used ONLY for __completions // external & foundation model__ serving endpoints and should only be used // with other completions query fields. Prompt any `json:"prompt,omitempty"` // The stop sequences field used ONLY for __completions__ and __chat // external & foundation model__ serving endpoints. This is a list of // strings and should only be used with other chat/completions query fields. Stop []string `json:"stop,omitempty"` // The stream field used ONLY for __completions__ and __chat external & // foundation model__ serving endpoints. This is a boolean defaulting to // false and should only be used with other chat/completions query fields. Stream bool `json:"stream,omitempty"` // The temperature field used ONLY for __completions__ and __chat external & // foundation model__ serving endpoints. This is a float between 0.0 and 2.0 // with a default of 1.0 and should only be used with other chat/completions // query fields. Temperature float64 `json:"temperature,omitempty"` ForceSendFields []string `json:"-"` }
func (QueryEndpointInput) MarshalJSON ¶ added in v0.27.0
func (s QueryEndpointInput) MarshalJSON() ([]byte, error)
func (*QueryEndpointInput) UnmarshalJSON ¶ added in v0.27.0
func (s *QueryEndpointInput) UnmarshalJSON(b []byte) error
type QueryEndpointResponse ¶
type QueryEndpointResponse struct { // The list of choices returned by the __chat or completions // external/foundation model__ serving endpoint. Choices []V1ResponseChoiceElement `json:"choices,omitempty"` // The timestamp in seconds when the query was created in Unix time returned // by a __completions or chat external/foundation model__ serving endpoint. Created int64 `json:"created,omitempty"` // The list of the embeddings returned by the __embeddings // external/foundation model__ serving endpoint. Data []EmbeddingsV1ResponseEmbeddingElement `json:"data,omitempty"` // The ID of the query that may be returned by a __completions or chat // external/foundation model__ serving endpoint. Id string `json:"id,omitempty"` // The name of the __external/foundation model__ used for querying. This is // the name of the model that was specified in the endpoint config. Model string `json:"model,omitempty"` // The type of object returned by the __external/foundation model__ serving // endpoint, one of [text_completion, chat.completion, list (of // embeddings)]. Object QueryEndpointResponseObject `json:"object,omitempty"` // The predictions returned by the serving endpoint. Predictions []any `json:"predictions,omitempty"` // The name of the served model that served the request. This is useful when // there are multiple models behind the same endpoint with traffic split. ServedModelName string `json:"-" url:"-" header:"served-model-name,omitempty"` // The usage object that may be returned by the __external/foundation // model__ serving endpoint. This contains information about the number of // tokens used in the prompt and response. Usage *ExternalModelUsageElement `json:"usage,omitempty"` ForceSendFields []string `json:"-"` }
func (QueryEndpointResponse) MarshalJSON ¶ added in v0.27.0
func (s QueryEndpointResponse) MarshalJSON() ([]byte, error)
func (*QueryEndpointResponse) UnmarshalJSON ¶ added in v0.27.0
func (s *QueryEndpointResponse) UnmarshalJSON(b []byte) error
type QueryEndpointResponseObject ¶ added in v0.27.0
type QueryEndpointResponseObject string
The type of object returned by the __external/foundation model__ serving endpoint, one of [text_completion, chat.completion, list (of embeddings)].
const QueryEndpointResponseObjectChatCompletion QueryEndpointResponseObject = `chat.completion`
const QueryEndpointResponseObjectList QueryEndpointResponseObject = `list`
const QueryEndpointResponseObjectTextCompletion QueryEndpointResponseObject = `text_completion`
func (*QueryEndpointResponseObject) Set ¶ added in v0.27.0
func (f *QueryEndpointResponseObject) Set(v string) error
Set raw string value and validate it against allowed values
func (*QueryEndpointResponseObject) String ¶ added in v0.27.0
func (f *QueryEndpointResponseObject) String() string
String representation for fmt.Print
func (*QueryEndpointResponseObject) Type ¶ added in v0.27.0
func (f *QueryEndpointResponseObject) Type() string
Type always returns QueryEndpointResponseObject to satisfy [pflag.Value] interface
type RateLimit ¶ added in v0.27.0
type RateLimit struct { // Used to specify how many calls are allowed for a key within the // renewal_period. Calls int `json:"calls"` // Key field for a serving endpoint rate limit. Currently, only 'user' and // 'endpoint' are supported, with 'endpoint' being the default if not // specified. Key RateLimitKey `json:"key,omitempty"` // Renewal period field for a serving endpoint rate limit. Currently, only // 'minute' is supported. RenewalPeriod RateLimitRenewalPeriod `json:"renewal_period"` }
type RateLimitKey ¶ added in v0.27.0
type RateLimitKey string
Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified.
const RateLimitKeyEndpoint RateLimitKey = `endpoint`
const RateLimitKeyUser RateLimitKey = `user`
func (*RateLimitKey) Set ¶ added in v0.27.0
func (f *RateLimitKey) Set(v string) error
Set raw string value and validate it against allowed values
func (*RateLimitKey) String ¶ added in v0.27.0
func (f *RateLimitKey) String() string
String representation for fmt.Print
func (*RateLimitKey) Type ¶ added in v0.27.0
func (f *RateLimitKey) Type() string
Type always returns RateLimitKey to satisfy [pflag.Value] interface
type RateLimitRenewalPeriod ¶ added in v0.27.0
type RateLimitRenewalPeriod string
Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported.
const RateLimitRenewalPeriodMinute RateLimitRenewalPeriod = `minute`
func (*RateLimitRenewalPeriod) Set ¶ added in v0.27.0
func (f *RateLimitRenewalPeriod) Set(v string) error
Set raw string value and validate it against allowed values
func (*RateLimitRenewalPeriod) String ¶ added in v0.27.0
func (f *RateLimitRenewalPeriod) String() string
String representation for fmt.Print
func (*RateLimitRenewalPeriod) Type ¶ added in v0.27.0
func (f *RateLimitRenewalPeriod) Type() string
Type always returns RateLimitRenewalPeriod to satisfy [pflag.Value] interface
type Route ¶
type Route struct { // The name of the served model this route configures traffic for. ServedModelName string `json:"served_model_name"` // The percentage of endpoint traffic to send to this route. It must be an // integer between 0 and 100 inclusive. TrafficPercentage int `json:"traffic_percentage"` }
type ServedEntityInput ¶ added in v0.27.0
type ServedEntityInput struct { // The name of the entity to be served. The entity may be a model in the // Databricks Model Registry, a model in the Unity Catalog (UC), or a // function of type FEATURE_SPEC in the UC. If it is a UC object, the full // name of the object should be given in the form of // __catalog_name__.__schema_name__.__model_name__. EntityName string `json:"entity_name,omitempty"` // The version of the model in Databricks Model Registry to be served or // empty if the entity is a FEATURE_SPEC. EntityVersion string `json:"entity_version,omitempty"` // An object containing a set of optional, user-specified environment // variable key-value pairs used for serving this entity. Note: this is an // experimental feature and subject to change. Example entity environment // variables that refer to Databricks secrets: `{"OPENAI_API_KEY": // "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": // "{{secrets/my_scope2/my_key2}}"}` EnvironmentVars map[string]string `json:"environment_vars,omitempty"` // The external model to be served. NOTE: Only one of external_model and // (entity_name, entity_version, workload_size, workload_type, and // scale_to_zero_enabled) can be specified with the latter set being used // for custom model serving for a Databricks registered model. When an // external_model is present, the served entities list can only have one // served_entity object. For an existing endpoint with external_model, it // can not be updated to an endpoint without external_model. If the endpoint // is created without external_model, users cannot update it to add // external_model later. ExternalModel *ExternalModel `json:"external_model,omitempty"` // ARN of the instance profile that the served entity uses to access AWS // resources. InstanceProfileArn string `json:"instance_profile_arn,omitempty"` // The maximum tokens per second that the endpoint can scale up to. MaxProvisionedThroughput int `json:"max_provisioned_throughput,omitempty"` // The minimum tokens per second that the endpoint can scale down to. MinProvisionedThroughput int `json:"min_provisioned_throughput,omitempty"` // The name of a served entity. It must be unique across an endpoint. A // served entity name can consist of alphanumeric characters, dashes, and // underscores. If not specified for an external model, this field defaults // to external_model.name, with '.' and ':' replaced with '-', and if not // specified for other entities, it defaults to // <entity-name>-<entity-version>. Name string `json:"name,omitempty"` // Whether the compute resources for the served entity should scale down to // zero. ScaleToZeroEnabled bool `json:"scale_to_zero_enabled,omitempty"` // The workload size of the served entity. The workload size corresponds to // a range of provisioned concurrency that the compute autoscales between. A // single unit of provisioned concurrency can process one request at a time. // Valid workload sizes are "Small" (4 - 4 provisioned concurrency), // "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 // provisioned concurrency). If scale-to-zero is enabled, the lower bound of // the provisioned concurrency for each workload size is 0. WorkloadSize string `json:"workload_size,omitempty"` // The workload type of the served entity. The workload type selects which // type of compute to use in the endpoint. The default value for this // parameter is "CPU". For deep learning workloads, GPU acceleration is // available by selecting workload types like GPU_SMALL and others. See the // available [GPU types]. // // [GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types WorkloadType string `json:"workload_type,omitempty"` ForceSendFields []string `json:"-"` }
func (ServedEntityInput) MarshalJSON ¶ added in v0.27.0
func (s ServedEntityInput) MarshalJSON() ([]byte, error)
func (*ServedEntityInput) UnmarshalJSON ¶ added in v0.27.0
func (s *ServedEntityInput) UnmarshalJSON(b []byte) error
type ServedEntityOutput ¶ added in v0.27.0
type ServedEntityOutput struct { // The creation timestamp of the served entity in Unix time. CreationTimestamp int64 `json:"creation_timestamp,omitempty"` // The email of the user who created the served entity. Creator string `json:"creator,omitempty"` // The name of the entity served. The entity may be a model in the // Databricks Model Registry, a model in the Unity Catalog (UC), or a // function of type FEATURE_SPEC in the UC. If it is a UC object, the full // name of the object is given in the form of // __catalog_name__.__schema_name__.__model_name__. EntityName string `json:"entity_name,omitempty"` // The version of the served entity in Databricks Model Registry or empty if // the entity is a FEATURE_SPEC. EntityVersion string `json:"entity_version,omitempty"` // An object containing a set of optional, user-specified environment // variable key-value pairs used for serving this entity. Note: this is an // experimental feature and subject to change. Example entity environment // variables that refer to Databricks secrets: `{"OPENAI_API_KEY": // "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": // "{{secrets/my_scope2/my_key2}}"}` EnvironmentVars map[string]string `json:"environment_vars,omitempty"` // The external model that is served. NOTE: Only one of external_model, // foundation_model, and (entity_name, entity_version, workload_size, // workload_type, and scale_to_zero_enabled) is returned based on the // endpoint type. ExternalModel *ExternalModel `json:"external_model,omitempty"` // The foundation model that is served. NOTE: Only one of foundation_model, // external_model, and (entity_name, entity_version, workload_size, // workload_type, and scale_to_zero_enabled) is returned based on the // endpoint type. FoundationModel *FoundationModel `json:"foundation_model,omitempty"` // ARN of the instance profile that the served entity uses to access AWS // resources. InstanceProfileArn string `json:"instance_profile_arn,omitempty"` // The maximum tokens per second that the endpoint can scale up to. MaxProvisionedThroughput int `json:"max_provisioned_throughput,omitempty"` // The minimum tokens per second that the endpoint can scale down to. MinProvisionedThroughput int `json:"min_provisioned_throughput,omitempty"` // The name of the served entity. Name string `json:"name,omitempty"` // Whether the compute resources for the served entity should scale down to // zero. ScaleToZeroEnabled bool `json:"scale_to_zero_enabled,omitempty"` // Information corresponding to the state of the served entity. State *ServedModelState `json:"state,omitempty"` // The workload size of the served entity. The workload size corresponds to // a range of provisioned concurrency that the compute autoscales between. A // single unit of provisioned concurrency can process one request at a time. // Valid workload sizes are "Small" (4 - 4 provisioned concurrency), // "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 // provisioned concurrency). If scale-to-zero is enabled, the lower bound of // the provisioned concurrency for each workload size will be 0. WorkloadSize string `json:"workload_size,omitempty"` // The workload type of the served entity. The workload type selects which // type of compute to use in the endpoint. The default value for this // parameter is "CPU". For deep learning workloads, GPU acceleration is // available by selecting workload types like GPU_SMALL and others. See the // available [GPU types]. // // [GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types WorkloadType string `json:"workload_type,omitempty"` ForceSendFields []string `json:"-"` }
func (ServedEntityOutput) MarshalJSON ¶ added in v0.27.0
func (s ServedEntityOutput) MarshalJSON() ([]byte, error)
func (*ServedEntityOutput) UnmarshalJSON ¶ added in v0.27.0
func (s *ServedEntityOutput) UnmarshalJSON(b []byte) error
type ServedEntitySpec ¶ added in v0.27.0
type ServedEntitySpec struct { // The name of the entity served. The entity may be a model in the // Databricks Model Registry, a model in the Unity Catalog (UC), or a // function of type FEATURE_SPEC in the UC. If it is a UC object, the full // name of the object is given in the form of // __catalog_name__.__schema_name__.__model_name__. EntityName string `json:"entity_name,omitempty"` // The version of the served entity in Databricks Model Registry or empty if // the entity is a FEATURE_SPEC. EntityVersion string `json:"entity_version,omitempty"` // The external model that is served. NOTE: Only one of external_model, // foundation_model, and (entity_name, entity_version) is returned based on // the endpoint type. ExternalModel *ExternalModel `json:"external_model,omitempty"` // The foundation model that is served. NOTE: Only one of foundation_model, // external_model, and (entity_name, entity_version) is returned based on // the endpoint type. FoundationModel *FoundationModel `json:"foundation_model,omitempty"` // The name of the served entity. Name string `json:"name,omitempty"` ForceSendFields []string `json:"-"` }
func (ServedEntitySpec) MarshalJSON ¶ added in v0.27.0
func (s ServedEntitySpec) MarshalJSON() ([]byte, error)
func (*ServedEntitySpec) UnmarshalJSON ¶ added in v0.27.0
func (s *ServedEntitySpec) UnmarshalJSON(b []byte) error
type ServedModelInput ¶
type ServedModelInput struct { // An object containing a set of optional, user-specified environment // variable key-value pairs used for serving this model. Note: this is an // experimental feature and subject to change. Example model environment // variables that refer to Databricks secrets: `{"OPENAI_API_KEY": // "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": // "{{secrets/my_scope2/my_key2}}"}` EnvironmentVars map[string]string `json:"environment_vars,omitempty"` // ARN of the instance profile that the served model will use to access AWS // resources. InstanceProfileArn string `json:"instance_profile_arn,omitempty"` // The name of the model in Databricks Model Registry to be served or if the // model resides in Unity Catalog, the full name of model, in the form of // __catalog_name__.__schema_name__.__model_name__. ModelName string `json:"model_name"` // The version of the model in Databricks Model Registry or Unity Catalog to // be served. ModelVersion string `json:"model_version"` // The name of a served model. It must be unique across an endpoint. If not // specified, this field will default to <model-name>-<model-version>. A // served model name can consist of alphanumeric characters, dashes, and // underscores. Name string `json:"name,omitempty"` // Whether the compute resources for the served model should scale down to // zero. ScaleToZeroEnabled bool `json:"scale_to_zero_enabled"` // The workload size of the served model. The workload size corresponds to a // range of provisioned concurrency that the compute will autoscale between. // A single unit of provisioned concurrency can process one request at a // time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), // "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 // provisioned concurrency). If scale-to-zero is enabled, the lower bound of // the provisioned concurrency for each workload size will be 0. WorkloadSize ServedModelInputWorkloadSize `json:"workload_size"` // The workload type of the served model. The workload type selects which // type of compute to use in the endpoint. The default value for this // parameter is "CPU". For deep learning workloads, GPU acceleration is // available by selecting workload types like GPU_SMALL and others. See the // available [GPU types]. // // [GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types WorkloadType ServedModelInputWorkloadType `json:"workload_type,omitempty"` ForceSendFields []string `json:"-"` }
func (ServedModelInput) MarshalJSON ¶ added in v0.23.0
func (s ServedModelInput) MarshalJSON() ([]byte, error)
func (*ServedModelInput) UnmarshalJSON ¶ added in v0.23.0
func (s *ServedModelInput) UnmarshalJSON(b []byte) error
type ServedModelInputWorkloadSize ¶ added in v0.27.0
type ServedModelInputWorkloadSize string
The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0.
const ServedModelInputWorkloadSizeLarge ServedModelInputWorkloadSize = `Large`
const ServedModelInputWorkloadSizeMedium ServedModelInputWorkloadSize = `Medium`
const ServedModelInputWorkloadSizeSmall ServedModelInputWorkloadSize = `Small`
func (*ServedModelInputWorkloadSize) Set ¶ added in v0.27.0
func (f *ServedModelInputWorkloadSize) Set(v string) error
Set raw string value and validate it against allowed values
func (*ServedModelInputWorkloadSize) String ¶ added in v0.27.0
func (f *ServedModelInputWorkloadSize) String() string
String representation for fmt.Print
func (*ServedModelInputWorkloadSize) Type ¶ added in v0.27.0
func (f *ServedModelInputWorkloadSize) Type() string
Type always returns ServedModelInputWorkloadSize to satisfy [pflag.Value] interface
type ServedModelInputWorkloadType ¶ added in v0.27.0
type ServedModelInputWorkloadType string
The workload type of the served model. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is "CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available GPU types.
const ServedModelInputWorkloadTypeCpu ServedModelInputWorkloadType = `CPU`
const ServedModelInputWorkloadTypeGpuLarge ServedModelInputWorkloadType = `GPU_LARGE`
const ServedModelInputWorkloadTypeGpuMedium ServedModelInputWorkloadType = `GPU_MEDIUM`
const ServedModelInputWorkloadTypeGpuSmall ServedModelInputWorkloadType = `GPU_SMALL`
const ServedModelInputWorkloadTypeMultigpuMedium ServedModelInputWorkloadType = `MULTIGPU_MEDIUM`
func (*ServedModelInputWorkloadType) Set ¶ added in v0.27.0
func (f *ServedModelInputWorkloadType) Set(v string) error
Set raw string value and validate it against allowed values
func (*ServedModelInputWorkloadType) String ¶ added in v0.27.0
func (f *ServedModelInputWorkloadType) String() string
String representation for fmt.Print
func (*ServedModelInputWorkloadType) Type ¶ added in v0.27.0
func (f *ServedModelInputWorkloadType) Type() string
Type always returns ServedModelInputWorkloadType to satisfy [pflag.Value] interface
type ServedModelOutput ¶
type ServedModelOutput struct { // The creation timestamp of the served model in Unix time. CreationTimestamp int64 `json:"creation_timestamp,omitempty"` // The email of the user who created the served model. Creator string `json:"creator,omitempty"` // An object containing a set of optional, user-specified environment // variable key-value pairs used for serving this model. Note: this is an // experimental feature and subject to change. Example model environment // variables that refer to Databricks secrets: `{"OPENAI_API_KEY": // "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": // "{{secrets/my_scope2/my_key2}}"}` EnvironmentVars map[string]string `json:"environment_vars,omitempty"` // ARN of the instance profile that the served model will use to access AWS // resources. InstanceProfileArn string `json:"instance_profile_arn,omitempty"` // The name of the model in Databricks Model Registry or the full name of // the model in Unity Catalog. ModelName string `json:"model_name,omitempty"` // The version of the model in Databricks Model Registry or Unity Catalog to // be served. ModelVersion string `json:"model_version,omitempty"` // The name of the served model. Name string `json:"name,omitempty"` // Whether the compute resources for the Served Model should scale down to // zero. ScaleToZeroEnabled bool `json:"scale_to_zero_enabled,omitempty"` // Information corresponding to the state of the Served Model. State *ServedModelState `json:"state,omitempty"` // The workload size of the served model. The workload size corresponds to a // range of provisioned concurrency that the compute will autoscale between. // A single unit of provisioned concurrency can process one request at a // time. Valid workload sizes are "Small" (4 - 4 provisioned concurrency), // "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 // provisioned concurrency). If scale-to-zero is enabled, the lower bound of // the provisioned concurrency for each workload size will be 0. WorkloadSize string `json:"workload_size,omitempty"` // The workload type of the served model. The workload type selects which // type of compute to use in the endpoint. The default value for this // parameter is "CPU". For deep learning workloads, GPU acceleration is // available by selecting workload types like GPU_SMALL and others. See the // available [GPU types]. // // [GPU types]: https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types WorkloadType string `json:"workload_type,omitempty"` ForceSendFields []string `json:"-"` }
func (ServedModelOutput) MarshalJSON ¶ added in v0.23.0
func (s ServedModelOutput) MarshalJSON() ([]byte, error)
func (*ServedModelOutput) UnmarshalJSON ¶ added in v0.23.0
func (s *ServedModelOutput) UnmarshalJSON(b []byte) error
type ServedModelSpec ¶
type ServedModelSpec struct { // The name of the model in Databricks Model Registry or the full name of // the model in Unity Catalog. ModelName string `json:"model_name,omitempty"` // The version of the model in Databricks Model Registry or Unity Catalog to // be served. ModelVersion string `json:"model_version,omitempty"` // The name of the served model. Name string `json:"name,omitempty"` ForceSendFields []string `json:"-"` }
func (ServedModelSpec) MarshalJSON ¶ added in v0.23.0
func (s ServedModelSpec) MarshalJSON() ([]byte, error)
func (*ServedModelSpec) UnmarshalJSON ¶ added in v0.23.0
func (s *ServedModelSpec) UnmarshalJSON(b []byte) error
type ServedModelState ¶
type ServedModelState struct { // The state of the served entity deployment. DEPLOYMENT_CREATING indicates // that the served entity is not ready yet because the deployment is still // being created (i.e container image is building, model server is deploying // for the first time, etc.). DEPLOYMENT_RECOVERING indicates that the // served entity was previously in a ready state but no longer is and is // attempting to recover. DEPLOYMENT_READY indicates that the served entity // is ready to receive traffic. DEPLOYMENT_FAILED indicates that there was // an error trying to bring up the served entity (e.g container image build // failed, the model server failed to start due to a model loading error, // etc.) DEPLOYMENT_ABORTED indicates that the deployment was terminated // likely due to a failure in bringing up another served entity under the // same endpoint and config version. Deployment ServedModelStateDeployment `json:"deployment,omitempty"` // More information about the state of the served entity, if available. DeploymentStateMessage string `json:"deployment_state_message,omitempty"` ForceSendFields []string `json:"-"` }
func (ServedModelState) MarshalJSON ¶ added in v0.23.0
func (s ServedModelState) MarshalJSON() ([]byte, error)
func (*ServedModelState) UnmarshalJSON ¶ added in v0.23.0
func (s *ServedModelState) UnmarshalJSON(b []byte) error
type ServedModelStateDeployment ¶
type ServedModelStateDeployment string
The state of the served entity deployment. DEPLOYMENT_CREATING indicates that the served entity is not ready yet because the deployment is still being created (i.e container image is building, model server is deploying for the first time, etc.). DEPLOYMENT_RECOVERING indicates that the served entity was previously in a ready state but no longer is and is attempting to recover. DEPLOYMENT_READY indicates that the served entity is ready to receive traffic. DEPLOYMENT_FAILED indicates that there was an error trying to bring up the served entity (e.g container image build failed, the model server failed to start due to a model loading error, etc.) DEPLOYMENT_ABORTED indicates that the deployment was terminated likely due to a failure in bringing up another served entity under the same endpoint and config version.
const ServedModelStateDeploymentAborted ServedModelStateDeployment = `DEPLOYMENT_ABORTED`
const ServedModelStateDeploymentCreating ServedModelStateDeployment = `DEPLOYMENT_CREATING`
const ServedModelStateDeploymentFailed ServedModelStateDeployment = `DEPLOYMENT_FAILED`
const ServedModelStateDeploymentReady ServedModelStateDeployment = `DEPLOYMENT_READY`
const ServedModelStateDeploymentRecovering ServedModelStateDeployment = `DEPLOYMENT_RECOVERING`
func (*ServedModelStateDeployment) Set ¶
func (f *ServedModelStateDeployment) Set(v string) error
Set raw string value and validate it against allowed values
func (*ServedModelStateDeployment) String ¶
func (f *ServedModelStateDeployment) String() string
String representation for fmt.Print
func (*ServedModelStateDeployment) Type ¶
func (f *ServedModelStateDeployment) Type() string
Type always returns ServedModelStateDeployment to satisfy [pflag.Value] interface
type ServerLogsResponse ¶
type ServerLogsResponse struct { // The most recent log lines of the model server processing invocation // requests. Logs string `json:"logs"` }
type ServingEndpoint ¶
type ServingEndpoint struct { // The config that is currently being served by the endpoint. Config *EndpointCoreConfigSummary `json:"config,omitempty"` // The timestamp when the endpoint was created in Unix time. CreationTimestamp int64 `json:"creation_timestamp,omitempty"` // The email of the user who created the serving endpoint. Creator string `json:"creator,omitempty"` // System-generated ID of the endpoint. This is used to refer to the // endpoint in the Permissions API Id string `json:"id,omitempty"` // The timestamp when the endpoint was last updated by a user in Unix time. LastUpdatedTimestamp int64 `json:"last_updated_timestamp,omitempty"` // The name of the serving endpoint. Name string `json:"name,omitempty"` // Information corresponding to the state of the serving endpoint. State *EndpointState `json:"state,omitempty"` // Tags attached to the serving endpoint. Tags []EndpointTag `json:"tags,omitempty"` // The task type of the serving endpoint. Task string `json:"task,omitempty"` ForceSendFields []string `json:"-"` }
func (ServingEndpoint) MarshalJSON ¶ added in v0.23.0
func (s ServingEndpoint) MarshalJSON() ([]byte, error)
func (*ServingEndpoint) UnmarshalJSON ¶ added in v0.23.0
func (s *ServingEndpoint) UnmarshalJSON(b []byte) error
type ServingEndpointAccessControlRequest ¶ added in v0.15.0
type ServingEndpointAccessControlRequest struct { // name of the group GroupName string `json:"group_name,omitempty"` // Permission level PermissionLevel ServingEndpointPermissionLevel `json:"permission_level,omitempty"` // application ID of a service principal ServicePrincipalName string `json:"service_principal_name,omitempty"` // name of the user UserName string `json:"user_name,omitempty"` ForceSendFields []string `json:"-"` }
func (ServingEndpointAccessControlRequest) MarshalJSON ¶ added in v0.23.0
func (s ServingEndpointAccessControlRequest) MarshalJSON() ([]byte, error)
func (*ServingEndpointAccessControlRequest) UnmarshalJSON ¶ added in v0.23.0
func (s *ServingEndpointAccessControlRequest) UnmarshalJSON(b []byte) error
type ServingEndpointAccessControlResponse ¶ added in v0.15.0
type ServingEndpointAccessControlResponse struct { // All permissions. AllPermissions []ServingEndpointPermission `json:"all_permissions,omitempty"` // Display name of the user or service principal. DisplayName string `json:"display_name,omitempty"` // name of the group GroupName string `json:"group_name,omitempty"` // Name of the service principal. ServicePrincipalName string `json:"service_principal_name,omitempty"` // name of the user UserName string `json:"user_name,omitempty"` ForceSendFields []string `json:"-"` }
func (ServingEndpointAccessControlResponse) MarshalJSON ¶ added in v0.23.0
func (s ServingEndpointAccessControlResponse) MarshalJSON() ([]byte, error)
func (*ServingEndpointAccessControlResponse) UnmarshalJSON ¶ added in v0.23.0
func (s *ServingEndpointAccessControlResponse) UnmarshalJSON(b []byte) error
type ServingEndpointDetailed ¶
type ServingEndpointDetailed struct { // The config that is currently being served by the endpoint. Config *EndpointCoreConfigOutput `json:"config,omitempty"` // The timestamp when the endpoint was created in Unix time. CreationTimestamp int64 `json:"creation_timestamp,omitempty"` // The email of the user who created the serving endpoint. Creator string `json:"creator,omitempty"` // System-generated ID of the endpoint. This is used to refer to the // endpoint in the Permissions API Id string `json:"id,omitempty"` // The timestamp when the endpoint was last updated by a user in Unix time. LastUpdatedTimestamp int64 `json:"last_updated_timestamp,omitempty"` // The name of the serving endpoint. Name string `json:"name,omitempty"` // The config that the endpoint is attempting to update to. PendingConfig *EndpointPendingConfig `json:"pending_config,omitempty"` // The permission level of the principal making the request. PermissionLevel ServingEndpointDetailedPermissionLevel `json:"permission_level,omitempty"` // Information corresponding to the state of the serving endpoint. State *EndpointState `json:"state,omitempty"` // Tags attached to the serving endpoint. Tags []EndpointTag `json:"tags,omitempty"` // The task type of the serving endpoint. Task string `json:"task,omitempty"` ForceSendFields []string `json:"-"` }
func (ServingEndpointDetailed) MarshalJSON ¶ added in v0.23.0
func (s ServingEndpointDetailed) MarshalJSON() ([]byte, error)
func (*ServingEndpointDetailed) UnmarshalJSON ¶ added in v0.23.0
func (s *ServingEndpointDetailed) UnmarshalJSON(b []byte) error
type ServingEndpointDetailedPermissionLevel ¶
type ServingEndpointDetailedPermissionLevel string
The permission level of the principal making the request.
const ServingEndpointDetailedPermissionLevelCanManage ServingEndpointDetailedPermissionLevel = `CAN_MANAGE`
const ServingEndpointDetailedPermissionLevelCanQuery ServingEndpointDetailedPermissionLevel = `CAN_QUERY`
const ServingEndpointDetailedPermissionLevelCanView ServingEndpointDetailedPermissionLevel = `CAN_VIEW`
func (*ServingEndpointDetailedPermissionLevel) Set ¶
func (f *ServingEndpointDetailedPermissionLevel) Set(v string) error
Set raw string value and validate it against allowed values
func (*ServingEndpointDetailedPermissionLevel) String ¶
func (f *ServingEndpointDetailedPermissionLevel) String() string
String representation for fmt.Print
func (*ServingEndpointDetailedPermissionLevel) Type ¶
func (f *ServingEndpointDetailedPermissionLevel) Type() string
Type always returns ServingEndpointDetailedPermissionLevel to satisfy [pflag.Value] interface
type ServingEndpointPermission ¶ added in v0.15.0
type ServingEndpointPermission struct { Inherited bool `json:"inherited,omitempty"` InheritedFromObject []string `json:"inherited_from_object,omitempty"` // Permission level PermissionLevel ServingEndpointPermissionLevel `json:"permission_level,omitempty"` ForceSendFields []string `json:"-"` }
func (ServingEndpointPermission) MarshalJSON ¶ added in v0.23.0
func (s ServingEndpointPermission) MarshalJSON() ([]byte, error)
func (*ServingEndpointPermission) UnmarshalJSON ¶ added in v0.23.0
func (s *ServingEndpointPermission) UnmarshalJSON(b []byte) error
type ServingEndpointPermissionLevel ¶ added in v0.15.0
type ServingEndpointPermissionLevel string
Permission level
const ServingEndpointPermissionLevelCanManage ServingEndpointPermissionLevel = `CAN_MANAGE`
const ServingEndpointPermissionLevelCanQuery ServingEndpointPermissionLevel = `CAN_QUERY`
const ServingEndpointPermissionLevelCanView ServingEndpointPermissionLevel = `CAN_VIEW`
func (*ServingEndpointPermissionLevel) Set ¶ added in v0.15.0
func (f *ServingEndpointPermissionLevel) Set(v string) error
Set raw string value and validate it against allowed values
func (*ServingEndpointPermissionLevel) String ¶ added in v0.15.0
func (f *ServingEndpointPermissionLevel) String() string
String representation for fmt.Print
func (*ServingEndpointPermissionLevel) Type ¶ added in v0.15.0
func (f *ServingEndpointPermissionLevel) Type() string
Type always returns ServingEndpointPermissionLevel to satisfy [pflag.Value] interface
type ServingEndpointPermissions ¶ added in v0.15.0
type ServingEndpointPermissions struct { AccessControlList []ServingEndpointAccessControlResponse `json:"access_control_list,omitempty"` ObjectId string `json:"object_id,omitempty"` ObjectType string `json:"object_type,omitempty"` ForceSendFields []string `json:"-"` }
func (ServingEndpointPermissions) MarshalJSON ¶ added in v0.23.0
func (s ServingEndpointPermissions) MarshalJSON() ([]byte, error)
func (*ServingEndpointPermissions) UnmarshalJSON ¶ added in v0.23.0
func (s *ServingEndpointPermissions) UnmarshalJSON(b []byte) error
type ServingEndpointPermissionsDescription ¶ added in v0.15.0
type ServingEndpointPermissionsDescription struct { Description string `json:"description,omitempty"` // Permission level PermissionLevel ServingEndpointPermissionLevel `json:"permission_level,omitempty"` ForceSendFields []string `json:"-"` }
func (ServingEndpointPermissionsDescription) MarshalJSON ¶ added in v0.23.0
func (s ServingEndpointPermissionsDescription) MarshalJSON() ([]byte, error)
func (*ServingEndpointPermissionsDescription) UnmarshalJSON ¶ added in v0.23.0
func (s *ServingEndpointPermissionsDescription) UnmarshalJSON(b []byte) error
type ServingEndpointPermissionsRequest ¶ added in v0.15.0
type ServingEndpointPermissionsRequest struct { AccessControlList []ServingEndpointAccessControlRequest `json:"access_control_list,omitempty"` // The serving endpoint for which to get or manage permissions. ServingEndpointId string `json:"-" url:"-"` }
type ServingEndpointsAPI ¶
type ServingEndpointsAPI struct {
// contains filtered or unexported fields
}
The Serving Endpoints API allows you to create, update, and delete model serving endpoints.
You can use a serving endpoint to serve models from the Databricks Model Registry or from Unity Catalog. Endpoints expose the underlying models as scalable REST API endpoints using serverless compute. This means the endpoints and associated compute resources are fully managed by Databricks and will not appear in your cloud account. A serving endpoint can consist of one or more MLflow models from the Databricks Model Registry, called served entities. A serving endpoint can have at most ten served entities. You can configure traffic settings to define how requests should be routed to your served entities behind an endpoint. Additionally, you can configure the scale of resources that should be applied to each served entity.
func NewServingEndpoints ¶
func NewServingEndpoints(client *client.DatabricksClient) *ServingEndpointsAPI
func (*ServingEndpointsAPI) BuildLogs ¶
func (a *ServingEndpointsAPI) BuildLogs(ctx context.Context, request BuildLogsRequest) (*BuildLogsResponse, error)
Get build logs for a served model.
Retrieves the build logs associated with the provided served model.
func (*ServingEndpointsAPI) BuildLogsByNameAndServedModelName ¶
func (a *ServingEndpointsAPI) BuildLogsByNameAndServedModelName(ctx context.Context, name string, servedModelName string) (*BuildLogsResponse, error)
Get build logs for a served model.
Retrieves the build logs associated with the provided served model.
func (*ServingEndpointsAPI) Create ¶
func (a *ServingEndpointsAPI) Create(ctx context.Context, createServingEndpoint CreateServingEndpoint) (*WaitGetServingEndpointNotUpdating[ServingEndpointDetailed], error)
Create a new serving endpoint.
func (*ServingEndpointsAPI) CreateAndWait
deprecated
func (a *ServingEndpointsAPI) CreateAndWait(ctx context.Context, createServingEndpoint CreateServingEndpoint, options ...retries.Option[ServingEndpointDetailed]) (*ServingEndpointDetailed, error)
Calls ServingEndpointsAPI.Create and waits to reach NOT_UPDATING state
You can override the default timeout of 20 minutes by calling adding retries.Timeout[ServingEndpointDetailed](60*time.Minute) functional option.
Deprecated: use ServingEndpointsAPI.Create.Get() or ServingEndpointsAPI.WaitGetServingEndpointNotUpdating
func (*ServingEndpointsAPI) Delete ¶
func (a *ServingEndpointsAPI) Delete(ctx context.Context, request DeleteServingEndpointRequest) error
Delete a serving endpoint.
func (*ServingEndpointsAPI) DeleteByName ¶
func (a *ServingEndpointsAPI) DeleteByName(ctx context.Context, name string) error
Delete a serving endpoint.
func (*ServingEndpointsAPI) ExportMetrics ¶
func (a *ServingEndpointsAPI) ExportMetrics(ctx context.Context, request ExportMetricsRequest) error
Get metrics of a serving endpoint.
Retrieves the metrics associated with the provided serving endpoint in either Prometheus or OpenMetrics exposition format.
func (*ServingEndpointsAPI) ExportMetricsByName ¶
func (a *ServingEndpointsAPI) ExportMetricsByName(ctx context.Context, name string) error
Get metrics of a serving endpoint.
Retrieves the metrics associated with the provided serving endpoint in either Prometheus or OpenMetrics exposition format.
func (*ServingEndpointsAPI) Get ¶
func (a *ServingEndpointsAPI) Get(ctx context.Context, request GetServingEndpointRequest) (*ServingEndpointDetailed, error)
Get a single serving endpoint.
Retrieves the details for a single serving endpoint.
func (*ServingEndpointsAPI) GetByName ¶
func (a *ServingEndpointsAPI) GetByName(ctx context.Context, name string) (*ServingEndpointDetailed, error)
Get a single serving endpoint.
Retrieves the details for a single serving endpoint.
func (*ServingEndpointsAPI) GetOpenApi ¶ added in v0.39.0
func (a *ServingEndpointsAPI) GetOpenApi(ctx context.Context, request GetOpenApiRequest) error
Get the schema for a serving endpoint.
Get the query schema of the serving endpoint in OpenAPI format. The schema contains information for the supported paths, input and output format and datatypes.
func (*ServingEndpointsAPI) GetOpenApiByName ¶ added in v0.39.0
func (a *ServingEndpointsAPI) GetOpenApiByName(ctx context.Context, name string) error
Get the schema for a serving endpoint.
Get the query schema of the serving endpoint in OpenAPI format. The schema contains information for the supported paths, input and output format and datatypes.
func (*ServingEndpointsAPI) GetPermissionLevels ¶ added in v0.19.0
func (a *ServingEndpointsAPI) GetPermissionLevels(ctx context.Context, request GetServingEndpointPermissionLevelsRequest) (*GetServingEndpointPermissionLevelsResponse, error)
Get serving endpoint permission levels.
Gets the permission levels that a user can have on an object.
func (*ServingEndpointsAPI) GetPermissionLevelsByServingEndpointId ¶ added in v0.19.0
func (a *ServingEndpointsAPI) GetPermissionLevelsByServingEndpointId(ctx context.Context, servingEndpointId string) (*GetServingEndpointPermissionLevelsResponse, error)
Get serving endpoint permission levels.
Gets the permission levels that a user can have on an object.
func (*ServingEndpointsAPI) GetPermissions ¶ added in v0.19.0
func (a *ServingEndpointsAPI) GetPermissions(ctx context.Context, request GetServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error)
Get serving endpoint permissions.
Gets the permissions of a serving endpoint. Serving endpoints can inherit permissions from their root object.
func (*ServingEndpointsAPI) GetPermissionsByServingEndpointId ¶ added in v0.19.0
func (a *ServingEndpointsAPI) GetPermissionsByServingEndpointId(ctx context.Context, servingEndpointId string) (*ServingEndpointPermissions, error)
Get serving endpoint permissions.
Gets the permissions of a serving endpoint. Serving endpoints can inherit permissions from their root object.
func (*ServingEndpointsAPI) Impl ¶
func (a *ServingEndpointsAPI) Impl() ServingEndpointsService
Impl returns low-level ServingEndpoints API implementation Deprecated: use MockServingEndpointsInterface instead.
func (*ServingEndpointsAPI) List ¶
func (a *ServingEndpointsAPI) List(ctx context.Context) listing.Iterator[ServingEndpoint]
Get all serving endpoints.
This method is generated by Databricks SDK Code Generator.
func (*ServingEndpointsAPI) ListAll ¶ added in v0.10.0
func (a *ServingEndpointsAPI) ListAll(ctx context.Context) ([]ServingEndpoint, error)
Get all serving endpoints.
This method is generated by Databricks SDK Code Generator.
func (*ServingEndpointsAPI) Logs ¶
func (a *ServingEndpointsAPI) Logs(ctx context.Context, request LogsRequest) (*ServerLogsResponse, error)
Get the latest logs for a served model.
Retrieves the service logs associated with the provided served model.
func (*ServingEndpointsAPI) LogsByNameAndServedModelName ¶
func (a *ServingEndpointsAPI) LogsByNameAndServedModelName(ctx context.Context, name string, servedModelName string) (*ServerLogsResponse, error)
Get the latest logs for a served model.
Retrieves the service logs associated with the provided served model.
func (*ServingEndpointsAPI) Patch ¶ added in v0.20.0
func (a *ServingEndpointsAPI) Patch(ctx context.Context, request PatchServingEndpointTags) ([]EndpointTag, error)
Update tags of a serving endpoint.
Used to batch add and delete tags from a serving endpoint with a single API call.
func (*ServingEndpointsAPI) Put ¶ added in v0.27.0
func (a *ServingEndpointsAPI) Put(ctx context.Context, request PutRequest) (*PutResponse, error)
Update rate limits of a serving endpoint.
Used to update the rate limits of a serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.
func (*ServingEndpointsAPI) Query ¶
func (a *ServingEndpointsAPI) Query(ctx context.Context, request QueryEndpointInput) (*QueryEndpointResponse, error)
Query a serving endpoint.
func (*ServingEndpointsAPI) SetPermissions ¶ added in v0.19.0
func (a *ServingEndpointsAPI) SetPermissions(ctx context.Context, request ServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error)
Set serving endpoint permissions.
Sets permissions on a serving endpoint. Serving endpoints can inherit permissions from their root object.
func (*ServingEndpointsAPI) UpdateConfig ¶
func (a *ServingEndpointsAPI) UpdateConfig(ctx context.Context, endpointCoreConfigInput EndpointCoreConfigInput) (*WaitGetServingEndpointNotUpdating[ServingEndpointDetailed], error)
Update config of a serving endpoint.
Updates any combination of the serving endpoint's served entities, the compute configuration of those served entities, and the endpoint's traffic config. An endpoint that already has an update in progress can not be updated until the current update completes or fails.
func (*ServingEndpointsAPI) UpdateConfigAndWait
deprecated
func (a *ServingEndpointsAPI) UpdateConfigAndWait(ctx context.Context, endpointCoreConfigInput EndpointCoreConfigInput, options ...retries.Option[ServingEndpointDetailed]) (*ServingEndpointDetailed, error)
Calls ServingEndpointsAPI.UpdateConfig and waits to reach NOT_UPDATING state
You can override the default timeout of 20 minutes by calling adding retries.Timeout[ServingEndpointDetailed](60*time.Minute) functional option.
Deprecated: use ServingEndpointsAPI.UpdateConfig.Get() or ServingEndpointsAPI.WaitGetServingEndpointNotUpdating
func (*ServingEndpointsAPI) UpdatePermissions ¶ added in v0.19.0
func (a *ServingEndpointsAPI) UpdatePermissions(ctx context.Context, request ServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error)
Update serving endpoint permissions.
Updates the permissions on a serving endpoint. Serving endpoints can inherit permissions from their root object.
func (*ServingEndpointsAPI) WaitGetServingEndpointNotUpdating ¶ added in v0.10.0
func (a *ServingEndpointsAPI) WaitGetServingEndpointNotUpdating(ctx context.Context, name string, timeout time.Duration, callback func(*ServingEndpointDetailed)) (*ServingEndpointDetailed, error)
WaitGetServingEndpointNotUpdating repeatedly calls ServingEndpointsAPI.Get and waits to reach NOT_UPDATING state
func (*ServingEndpointsAPI) WithImpl ¶
func (a *ServingEndpointsAPI) WithImpl(impl ServingEndpointsService) ServingEndpointsInterface
WithImpl could be used to override low-level API implementations for unit testing purposes with github.com/golang/mock or other mocking frameworks. Deprecated: use MockServingEndpointsInterface instead.
type ServingEndpointsInterface ¶ added in v0.29.0
type ServingEndpointsInterface interface { // WithImpl could be used to override low-level API implementations for unit // testing purposes with [github.com/golang/mock] or other mocking frameworks. // Deprecated: use MockServingEndpointsInterface instead. WithImpl(impl ServingEndpointsService) ServingEndpointsInterface // Impl returns low-level ServingEndpoints API implementation // Deprecated: use MockServingEndpointsInterface instead. Impl() ServingEndpointsService // WaitGetServingEndpointNotUpdating repeatedly calls [ServingEndpointsAPI.Get] and waits to reach NOT_UPDATING state WaitGetServingEndpointNotUpdating(ctx context.Context, name string, timeout time.Duration, callback func(*ServingEndpointDetailed)) (*ServingEndpointDetailed, error) // Get build logs for a served model. // // Retrieves the build logs associated with the provided served model. BuildLogs(ctx context.Context, request BuildLogsRequest) (*BuildLogsResponse, error) // Get build logs for a served model. // // Retrieves the build logs associated with the provided served model. BuildLogsByNameAndServedModelName(ctx context.Context, name string, servedModelName string) (*BuildLogsResponse, error) // Create a new serving endpoint. Create(ctx context.Context, createServingEndpoint CreateServingEndpoint) (*WaitGetServingEndpointNotUpdating[ServingEndpointDetailed], error) // Calls [ServingEndpointsAPIInterface.Create] and waits to reach NOT_UPDATING state // // You can override the default timeout of 20 minutes by calling adding // retries.Timeout[ServingEndpointDetailed](60*time.Minute) functional option. // // Deprecated: use [ServingEndpointsAPIInterface.Create].Get() or [ServingEndpointsAPIInterface.WaitGetServingEndpointNotUpdating] CreateAndWait(ctx context.Context, createServingEndpoint CreateServingEndpoint, options ...retries.Option[ServingEndpointDetailed]) (*ServingEndpointDetailed, error) // Delete a serving endpoint. Delete(ctx context.Context, request DeleteServingEndpointRequest) error // Delete a serving endpoint. DeleteByName(ctx context.Context, name string) error // Get metrics of a serving endpoint. // // Retrieves the metrics associated with the provided serving endpoint in either // Prometheus or OpenMetrics exposition format. ExportMetrics(ctx context.Context, request ExportMetricsRequest) error // Get metrics of a serving endpoint. // // Retrieves the metrics associated with the provided serving endpoint in either // Prometheus or OpenMetrics exposition format. ExportMetricsByName(ctx context.Context, name string) error // Get a single serving endpoint. // // Retrieves the details for a single serving endpoint. Get(ctx context.Context, request GetServingEndpointRequest) (*ServingEndpointDetailed, error) // Get a single serving endpoint. // // Retrieves the details for a single serving endpoint. GetByName(ctx context.Context, name string) (*ServingEndpointDetailed, error) // Get the schema for a serving endpoint. // // Get the query schema of the serving endpoint in OpenAPI format. The schema // contains information for the supported paths, input and output format and // datatypes. GetOpenApi(ctx context.Context, request GetOpenApiRequest) error // Get the schema for a serving endpoint. // // Get the query schema of the serving endpoint in OpenAPI format. The schema // contains information for the supported paths, input and output format and // datatypes. GetOpenApiByName(ctx context.Context, name string) error // Get serving endpoint permission levels. // // Gets the permission levels that a user can have on an object. GetPermissionLevels(ctx context.Context, request GetServingEndpointPermissionLevelsRequest) (*GetServingEndpointPermissionLevelsResponse, error) // Get serving endpoint permission levels. // // Gets the permission levels that a user can have on an object. GetPermissionLevelsByServingEndpointId(ctx context.Context, servingEndpointId string) (*GetServingEndpointPermissionLevelsResponse, error) // Get serving endpoint permissions. // // Gets the permissions of a serving endpoint. Serving endpoints can inherit // permissions from their root object. GetPermissions(ctx context.Context, request GetServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error) // Get serving endpoint permissions. // // Gets the permissions of a serving endpoint. Serving endpoints can inherit // permissions from their root object. GetPermissionsByServingEndpointId(ctx context.Context, servingEndpointId string) (*ServingEndpointPermissions, error) // Get all serving endpoints. // // This method is generated by Databricks SDK Code Generator. List(ctx context.Context) listing.Iterator[ServingEndpoint] // Get all serving endpoints. // // This method is generated by Databricks SDK Code Generator. ListAll(ctx context.Context) ([]ServingEndpoint, error) // Get the latest logs for a served model. // // Retrieves the service logs associated with the provided served model. Logs(ctx context.Context, request LogsRequest) (*ServerLogsResponse, error) // Get the latest logs for a served model. // // Retrieves the service logs associated with the provided served model. LogsByNameAndServedModelName(ctx context.Context, name string, servedModelName string) (*ServerLogsResponse, error) // Update tags of a serving endpoint. // // Used to batch add and delete tags from a serving endpoint with a single API // call. Patch(ctx context.Context, request PatchServingEndpointTags) ([]EndpointTag, error) // Update rate limits of a serving endpoint. // // Used to update the rate limits of a serving endpoint. NOTE: only external and // foundation model endpoints are supported as of now. Put(ctx context.Context, request PutRequest) (*PutResponse, error) // Query a serving endpoint. Query(ctx context.Context, request QueryEndpointInput) (*QueryEndpointResponse, error) // Set serving endpoint permissions. // // Sets permissions on a serving endpoint. Serving endpoints can inherit // permissions from their root object. SetPermissions(ctx context.Context, request ServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error) // Update config of a serving endpoint. // // Updates any combination of the serving endpoint's served entities, the // compute configuration of those served entities, and the endpoint's traffic // config. An endpoint that already has an update in progress can not be updated // until the current update completes or fails. UpdateConfig(ctx context.Context, endpointCoreConfigInput EndpointCoreConfigInput) (*WaitGetServingEndpointNotUpdating[ServingEndpointDetailed], error) // Calls [ServingEndpointsAPIInterface.UpdateConfig] and waits to reach NOT_UPDATING state // // You can override the default timeout of 20 minutes by calling adding // retries.Timeout[ServingEndpointDetailed](60*time.Minute) functional option. // // Deprecated: use [ServingEndpointsAPIInterface.UpdateConfig].Get() or [ServingEndpointsAPIInterface.WaitGetServingEndpointNotUpdating] UpdateConfigAndWait(ctx context.Context, endpointCoreConfigInput EndpointCoreConfigInput, options ...retries.Option[ServingEndpointDetailed]) (*ServingEndpointDetailed, error) // Update serving endpoint permissions. // // Updates the permissions on a serving endpoint. Serving endpoints can inherit // permissions from their root object. UpdatePermissions(ctx context.Context, request ServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error) }
type ServingEndpointsService ¶
type ServingEndpointsService interface { // Get build logs for a served model. // // Retrieves the build logs associated with the provided served model. BuildLogs(ctx context.Context, request BuildLogsRequest) (*BuildLogsResponse, error) // Create a new serving endpoint. Create(ctx context.Context, request CreateServingEndpoint) (*ServingEndpointDetailed, error) // Delete a serving endpoint. Delete(ctx context.Context, request DeleteServingEndpointRequest) error // Get metrics of a serving endpoint. // // Retrieves the metrics associated with the provided serving endpoint in // either Prometheus or OpenMetrics exposition format. ExportMetrics(ctx context.Context, request ExportMetricsRequest) error // Get a single serving endpoint. // // Retrieves the details for a single serving endpoint. Get(ctx context.Context, request GetServingEndpointRequest) (*ServingEndpointDetailed, error) // Get the schema for a serving endpoint. // // Get the query schema of the serving endpoint in OpenAPI format. The // schema contains information for the supported paths, input and output // format and datatypes. GetOpenApi(ctx context.Context, request GetOpenApiRequest) error // Get serving endpoint permission levels. // // Gets the permission levels that a user can have on an object. GetPermissionLevels(ctx context.Context, request GetServingEndpointPermissionLevelsRequest) (*GetServingEndpointPermissionLevelsResponse, error) // Get serving endpoint permissions. // // Gets the permissions of a serving endpoint. Serving endpoints can inherit // permissions from their root object. GetPermissions(ctx context.Context, request GetServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error) // Get all serving endpoints. // // Use ListAll() to get all ServingEndpoint instances List(ctx context.Context) (*ListEndpointsResponse, error) // Get the latest logs for a served model. // // Retrieves the service logs associated with the provided served model. Logs(ctx context.Context, request LogsRequest) (*ServerLogsResponse, error) // Update tags of a serving endpoint. // // Used to batch add and delete tags from a serving endpoint with a single // API call. Patch(ctx context.Context, request PatchServingEndpointTags) ([]EndpointTag, error) // Update rate limits of a serving endpoint. // // Used to update the rate limits of a serving endpoint. NOTE: only external // and foundation model endpoints are supported as of now. Put(ctx context.Context, request PutRequest) (*PutResponse, error) // Query a serving endpoint. Query(ctx context.Context, request QueryEndpointInput) (*QueryEndpointResponse, error) // Set serving endpoint permissions. // // Sets permissions on a serving endpoint. Serving endpoints can inherit // permissions from their root object. SetPermissions(ctx context.Context, request ServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error) // Update config of a serving endpoint. // // Updates any combination of the serving endpoint's served entities, the // compute configuration of those served entities, and the endpoint's // traffic config. An endpoint that already has an update in progress can // not be updated until the current update completes or fails. UpdateConfig(ctx context.Context, request EndpointCoreConfigInput) (*ServingEndpointDetailed, error) // Update serving endpoint permissions. // // Updates the permissions on a serving endpoint. Serving endpoints can // inherit permissions from their root object. UpdatePermissions(ctx context.Context, request ServingEndpointPermissionsRequest) (*ServingEndpointPermissions, error) }
The Serving Endpoints API allows you to create, update, and delete model serving endpoints.
You can use a serving endpoint to serve models from the Databricks Model Registry or from Unity Catalog. Endpoints expose the underlying models as scalable REST API endpoints using serverless compute. This means the endpoints and associated compute resources are fully managed by Databricks and will not appear in your cloud account. A serving endpoint can consist of one or more MLflow models from the Databricks Model Registry, called served entities. A serving endpoint can have at most ten served entities. You can configure traffic settings to define how requests should be routed to your served entities behind an endpoint. Additionally, you can configure the scale of resources that should be applied to each served entity.
type StopAppRequest ¶ added in v0.40.0
type StopAppRequest struct { // The name of the app. Name string `json:"-" url:"-"` }
type StopAppResponse ¶ added in v0.40.0
type StopAppResponse struct { }
type TrafficConfig ¶
type TrafficConfig struct { // The list of routes that define traffic to each served entity. Routes []Route `json:"routes,omitempty"` }
type UpdateAppRequest ¶ added in v0.40.0
type UpdateAppRequest struct { // The description of the app. Description string `json:"description,omitempty"` // The name of the app. The name must contain only lowercase alphanumeric // characters and hyphens and be between 2 and 30 characters long. It must // be unique within the workspace. Name string `json:"name" url:"-"` ForceSendFields []string `json:"-"` }
func (UpdateAppRequest) MarshalJSON ¶ added in v0.40.0
func (s UpdateAppRequest) MarshalJSON() ([]byte, error)
func (*UpdateAppRequest) UnmarshalJSON ¶ added in v0.40.0
func (s *UpdateAppRequest) UnmarshalJSON(b []byte) error
type V1ResponseChoiceElement ¶ added in v0.27.0
type V1ResponseChoiceElement struct { // The finish reason returned by the endpoint. FinishReason string `json:"finishReason,omitempty"` // The index of the choice in the __chat or completions__ response. Index int `json:"index,omitempty"` // The logprobs returned only by the __completions__ endpoint. Logprobs int `json:"logprobs,omitempty"` // The message response from the __chat__ endpoint. Message *ChatMessage `json:"message,omitempty"` // The text response from the __completions__ endpoint. Text string `json:"text,omitempty"` ForceSendFields []string `json:"-"` }
func (V1ResponseChoiceElement) MarshalJSON ¶ added in v0.27.0
func (s V1ResponseChoiceElement) MarshalJSON() ([]byte, error)
func (*V1ResponseChoiceElement) UnmarshalJSON ¶ added in v0.27.0
func (s *V1ResponseChoiceElement) UnmarshalJSON(b []byte) error
type WaitGetAppIdle ¶ added in v0.40.0
type WaitGetAppIdle[R any] struct { Response *R Name string `json:"name"` Poll func(time.Duration, func(*App)) (*App, error) // contains filtered or unexported fields }
WaitGetAppIdle is a wrapper that calls AppsAPI.WaitGetAppIdle and waits to reach IDLE state.
func (*WaitGetAppIdle[R]) Get ¶ added in v0.40.0
func (w *WaitGetAppIdle[R]) Get() (*App, error)
Get the App with the default timeout of 20 minutes.
func (*WaitGetAppIdle[R]) GetWithTimeout ¶ added in v0.40.0
func (w *WaitGetAppIdle[R]) GetWithTimeout(timeout time.Duration) (*App, error)
Get the App with custom timeout.
func (*WaitGetAppIdle[R]) OnProgress ¶ added in v0.40.0
func (w *WaitGetAppIdle[R]) OnProgress(callback func(*App)) *WaitGetAppIdle[R]
OnProgress invokes a callback every time it polls for the status update.
type WaitGetDeploymentAppSucceeded ¶ added in v0.40.0
type WaitGetDeploymentAppSucceeded[R any] struct { Response *R AppName string `json:"app_name"` DeploymentId string `json:"deployment_id"` Poll func(time.Duration, func(*AppDeployment)) (*AppDeployment, error) // contains filtered or unexported fields }
WaitGetDeploymentAppSucceeded is a wrapper that calls AppsAPI.WaitGetDeploymentAppSucceeded and waits to reach SUCCEEDED state.
func (*WaitGetDeploymentAppSucceeded[R]) Get ¶ added in v0.40.0
func (w *WaitGetDeploymentAppSucceeded[R]) Get() (*AppDeployment, error)
Get the AppDeployment with the default timeout of 20 minutes.
func (*WaitGetDeploymentAppSucceeded[R]) GetWithTimeout ¶ added in v0.40.0
func (w *WaitGetDeploymentAppSucceeded[R]) GetWithTimeout(timeout time.Duration) (*AppDeployment, error)
Get the AppDeployment with custom timeout.
func (*WaitGetDeploymentAppSucceeded[R]) OnProgress ¶ added in v0.40.0
func (w *WaitGetDeploymentAppSucceeded[R]) OnProgress(callback func(*AppDeployment)) *WaitGetDeploymentAppSucceeded[R]
OnProgress invokes a callback every time it polls for the status update.
type WaitGetServingEndpointNotUpdating ¶ added in v0.10.0
type WaitGetServingEndpointNotUpdating[R any] struct { Response *R Name string `json:"name"` Poll func(time.Duration, func(*ServingEndpointDetailed)) (*ServingEndpointDetailed, error) // contains filtered or unexported fields }
WaitGetServingEndpointNotUpdating is a wrapper that calls ServingEndpointsAPI.WaitGetServingEndpointNotUpdating and waits to reach NOT_UPDATING state.
func (*WaitGetServingEndpointNotUpdating[R]) Get ¶ added in v0.10.0
func (w *WaitGetServingEndpointNotUpdating[R]) Get() (*ServingEndpointDetailed, error)
Get the ServingEndpointDetailed with the default timeout of 20 minutes.
func (*WaitGetServingEndpointNotUpdating[R]) GetWithTimeout ¶ added in v0.10.0
func (w *WaitGetServingEndpointNotUpdating[R]) GetWithTimeout(timeout time.Duration) (*ServingEndpointDetailed, error)
Get the ServingEndpointDetailed with custom timeout.
func (*WaitGetServingEndpointNotUpdating[R]) OnProgress ¶ added in v0.10.0
func (w *WaitGetServingEndpointNotUpdating[R]) OnProgress(callback func(*ServingEndpointDetailed)) *WaitGetServingEndpointNotUpdating[R]
OnProgress invokes a callback every time it polls for the status update.