Documentation ¶
Index ¶
- Variables
- func RegisterSpeechServer(s *grpc.Server, srv SpeechServer)
- type LongRunningRecognizeMetadata
- func (*LongRunningRecognizeMetadata) Descriptor() ([]byte, []int)deprecated
- func (x *LongRunningRecognizeMetadata) GetLastUpdateTime() *timestamppb.Timestamp
- func (x *LongRunningRecognizeMetadata) GetProgressPercent() int32
- func (x *LongRunningRecognizeMetadata) GetStartTime() *timestamppb.Timestamp
- func (x *LongRunningRecognizeMetadata) GetUri() string
- func (*LongRunningRecognizeMetadata) ProtoMessage()
- func (x *LongRunningRecognizeMetadata) ProtoReflect() protoreflect.Message
- func (x *LongRunningRecognizeMetadata) Reset()
- func (x *LongRunningRecognizeMetadata) String() string
- type LongRunningRecognizeRequest
- func (*LongRunningRecognizeRequest) Descriptor() ([]byte, []int)deprecated
- func (x *LongRunningRecognizeRequest) GetAudio() *RecognitionAudio
- func (x *LongRunningRecognizeRequest) GetConfig() *RecognitionConfig
- func (x *LongRunningRecognizeRequest) GetOutputConfig() *TranscriptOutputConfig
- func (*LongRunningRecognizeRequest) ProtoMessage()
- func (x *LongRunningRecognizeRequest) ProtoReflect() protoreflect.Message
- func (x *LongRunningRecognizeRequest) Reset()
- func (x *LongRunningRecognizeRequest) String() string
- type LongRunningRecognizeResponse
- func (*LongRunningRecognizeResponse) Descriptor() ([]byte, []int)deprecated
- func (x *LongRunningRecognizeResponse) GetResults() []*SpeechRecognitionResult
- func (x *LongRunningRecognizeResponse) GetTotalBilledTime() *durationpb.Duration
- func (*LongRunningRecognizeResponse) ProtoMessage()
- func (x *LongRunningRecognizeResponse) ProtoReflect() protoreflect.Message
- func (x *LongRunningRecognizeResponse) Reset()
- func (x *LongRunningRecognizeResponse) String() string
- type RecognitionAudio
- func (*RecognitionAudio) Descriptor() ([]byte, []int)deprecated
- func (m *RecognitionAudio) GetAudioSource() isRecognitionAudio_AudioSource
- func (x *RecognitionAudio) GetContent() []byte
- func (x *RecognitionAudio) GetUri() string
- func (*RecognitionAudio) ProtoMessage()
- func (x *RecognitionAudio) ProtoReflect() protoreflect.Message
- func (x *RecognitionAudio) Reset()
- func (x *RecognitionAudio) String() string
- type RecognitionAudio_Content
- type RecognitionAudio_Uri
- type RecognitionConfig
- func (*RecognitionConfig) Descriptor() ([]byte, []int)deprecated
- func (x *RecognitionConfig) GetAudioChannelCount() int32
- func (x *RecognitionConfig) GetDiarizationConfig() *SpeakerDiarizationConfig
- func (x *RecognitionConfig) GetEnableAutomaticPunctuation() bool
- func (x *RecognitionConfig) GetEnableSeparateRecognitionPerChannel() bool
- func (x *RecognitionConfig) GetEnableWordTimeOffsets() bool
- func (x *RecognitionConfig) GetEncoding() RecognitionConfig_AudioEncoding
- func (x *RecognitionConfig) GetLanguageCode() string
- func (x *RecognitionConfig) GetMaxAlternatives() int32
- func (x *RecognitionConfig) GetMetadata() *RecognitionMetadata
- func (x *RecognitionConfig) GetModel() string
- func (x *RecognitionConfig) GetProfanityFilter() bool
- func (x *RecognitionConfig) GetSampleRateHertz() int32
- func (x *RecognitionConfig) GetSpeechContexts() []*SpeechContext
- func (x *RecognitionConfig) GetUseEnhanced() bool
- func (*RecognitionConfig) ProtoMessage()
- func (x *RecognitionConfig) ProtoReflect() protoreflect.Message
- func (x *RecognitionConfig) Reset()
- func (x *RecognitionConfig) String() string
- type RecognitionConfig_AudioEncoding
- func (RecognitionConfig_AudioEncoding) Descriptor() protoreflect.EnumDescriptor
- func (x RecognitionConfig_AudioEncoding) Enum() *RecognitionConfig_AudioEncoding
- func (RecognitionConfig_AudioEncoding) EnumDescriptor() ([]byte, []int)deprecated
- func (x RecognitionConfig_AudioEncoding) Number() protoreflect.EnumNumber
- func (x RecognitionConfig_AudioEncoding) String() string
- func (RecognitionConfig_AudioEncoding) Type() protoreflect.EnumType
- type RecognitionMetadata
- func (*RecognitionMetadata) Descriptor() ([]byte, []int)deprecated
- func (x *RecognitionMetadata) GetAudioTopic() string
- func (x *RecognitionMetadata) GetIndustryNaicsCodeOfAudio() uint32
- func (x *RecognitionMetadata) GetInteractionType() RecognitionMetadata_InteractionType
- func (x *RecognitionMetadata) GetMicrophoneDistance() RecognitionMetadata_MicrophoneDistance
- func (x *RecognitionMetadata) GetOriginalMediaType() RecognitionMetadata_OriginalMediaType
- func (x *RecognitionMetadata) GetOriginalMimeType() string
- func (x *RecognitionMetadata) GetRecordingDeviceName() string
- func (x *RecognitionMetadata) GetRecordingDeviceType() RecognitionMetadata_RecordingDeviceType
- func (*RecognitionMetadata) ProtoMessage()
- func (x *RecognitionMetadata) ProtoReflect() protoreflect.Message
- func (x *RecognitionMetadata) Reset()
- func (x *RecognitionMetadata) String() string
- type RecognitionMetadata_InteractionType
- func (RecognitionMetadata_InteractionType) Descriptor() protoreflect.EnumDescriptor
- func (x RecognitionMetadata_InteractionType) Enum() *RecognitionMetadata_InteractionType
- func (RecognitionMetadata_InteractionType) EnumDescriptor() ([]byte, []int)deprecated
- func (x RecognitionMetadata_InteractionType) Number() protoreflect.EnumNumber
- func (x RecognitionMetadata_InteractionType) String() string
- func (RecognitionMetadata_InteractionType) Type() protoreflect.EnumType
- type RecognitionMetadata_MicrophoneDistance
- func (RecognitionMetadata_MicrophoneDistance) Descriptor() protoreflect.EnumDescriptor
- func (x RecognitionMetadata_MicrophoneDistance) Enum() *RecognitionMetadata_MicrophoneDistance
- func (RecognitionMetadata_MicrophoneDistance) EnumDescriptor() ([]byte, []int)deprecated
- func (x RecognitionMetadata_MicrophoneDistance) Number() protoreflect.EnumNumber
- func (x RecognitionMetadata_MicrophoneDistance) String() string
- func (RecognitionMetadata_MicrophoneDistance) Type() protoreflect.EnumType
- type RecognitionMetadata_OriginalMediaType
- func (RecognitionMetadata_OriginalMediaType) Descriptor() protoreflect.EnumDescriptor
- func (x RecognitionMetadata_OriginalMediaType) Enum() *RecognitionMetadata_OriginalMediaType
- func (RecognitionMetadata_OriginalMediaType) EnumDescriptor() ([]byte, []int)deprecated
- func (x RecognitionMetadata_OriginalMediaType) Number() protoreflect.EnumNumber
- func (x RecognitionMetadata_OriginalMediaType) String() string
- func (RecognitionMetadata_OriginalMediaType) Type() protoreflect.EnumType
- type RecognitionMetadata_RecordingDeviceType
- func (RecognitionMetadata_RecordingDeviceType) Descriptor() protoreflect.EnumDescriptor
- func (x RecognitionMetadata_RecordingDeviceType) Enum() *RecognitionMetadata_RecordingDeviceType
- func (RecognitionMetadata_RecordingDeviceType) EnumDescriptor() ([]byte, []int)deprecated
- func (x RecognitionMetadata_RecordingDeviceType) Number() protoreflect.EnumNumber
- func (x RecognitionMetadata_RecordingDeviceType) String() string
- func (RecognitionMetadata_RecordingDeviceType) Type() protoreflect.EnumType
- type RecognizeRequest
- func (*RecognizeRequest) Descriptor() ([]byte, []int)deprecated
- func (x *RecognizeRequest) GetAudio() *RecognitionAudio
- func (x *RecognizeRequest) GetConfig() *RecognitionConfig
- func (*RecognizeRequest) ProtoMessage()
- func (x *RecognizeRequest) ProtoReflect() protoreflect.Message
- func (x *RecognizeRequest) Reset()
- func (x *RecognizeRequest) String() string
- type RecognizeResponse
- func (*RecognizeResponse) Descriptor() ([]byte, []int)deprecated
- func (x *RecognizeResponse) GetResults() []*SpeechRecognitionResult
- func (x *RecognizeResponse) GetTotalBilledTime() *durationpb.Duration
- func (*RecognizeResponse) ProtoMessage()
- func (x *RecognizeResponse) ProtoReflect() protoreflect.Message
- func (x *RecognizeResponse) Reset()
- func (x *RecognizeResponse) String() string
- type SpeakerDiarizationConfig
- func (*SpeakerDiarizationConfig) Descriptor() ([]byte, []int)deprecated
- func (x *SpeakerDiarizationConfig) GetEnableSpeakerDiarization() bool
- func (x *SpeakerDiarizationConfig) GetMaxSpeakerCount() int32
- func (x *SpeakerDiarizationConfig) GetMinSpeakerCount() int32
- func (x *SpeakerDiarizationConfig) GetSpeakerTag() int32deprecated
- func (*SpeakerDiarizationConfig) ProtoMessage()
- func (x *SpeakerDiarizationConfig) ProtoReflect() protoreflect.Message
- func (x *SpeakerDiarizationConfig) Reset()
- func (x *SpeakerDiarizationConfig) String() string
- type SpeechClient
- type SpeechContext
- type SpeechRecognitionAlternative
- func (*SpeechRecognitionAlternative) Descriptor() ([]byte, []int)deprecated
- func (x *SpeechRecognitionAlternative) GetConfidence() float32
- func (x *SpeechRecognitionAlternative) GetTranscript() string
- func (x *SpeechRecognitionAlternative) GetWords() []*WordInfo
- func (*SpeechRecognitionAlternative) ProtoMessage()
- func (x *SpeechRecognitionAlternative) ProtoReflect() protoreflect.Message
- func (x *SpeechRecognitionAlternative) Reset()
- func (x *SpeechRecognitionAlternative) String() string
- type SpeechRecognitionResult
- func (*SpeechRecognitionResult) Descriptor() ([]byte, []int)deprecated
- func (x *SpeechRecognitionResult) GetAlternatives() []*SpeechRecognitionAlternative
- func (x *SpeechRecognitionResult) GetChannelTag() int32
- func (*SpeechRecognitionResult) ProtoMessage()
- func (x *SpeechRecognitionResult) ProtoReflect() protoreflect.Message
- func (x *SpeechRecognitionResult) Reset()
- func (x *SpeechRecognitionResult) String() string
- type SpeechServer
- type Speech_StreamingRecognizeClient
- type Speech_StreamingRecognizeServer
- type StreamingRecognitionConfig
- func (*StreamingRecognitionConfig) Descriptor() ([]byte, []int)deprecated
- func (x *StreamingRecognitionConfig) GetConfig() *RecognitionConfig
- func (x *StreamingRecognitionConfig) GetInterimResults() bool
- func (x *StreamingRecognitionConfig) GetSingleUtterance() bool
- func (*StreamingRecognitionConfig) ProtoMessage()
- func (x *StreamingRecognitionConfig) ProtoReflect() protoreflect.Message
- func (x *StreamingRecognitionConfig) Reset()
- func (x *StreamingRecognitionConfig) String() string
- type StreamingRecognitionResult
- func (*StreamingRecognitionResult) Descriptor() ([]byte, []int)deprecated
- func (x *StreamingRecognitionResult) GetAlternatives() []*SpeechRecognitionAlternative
- func (x *StreamingRecognitionResult) GetChannelTag() int32
- func (x *StreamingRecognitionResult) GetIsFinal() bool
- func (x *StreamingRecognitionResult) GetLanguageCode() string
- func (x *StreamingRecognitionResult) GetResultEndTime() *durationpb.Duration
- func (x *StreamingRecognitionResult) GetStability() float32
- func (*StreamingRecognitionResult) ProtoMessage()
- func (x *StreamingRecognitionResult) ProtoReflect() protoreflect.Message
- func (x *StreamingRecognitionResult) Reset()
- func (x *StreamingRecognitionResult) String() string
- type StreamingRecognizeRequest
- func (*StreamingRecognizeRequest) Descriptor() ([]byte, []int)deprecated
- func (x *StreamingRecognizeRequest) GetAudioContent() []byte
- func (x *StreamingRecognizeRequest) GetStreamingConfig() *StreamingRecognitionConfig
- func (m *StreamingRecognizeRequest) GetStreamingRequest() isStreamingRecognizeRequest_StreamingRequest
- func (*StreamingRecognizeRequest) ProtoMessage()
- func (x *StreamingRecognizeRequest) ProtoReflect() protoreflect.Message
- func (x *StreamingRecognizeRequest) Reset()
- func (x *StreamingRecognizeRequest) String() string
- type StreamingRecognizeRequest_AudioContent
- type StreamingRecognizeRequest_StreamingConfig
- type StreamingRecognizeResponse
- func (*StreamingRecognizeResponse) Descriptor() ([]byte, []int)deprecated
- func (x *StreamingRecognizeResponse) GetError() *status.Status
- func (x *StreamingRecognizeResponse) GetResults() []*StreamingRecognitionResult
- func (x *StreamingRecognizeResponse) GetSpeechEventType() StreamingRecognizeResponse_SpeechEventType
- func (x *StreamingRecognizeResponse) GetTotalBilledTime() *durationpb.Duration
- func (*StreamingRecognizeResponse) ProtoMessage()
- func (x *StreamingRecognizeResponse) ProtoReflect() protoreflect.Message
- func (x *StreamingRecognizeResponse) Reset()
- func (x *StreamingRecognizeResponse) String() string
- type StreamingRecognizeResponse_SpeechEventType
- func (StreamingRecognizeResponse_SpeechEventType) Descriptor() protoreflect.EnumDescriptor
- func (x StreamingRecognizeResponse_SpeechEventType) Enum() *StreamingRecognizeResponse_SpeechEventType
- func (StreamingRecognizeResponse_SpeechEventType) EnumDescriptor() ([]byte, []int)deprecated
- func (x StreamingRecognizeResponse_SpeechEventType) Number() protoreflect.EnumNumber
- func (x StreamingRecognizeResponse_SpeechEventType) String() string
- func (StreamingRecognizeResponse_SpeechEventType) Type() protoreflect.EnumType
- type TranscriptOutputConfig
- func (*TranscriptOutputConfig) Descriptor() ([]byte, []int)deprecated
- func (x *TranscriptOutputConfig) GetGcsUri() string
- func (m *TranscriptOutputConfig) GetOutputType() isTranscriptOutputConfig_OutputType
- func (*TranscriptOutputConfig) ProtoMessage()
- func (x *TranscriptOutputConfig) ProtoReflect() protoreflect.Message
- func (x *TranscriptOutputConfig) Reset()
- func (x *TranscriptOutputConfig) String() string
- type TranscriptOutputConfig_GcsUri
- type UnimplementedSpeechServer
- func (*UnimplementedSpeechServer) LongRunningRecognize(context.Context, *LongRunningRecognizeRequest) (*longrunning.Operation, error)
- func (*UnimplementedSpeechServer) Recognize(context.Context, *RecognizeRequest) (*RecognizeResponse, error)
- func (*UnimplementedSpeechServer) StreamingRecognize(Speech_StreamingRecognizeServer) error
- type WordInfo
- func (*WordInfo) Descriptor() ([]byte, []int)deprecated
- func (x *WordInfo) GetEndTime() *durationpb.Duration
- func (x *WordInfo) GetSpeakerTag() int32
- func (x *WordInfo) GetStartTime() *durationpb.Duration
- func (x *WordInfo) GetWord() string
- func (*WordInfo) ProtoMessage()
- func (x *WordInfo) ProtoReflect() protoreflect.Message
- func (x *WordInfo) Reset()
- func (x *WordInfo) String() string
Constants ¶
This section is empty.
Variables ¶
var ( RecognitionConfig_AudioEncoding_name = map[int32]string{ 0: "ENCODING_UNSPECIFIED", 1: "LINEAR16", 2: "FLAC", 3: "MULAW", 4: "AMR", 5: "AMR_WB", 6: "OGG_OPUS", 7: "SPEEX_WITH_HEADER_BYTE", } RecognitionConfig_AudioEncoding_value = map[string]int32{ "ENCODING_UNSPECIFIED": 0, "LINEAR16": 1, "FLAC": 2, "MULAW": 3, "AMR": 4, "AMR_WB": 5, "OGG_OPUS": 6, "SPEEX_WITH_HEADER_BYTE": 7, } )
Enum value maps for RecognitionConfig_AudioEncoding.
var ( RecognitionMetadata_InteractionType_name = map[int32]string{ 0: "INTERACTION_TYPE_UNSPECIFIED", 1: "DISCUSSION", 2: "PRESENTATION", 3: "PHONE_CALL", 4: "VOICEMAIL", 5: "PROFESSIONALLY_PRODUCED", 6: "VOICE_SEARCH", 7: "VOICE_COMMAND", 8: "DICTATION", } RecognitionMetadata_InteractionType_value = map[string]int32{ "INTERACTION_TYPE_UNSPECIFIED": 0, "DISCUSSION": 1, "PRESENTATION": 2, "PHONE_CALL": 3, "VOICEMAIL": 4, "PROFESSIONALLY_PRODUCED": 5, "VOICE_SEARCH": 6, "VOICE_COMMAND": 7, "DICTATION": 8, } )
Enum value maps for RecognitionMetadata_InteractionType.
var ( RecognitionMetadata_MicrophoneDistance_name = map[int32]string{ 0: "MICROPHONE_DISTANCE_UNSPECIFIED", 1: "NEARFIELD", 2: "MIDFIELD", 3: "FARFIELD", } RecognitionMetadata_MicrophoneDistance_value = map[string]int32{ "MICROPHONE_DISTANCE_UNSPECIFIED": 0, "NEARFIELD": 1, "MIDFIELD": 2, "FARFIELD": 3, } )
Enum value maps for RecognitionMetadata_MicrophoneDistance.
var ( RecognitionMetadata_OriginalMediaType_name = map[int32]string{ 0: "ORIGINAL_MEDIA_TYPE_UNSPECIFIED", 1: "AUDIO", 2: "VIDEO", } RecognitionMetadata_OriginalMediaType_value = map[string]int32{ "ORIGINAL_MEDIA_TYPE_UNSPECIFIED": 0, "AUDIO": 1, "VIDEO": 2, } )
Enum value maps for RecognitionMetadata_OriginalMediaType.
var ( RecognitionMetadata_RecordingDeviceType_name = map[int32]string{ 0: "RECORDING_DEVICE_TYPE_UNSPECIFIED", 1: "SMARTPHONE", 2: "PC", 3: "PHONE_LINE", 4: "VEHICLE", 5: "OTHER_OUTDOOR_DEVICE", 6: "OTHER_INDOOR_DEVICE", } RecognitionMetadata_RecordingDeviceType_value = map[string]int32{ "RECORDING_DEVICE_TYPE_UNSPECIFIED": 0, "SMARTPHONE": 1, "PC": 2, "PHONE_LINE": 3, "VEHICLE": 4, "OTHER_OUTDOOR_DEVICE": 5, "OTHER_INDOOR_DEVICE": 6, } )
Enum value maps for RecognitionMetadata_RecordingDeviceType.
var ( StreamingRecognizeResponse_SpeechEventType_name = map[int32]string{ 0: "SPEECH_EVENT_UNSPECIFIED", 1: "END_OF_SINGLE_UTTERANCE", } StreamingRecognizeResponse_SpeechEventType_value = map[string]int32{ "SPEECH_EVENT_UNSPECIFIED": 0, "END_OF_SINGLE_UTTERANCE": 1, } )
Enum value maps for StreamingRecognizeResponse_SpeechEventType.
var File_google_cloud_speech_v1_cloud_speech_proto protoreflect.FileDescriptor
Functions ¶
func RegisterSpeechServer ¶
func RegisterSpeechServer(s *grpc.Server, srv SpeechServer)
Types ¶
type LongRunningRecognizeMetadata ¶
type LongRunningRecognizeMetadata struct { // Approximate percentage of audio processed thus far. Guaranteed to be 100 // when the audio is fully processed and the results are available. ProgressPercent int32 `protobuf:"varint,1,opt,name=progress_percent,json=progressPercent,proto3" json:"progress_percent,omitempty"` // Time when the request was received. StartTime *timestamppb.Timestamp `protobuf:"bytes,2,opt,name=start_time,json=startTime,proto3" json:"start_time,omitempty"` // Time of the most recent processing update. LastUpdateTime *timestamppb.Timestamp `protobuf:"bytes,3,opt,name=last_update_time,json=lastUpdateTime,proto3" json:"last_update_time,omitempty"` // Output only. The URI of the audio file being transcribed. Empty if the audio was sent // as byte content. Uri string `protobuf:"bytes,4,opt,name=uri,proto3" json:"uri,omitempty"` // contains filtered or unexported fields }
Describes the progress of a long-running `LongRunningRecognize` call. It is included in the `metadata` field of the `Operation` returned by the `GetOperation` call of the `google::longrunning::Operations` service.
func (*LongRunningRecognizeMetadata) Descriptor
deprecated
func (*LongRunningRecognizeMetadata) Descriptor() ([]byte, []int)
Deprecated: Use LongRunningRecognizeMetadata.ProtoReflect.Descriptor instead.
func (*LongRunningRecognizeMetadata) GetLastUpdateTime ¶
func (x *LongRunningRecognizeMetadata) GetLastUpdateTime() *timestamppb.Timestamp
func (*LongRunningRecognizeMetadata) GetProgressPercent ¶
func (x *LongRunningRecognizeMetadata) GetProgressPercent() int32
func (*LongRunningRecognizeMetadata) GetStartTime ¶
func (x *LongRunningRecognizeMetadata) GetStartTime() *timestamppb.Timestamp
func (*LongRunningRecognizeMetadata) GetUri ¶
func (x *LongRunningRecognizeMetadata) GetUri() string
func (*LongRunningRecognizeMetadata) ProtoMessage ¶
func (*LongRunningRecognizeMetadata) ProtoMessage()
func (*LongRunningRecognizeMetadata) ProtoReflect ¶
func (x *LongRunningRecognizeMetadata) ProtoReflect() protoreflect.Message
func (*LongRunningRecognizeMetadata) Reset ¶
func (x *LongRunningRecognizeMetadata) Reset()
func (*LongRunningRecognizeMetadata) String ¶
func (x *LongRunningRecognizeMetadata) String() string
type LongRunningRecognizeRequest ¶
type LongRunningRecognizeRequest struct { // Required. Provides information to the recognizer that specifies how to // process the request. Config *RecognitionConfig `protobuf:"bytes,1,opt,name=config,proto3" json:"config,omitempty"` // Required. The audio data to be recognized. Audio *RecognitionAudio `protobuf:"bytes,2,opt,name=audio,proto3" json:"audio,omitempty"` // Optional. Specifies an optional destination for the recognition results. OutputConfig *TranscriptOutputConfig `protobuf:"bytes,4,opt,name=output_config,json=outputConfig,proto3" json:"output_config,omitempty"` // contains filtered or unexported fields }
The top-level message sent by the client for the `LongRunningRecognize` method.
func (*LongRunningRecognizeRequest) Descriptor
deprecated
func (*LongRunningRecognizeRequest) Descriptor() ([]byte, []int)
Deprecated: Use LongRunningRecognizeRequest.ProtoReflect.Descriptor instead.
func (*LongRunningRecognizeRequest) GetAudio ¶
func (x *LongRunningRecognizeRequest) GetAudio() *RecognitionAudio
func (*LongRunningRecognizeRequest) GetConfig ¶
func (x *LongRunningRecognizeRequest) GetConfig() *RecognitionConfig
func (*LongRunningRecognizeRequest) GetOutputConfig ¶
func (x *LongRunningRecognizeRequest) GetOutputConfig() *TranscriptOutputConfig
func (*LongRunningRecognizeRequest) ProtoMessage ¶
func (*LongRunningRecognizeRequest) ProtoMessage()
func (*LongRunningRecognizeRequest) ProtoReflect ¶
func (x *LongRunningRecognizeRequest) ProtoReflect() protoreflect.Message
func (*LongRunningRecognizeRequest) Reset ¶
func (x *LongRunningRecognizeRequest) Reset()
func (*LongRunningRecognizeRequest) String ¶
func (x *LongRunningRecognizeRequest) String() string
type LongRunningRecognizeResponse ¶
type LongRunningRecognizeResponse struct { // Sequential list of transcription results corresponding to // sequential portions of audio. Results []*SpeechRecognitionResult `protobuf:"bytes,2,rep,name=results,proto3" json:"results,omitempty"` // When available, billed audio seconds for the corresponding request. TotalBilledTime *durationpb.Duration `protobuf:"bytes,3,opt,name=total_billed_time,json=totalBilledTime,proto3" json:"total_billed_time,omitempty"` // contains filtered or unexported fields }
The only message returned to the client by the `LongRunningRecognize` method. It contains the result as zero or more sequential `SpeechRecognitionResult` messages. It is included in the `result.response` field of the `Operation` returned by the `GetOperation` call of the `google::longrunning::Operations` service.
func (*LongRunningRecognizeResponse) Descriptor
deprecated
func (*LongRunningRecognizeResponse) Descriptor() ([]byte, []int)
Deprecated: Use LongRunningRecognizeResponse.ProtoReflect.Descriptor instead.
func (*LongRunningRecognizeResponse) GetResults ¶
func (x *LongRunningRecognizeResponse) GetResults() []*SpeechRecognitionResult
func (*LongRunningRecognizeResponse) GetTotalBilledTime ¶
func (x *LongRunningRecognizeResponse) GetTotalBilledTime() *durationpb.Duration
func (*LongRunningRecognizeResponse) ProtoMessage ¶
func (*LongRunningRecognizeResponse) ProtoMessage()
func (*LongRunningRecognizeResponse) ProtoReflect ¶
func (x *LongRunningRecognizeResponse) ProtoReflect() protoreflect.Message
func (*LongRunningRecognizeResponse) Reset ¶
func (x *LongRunningRecognizeResponse) Reset()
func (*LongRunningRecognizeResponse) String ¶
func (x *LongRunningRecognizeResponse) String() string
type RecognitionAudio ¶
type RecognitionAudio struct { // The audio source, which is either inline content or a Google Cloud // Storage uri. // // Types that are assignable to AudioSource: // *RecognitionAudio_Content // *RecognitionAudio_Uri AudioSource isRecognitionAudio_AudioSource `protobuf_oneof:"audio_source"` // contains filtered or unexported fields }
Contains audio data in the encoding specified in the `RecognitionConfig`. Either `content` or `uri` must be supplied. Supplying both or neither returns [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See [content limits](https://cloud.google.com/speech-to-text/quotas#content).
func (*RecognitionAudio) Descriptor
deprecated
func (*RecognitionAudio) Descriptor() ([]byte, []int)
Deprecated: Use RecognitionAudio.ProtoReflect.Descriptor instead.
func (*RecognitionAudio) GetAudioSource ¶
func (m *RecognitionAudio) GetAudioSource() isRecognitionAudio_AudioSource
func (*RecognitionAudio) GetContent ¶
func (x *RecognitionAudio) GetContent() []byte
func (*RecognitionAudio) GetUri ¶
func (x *RecognitionAudio) GetUri() string
func (*RecognitionAudio) ProtoMessage ¶
func (*RecognitionAudio) ProtoMessage()
func (*RecognitionAudio) ProtoReflect ¶
func (x *RecognitionAudio) ProtoReflect() protoreflect.Message
func (*RecognitionAudio) Reset ¶
func (x *RecognitionAudio) Reset()
func (*RecognitionAudio) String ¶
func (x *RecognitionAudio) String() string
type RecognitionAudio_Content ¶
type RecognitionAudio_Content struct { // The audio data bytes encoded as specified in // `RecognitionConfig`. Note: as with all bytes fields, proto buffers use a // pure binary representation, whereas JSON representations use base64. Content []byte `protobuf:"bytes,1,opt,name=content,proto3,oneof"` }
type RecognitionAudio_Uri ¶
type RecognitionAudio_Uri struct { // URI that points to a file that contains audio data bytes as specified in // `RecognitionConfig`. The file must not be compressed (for example, gzip). // Currently, only Google Cloud Storage URIs are // supported, which must be specified in the following format: // `gs://bucket_name/object_name` (other URI formats return // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more information, see // [Request URIs](https://cloud.google.com/storage/docs/reference-uris). Uri string `protobuf:"bytes,2,opt,name=uri,proto3,oneof"` }
type RecognitionConfig ¶
type RecognitionConfig struct { // Encoding of audio data sent in all `RecognitionAudio` messages. // This field is optional for `FLAC` and `WAV` audio files and required // for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding]. Encoding RecognitionConfig_AudioEncoding `` /* 130-byte string literal not displayed */ // Sample rate in Hertz of the audio data sent in all // `RecognitionAudio` messages. Valid values are: 8000-48000. // 16000 is optimal. For best results, set the sampling rate of the audio // source to 16000 Hz. If that's not possible, use the native sample rate of // the audio source (instead of re-sampling). // This field is optional for FLAC and WAV audio files, but is // required for all other audio formats. For details, see [AudioEncoding][google.cloud.speech.v1.RecognitionConfig.AudioEncoding]. SampleRateHertz int32 `protobuf:"varint,2,opt,name=sample_rate_hertz,json=sampleRateHertz,proto3" json:"sample_rate_hertz,omitempty"` // The number of channels in the input audio data. // ONLY set this for MULTI-CHANNEL recognition. // Valid values for LINEAR16 and FLAC are `1`-`8`. // Valid values for OGG_OPUS are '1'-'254'. // Valid value for MULAW, AMR, AMR_WB and SPEEX_WITH_HEADER_BYTE is only `1`. // If `0` or omitted, defaults to one channel (mono). // Note: We only recognize the first channel by default. // To perform independent recognition on each channel set // `enable_separate_recognition_per_channel` to 'true'. AudioChannelCount int32 `protobuf:"varint,7,opt,name=audio_channel_count,json=audioChannelCount,proto3" json:"audio_channel_count,omitempty"` // This needs to be set to `true` explicitly and `audio_channel_count` > 1 // to get each channel recognized separately. The recognition result will // contain a `channel_tag` field to state which channel that result belongs // to. If this is not true, we will only recognize the first channel. The // request is billed cumulatively for all channels recognized: // `audio_channel_count` multiplied by the length of the audio. EnableSeparateRecognitionPerChannel bool `` /* 174-byte string literal not displayed */ // Required. The language of the supplied audio as a // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag. // Example: "en-US". // See [Language // Support](https://cloud.google.com/speech-to-text/docs/languages) for a list // of the currently supported language codes. LanguageCode string `protobuf:"bytes,3,opt,name=language_code,json=languageCode,proto3" json:"language_code,omitempty"` // Maximum number of recognition hypotheses to be returned. // Specifically, the maximum number of `SpeechRecognitionAlternative` messages // within each `SpeechRecognitionResult`. // The server may return fewer than `max_alternatives`. // Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of // one. If omitted, will return a maximum of one. MaxAlternatives int32 `protobuf:"varint,4,opt,name=max_alternatives,json=maxAlternatives,proto3" json:"max_alternatives,omitempty"` // If set to `true`, the server will attempt to filter out // profanities, replacing all but the initial character in each filtered word // with asterisks, e.g. "f***". If set to `false` or omitted, profanities // won't be filtered out. ProfanityFilter bool `protobuf:"varint,5,opt,name=profanity_filter,json=profanityFilter,proto3" json:"profanity_filter,omitempty"` // Array of [SpeechContext][google.cloud.speech.v1.SpeechContext]. // A means to provide context to assist the speech recognition. For more // information, see // [speech // adaptation](https://cloud.google.com/speech-to-text/docs/adaptation). SpeechContexts []*SpeechContext `protobuf:"bytes,6,rep,name=speech_contexts,json=speechContexts,proto3" json:"speech_contexts,omitempty"` // If `true`, the top result includes a list of words and // the start and end time offsets (timestamps) for those words. If // `false`, no word-level time offset information is returned. The default is // `false`. EnableWordTimeOffsets bool `` /* 129-byte string literal not displayed */ // If 'true', adds punctuation to recognition result hypotheses. // This feature is only available in select languages. Setting this for // requests in other languages has no effect at all. // The default 'false' value does not add punctuation to result hypotheses. EnableAutomaticPunctuation bool `` /* 143-byte string literal not displayed */ // Config to enable speaker diarization and set additional // parameters to make diarization better suited for your application. // Note: When this is enabled, we send all the words from the beginning of the // audio for the top alternative in every consecutive STREAMING responses. // This is done in order to improve our speaker tags as our models learn to // identify the speakers in the conversation over time. // For non-streaming requests, the diarization results will be provided only // in the top alternative of the FINAL SpeechRecognitionResult. DiarizationConfig *SpeakerDiarizationConfig `protobuf:"bytes,19,opt,name=diarization_config,json=diarizationConfig,proto3" json:"diarization_config,omitempty"` // Metadata regarding this request. Metadata *RecognitionMetadata `protobuf:"bytes,9,opt,name=metadata,proto3" json:"metadata,omitempty"` // Which model to select for the given request. Select the model // best suited to your domain to get best results. If a model is not // explicitly specified, then we auto-select a model based on the parameters // in the RecognitionConfig. // <table> // <tr> // <td><b>Model</b></td> // <td><b>Description</b></td> // </tr> // <tr> // <td><code>command_and_search</code></td> // <td>Best for short queries such as voice commands or voice search.</td> // </tr> // <tr> // <td><code>phone_call</code></td> // <td>Best for audio that originated from a phone call (typically // recorded at an 8khz sampling rate).</td> // </tr> // <tr> // <td><code>video</code></td> // <td>Best for audio that originated from video or includes multiple // speakers. Ideally the audio is recorded at a 16khz or greater // sampling rate. This is a premium model that costs more than the // standard rate.</td> // </tr> // <tr> // <td><code>default</code></td> // <td>Best for audio that is not one of the specific audio models. // For example, long-form audio. Ideally the audio is high-fidelity, // recorded at a 16khz or greater sampling rate.</td> // </tr> // </table> Model string `protobuf:"bytes,13,opt,name=model,proto3" json:"model,omitempty"` // Set to true to use an enhanced model for speech recognition. // If `use_enhanced` is set to true and the `model` field is not set, then // an appropriate enhanced model is chosen if an enhanced model exists for // the audio. // // If `use_enhanced` is true and an enhanced version of the specified model // does not exist, then the speech is recognized using the standard version // of the specified model. UseEnhanced bool `protobuf:"varint,14,opt,name=use_enhanced,json=useEnhanced,proto3" json:"use_enhanced,omitempty"` // contains filtered or unexported fields }
Provides information to the recognizer that specifies how to process the request.
func (*RecognitionConfig) Descriptor
deprecated
func (*RecognitionConfig) Descriptor() ([]byte, []int)
Deprecated: Use RecognitionConfig.ProtoReflect.Descriptor instead.
func (*RecognitionConfig) GetAudioChannelCount ¶
func (x *RecognitionConfig) GetAudioChannelCount() int32
func (*RecognitionConfig) GetDiarizationConfig ¶
func (x *RecognitionConfig) GetDiarizationConfig() *SpeakerDiarizationConfig
func (*RecognitionConfig) GetEnableAutomaticPunctuation ¶
func (x *RecognitionConfig) GetEnableAutomaticPunctuation() bool
func (*RecognitionConfig) GetEnableSeparateRecognitionPerChannel ¶
func (x *RecognitionConfig) GetEnableSeparateRecognitionPerChannel() bool
func (*RecognitionConfig) GetEnableWordTimeOffsets ¶
func (x *RecognitionConfig) GetEnableWordTimeOffsets() bool
func (*RecognitionConfig) GetEncoding ¶
func (x *RecognitionConfig) GetEncoding() RecognitionConfig_AudioEncoding
func (*RecognitionConfig) GetLanguageCode ¶
func (x *RecognitionConfig) GetLanguageCode() string
func (*RecognitionConfig) GetMaxAlternatives ¶
func (x *RecognitionConfig) GetMaxAlternatives() int32
func (*RecognitionConfig) GetMetadata ¶
func (x *RecognitionConfig) GetMetadata() *RecognitionMetadata
func (*RecognitionConfig) GetModel ¶
func (x *RecognitionConfig) GetModel() string
func (*RecognitionConfig) GetProfanityFilter ¶
func (x *RecognitionConfig) GetProfanityFilter() bool
func (*RecognitionConfig) GetSampleRateHertz ¶
func (x *RecognitionConfig) GetSampleRateHertz() int32
func (*RecognitionConfig) GetSpeechContexts ¶
func (x *RecognitionConfig) GetSpeechContexts() []*SpeechContext
func (*RecognitionConfig) GetUseEnhanced ¶
func (x *RecognitionConfig) GetUseEnhanced() bool
func (*RecognitionConfig) ProtoMessage ¶
func (*RecognitionConfig) ProtoMessage()
func (*RecognitionConfig) ProtoReflect ¶
func (x *RecognitionConfig) ProtoReflect() protoreflect.Message
func (*RecognitionConfig) Reset ¶
func (x *RecognitionConfig) Reset()
func (*RecognitionConfig) String ¶
func (x *RecognitionConfig) String() string
type RecognitionConfig_AudioEncoding ¶
type RecognitionConfig_AudioEncoding int32
The encoding of the audio data sent in the request.
All encodings support only 1 channel (mono) audio, unless the `audio_channel_count` and `enable_separate_recognition_per_channel` fields are set.
For best results, the audio source should be captured and transmitted using a lossless encoding (`FLAC` or `LINEAR16`). The accuracy of the speech recognition can be reduced if lossy codecs are used to capture or transmit audio, particularly if background noise is present. Lossy codecs include `MULAW`, `AMR`, `AMR_WB`, `OGG_OPUS`, `SPEEX_WITH_HEADER_BYTE`, `MP3`.
The `FLAC` and `WAV` audio file formats include a header that describes the included audio content. You can request recognition for `WAV` files that contain either `LINEAR16` or `MULAW` encoded audio. If you send `FLAC` or `WAV` audio file format in your request, you do not need to specify an `AudioEncoding`; the audio encoding format is determined from the file header. If you specify an `AudioEncoding` when you send send `FLAC` or `WAV` audio, the encoding configuration must match the encoding described in the audio header; otherwise the request returns an [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT] error code.
const ( // Not specified. RecognitionConfig_ENCODING_UNSPECIFIED RecognitionConfig_AudioEncoding = 0 // Uncompressed 16-bit signed little-endian samples (Linear PCM). RecognitionConfig_LINEAR16 RecognitionConfig_AudioEncoding = 1 // `FLAC` (Free Lossless Audio // Codec) is the recommended encoding because it is // lossless--therefore recognition is not compromised--and // requires only about half the bandwidth of `LINEAR16`. `FLAC` stream // encoding supports 16-bit and 24-bit samples, however, not all fields in // `STREAMINFO` are supported. RecognitionConfig_FLAC RecognitionConfig_AudioEncoding = 2 // 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law. RecognitionConfig_MULAW RecognitionConfig_AudioEncoding = 3 // Adaptive Multi-Rate Narrowband codec. `sample_rate_hertz` must be 8000. RecognitionConfig_AMR RecognitionConfig_AudioEncoding = 4 // Adaptive Multi-Rate Wideband codec. `sample_rate_hertz` must be 16000. RecognitionConfig_AMR_WB RecognitionConfig_AudioEncoding = 5 // Opus encoded audio frames in Ogg container // ([OggOpus](https://wiki.xiph.org/OggOpus)). // `sample_rate_hertz` must be one of 8000, 12000, 16000, 24000, or 48000. RecognitionConfig_OGG_OPUS RecognitionConfig_AudioEncoding = 6 // Although the use of lossy encodings is not recommended, if a very low // bitrate encoding is required, `OGG_OPUS` is highly preferred over // Speex encoding. The [Speex](https://speex.org/) encoding supported by // Cloud Speech API has a header byte in each block, as in MIME type // `audio/x-speex-with-header-byte`. // It is a variant of the RTP Speex encoding defined in // [RFC 5574](https://tools.ietf.org/html/rfc5574). // The stream is a sequence of blocks, one block per RTP packet. Each block // starts with a byte containing the length of the block, in bytes, followed // by one or more frames of Speex data, padded to an integral number of // bytes (octets) as specified in RFC 5574. In other words, each RTP header // is replaced with a single byte containing the block length. Only Speex // wideband is supported. `sample_rate_hertz` must be 16000. RecognitionConfig_SPEEX_WITH_HEADER_BYTE RecognitionConfig_AudioEncoding = 7 )
func (RecognitionConfig_AudioEncoding) Descriptor ¶
func (RecognitionConfig_AudioEncoding) Descriptor() protoreflect.EnumDescriptor
func (RecognitionConfig_AudioEncoding) Enum ¶
func (x RecognitionConfig_AudioEncoding) Enum() *RecognitionConfig_AudioEncoding
func (RecognitionConfig_AudioEncoding) EnumDescriptor
deprecated
func (RecognitionConfig_AudioEncoding) EnumDescriptor() ([]byte, []int)
Deprecated: Use RecognitionConfig_AudioEncoding.Descriptor instead.
func (RecognitionConfig_AudioEncoding) Number ¶
func (x RecognitionConfig_AudioEncoding) Number() protoreflect.EnumNumber
func (RecognitionConfig_AudioEncoding) String ¶
func (x RecognitionConfig_AudioEncoding) String() string
func (RecognitionConfig_AudioEncoding) Type ¶
func (RecognitionConfig_AudioEncoding) Type() protoreflect.EnumType
type RecognitionMetadata ¶
type RecognitionMetadata struct { // The use case most closely describing the audio content to be recognized. InteractionType RecognitionMetadata_InteractionType `` /* 171-byte string literal not displayed */ // The industry vertical to which this speech recognition request most // closely applies. This is most indicative of the topics contained // in the audio. Use the 6-digit NAICS code to identify the industry // vertical - see https://www.naics.com/search/. IndustryNaicsCodeOfAudio uint32 `` /* 140-byte string literal not displayed */ // The audio type that most closely describes the audio being recognized. MicrophoneDistance RecognitionMetadata_MicrophoneDistance `` /* 183-byte string literal not displayed */ // The original media the speech was recorded on. OriginalMediaType RecognitionMetadata_OriginalMediaType `` /* 181-byte string literal not displayed */ // The type of device the speech was recorded with. RecordingDeviceType RecognitionMetadata_RecordingDeviceType `` /* 189-byte string literal not displayed */ // The device used to make the recording. Examples 'Nexus 5X' or // 'Polycom SoundStation IP 6000' or 'POTS' or 'VoIP' or // 'Cardioid Microphone'. RecordingDeviceName string `protobuf:"bytes,7,opt,name=recording_device_name,json=recordingDeviceName,proto3" json:"recording_device_name,omitempty"` // Mime type of the original audio file. For example `audio/m4a`, // `audio/x-alaw-basic`, `audio/mp3`, `audio/3gpp`. // A list of possible audio mime types is maintained at // http://www.iana.org/assignments/media-types/media-types.xhtml#audio OriginalMimeType string `protobuf:"bytes,8,opt,name=original_mime_type,json=originalMimeType,proto3" json:"original_mime_type,omitempty"` // Description of the content. Eg. "Recordings of federal supreme court // hearings from 2012". AudioTopic string `protobuf:"bytes,10,opt,name=audio_topic,json=audioTopic,proto3" json:"audio_topic,omitempty"` // contains filtered or unexported fields }
Description of audio data to be recognized.
func (*RecognitionMetadata) Descriptor
deprecated
func (*RecognitionMetadata) Descriptor() ([]byte, []int)
Deprecated: Use RecognitionMetadata.ProtoReflect.Descriptor instead.
func (*RecognitionMetadata) GetAudioTopic ¶
func (x *RecognitionMetadata) GetAudioTopic() string
func (*RecognitionMetadata) GetIndustryNaicsCodeOfAudio ¶
func (x *RecognitionMetadata) GetIndustryNaicsCodeOfAudio() uint32
func (*RecognitionMetadata) GetInteractionType ¶
func (x *RecognitionMetadata) GetInteractionType() RecognitionMetadata_InteractionType
func (*RecognitionMetadata) GetMicrophoneDistance ¶
func (x *RecognitionMetadata) GetMicrophoneDistance() RecognitionMetadata_MicrophoneDistance
func (*RecognitionMetadata) GetOriginalMediaType ¶
func (x *RecognitionMetadata) GetOriginalMediaType() RecognitionMetadata_OriginalMediaType
func (*RecognitionMetadata) GetOriginalMimeType ¶
func (x *RecognitionMetadata) GetOriginalMimeType() string
func (*RecognitionMetadata) GetRecordingDeviceName ¶
func (x *RecognitionMetadata) GetRecordingDeviceName() string
func (*RecognitionMetadata) GetRecordingDeviceType ¶
func (x *RecognitionMetadata) GetRecordingDeviceType() RecognitionMetadata_RecordingDeviceType
func (*RecognitionMetadata) ProtoMessage ¶
func (*RecognitionMetadata) ProtoMessage()
func (*RecognitionMetadata) ProtoReflect ¶
func (x *RecognitionMetadata) ProtoReflect() protoreflect.Message
func (*RecognitionMetadata) Reset ¶
func (x *RecognitionMetadata) Reset()
func (*RecognitionMetadata) String ¶
func (x *RecognitionMetadata) String() string
type RecognitionMetadata_InteractionType ¶
type RecognitionMetadata_InteractionType int32
Use case categories that the audio recognition request can be described by.
const ( // Use case is either unknown or is something other than one of the other // values below. RecognitionMetadata_INTERACTION_TYPE_UNSPECIFIED RecognitionMetadata_InteractionType = 0 // Multiple people in a conversation or discussion. For example in a // meeting with two or more people actively participating. Typically // all the primary people speaking would be in the same room (if not, // see PHONE_CALL) RecognitionMetadata_DISCUSSION RecognitionMetadata_InteractionType = 1 // One or more persons lecturing or presenting to others, mostly // uninterrupted. RecognitionMetadata_PRESENTATION RecognitionMetadata_InteractionType = 2 // A phone-call or video-conference in which two or more people, who are // not in the same room, are actively participating. RecognitionMetadata_PHONE_CALL RecognitionMetadata_InteractionType = 3 // A recorded message intended for another person to listen to. RecognitionMetadata_VOICEMAIL RecognitionMetadata_InteractionType = 4 // Professionally produced audio (eg. TV Show, Podcast). RecognitionMetadata_PROFESSIONALLY_PRODUCED RecognitionMetadata_InteractionType = 5 // Transcribe spoken questions and queries into text. RecognitionMetadata_VOICE_SEARCH RecognitionMetadata_InteractionType = 6 // Transcribe voice commands, such as for controlling a device. RecognitionMetadata_VOICE_COMMAND RecognitionMetadata_InteractionType = 7 // Transcribe speech to text to create a written document, such as a // text-message, email or report. RecognitionMetadata_DICTATION RecognitionMetadata_InteractionType = 8 )
func (RecognitionMetadata_InteractionType) Descriptor ¶
func (RecognitionMetadata_InteractionType) Descriptor() protoreflect.EnumDescriptor
func (RecognitionMetadata_InteractionType) Enum ¶
func (x RecognitionMetadata_InteractionType) Enum() *RecognitionMetadata_InteractionType
func (RecognitionMetadata_InteractionType) EnumDescriptor
deprecated
func (RecognitionMetadata_InteractionType) EnumDescriptor() ([]byte, []int)
Deprecated: Use RecognitionMetadata_InteractionType.Descriptor instead.
func (RecognitionMetadata_InteractionType) Number ¶
func (x RecognitionMetadata_InteractionType) Number() protoreflect.EnumNumber
func (RecognitionMetadata_InteractionType) String ¶
func (x RecognitionMetadata_InteractionType) String() string
func (RecognitionMetadata_InteractionType) Type ¶
func (RecognitionMetadata_InteractionType) Type() protoreflect.EnumType
type RecognitionMetadata_MicrophoneDistance ¶
type RecognitionMetadata_MicrophoneDistance int32
Enumerates the types of capture settings describing an audio file.
const ( // Audio type is not known. RecognitionMetadata_MICROPHONE_DISTANCE_UNSPECIFIED RecognitionMetadata_MicrophoneDistance = 0 // The audio was captured from a closely placed microphone. Eg. phone, // dictaphone, or handheld microphone. Generally if there speaker is within // 1 meter of the microphone. RecognitionMetadata_NEARFIELD RecognitionMetadata_MicrophoneDistance = 1 // The speaker if within 3 meters of the microphone. RecognitionMetadata_MIDFIELD RecognitionMetadata_MicrophoneDistance = 2 // The speaker is more than 3 meters away from the microphone. RecognitionMetadata_FARFIELD RecognitionMetadata_MicrophoneDistance = 3 )
func (RecognitionMetadata_MicrophoneDistance) Descriptor ¶
func (RecognitionMetadata_MicrophoneDistance) Descriptor() protoreflect.EnumDescriptor
func (RecognitionMetadata_MicrophoneDistance) EnumDescriptor
deprecated
func (RecognitionMetadata_MicrophoneDistance) EnumDescriptor() ([]byte, []int)
Deprecated: Use RecognitionMetadata_MicrophoneDistance.Descriptor instead.
func (RecognitionMetadata_MicrophoneDistance) Number ¶
func (x RecognitionMetadata_MicrophoneDistance) Number() protoreflect.EnumNumber
func (RecognitionMetadata_MicrophoneDistance) String ¶
func (x RecognitionMetadata_MicrophoneDistance) String() string
func (RecognitionMetadata_MicrophoneDistance) Type ¶
func (RecognitionMetadata_MicrophoneDistance) Type() protoreflect.EnumType
type RecognitionMetadata_OriginalMediaType ¶
type RecognitionMetadata_OriginalMediaType int32
The original media the speech was recorded on.
const ( // Unknown original media type. RecognitionMetadata_ORIGINAL_MEDIA_TYPE_UNSPECIFIED RecognitionMetadata_OriginalMediaType = 0 // The speech data is an audio recording. RecognitionMetadata_AUDIO RecognitionMetadata_OriginalMediaType = 1 // The speech data originally recorded on a video. RecognitionMetadata_VIDEO RecognitionMetadata_OriginalMediaType = 2 )
func (RecognitionMetadata_OriginalMediaType) Descriptor ¶
func (RecognitionMetadata_OriginalMediaType) Descriptor() protoreflect.EnumDescriptor
func (RecognitionMetadata_OriginalMediaType) EnumDescriptor
deprecated
func (RecognitionMetadata_OriginalMediaType) EnumDescriptor() ([]byte, []int)
Deprecated: Use RecognitionMetadata_OriginalMediaType.Descriptor instead.
func (RecognitionMetadata_OriginalMediaType) Number ¶
func (x RecognitionMetadata_OriginalMediaType) Number() protoreflect.EnumNumber
func (RecognitionMetadata_OriginalMediaType) String ¶
func (x RecognitionMetadata_OriginalMediaType) String() string
func (RecognitionMetadata_OriginalMediaType) Type ¶
func (RecognitionMetadata_OriginalMediaType) Type() protoreflect.EnumType
type RecognitionMetadata_RecordingDeviceType ¶
type RecognitionMetadata_RecordingDeviceType int32
The type of device the speech was recorded with.
const ( // The recording device is unknown. RecognitionMetadata_RECORDING_DEVICE_TYPE_UNSPECIFIED RecognitionMetadata_RecordingDeviceType = 0 // Speech was recorded on a smartphone. RecognitionMetadata_SMARTPHONE RecognitionMetadata_RecordingDeviceType = 1 // Speech was recorded using a personal computer or tablet. RecognitionMetadata_PC RecognitionMetadata_RecordingDeviceType = 2 // Speech was recorded over a phone line. RecognitionMetadata_PHONE_LINE RecognitionMetadata_RecordingDeviceType = 3 // Speech was recorded in a vehicle. RecognitionMetadata_VEHICLE RecognitionMetadata_RecordingDeviceType = 4 // Speech was recorded outdoors. RecognitionMetadata_OTHER_OUTDOOR_DEVICE RecognitionMetadata_RecordingDeviceType = 5 // Speech was recorded indoors. RecognitionMetadata_OTHER_INDOOR_DEVICE RecognitionMetadata_RecordingDeviceType = 6 )
func (RecognitionMetadata_RecordingDeviceType) Descriptor ¶
func (RecognitionMetadata_RecordingDeviceType) Descriptor() protoreflect.EnumDescriptor
func (RecognitionMetadata_RecordingDeviceType) EnumDescriptor
deprecated
func (RecognitionMetadata_RecordingDeviceType) EnumDescriptor() ([]byte, []int)
Deprecated: Use RecognitionMetadata_RecordingDeviceType.Descriptor instead.
func (RecognitionMetadata_RecordingDeviceType) Number ¶
func (x RecognitionMetadata_RecordingDeviceType) Number() protoreflect.EnumNumber
func (RecognitionMetadata_RecordingDeviceType) String ¶
func (x RecognitionMetadata_RecordingDeviceType) String() string
func (RecognitionMetadata_RecordingDeviceType) Type ¶
func (RecognitionMetadata_RecordingDeviceType) Type() protoreflect.EnumType
type RecognizeRequest ¶
type RecognizeRequest struct { // Required. Provides information to the recognizer that specifies how to // process the request. Config *RecognitionConfig `protobuf:"bytes,1,opt,name=config,proto3" json:"config,omitempty"` // Required. The audio data to be recognized. Audio *RecognitionAudio `protobuf:"bytes,2,opt,name=audio,proto3" json:"audio,omitempty"` // contains filtered or unexported fields }
The top-level message sent by the client for the `Recognize` method.
func (*RecognizeRequest) Descriptor
deprecated
func (*RecognizeRequest) Descriptor() ([]byte, []int)
Deprecated: Use RecognizeRequest.ProtoReflect.Descriptor instead.
func (*RecognizeRequest) GetAudio ¶
func (x *RecognizeRequest) GetAudio() *RecognitionAudio
func (*RecognizeRequest) GetConfig ¶
func (x *RecognizeRequest) GetConfig() *RecognitionConfig
func (*RecognizeRequest) ProtoMessage ¶
func (*RecognizeRequest) ProtoMessage()
func (*RecognizeRequest) ProtoReflect ¶
func (x *RecognizeRequest) ProtoReflect() protoreflect.Message
func (*RecognizeRequest) Reset ¶
func (x *RecognizeRequest) Reset()
func (*RecognizeRequest) String ¶
func (x *RecognizeRequest) String() string
type RecognizeResponse ¶
type RecognizeResponse struct { // Sequential list of transcription results corresponding to // sequential portions of audio. Results []*SpeechRecognitionResult `protobuf:"bytes,2,rep,name=results,proto3" json:"results,omitempty"` // When available, billed audio seconds for the corresponding request. TotalBilledTime *durationpb.Duration `protobuf:"bytes,3,opt,name=total_billed_time,json=totalBilledTime,proto3" json:"total_billed_time,omitempty"` // contains filtered or unexported fields }
The only message returned to the client by the `Recognize` method. It contains the result as zero or more sequential `SpeechRecognitionResult` messages.
func (*RecognizeResponse) Descriptor
deprecated
func (*RecognizeResponse) Descriptor() ([]byte, []int)
Deprecated: Use RecognizeResponse.ProtoReflect.Descriptor instead.
func (*RecognizeResponse) GetResults ¶
func (x *RecognizeResponse) GetResults() []*SpeechRecognitionResult
func (*RecognizeResponse) GetTotalBilledTime ¶
func (x *RecognizeResponse) GetTotalBilledTime() *durationpb.Duration
func (*RecognizeResponse) ProtoMessage ¶
func (*RecognizeResponse) ProtoMessage()
func (*RecognizeResponse) ProtoReflect ¶
func (x *RecognizeResponse) ProtoReflect() protoreflect.Message
func (*RecognizeResponse) Reset ¶
func (x *RecognizeResponse) Reset()
func (*RecognizeResponse) String ¶
func (x *RecognizeResponse) String() string
type SpeakerDiarizationConfig ¶
type SpeakerDiarizationConfig struct { // If 'true', enables speaker detection for each recognized word in // the top alternative of the recognition result using a speaker_tag provided // in the WordInfo. EnableSpeakerDiarization bool `` /* 136-byte string literal not displayed */ // Minimum number of speakers in the conversation. This range gives you more // flexibility by allowing the system to automatically determine the correct // number of speakers. If not set, the default value is 2. MinSpeakerCount int32 `protobuf:"varint,2,opt,name=min_speaker_count,json=minSpeakerCount,proto3" json:"min_speaker_count,omitempty"` // Maximum number of speakers in the conversation. This range gives you more // flexibility by allowing the system to automatically determine the correct // number of speakers. If not set, the default value is 6. MaxSpeakerCount int32 `protobuf:"varint,3,opt,name=max_speaker_count,json=maxSpeakerCount,proto3" json:"max_speaker_count,omitempty"` // Output only. Unused. // // Deprecated: Do not use. SpeakerTag int32 `protobuf:"varint,5,opt,name=speaker_tag,json=speakerTag,proto3" json:"speaker_tag,omitempty"` // contains filtered or unexported fields }
Config to enable speaker diarization.
func (*SpeakerDiarizationConfig) Descriptor
deprecated
func (*SpeakerDiarizationConfig) Descriptor() ([]byte, []int)
Deprecated: Use SpeakerDiarizationConfig.ProtoReflect.Descriptor instead.
func (*SpeakerDiarizationConfig) GetEnableSpeakerDiarization ¶
func (x *SpeakerDiarizationConfig) GetEnableSpeakerDiarization() bool
func (*SpeakerDiarizationConfig) GetMaxSpeakerCount ¶
func (x *SpeakerDiarizationConfig) GetMaxSpeakerCount() int32
func (*SpeakerDiarizationConfig) GetMinSpeakerCount ¶
func (x *SpeakerDiarizationConfig) GetMinSpeakerCount() int32
func (*SpeakerDiarizationConfig) GetSpeakerTag
deprecated
func (x *SpeakerDiarizationConfig) GetSpeakerTag() int32
Deprecated: Do not use.
func (*SpeakerDiarizationConfig) ProtoMessage ¶
func (*SpeakerDiarizationConfig) ProtoMessage()
func (*SpeakerDiarizationConfig) ProtoReflect ¶
func (x *SpeakerDiarizationConfig) ProtoReflect() protoreflect.Message
func (*SpeakerDiarizationConfig) Reset ¶
func (x *SpeakerDiarizationConfig) Reset()
func (*SpeakerDiarizationConfig) String ¶
func (x *SpeakerDiarizationConfig) String() string
type SpeechClient ¶
type SpeechClient interface { // Performs synchronous speech recognition: receive results after all audio // has been sent and processed. Recognize(ctx context.Context, in *RecognizeRequest, opts ...grpc.CallOption) (*RecognizeResponse, error) // Performs asynchronous speech recognition: receive results via the // google.longrunning.Operations interface. Returns either an // `Operation.error` or an `Operation.response` which contains // a `LongRunningRecognizeResponse` message. // For more information on asynchronous speech recognition, see the // [how-to](https://cloud.google.com/speech-to-text/docs/async-recognize). LongRunningRecognize(ctx context.Context, in *LongRunningRecognizeRequest, opts ...grpc.CallOption) (*longrunning.Operation, error) // Performs bidirectional streaming speech recognition: receive results while // sending audio. This method is only available via the gRPC API (not REST). StreamingRecognize(ctx context.Context, opts ...grpc.CallOption) (Speech_StreamingRecognizeClient, error) }
SpeechClient is the client API for Speech service.
For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream.
func NewSpeechClient ¶
func NewSpeechClient(cc grpc.ClientConnInterface) SpeechClient
type SpeechContext ¶
type SpeechContext struct { // A list of strings containing words and phrases "hints" so that // the speech recognition is more likely to recognize them. This can be used // to improve the accuracy for specific words and phrases, for example, if // specific commands are typically spoken by the user. This can also be used // to add additional words to the vocabulary of the recognizer. See // [usage limits](https://cloud.google.com/speech-to-text/quotas#content). // // List items can also be set to classes for groups of words that represent // common concepts that occur in natural language. For example, rather than // providing phrase hints for every month of the year, using the $MONTH class // improves the likelihood of correctly transcribing audio that includes // months. Phrases []string `protobuf:"bytes,1,rep,name=phrases,proto3" json:"phrases,omitempty"` // contains filtered or unexported fields }
Provides "hints" to the speech recognizer to favor specific words and phrases in the results.
func (*SpeechContext) Descriptor
deprecated
func (*SpeechContext) Descriptor() ([]byte, []int)
Deprecated: Use SpeechContext.ProtoReflect.Descriptor instead.
func (*SpeechContext) GetPhrases ¶
func (x *SpeechContext) GetPhrases() []string
func (*SpeechContext) ProtoMessage ¶
func (*SpeechContext) ProtoMessage()
func (*SpeechContext) ProtoReflect ¶
func (x *SpeechContext) ProtoReflect() protoreflect.Message
func (*SpeechContext) Reset ¶
func (x *SpeechContext) Reset()
func (*SpeechContext) String ¶
func (x *SpeechContext) String() string
type SpeechRecognitionAlternative ¶
type SpeechRecognitionAlternative struct { // Transcript text representing the words that the user spoke. Transcript string `protobuf:"bytes,1,opt,name=transcript,proto3" json:"transcript,omitempty"` // The confidence estimate between 0.0 and 1.0. A higher number // indicates an estimated greater likelihood that the recognized words are // correct. This field is set only for the top alternative of a non-streaming // result or, of a streaming result where `is_final=true`. // This field is not guaranteed to be accurate and users should not rely on it // to be always provided. // The default of 0.0 is a sentinel value indicating `confidence` was not set. Confidence float32 `protobuf:"fixed32,2,opt,name=confidence,proto3" json:"confidence,omitempty"` // A list of word-specific information for each recognized word. // Note: When `enable_speaker_diarization` is true, you will see all the words // from the beginning of the audio. Words []*WordInfo `protobuf:"bytes,3,rep,name=words,proto3" json:"words,omitempty"` // contains filtered or unexported fields }
Alternative hypotheses (a.k.a. n-best list).
func (*SpeechRecognitionAlternative) Descriptor
deprecated
func (*SpeechRecognitionAlternative) Descriptor() ([]byte, []int)
Deprecated: Use SpeechRecognitionAlternative.ProtoReflect.Descriptor instead.
func (*SpeechRecognitionAlternative) GetConfidence ¶
func (x *SpeechRecognitionAlternative) GetConfidence() float32
func (*SpeechRecognitionAlternative) GetTranscript ¶
func (x *SpeechRecognitionAlternative) GetTranscript() string
func (*SpeechRecognitionAlternative) GetWords ¶
func (x *SpeechRecognitionAlternative) GetWords() []*WordInfo
func (*SpeechRecognitionAlternative) ProtoMessage ¶
func (*SpeechRecognitionAlternative) ProtoMessage()
func (*SpeechRecognitionAlternative) ProtoReflect ¶
func (x *SpeechRecognitionAlternative) ProtoReflect() protoreflect.Message
func (*SpeechRecognitionAlternative) Reset ¶
func (x *SpeechRecognitionAlternative) Reset()
func (*SpeechRecognitionAlternative) String ¶
func (x *SpeechRecognitionAlternative) String() string
type SpeechRecognitionResult ¶
type SpeechRecognitionResult struct { // May contain one or more recognition hypotheses (up to the // maximum specified in `max_alternatives`). // These alternatives are ordered in terms of accuracy, with the top (first) // alternative being the most probable, as ranked by the recognizer. Alternatives []*SpeechRecognitionAlternative `protobuf:"bytes,1,rep,name=alternatives,proto3" json:"alternatives,omitempty"` // For multi-channel audio, this is the channel number corresponding to the // recognized result for the audio from that channel. // For audio_channel_count = N, its output values can range from '1' to 'N'. ChannelTag int32 `protobuf:"varint,2,opt,name=channel_tag,json=channelTag,proto3" json:"channel_tag,omitempty"` // contains filtered or unexported fields }
A speech recognition result corresponding to a portion of the audio.
func (*SpeechRecognitionResult) Descriptor
deprecated
func (*SpeechRecognitionResult) Descriptor() ([]byte, []int)
Deprecated: Use SpeechRecognitionResult.ProtoReflect.Descriptor instead.
func (*SpeechRecognitionResult) GetAlternatives ¶
func (x *SpeechRecognitionResult) GetAlternatives() []*SpeechRecognitionAlternative
func (*SpeechRecognitionResult) GetChannelTag ¶
func (x *SpeechRecognitionResult) GetChannelTag() int32
func (*SpeechRecognitionResult) ProtoMessage ¶
func (*SpeechRecognitionResult) ProtoMessage()
func (*SpeechRecognitionResult) ProtoReflect ¶
func (x *SpeechRecognitionResult) ProtoReflect() protoreflect.Message
func (*SpeechRecognitionResult) Reset ¶
func (x *SpeechRecognitionResult) Reset()
func (*SpeechRecognitionResult) String ¶
func (x *SpeechRecognitionResult) String() string
type SpeechServer ¶
type SpeechServer interface { // Performs synchronous speech recognition: receive results after all audio // has been sent and processed. Recognize(context.Context, *RecognizeRequest) (*RecognizeResponse, error) // Performs asynchronous speech recognition: receive results via the // google.longrunning.Operations interface. Returns either an // `Operation.error` or an `Operation.response` which contains // a `LongRunningRecognizeResponse` message. // For more information on asynchronous speech recognition, see the // [how-to](https://cloud.google.com/speech-to-text/docs/async-recognize). LongRunningRecognize(context.Context, *LongRunningRecognizeRequest) (*longrunning.Operation, error) // Performs bidirectional streaming speech recognition: receive results while // sending audio. This method is only available via the gRPC API (not REST). StreamingRecognize(Speech_StreamingRecognizeServer) error }
SpeechServer is the server API for Speech service.
type Speech_StreamingRecognizeClient ¶
type Speech_StreamingRecognizeClient interface { Send(*StreamingRecognizeRequest) error Recv() (*StreamingRecognizeResponse, error) grpc.ClientStream }
type Speech_StreamingRecognizeServer ¶
type Speech_StreamingRecognizeServer interface { Send(*StreamingRecognizeResponse) error Recv() (*StreamingRecognizeRequest, error) grpc.ServerStream }
type StreamingRecognitionConfig ¶
type StreamingRecognitionConfig struct { // Required. Provides information to the recognizer that specifies how to // process the request. Config *RecognitionConfig `protobuf:"bytes,1,opt,name=config,proto3" json:"config,omitempty"` // If `false` or omitted, the recognizer will perform continuous // recognition (continuing to wait for and process audio even if the user // pauses speaking) until the client closes the input stream (gRPC API) or // until the maximum time limit has been reached. May return multiple // `StreamingRecognitionResult`s with the `is_final` flag set to `true`. // // If `true`, the recognizer will detect a single spoken utterance. When it // detects that the user has paused or stopped speaking, it will return an // `END_OF_SINGLE_UTTERANCE` event and cease recognition. It will return no // more than one `StreamingRecognitionResult` with the `is_final` flag set to // `true`. // // The `single_utterance` field can only be used with specified models, // otherwise an error is thrown. The `model` field in [`RecognitionConfig`][] // must be set to: // // * `command_and_search` // * `phone_call` AND additional field `useEnhanced`=`true` // * The `model` field is left undefined. In this case the API auto-selects // a model based on any other parameters that you set in // `RecognitionConfig`. SingleUtterance bool `protobuf:"varint,2,opt,name=single_utterance,json=singleUtterance,proto3" json:"single_utterance,omitempty"` // If `true`, interim results (tentative hypotheses) may be // returned as they become available (these interim results are indicated with // the `is_final=false` flag). // If `false` or omitted, only `is_final=true` result(s) are returned. InterimResults bool `protobuf:"varint,3,opt,name=interim_results,json=interimResults,proto3" json:"interim_results,omitempty"` // contains filtered or unexported fields }
Provides information to the recognizer that specifies how to process the request.
func (*StreamingRecognitionConfig) Descriptor
deprecated
func (*StreamingRecognitionConfig) Descriptor() ([]byte, []int)
Deprecated: Use StreamingRecognitionConfig.ProtoReflect.Descriptor instead.
func (*StreamingRecognitionConfig) GetConfig ¶
func (x *StreamingRecognitionConfig) GetConfig() *RecognitionConfig
func (*StreamingRecognitionConfig) GetInterimResults ¶
func (x *StreamingRecognitionConfig) GetInterimResults() bool
func (*StreamingRecognitionConfig) GetSingleUtterance ¶
func (x *StreamingRecognitionConfig) GetSingleUtterance() bool
func (*StreamingRecognitionConfig) ProtoMessage ¶
func (*StreamingRecognitionConfig) ProtoMessage()
func (*StreamingRecognitionConfig) ProtoReflect ¶
func (x *StreamingRecognitionConfig) ProtoReflect() protoreflect.Message
func (*StreamingRecognitionConfig) Reset ¶
func (x *StreamingRecognitionConfig) Reset()
func (*StreamingRecognitionConfig) String ¶
func (x *StreamingRecognitionConfig) String() string
type StreamingRecognitionResult ¶
type StreamingRecognitionResult struct { // May contain one or more recognition hypotheses (up to the // maximum specified in `max_alternatives`). // These alternatives are ordered in terms of accuracy, with the top (first) // alternative being the most probable, as ranked by the recognizer. Alternatives []*SpeechRecognitionAlternative `protobuf:"bytes,1,rep,name=alternatives,proto3" json:"alternatives,omitempty"` // If `false`, this `StreamingRecognitionResult` represents an // interim result that may change. If `true`, this is the final time the // speech service will return this particular `StreamingRecognitionResult`, // the recognizer will not return any further hypotheses for this portion of // the transcript and corresponding audio. IsFinal bool `protobuf:"varint,2,opt,name=is_final,json=isFinal,proto3" json:"is_final,omitempty"` // An estimate of the likelihood that the recognizer will not // change its guess about this interim result. Values range from 0.0 // (completely unstable) to 1.0 (completely stable). // This field is only provided for interim results (`is_final=false`). // The default of 0.0 is a sentinel value indicating `stability` was not set. Stability float32 `protobuf:"fixed32,3,opt,name=stability,proto3" json:"stability,omitempty"` // Time offset of the end of this result relative to the // beginning of the audio. ResultEndTime *durationpb.Duration `protobuf:"bytes,4,opt,name=result_end_time,json=resultEndTime,proto3" json:"result_end_time,omitempty"` // For multi-channel audio, this is the channel number corresponding to the // recognized result for the audio from that channel. // For audio_channel_count = N, its output values can range from '1' to 'N'. ChannelTag int32 `protobuf:"varint,5,opt,name=channel_tag,json=channelTag,proto3" json:"channel_tag,omitempty"` // The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag of // the language in this result. This language code was detected to have the // most likelihood of being spoken in the audio. LanguageCode string `protobuf:"bytes,6,opt,name=language_code,json=languageCode,proto3" json:"language_code,omitempty"` // contains filtered or unexported fields }
A streaming speech recognition result corresponding to a portion of the audio that is currently being processed.
func (*StreamingRecognitionResult) Descriptor
deprecated
func (*StreamingRecognitionResult) Descriptor() ([]byte, []int)
Deprecated: Use StreamingRecognitionResult.ProtoReflect.Descriptor instead.
func (*StreamingRecognitionResult) GetAlternatives ¶
func (x *StreamingRecognitionResult) GetAlternatives() []*SpeechRecognitionAlternative
func (*StreamingRecognitionResult) GetChannelTag ¶
func (x *StreamingRecognitionResult) GetChannelTag() int32
func (*StreamingRecognitionResult) GetIsFinal ¶
func (x *StreamingRecognitionResult) GetIsFinal() bool
func (*StreamingRecognitionResult) GetLanguageCode ¶
func (x *StreamingRecognitionResult) GetLanguageCode() string
func (*StreamingRecognitionResult) GetResultEndTime ¶
func (x *StreamingRecognitionResult) GetResultEndTime() *durationpb.Duration
func (*StreamingRecognitionResult) GetStability ¶
func (x *StreamingRecognitionResult) GetStability() float32
func (*StreamingRecognitionResult) ProtoMessage ¶
func (*StreamingRecognitionResult) ProtoMessage()
func (*StreamingRecognitionResult) ProtoReflect ¶
func (x *StreamingRecognitionResult) ProtoReflect() protoreflect.Message
func (*StreamingRecognitionResult) Reset ¶
func (x *StreamingRecognitionResult) Reset()
func (*StreamingRecognitionResult) String ¶
func (x *StreamingRecognitionResult) String() string
type StreamingRecognizeRequest ¶
type StreamingRecognizeRequest struct { // The streaming request, which is either a streaming config or audio content. // // Types that are assignable to StreamingRequest: // *StreamingRecognizeRequest_StreamingConfig // *StreamingRecognizeRequest_AudioContent StreamingRequest isStreamingRecognizeRequest_StreamingRequest `protobuf_oneof:"streaming_request"` // contains filtered or unexported fields }
The top-level message sent by the client for the `StreamingRecognize` method. Multiple `StreamingRecognizeRequest` messages are sent. The first message must contain a `streaming_config` message and must not contain `audio_content`. All subsequent messages must contain `audio_content` and must not contain a `streaming_config` message.
func (*StreamingRecognizeRequest) Descriptor
deprecated
func (*StreamingRecognizeRequest) Descriptor() ([]byte, []int)
Deprecated: Use StreamingRecognizeRequest.ProtoReflect.Descriptor instead.
func (*StreamingRecognizeRequest) GetAudioContent ¶
func (x *StreamingRecognizeRequest) GetAudioContent() []byte
func (*StreamingRecognizeRequest) GetStreamingConfig ¶
func (x *StreamingRecognizeRequest) GetStreamingConfig() *StreamingRecognitionConfig
func (*StreamingRecognizeRequest) GetStreamingRequest ¶
func (m *StreamingRecognizeRequest) GetStreamingRequest() isStreamingRecognizeRequest_StreamingRequest
func (*StreamingRecognizeRequest) ProtoMessage ¶
func (*StreamingRecognizeRequest) ProtoMessage()
func (*StreamingRecognizeRequest) ProtoReflect ¶
func (x *StreamingRecognizeRequest) ProtoReflect() protoreflect.Message
func (*StreamingRecognizeRequest) Reset ¶
func (x *StreamingRecognizeRequest) Reset()
func (*StreamingRecognizeRequest) String ¶
func (x *StreamingRecognizeRequest) String() string
type StreamingRecognizeRequest_AudioContent ¶
type StreamingRecognizeRequest_AudioContent struct { // The audio data to be recognized. Sequential chunks of audio data are sent // in sequential `StreamingRecognizeRequest` messages. The first // `StreamingRecognizeRequest` message must not contain `audio_content` data // and all subsequent `StreamingRecognizeRequest` messages must contain // `audio_content` data. The audio bytes must be encoded as specified in // `RecognitionConfig`. Note: as with all bytes fields, proto buffers use a // pure binary representation (not base64). See // [content limits](https://cloud.google.com/speech-to-text/quotas#content). AudioContent []byte `protobuf:"bytes,2,opt,name=audio_content,json=audioContent,proto3,oneof"` }
type StreamingRecognizeRequest_StreamingConfig ¶
type StreamingRecognizeRequest_StreamingConfig struct { // Provides information to the recognizer that specifies how to process the // request. The first `StreamingRecognizeRequest` message must contain a // `streaming_config` message. StreamingConfig *StreamingRecognitionConfig `protobuf:"bytes,1,opt,name=streaming_config,json=streamingConfig,proto3,oneof"` }
type StreamingRecognizeResponse ¶
type StreamingRecognizeResponse struct { // If set, returns a [google.rpc.Status][google.rpc.Status] message that // specifies the error for the operation. Error *status.Status `protobuf:"bytes,1,opt,name=error,proto3" json:"error,omitempty"` // This repeated list contains zero or more results that // correspond to consecutive portions of the audio currently being processed. // It contains zero or one `is_final=true` result (the newly settled portion), // followed by zero or more `is_final=false` results (the interim results). Results []*StreamingRecognitionResult `protobuf:"bytes,2,rep,name=results,proto3" json:"results,omitempty"` // Indicates the type of speech event. SpeechEventType StreamingRecognizeResponse_SpeechEventType `` /* 180-byte string literal not displayed */ // When available, billed audio seconds for the stream. // Set only if this is the last response in the stream. TotalBilledTime *durationpb.Duration `protobuf:"bytes,5,opt,name=total_billed_time,json=totalBilledTime,proto3" json:"total_billed_time,omitempty"` // contains filtered or unexported fields }
`StreamingRecognizeResponse` is the only message returned to the client by `StreamingRecognize`. A series of zero or more `StreamingRecognizeResponse` messages are streamed back to the client. If there is no recognizable audio, and `single_utterance` is set to false, then no messages are streamed back to the client.
Here's an example of a series of `StreamingRecognizeResponse`s that might be returned while processing audio:
1. results { alternatives { transcript: "tube" } stability: 0.01 }
2. results { alternatives { transcript: "to be a" } stability: 0.01 }
- results { alternatives { transcript: "to be" } stability: 0.9 } results { alternatives { transcript: " or not to be" } stability: 0.01 }
- results { alternatives { transcript: "to be or not to be" confidence: 0.92 } alternatives { transcript: "to bee or not to bee" } is_final: true }
5. results { alternatives { transcript: " that's" } stability: 0.01 }
- results { alternatives { transcript: " that is" } stability: 0.9 } results { alternatives { transcript: " the question" } stability: 0.01 }
- results { alternatives { transcript: " that is the question" confidence: 0.98 } alternatives { transcript: " that was the question" } is_final: true }
Notes:
- Only two of the above responses #4 and #7 contain final results; they are indicated by `is_final: true`. Concatenating these together generates the full transcript: "to be or not to be that is the question".
- The others contain interim `results`. #3 and #6 contain two interim `results`: the first portion has a high stability and is less likely to change; the second portion has a low stability and is very likely to change. A UI designer might choose to show only high stability `results`.
- The specific `stability` and `confidence` values shown above are only for illustrative purposes. Actual values may vary.
- In each response, only one of these fields will be set: `error`, `speech_event_type`, or one or more (repeated) `results`.
func (*StreamingRecognizeResponse) Descriptor
deprecated
func (*StreamingRecognizeResponse) Descriptor() ([]byte, []int)
Deprecated: Use StreamingRecognizeResponse.ProtoReflect.Descriptor instead.
func (*StreamingRecognizeResponse) GetError ¶
func (x *StreamingRecognizeResponse) GetError() *status.Status
func (*StreamingRecognizeResponse) GetResults ¶
func (x *StreamingRecognizeResponse) GetResults() []*StreamingRecognitionResult
func (*StreamingRecognizeResponse) GetSpeechEventType ¶
func (x *StreamingRecognizeResponse) GetSpeechEventType() StreamingRecognizeResponse_SpeechEventType
func (*StreamingRecognizeResponse) GetTotalBilledTime ¶
func (x *StreamingRecognizeResponse) GetTotalBilledTime() *durationpb.Duration
func (*StreamingRecognizeResponse) ProtoMessage ¶
func (*StreamingRecognizeResponse) ProtoMessage()
func (*StreamingRecognizeResponse) ProtoReflect ¶
func (x *StreamingRecognizeResponse) ProtoReflect() protoreflect.Message
func (*StreamingRecognizeResponse) Reset ¶
func (x *StreamingRecognizeResponse) Reset()
func (*StreamingRecognizeResponse) String ¶
func (x *StreamingRecognizeResponse) String() string
type StreamingRecognizeResponse_SpeechEventType ¶
type StreamingRecognizeResponse_SpeechEventType int32
Indicates the type of speech event.
const ( // No speech event specified. StreamingRecognizeResponse_SPEECH_EVENT_UNSPECIFIED StreamingRecognizeResponse_SpeechEventType = 0 // This event indicates that the server has detected the end of the user's // speech utterance and expects no additional speech. Therefore, the server // will not process additional audio (although it may subsequently return // additional results). The client should stop sending additional audio // data, half-close the gRPC connection, and wait for any additional results // until the server closes the gRPC connection. This event is only sent if // `single_utterance` was set to `true`, and is not used otherwise. StreamingRecognizeResponse_END_OF_SINGLE_UTTERANCE StreamingRecognizeResponse_SpeechEventType = 1 )
func (StreamingRecognizeResponse_SpeechEventType) Descriptor ¶
func (StreamingRecognizeResponse_SpeechEventType) Descriptor() protoreflect.EnumDescriptor
func (StreamingRecognizeResponse_SpeechEventType) EnumDescriptor
deprecated
func (StreamingRecognizeResponse_SpeechEventType) EnumDescriptor() ([]byte, []int)
Deprecated: Use StreamingRecognizeResponse_SpeechEventType.Descriptor instead.
func (StreamingRecognizeResponse_SpeechEventType) Number ¶
func (x StreamingRecognizeResponse_SpeechEventType) Number() protoreflect.EnumNumber
func (StreamingRecognizeResponse_SpeechEventType) String ¶
func (x StreamingRecognizeResponse_SpeechEventType) String() string
func (StreamingRecognizeResponse_SpeechEventType) Type ¶
func (StreamingRecognizeResponse_SpeechEventType) Type() protoreflect.EnumType
type TranscriptOutputConfig ¶
type TranscriptOutputConfig struct { // Types that are assignable to OutputType: // *TranscriptOutputConfig_GcsUri OutputType isTranscriptOutputConfig_OutputType `protobuf_oneof:"output_type"` // contains filtered or unexported fields }
Specifies an optional destination for the recognition results.
func (*TranscriptOutputConfig) Descriptor
deprecated
func (*TranscriptOutputConfig) Descriptor() ([]byte, []int)
Deprecated: Use TranscriptOutputConfig.ProtoReflect.Descriptor instead.
func (*TranscriptOutputConfig) GetGcsUri ¶
func (x *TranscriptOutputConfig) GetGcsUri() string
func (*TranscriptOutputConfig) GetOutputType ¶
func (m *TranscriptOutputConfig) GetOutputType() isTranscriptOutputConfig_OutputType
func (*TranscriptOutputConfig) ProtoMessage ¶
func (*TranscriptOutputConfig) ProtoMessage()
func (*TranscriptOutputConfig) ProtoReflect ¶
func (x *TranscriptOutputConfig) ProtoReflect() protoreflect.Message
func (*TranscriptOutputConfig) Reset ¶
func (x *TranscriptOutputConfig) Reset()
func (*TranscriptOutputConfig) String ¶
func (x *TranscriptOutputConfig) String() string
type TranscriptOutputConfig_GcsUri ¶
type TranscriptOutputConfig_GcsUri struct { // Specifies a Cloud Storage URI for the recognition results. Must be // specified in the format: `gs://bucket_name/object_name`, and the bucket // must already exist. GcsUri string `protobuf:"bytes,1,opt,name=gcs_uri,json=gcsUri,proto3,oneof"` }
type UnimplementedSpeechServer ¶
type UnimplementedSpeechServer struct { }
UnimplementedSpeechServer can be embedded to have forward compatible implementations.
func (*UnimplementedSpeechServer) LongRunningRecognize ¶
func (*UnimplementedSpeechServer) LongRunningRecognize(context.Context, *LongRunningRecognizeRequest) (*longrunning.Operation, error)
func (*UnimplementedSpeechServer) Recognize ¶
func (*UnimplementedSpeechServer) Recognize(context.Context, *RecognizeRequest) (*RecognizeResponse, error)
func (*UnimplementedSpeechServer) StreamingRecognize ¶
func (*UnimplementedSpeechServer) StreamingRecognize(Speech_StreamingRecognizeServer) error
type WordInfo ¶
type WordInfo struct { // Time offset relative to the beginning of the audio, // and corresponding to the start of the spoken word. // This field is only set if `enable_word_time_offsets=true` and only // in the top hypothesis. // This is an experimental feature and the accuracy of the time offset can // vary. StartTime *durationpb.Duration `protobuf:"bytes,1,opt,name=start_time,json=startTime,proto3" json:"start_time,omitempty"` // Time offset relative to the beginning of the audio, // and corresponding to the end of the spoken word. // This field is only set if `enable_word_time_offsets=true` and only // in the top hypothesis. // This is an experimental feature and the accuracy of the time offset can // vary. EndTime *durationpb.Duration `protobuf:"bytes,2,opt,name=end_time,json=endTime,proto3" json:"end_time,omitempty"` // The word corresponding to this set of information. Word string `protobuf:"bytes,3,opt,name=word,proto3" json:"word,omitempty"` // Output only. A distinct integer value is assigned for every speaker within // the audio. This field specifies which one of those speakers was detected to // have spoken this word. Value ranges from '1' to diarization_speaker_count. // speaker_tag is set if enable_speaker_diarization = 'true' and only in the // top alternative. SpeakerTag int32 `protobuf:"varint,5,opt,name=speaker_tag,json=speakerTag,proto3" json:"speaker_tag,omitempty"` // contains filtered or unexported fields }
Word-specific information for recognized words.
func (*WordInfo) Descriptor
deprecated
func (*WordInfo) GetEndTime ¶
func (x *WordInfo) GetEndTime() *durationpb.Duration
func (*WordInfo) GetSpeakerTag ¶
func (*WordInfo) GetStartTime ¶
func (x *WordInfo) GetStartTime() *durationpb.Duration
func (*WordInfo) ProtoMessage ¶
func (*WordInfo) ProtoMessage()
func (*WordInfo) ProtoReflect ¶
func (x *WordInfo) ProtoReflect() protoreflect.Message