Documentation
¶
Overview ¶
Package common contains the definitions for many of the shared objects and properties in the Speech SDK
Index ¶
- type CancellationErrorCode
- type CancellationReason
- type CarbonError
- type OperationOutcome
- type OutputFormat
- type ProfanityOption
- type PropertyCollection
- func (properties PropertyCollection) Close()
- func (properties PropertyCollection) GetProperty(id PropertyID, defaultValue string) string
- func (properties PropertyCollection) GetPropertyByString(name string, defaultValue string) string
- func (properties PropertyCollection) SetProperty(id PropertyID, value string) error
- func (properties PropertyCollection) SetPropertyByString(name string, value string) error
- type PropertyID
- type ResultReason
- type SPXHandle
- type ServicePropertyChannel
- type SpeechSynthesisOutputFormat
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type CancellationErrorCode ¶
type CancellationErrorCode int
CancellationErrorCode defines error code in case that CancellationReason is Error.
const ( // No error. // If CancellationReason is EndOfStream, CancellationErrorCode // is set to NoError. NoError CancellationErrorCode = 0 // Indicates an authentication error. // An authentication error occurs if subscription key or authorization token is invalid, expired, // or does not match the region being used. AuthenticationFailure CancellationErrorCode = 1 BadRequest CancellationErrorCode = 2 // Indicates that the number of parallel requests exceeded the number of allowed concurrent transcriptions for the subscription. TooManyRequests CancellationErrorCode = 3 // Indicates that the free subscription used by the request ran out of quota. Forbidden CancellationErrorCode = 4 // Indicates a connection error. ConnectionFailure CancellationErrorCode = 5 // Indicates a time-out error when waiting for response from service. ServiceTimeout CancellationErrorCode = 6 // Indicates that an error is returned by the service. ServiceError CancellationErrorCode = 7 ServiceUnavailable CancellationErrorCode = 8 // Indicates an unexpected runtime error. RuntimeError CancellationErrorCode = 9 )
type CancellationReason ¶
type CancellationReason int
CancellationReason defines the possible reasons a recognition result might be canceled.
const ( // Indicates that an error occurred during speech recognition. Error CancellationReason = 1 // Indicates that the end of the audio stream was reached. EndOfStream CancellationReason = 2 )
type CarbonError ¶
type CarbonError struct {
Code uintptr
}
func NewCarbonError ¶
func NewCarbonError(code uintptr) CarbonError
func (CarbonError) Error ¶
func (e CarbonError) Error() string
type OperationOutcome ¶
type OperationOutcome struct { // Error is present (not nil) if the operation failed Error error }
OperationOutcome is the base type of operation outcomes.
func (OperationOutcome) Failed ¶
func (outcome OperationOutcome) Failed() bool
Failed checks if the operation failed
type OutputFormat ¶
type OutputFormat int
OutputFormat Defines output formats
const ( // Simple output format Simple OutputFormat = 0 // Detailed output format Detailed OutputFormat = 1 )
type ProfanityOption ¶
type ProfanityOption int
ProfanityOption defines the profanity option.
const ( // Masked profanity option. Masked ProfanityOption = 0 // Removed profanity option Removed ProfanityOption = 1 // Raw profanity option Raw ProfanityOption = 2 )
type PropertyCollection ¶
type PropertyCollection struct {
// contains filtered or unexported fields
}
PropertyCollection is a class to retrieve or set a property value from a property collection.
func NewPropertyCollectionFromHandle ¶
func NewPropertyCollectionFromHandle(handle SPXHandle) PropertyCollection
NewPropertyCollectionFromHandle creates a PropertyCollection from a handle (for internal use)
func (PropertyCollection) Close ¶
func (properties PropertyCollection) Close()
Close disposes the associated resources.
func (PropertyCollection) GetProperty ¶
func (properties PropertyCollection) GetProperty(id PropertyID, defaultValue string) string
GetProperty returns value of a property. If the property value is not defined, the specified default value is returned.
func (PropertyCollection) GetPropertyByString ¶
func (properties PropertyCollection) GetPropertyByString(name string, defaultValue string) string
GetPropertyByString returns value of a property. If the property value is not defined, the specified default value is returned.
func (PropertyCollection) SetProperty ¶
func (properties PropertyCollection) SetProperty(id PropertyID, value string) error
SetProperty sets the value of a property.
func (PropertyCollection) SetPropertyByString ¶
func (properties PropertyCollection) SetPropertyByString(name string, value string) error
SetPropertyByString sets the value of a property.
type PropertyID ¶
type PropertyID int
PropertyID defines speech property ids. Changed in version 1.4.0.
const ( // SpeechServiceConnectionKey is the Cognitive Services Speech Service subscription key. If you are using an // intent recognizer, you need to specify the LUIS endpoint key for your particular LUIS app. Under normal // circumstances, you shouldn't have to use this property directly. // Instead, use NewSpeechConfigFromSubscription. SpeechServiceConnectionKey PropertyID = 1000 // SpeechServiceConnectionEndpoint is the Cognitive Services Speech Service endpoint (url). // Under normal circumstances, you shouldn't have to use this property directly. // Instead, use NewSpeechConfigFromEndpoint. // NOTE: This endpoint is not the same as the endpoint used to obtain an access token. SpeechServiceConnectionEndpoint PropertyID = 1001 // SpeechServiceConnectionRegion is the Cognitive Services Speech Service region. Under normal circumstances, // you shouldn't have to use this property directly. // Instead, use NewSpeechConfigFromSubscription, NewSpeechConfigFromEndpoint, NewSpeechConfigFromHost, // NewSpeechConfigFromAuthorizationToken. SpeechServiceConnectionRegion PropertyID = 1002 // SpeechServiceAuthorizationToken is the Cognitive Services Speech Service authorization token (aka access token). // Under normal circumstances, you shouldn't have to use this property directly. // Instead, use NewSpeechConfigFromAuthorizationToken, // Recognizer.SetAuthorizationToken SpeechServiceAuthorizationToken PropertyID = 1003 // SpeechServiceAuthorizationType is the Cognitive Services Speech Service authorization type. Currently unused. SpeechServiceAuthorizationType PropertyID = 1004 // SpeechServiceConnectionEndpointID is the Cognitive Services Custom Speech Service endpoint id. Under normal // circumstances, you shouldn't have to use this property directly. // Instead use SpeechConfig.SetEndpointId. // NOTE: The endpoint id is available in the Custom Speech Portal, listed under Endpoint Details. SpeechServiceConnectionEndpointID PropertyID = 1005 // SpeechServiceConnectionHost is the Cognitive Services Speech Service host (url). Under normal circumstances, // you shouldn't have to use this property directly. // Instead, use NewSpeechConfigFromHost. SpeechServiceConnectionHost PropertyID = 1006 // SpeechServiceConnectionProxyHostName is the host name of the proxy server used to connect to the Cognitive Services // Speech Service. Under normal circumstances, you shouldn't have to use this property directly. // Instead, use SpeechConfig.SetProxy. SpeechServiceConnectionProxyHostName PropertyID = 1100 // SpeechServiceConnectionProxyPort is the port of the proxy server used to connect to the Cognitive Services Speech // Service. Under normal circumstances, you shouldn't have to use this property directly. // Instead, use SpeechConfig.SetProxy. SpeechServiceConnectionProxyPort PropertyID = 1101 // SpeechServiceConnectionProxyUserName is the user name of the proxy server used to connect to the Cognitive Services // Speech Service. Under normal circumstances, you shouldn't have to use this property directly. // Instead, use SpeechConfig.SetProxy. SpeechServiceConnectionProxyUserName PropertyID = 1102 // SpeechServiceConnectionProxyPassword is the password of the proxy server used to connect to the Cognitive Services // Speech Service. Under normal circumstances, you shouldn't have to use this property directly. // Instead, use SpeechConfig.SetProxy. SpeechServiceConnectionProxyPassword PropertyID = 1103 // SpeechServiceConnectionURL is the URL string built from speech configuration. This property is intended to be read-only. // The SDK is using it internally. SpeechServiceConnectionURL PropertyID = 1104 // SpeechServiceConnectionTranslationToLanguages is the list of comma separated languages used as target translation // languages. Under normal circumstances, you shouldn't have to use this property directly. // Instead use SpeechTranslationConfig.AddTargetLanguage and SpeechTranslationConfig.GetTargetLanguages. SpeechServiceConnectionTranslationToLanguages PropertyID = 2000 // SpeechServiceConnectionTranslationVoice is the name of the Cognitive Service Text to Speech Service voice. Under normal // circumstances, you shouldn't have to use this property directly. // Instead use SpeechTranslationConfig.SetVoiceName. // NOTE: Valid voice names can be found at https://aka.ms/csspeech/voicenames. SpeechServiceConnectionTranslationVoice PropertyID = 2001 // SpeechServiceConnectionTranslationFeatures is the translation features. For internal use. SpeechServiceConnectionTranslationFeatures PropertyID = 2002 // SpeechServiceConnectionIntentRegion is the Language Understanding Service region. Under normal circumstances, you // shouldn't have to use this property directly. // Instead use LanguageUnderstandingModel. SpeechServiceConnectionIntentRegion PropertyID = 2003 // SpeechServiceConnectionRecoMode is the Cognitive Services Speech Service recognition mode. Can be "INTERACTIVE", // "CONVERSATION" or "DICTATION". // This property is intended to be read-only. The SDK is using it internally. SpeechServiceConnectionRecoMode PropertyID = 3000 // SpeechServiceConnectionRecoLanguage is the spoken language to be recognized (in BCP-47 format). Under normal // circumstances, you shouldn't have to use this property directly. // Instead, use SpeechConfig.SetSpeechRecognitionLanguage. SpeechServiceConnectionRecoLanguage PropertyID = 3001 // SpeechSessionID is the session id. This id is a universally unique identifier (aka UUID) representing a specific // binding of an audio input stream and the underlying speech recognition instance to which it is bound. Under normal // circumstances, you shouldn't have to use this property directly. /// Instead use SessionEventArgs.SessionId. SpeechSessionID PropertyID = 3002 // SpeechServiceConnectionUserDefinedQueryParameters are the query parameters provided by users. They will be passed // to the service as URL query parameters. SpeechServiceConnectionUserDefinedQueryParameters PropertyID = 3003 // SpeechServiceConnectionSynthLanguage is the spoken language to be synthesized (e.g. en-US) SpeechServiceConnectionSynthLanguage PropertyID = 3100 // SpeechServiceConnectionSynthVoice is the name of the TTS voice to be used for speech synthesis SpeechServiceConnectionSynthVoice PropertyID = 3101 // SpeechServiceConnectionSynthOutputFormat is the string to specify TTS output audio format. SpeechServiceConnectionSynthOutputFormat PropertyID = 3102 // SpeechServiceConnectionInitialSilenceTimeoutMs is the initial silence timeout value (in milliseconds) used by the // service. SpeechServiceConnectionInitialSilenceTimeoutMs PropertyID = 3200 // SpeechServiceConnectionEndSilenceTimeoutMs is the end silence timeout value (in milliseconds) used by the service. SpeechServiceConnectionEndSilenceTimeoutMs PropertyID = 3201 // SpeechServiceConnectionEnableAudioLogging is a boolean value specifying whether audio logging is enabled in the service // or not. SpeechServiceConnectionEnableAudioLogging PropertyID = 3202 // SpeechServiceConnectionAutoDetectSourceLanguages is the auto detect source languages. SpeechServiceConnectionAutoDetectSourceLanguages PropertyID = 3300 // SpeechServiceConnectionAutoDetectSourceLanguageResult is the auto detect source language result. SpeechServiceConnectionAutoDetectSourceLanguageResult PropertyID = 3301 // SpeechServiceResponseRequestDetailedResultTrueFalse the requested Cognitive Services Speech Service response output // format (simple or detailed). Under normal circumstances, you shouldn't have to use this property directly. // Instead use SpeechConfig.SetOutputFormat. SpeechServiceResponseRequestDetailedResultTrueFalse PropertyID = 4000 // SpeechServiceResponseRequestProfanityFilterTrueFalse is the requested Cognitive Services Speech Service response // output profanity level. Currently unused. SpeechServiceResponseRequestProfanityFilterTrueFalse PropertyID = 4001 // SpeechServiceResponseProfanityOption is the requested Cognitive Services Speech Service response output profanity // setting. // Allowed values are "masked", "removed", and "raw". SpeechServiceResponseProfanityOption PropertyID = 4002 // SpeechServiceResponsePostProcessingOption a string value specifying which post processing option should be used // by the service. // Allowed values are "TrueText". SpeechServiceResponsePostProcessingOption PropertyID = 4003 // SpeechServiceResponseRequestWordLevelTimestamps is a boolean value specifying whether to include word-level // timestamps in the response result. SpeechServiceResponseRequestWordLevelTimestamps PropertyID = 4004 // SpeechServiceResponseStablePartialResultThreshold is the number of times a word has to be in partial results // to be returned. SpeechServiceResponseStablePartialResultThreshold PropertyID = 4005 // SpeechServiceResponseOutputFormatOption is a string value specifying the output format option in the response // result. Internal use only. SpeechServiceResponseOutputFormatOption PropertyID = 4006 // SpeechServiceResponseTranslationRequestStablePartialResult is a boolean value to request for stabilizing translation // partial results by omitting words in the end. SpeechServiceResponseTranslationRequestStablePartialResult PropertyID = 4100 // SpeechServiceResponseJSONResult is the Cognitive Services Speech Service response output (in JSON format). This // property is available on recognition result objects only. SpeechServiceResponseJSONResult PropertyID = 5000 // SpeechServiceResponseJSONErrorDetails is the Cognitive Services Speech Service error details (in JSON format). // Under normal circumstances, you shouldn't have to use this property directly. // Instead, use CancellationDetails.ErrorDetails. SpeechServiceResponseJSONErrorDetails PropertyID = 5001 // SpeechServiceResponseRecognitionLatencyMs is the recognition latency in milliseconds. Read-only, available on final // speech/translation/intent results. This measures the latency between when an audio input is received by the SDK, and // the moment the final result is received from the service. The SDK computes the time difference between the last audio // fragment from the audio input that is contributing to the final result, and the time the final result is received from // the speech service. SpeechServiceResponseRecognitionLatencyMs PropertyID = 5002 // CancellationDetailsReason is the cancellation reason. Currently unused. CancellationDetailsReason PropertyID = 6000 // CancellationDetailsReasonText the cancellation text. Currently unused. CancellationDetailsReasonText PropertyID = 6001 // CancellationDetailsReasonDetailedText is the cancellation detailed text. Currently unused. CancellationDetailsReasonDetailedText PropertyID = 6002 // LanguageUnderstandingServiceResponseJSONResult is the Language Understanding Service response output (in JSON format). // Available via IntentRecognitionResult.Properties. LanguageUnderstandingServiceResponseJSONResult PropertyID = 7000 // AudioConfigDeviceNameForCapture is the device name for audio capture. Under normal circumstances, you shouldn't have // to use this property directly. // Instead, use AudioConfig.FromMicrophoneInput. AudioConfigDeviceNameForCapture PropertyID = 8000 // AudioConfigNumberOfChannelsForCapture is the number of channels for audio capture. Internal use only. AudioConfigNumberOfChannelsForCapture PropertyID = 8001 // AudioConfigSampleRateForCapture is the sample rate (in Hz) for audio capture. Internal use only. AudioConfigSampleRateForCapture PropertyID = 8002 // AudioConfigBitsPerSampleForCapture is the number of bits of each sample for audio capture. Internal use only. AudioConfigBitsPerSampleForCapture PropertyID = 8003 // AudioConfigAudioSource is the audio source. Allowed values are "Microphones", "File", and "Stream". AudioConfigAudioSource PropertyID = 8004 // SpeechLogFilename is the file name to write logs. SpeechLogFilename PropertyID = 9001 // ConversationApplicationID is the identifier used to connect to the backend service. ConversationApplicationID PropertyID = 10000 // ConversationDialogType is the type of dialog backend to connect to. ConversationDialogType PropertyID = 10001 // ConversationInitialSilenceTimeout is the silence timeout for listening. ConversationInitialSilenceTimeout PropertyID = 10002 // ConversationFromID is the FromId to be used on speech recognition activities. ConversationFromID PropertyID = 10003 // ConversationConversationID is the ConversationId for the session. ConversationConversationID PropertyID = 10004 // ConversationCustomVoiceDeploymentIDs is a comma separated list of custom voice deployment ids. ConversationCustomVoiceDeploymentIDs PropertyID = 10005 // ConversationSpeechActivityTemplate is use to stamp properties in the template on the activity generated by the service for speech. ConversationSpeechActivityTemplate PropertyID = 10006 // DataBufferTimeStamp is the time stamp associated to data buffer written by client when using Pull/Push // audio input streams. // The time stamp is a 64-bit value with a resolution of 90 kHz. It is the same as the presentation timestamp // in an MPEG transport stream. See https://en.wikipedia.org/wiki/Presentation_timestamp DataBufferTimeStamp PropertyID = 11001 // DataBufferUserID is the user id associated to data buffer written by client when using Pull/Push audio // input streams. DataBufferUserID PropertyID = 11002 )
type ResultReason ¶
type ResultReason int
ResultReason specifies the possible reasons a recognition result might be generated.
const ( // NoMatch indicates speech could not be recognized. More details can be found in the NoMatchDetails object. NoMatch ResultReason = 0 // Canceled indicates that the recognition was canceled. More details can be found using the CancellationDetails object. Canceled ResultReason = 1 // RecognizingSpeech indicates the speech result contains hypothesis text. RecognizingSpeech ResultReason = 2 // RecognizedSpeech indicates the speech result contains final text that has been recognized. // Speech Recognition is now complete for this phrase. RecognizedSpeech ResultReason = 3 // RecognizingIntent indicates the intent result contains hypothesis text and intent. RecognizingIntent ResultReason = 4 // RecognizedIntent indicates the intent result contains final text and intent. // Speech Recognition and Intent determination are now complete for this phrase. RecognizedIntent ResultReason = 5 // TranslatingSpeech indicates the translation result contains hypothesis text and its translation(s). TranslatingSpeech ResultReason = 6 // TranslatedSpeech indicates the translation result contains final text and corresponding translation(s). // Speech Recognition and Translation are now complete for this phrase. TranslatedSpeech ResultReason = 7 // SynthesizingAudio indicates the synthesized audio result contains a non-zero amount of audio data SynthesizingAudio ResultReason = 8 // SynthesizingAudioCompleted indicates the synthesized audio is now complete for this phrase. SynthesizingAudioCompleted ResultReason = 9 // RecognizingKeyword indicates the speech result contains (unverified) keyword text. RecognizingKeyword ResultReason = 10 // RecognizedKeyword indicates that keyword recognition completed recognizing the given keyword. RecognizedKeyword ResultReason = 11 // SynthesizingAudioStarted indicates the speech synthesis is now started SynthesizingAudioStarted ResultReason = 12 )
type ServicePropertyChannel ¶
type ServicePropertyChannel int
ServicePropertyChannel defines channels used to pass property settings to service.
const ( // URIQueryParameter uses URI query parameter to pass property settings to service. URIQueryParameter ServicePropertyChannel = 0 )
type SpeechSynthesisOutputFormat ¶
type SpeechSynthesisOutputFormat int
SpeechSynthesisOutputFormat defines the possible speech synthesis output audio formats.
const ( // Raw8Khz8BitMonoMULaw stands for raw-8khz-8bit-mono-mulaw Raw8Khz8BitMonoMULaw SpeechSynthesisOutputFormat = 1 // Riff16Khz16KbpsMonoSiren stands for riff-16khz-16kbps-mono-siren Riff16Khz16KbpsMonoSiren SpeechSynthesisOutputFormat = 2 // Audio16Khz16KbpsMonoSiren stands for audio-16khz-16kbps-mono-siren Audio16Khz16KbpsMonoSiren SpeechSynthesisOutputFormat = 3 // Audio16Khz32KBitRateMonoMp3 stands for audio-16khz-32kbitrate-mono-mp3 Audio16Khz32KBitRateMonoMp3 SpeechSynthesisOutputFormat = 4 // Audio16Khz128KBitRateMonoMp3 stands for audio-16khz-128kbitrate-mono-mp3 Audio16Khz128KBitRateMonoMp3 SpeechSynthesisOutputFormat = 5 // Audio16Khz64KBitRateMonoMp3 stands for audio-16khz-64kbitrate-mono-mp3 Audio16Khz64KBitRateMonoMp3 SpeechSynthesisOutputFormat = 6 // Audio24Khz48KBitRateMonoMp3 stands for audio-24khz-48kbitrate-mono-mp3 Audio24Khz48KBitRateMonoMp3 SpeechSynthesisOutputFormat = 7 // Audio24Khz96KBitRateMonoMp3 stands for audio-24khz-96kbitrate-mono-mp3 Audio24Khz96KBitRateMonoMp3 SpeechSynthesisOutputFormat = 8 // Audio24Khz160KBitRateMonoMp3 stands for audio-24khz-160kbitrate-mono-mp3 Audio24Khz160KBitRateMonoMp3 SpeechSynthesisOutputFormat = 9 // Raw16Khz16BitMonoTrueSilk stands for raw-16khz-16bit-mono-truesilk Raw16Khz16BitMonoTrueSilk SpeechSynthesisOutputFormat = 10 // Riff16Khz16BitMonoPcm stands for riff-16khz-16bit-mono-pcm Riff16Khz16BitMonoPcm SpeechSynthesisOutputFormat = 11 // Riff8Khz16BitMonoPcm stands for riff-8khz-16bit-mono-pcm Riff8Khz16BitMonoPcm SpeechSynthesisOutputFormat = 12 // Riff24Khz16BitMonoPcm stands for riff-24khz-16bit-mono-pcm Riff24Khz16BitMonoPcm SpeechSynthesisOutputFormat = 13 // Riff8Khz8BitMonoMULaw stands for riff-8khz-8bit-mono-mulaw Riff8Khz8BitMonoMULaw SpeechSynthesisOutputFormat = 14 // Raw16Khz16BitMonoPcm stands for raw-16khz-16bit-mono-pcm Raw16Khz16BitMonoPcm SpeechSynthesisOutputFormat = 15 // Raw24Khz16BitMonoPcm stands for raw-24khz-16bit-mono-pcm Raw24Khz16BitMonoPcm SpeechSynthesisOutputFormat = 16 // Raw8Khz16BitMonoPcm stands for raw-8khz-16bit-mono-pcm Raw8Khz16BitMonoPcm SpeechSynthesisOutputFormat = 17 )