Skip to content
This repository has been archived by the owner on Jul 20, 2023. It is now read-only.

feat: Added StreamingAnalyzeContent API #1004

Merged
merged 7 commits into from
Nov 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions protos/google/cloud/dialogflow/v2/audio_config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,67 @@ message OutputAudioConfig {
SynthesizeSpeechConfig synthesize_speech_config = 3;
}

// [DTMF](https://en.wikipedia.org/wiki/Dual-tone_multi-frequency_signaling)
// digit in Telephony Gateway.
enum TelephonyDtmf {
// Not specified. This value may be used to indicate an absent digit.
TELEPHONY_DTMF_UNSPECIFIED = 0;

// Number: '1'.
DTMF_ONE = 1;

// Number: '2'.
DTMF_TWO = 2;

// Number: '3'.
DTMF_THREE = 3;

// Number: '4'.
DTMF_FOUR = 4;

// Number: '5'.
DTMF_FIVE = 5;

// Number: '6'.
DTMF_SIX = 6;

// Number: '7'.
DTMF_SEVEN = 7;

// Number: '8'.
DTMF_EIGHT = 8;

// Number: '9'.
DTMF_NINE = 9;

// Number: '0'.
DTMF_ZERO = 10;

// Letter: 'A'.
DTMF_A = 11;

// Letter: 'B'.
DTMF_B = 12;

// Letter: 'C'.
DTMF_C = 13;

// Letter: 'D'.
DTMF_D = 14;

// Asterisk/star: '*'.
DTMF_STAR = 15;

// Pound/diamond/hash/square/gate/octothorpe: '#'.
DTMF_POUND = 16;
}

// A wrapper of repeated TelephonyDtmf digits.
message TelephonyDtmfEvents {
// A sequence of TelephonyDtmf digits.
repeated TelephonyDtmf dtmf_events = 1;
}

// Configures speech transcription for [ConversationProfile][google.cloud.dialogflow.v2.ConversationProfile].
message SpeechToTextConfig {
// The speech model used in speech to text.
Expand All @@ -379,4 +440,13 @@ message SpeechToTextConfig {
// version of the specified model for the language does not exist, then it
// would emit an error.
SpeechModelVariant speech_model_variant = 1;

// Which Speech model to select. Select the model best suited to your domain
// to get best results. If a model is not explicitly specified, then a default
// model is used.
// Refer to
// [Cloud Speech API
// documentation](https://cloud.google.com/speech-to-text/docs/basics#select-model)
// for more details.
string model = 2;
}
221 changes: 221 additions & 0 deletions protos/google/cloud/dialogflow/v2/participant.proto
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,24 @@ service Participants {
option (google.api.method_signature) = "participant,event_input";
}

// Adds a text (chat, for example), or audio (phone recording, for example)
// message from a participant into the conversation.
// Note: This method is only available through the gRPC API (not REST).
//
// The top-level message sent to the client by the server is
// `StreamingAnalyzeContentResponse`. Multiple response messages can be
// returned in order. The first one or more messages contain the
// `recognition_result` field. Each result represents a more complete
// transcript of what the user said. The next message contains the
// `reply_text` field and potentially the `reply_audio` field. The message can
// also contain the `automated_agent_reply` field.
//
// Note: Always use agent versions for production traffic
// sent to virtual agents. See [Versions and
// environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
rpc StreamingAnalyzeContent(stream StreamingAnalyzeContentRequest) returns (stream StreamingAnalyzeContentResponse) {
}

// Gets suggested articles for a participant based on specific historical
// messages.
rpc SuggestArticles(SuggestArticlesRequest) returns (SuggestArticlesResponse) {
Expand Down Expand Up @@ -190,6 +208,36 @@ message Participant {
// media stream to this participant. This field can be updated.
string sip_recording_media_label = 6 [(google.api.field_behavior) = OPTIONAL];

// Optional. Obfuscated user id that should be associated with the created participant.
//
// You can specify a user id as follows:
//
// 1. If you set this field in
// [CreateParticipantRequest][google.cloud.dialogflow.v2.CreateParticipantRequest.participant] or
// [UpdateParticipantRequest][google.cloud.dialogflow.v2.UpdateParticipantRequest.participant],
// Dialogflow adds the obfuscated user id with the participant.
//
// 2. If you set this field in
// [AnalyzeContent][google.cloud.dialogflow.v2.AnalyzeContentRequest.obfuscated_external_user_id] or
// [StreamingAnalyzeContent][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.obfuscated_external_user_id],
// Dialogflow will update [Participant.obfuscated_external_user_id][google.cloud.dialogflow.v2.Participant.obfuscated_external_user_id].
//
// Dialogflow returns an error if you try to add a user id for a
// non-[END_USER][google.cloud.dialogflow.v2.Participant.Role.END_USER] participant.
//
// Dialogflow uses this user id for billing and measurement purposes. For
// example, Dialogflow determines whether a user in one conversation returned
// in a later conversation.
//
// Note:
//
// * Please never pass raw user ids to Dialogflow. Always obfuscate your user
// id first.
// * Dialogflow only accepts a UTF-8 encoded string, e.g., a hex digest of a
// hash function like SHA-512.
// * The length of the user id must be <= 256 characters.
string obfuscated_external_user_id = 7 [(google.api.field_behavior) = OPTIONAL];

// Optional. Key-value filters on the metadata of documents returned by article
// suggestion. If specified, article suggestion only returns suggested
// documents that match all filters in their [Document.metadata][google.cloud.dialogflow.v2.Document.metadata]. Multiple
Expand Down Expand Up @@ -425,6 +473,171 @@ message AnalyzeContentResponse {
DtmfParameters dtmf_parameters = 9;
}

// The top-level message sent by the client to the
// [Participants.StreamingAnalyzeContent][google.cloud.dialogflow.v2.Participants.StreamingAnalyzeContent] method.
//
// Multiple request messages should be sent in order:
//
// 1. The first message must contain
// [participant][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.participant],
// [config][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.config] and optionally
// [query_params][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.query_params]. If you want
// to receive an audio response, it should also contain
// [reply_audio_config][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.reply_audio_config].
// The message must not contain
// [input][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.input].
//
// 2. If [config][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.config] in the first message
// was set to [audio_config][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.audio_config],
// all subsequent messages must contain
// [input_audio][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.input_audio] to continue
// with Speech recognition.
// However, note that:
//
// * Dialogflow will bill you for the audio so far.
// * Dialogflow discards all Speech recognition results in favor of the
// text input.
//
// 3. If [StreamingAnalyzeContentRequest.config][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.config] in the first message was set
// to [StreamingAnalyzeContentRequest.text_config][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.text_config], then the second message
// must contain only [input_text][google.cloud.dialogflow.v2.StreamingAnalyzeContentRequest.input_text].
// Moreover, you must not send more than two messages.
//
// After you sent all input, you must half-close or abort the request stream.
message StreamingAnalyzeContentRequest {
// Required. The name of the participant this text comes from.
// Format: `projects/<Project ID>/locations/<Location
// ID>/conversations/<Conversation ID>/participants/<Participant ID>`.
string participant = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "dialogflow.googleapis.com/Participant"
}
];

// The input config.
oneof config {
// Instructs the speech recognizer how to process the speech audio.
InputAudioConfig audio_config = 2;

// The natural language text to be processed.
InputTextConfig text_config = 3;
}

// Speech synthesis configuration.
// The speech synthesis settings for a virtual agent that may be configured
// for the associated conversation profile are not used when calling
// StreamingAnalyzeContent. If this configuration is not supplied, speech
// synthesis is disabled.
OutputAudioConfig reply_audio_config = 4;

// The input.
oneof input {
// The input audio content to be recognized. Must be sent if `audio_config`
// is set in the first message. The complete audio over all streaming
// messages must not exceed 1 minute.
bytes input_audio = 5;

// The UTF-8 encoded natural language text to be processed. Must be sent if
// `text_config` is set in the first message. Text length must not exceed
// 256 bytes for virtual agent interactions. The `input_text` field can be
// only sent once.
string input_text = 6;

// The DTMF digits used to invoke intent and fill in parameter value.
//
// This input is ignored if the previous response indicated that DTMF input
// is not accepted.
TelephonyDtmfEvents input_dtmf = 9;
}

// Parameters for a Dialogflow virtual-agent query.
QueryParameters query_params = 7;

// Parameters for a human assist query.
AssistQueryParameters assist_query_params = 8;

// Additional parameters to be put into Dialogflow CX session parameters. To
// remove a parameter from the session, clients should explicitly set the
// parameter value to null.
//
// Note: this field should only be used if you are connecting to a Dialogflow
// CX agent.
google.protobuf.Struct cx_parameters = 13;

// Enable partial virtual agent responses. If this flag is not enabled,
// response stream still contains only one final response even if some
// `Fulfillment`s in Dialogflow virtual agent have been configured to return
// partial responses.
bool enable_partial_automated_agent_reply = 12;
}

// The top-level message returned from the `StreamingAnalyzeContent` method.
//
// Multiple response messages can be returned in order:
//
// 1. If the input was set to streaming audio, the first one or more messages
// contain `recognition_result`. Each `recognition_result` represents a more
// complete transcript of what the user said. The last `recognition_result`
// has `is_final` set to `true`.
//
// 2. In virtual agent stage: if `enable_partial_automated_agent_reply` is
// true, the following N (currently 1 <= N <= 4) messages
// contain `automated_agent_reply` and optionally `reply_audio`
// returned by the virtual agent. The first (N-1)
// `automated_agent_reply`s will have `automated_agent_reply_type` set to
// `PARTIAL`. The last `automated_agent_reply` has
// `automated_agent_reply_type` set to `FINAL`.
// If `enable_partial_automated_agent_reply` is not enabled, response stream
// only contains the final reply.
//
// In human assist stage: the following N (N >= 1) messages contain
// `human_agent_suggestion_results`, `end_user_suggestion_results` or
// `message`.
message StreamingAnalyzeContentResponse {
// The result of speech recognition.
StreamingRecognitionResult recognition_result = 1;

// The output text content.
// This field is set if an automated agent responded with a text for the user.
string reply_text = 2;

// The audio data bytes encoded as specified in the request.
// This field is set if:
//
// - The `reply_audio_config` field is specified in the request.
// - The automated agent, which this output comes from, responded with audio.
// In such case, the `reply_audio.config` field contains settings used to
// synthesize the speech.
//
// In some scenarios, multiple output audio fields may be present in the
// response structure. In these cases, only the top-most-level audio output
// has content.
OutputAudio reply_audio = 3;

// Only set if a Dialogflow automated agent has responded.
// Note that: [AutomatedAgentReply.detect_intent_response.output_audio][]
// and [AutomatedAgentReply.detect_intent_response.output_audio_config][]
// are always empty, use [reply_audio][google.cloud.dialogflow.v2.StreamingAnalyzeContentResponse.reply_audio] instead.
AutomatedAgentReply automated_agent_reply = 4;

// Message analyzed by CCAI.
Message message = 6;

// The suggestions for most recent human agent. The order is the same as
// [HumanAgentAssistantConfig.SuggestionConfig.feature_configs][google.cloud.dialogflow.v2.HumanAgentAssistantConfig.SuggestionConfig.feature_configs] of
// [HumanAgentAssistantConfig.human_agent_suggestion_config][google.cloud.dialogflow.v2.HumanAgentAssistantConfig.human_agent_suggestion_config].
repeated SuggestionResult human_agent_suggestion_results = 7;

// The suggestions for end user. The order is the same as
// [HumanAgentAssistantConfig.SuggestionConfig.feature_configs][google.cloud.dialogflow.v2.HumanAgentAssistantConfig.SuggestionConfig.feature_configs] of
// [HumanAgentAssistantConfig.end_user_suggestion_config][google.cloud.dialogflow.v2.HumanAgentAssistantConfig.end_user_suggestion_config].
repeated SuggestionResult end_user_suggestion_results = 8;

// Indicates the parameters of DTMF.
DtmfParameters dtmf_parameters = 10;
}

// The request message for [Participants.SuggestArticles][google.cloud.dialogflow.v2.Participants.SuggestArticles].
message SuggestArticlesRequest {
// Required. The name of the participant to fetch suggestion for.
Expand Down Expand Up @@ -720,6 +933,14 @@ message SuggestionResult {
}
}

// Defines the language used in the input text.
message InputTextConfig {
// Required. The language of this conversational query. See [Language
// Support](https://cloud.google.com/dialogflow/docs/reference/language)
// for a list of the currently supported language codes.
string language_code = 1 [(google.api.field_behavior) = REQUIRED];
}

// Represents a part of a message possibly annotated with an entity. The part
// can be an entity or purely a part of the message between two entities or
// message start/end.
Expand Down
Loading