first working audio input

MichelNivard · Aug 23, 2024 · 20a6456 · 20a6456
1 parent f302d96
commit 20a6456
Show file tree

Hide file tree

Showing 5 changed files with 250 additions and 58 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -44,8 +44,8 @@ Imports:
     shiny.i18n,
     SSEparser,
     stringr (>= 1.5.0),
+    tuneR,
     utils,
-    waiter,
     yaml
 Suggests: 
     AzureRMR,

diff --git a/R/mod_app.R b/R/mod_app.R
@@ -10,7 +10,7 @@ mod_app_ui <- function(id, ide_colors = get_ide_theme_info()) {
   ns <- NS(id)
   translator <- create_translator(language = getOption("gptstudio.language"))
   tagList(
-    waiter::use_waiter(),
+    useBusyIndicators(),
     bslib::page_fluid(
       theme = create_chat_app_theme(ide_colors),
       title = "ChatGPT from gptstudio",
@@ -84,6 +84,7 @@ rgb_str_to_hex <- function(rgb_string) {
 create_chat_app_theme <- function(ide_colors = get_ide_theme_info()) {
   bslib::bs_theme(
     version = 5,
+    preset = "shiny",
     bg = ide_colors$bg,
     fg = ide_colors$fg,
     font_scale = 0.9,

diff --git a/R/mod_chat.R b/R/mod_chat.R
@@ -29,7 +29,37 @@ mod_chat_ui <- function(id, translator = create_translator()) {
             style = css(
               "width" = "100%"
             ),
-            uiOutput(ns("chat_input"))
+            div(
+              div(
+                style = "flex-grow: 1; height: 100%;",
+                text_area_input_wrapper(
+                  inputId = ns("chat_input"),
+                  label = NULL,
+                  width = "100%",
+                  height = "100%",
+                  value = "",
+                  resize = "none",
+                  textarea_class = "chat-prompt"
+                )
+              ),
+              div(
+                style = "position: absolute; right: 10px; top: 50%; transform: translateY(-50%);",
+                bslib::input_task_button(
+                  id = ns("chat"),
+                  label = icon("fas fa-paper-plane"),
+                  label_busy = NULL,
+                  class = "btn-primary p-1 chat-send-btn"
+                ) %>%
+                  bslib::tooltip("Send (click or Enter)")
+              ),
+              div(
+                style = "position: absolute; right: 40px; top: 30%; transform: translateY(-50%);",
+                input_audio_clip(ns("clip"),
+                                 record_label = NULL,
+                                 stop_label = NULL,
+                                 show_mic_settings = FALSE)
+              )
+            )
           )
         )
       )
@@ -51,8 +81,8 @@ mod_chat_server <- function(id,
                             history) {
   # This is where changes will focus
   moduleServer(id, function(input, output, session) {
+    ns <- NS(id)
     # Session data ----
-
     rv <- reactiveValues()
     rv$reset_welcome_message <- 0L
     rv$reset_streaming_message <- 0L
@@ -86,9 +116,8 @@ mod_chat_server <- function(id,
     }) %>%
       bindEvent(history$create_new_chat)
 
-
     observe({
-
+      cli::cli_inform("Chat triggered")
       skeleton <- gptstudio_create_skeleton(
         service = settings$service,
         model = settings$model,
@@ -122,48 +151,95 @@ mod_chat_server <- function(id,
       }
 
       updateTextAreaInput(session, "chat_input", value = "")
+      if (settings$stream) {
+        rv$reset_streaming_message <- rv$reset_streaming_message + 1L
+      }
     }) %>%
       bindEvent(input$chat)
 
-    output$chat_input <- renderUI({
-      tagList(
-        fluidRow(
-          column(
-            width = 3,
-            div(
-              style = "display: flex; align-items: center; height: 100%;",
-              input_audio_clip("clip", show_mic_settings = FALSE)
-            )
-          ),
-          column(
-            width = 9,
-            div(
-              style = "display: flex; align-items: center; position: relative; height: 100%;",
-              div(
-                style = "flex-grow: 1;",
-                text_area_input_wrapper(
-                  inputId = "chat_input",
-                  label = NULL,
-                  width = "100%",
-                  value = "",
-                  resize = "none",
-                  textarea_class = "chat-prompt"
-                )
-              ),
-              div(
-                style = "position: absolute; right: 10px;",
-                bslib::input_task_button(
-                  id = "chat",
-                  label = icon("fas fa-paper-plane"),
-                  label_busy = NULL,
-                  class = "w-100 btn-primary p-1 chat-send-btn"
-                ) %>%
-                  bslib::tooltip("Send (click or Enter)")
-              )
-            )
-          )
+    observe({
+      req(input$clip)
+      new_prompt <- transcribe_audio(input$clip)
+      skeleton <- gptstudio_create_skeleton(
+        service = settings$service,
+        model = settings$model,
+        prompt = new_prompt,
+        history = history$chat_history,
+        stream = settings$stream
+      ) %>%
+        gptstudio_skeleton_build(
+          skill = settings$skill,
+          style = settings$style,
+          task = settings$task,
+          custom_prompt = settings$custom_prompt
         )
+
+      response <- gptstudio_request_perform(
+        skeleton = skeleton,
+        shiny_session = session
+      ) %>%
+        gptstudio_response_process()
+
+      history$chat_history <- response$history
+
+      append_to_conversation_history(
+        id = history$selected_conversation$id %||% ids::random_id(),
+        title = history$selected_conversation$title %||% find_placeholder_title(history$chat_history), # nolint
+        messages = history$chat_history
       )
-    })
+
+      if (settings$stream) {
+        rv$reset_streaming_message <- rv$reset_streaming_message + 1L
+      }
+
+      updateTextAreaInput(session, "chat_input", value = "")
+      if (settings$stream) {
+        rv$reset_streaming_message <- rv$reset_streaming_message + 1L
+      }
+    }) %>%
+      bindEvent(input$clip)
+
+    # output$chat_input <- renderUI({
+    #   audio_recorder <-
+    #     if (rv$audio_input %||% getOption("gptstudio.audio_input")) {
+    #       div(
+    #         style = "position: absolute; right: 40px; top: 30%; transform: translateY(-50%);",
+    #         input_audio_clip("clip",
+    #                          record_label = NULL,
+    #                          stop_label = NULL,
+    #                          show_mic_settings = FALSE)
+    #       )
+    #     } else {
+    #       NULL
+    #     }
+    #
+    #   tagList(
+    #     div(
+    #       div(
+    #         style = "flex-grow: 1; height: 100%;",
+    #         text_area_input_wrapper(
+    #           inputId = "chat_input",
+    #           label = NULL,
+    #           width = "100%",
+    #           height = "100%",
+    #           value = "",
+    #           resize = "none",
+    #           textarea_class = "chat-prompt"
+    #         )
+    #       ),
+    #       div(
+    #         style = "position: absolute; right: 10px; top: 50%; transform: translateY(-50%);",
+    #         bslib::input_task_button(
+    #           id = "chat",
+    #           label = icon("fas fa-paper-plane"),
+    #           label_busy = NULL,
+    #           class = "btn-primary p-1 chat-send-btn"
+    #         ) %>%
+    #           bslib::tooltip("Send (click or Enter)")
+    #       ),
+    #       audio_recorder
+    #     )
+    #   )
+    # })
   })
 }
diff --git a/R/record-audio.R b/R/record-audio.R
@@ -1,18 +1,24 @@
-#' An audio clip input control that records short audio clips from the microphone
+#' An audio clip input control that records short audio clips from the
+#' microphone
 #'
 #' @param inputId The input slot that will be used to access the value.
-#' @param reset_on_record Whether to reset the audio clip input value when recording starts. If
-#'   TRUE, the audio clip input value will become NULL at the moment the
-#'   Record button is pressed; if FALSE, the value will not change until
-#'   the user stops recording. Default is TRUE.
-#' @param mime_type The MIME type of the audio clip to record. By default, this is NULL,
-#'   which means the browser will choose a suitable MIME type for audio
+#' @param record_label Display label for the "record" control, or NULL for no
+#'   label. Default is 'Record'.
+#' @param stop_label Display label for the "stop" control, or NULL for no label.
+#'   Default is 'Record'.
+#' @param reset_on_record Whether to reset the audio clip input value when
+#'   recording starts. If TRUE, the audio clip input value will become NULL at
+#'   the moment the Record button is pressed; if FALSE, the value will not
+#'   change until the user stops recording. Default is TRUE.
+#' @param mime_type The MIME type of the audio clip to record. By default, this
+#'   is NULL, which means the browser will choose a suitable MIME type for audio
 #'   recording. Common MIME types include 'audio/webm' and 'audio/mp4'.
-#' @param audio_bits_per_second The target audio bitrate in bits per second. By default, this is NULL,
-#'   which means the browser will choose a suitable bitrate for audio
-#'   recording. This is only a suggestion; the browser may choose a different
-#'   bitrate.
-#' @param show_mic_settings Whether to show the microphone settings in the settings menu. Default is TRUE.
+#' @param audio_bits_per_second The target audio bitrate in bits per second. By
+#'   default, this is NULL, which means the browser will choose a suitable
+#'   bitrate for audio recording. This is only a suggestion; the browser may
+#'   choose a different bitrate.
+#' @param show_mic_settings Whether to show the microphone settings in the
+#'   settings menu. Default is TRUE.
 #' @param ... Additional parameters to pass to the underlying HTML tag.
 #'
 #' @return An audio clip input control that can be added to a UI definition.
@@ -22,6 +28,8 @@
 #' @importFrom shiny icon
 input_audio_clip <- function(
     inputId,
+    record_label = "Record",
+    stop_label = "Stop",
     reset_on_record = TRUE,
     mime_type = NULL,
     audio_bits_per_second = NULL,
@@ -80,7 +88,7 @@ input_audio_clip <- function(
         div(
           style = "display: inline-block; background-color: red; width: 1rem; height: 1rem; border-radius: 100%; position: relative; top: 0.175rem; margin-right: 0.3rem;"
         ),
-        "Record"
+        record_label
       )
     ),
     tags$button(
@@ -90,7 +98,7 @@ input_audio_clip <- function(
         div(
           style = "display: inline-block; background-color: currentColor; width: 1rem; height: 1rem; position: relative; top: 0.175rem; margin-right: 0.3rem;"
         ),
-        "Stop"
+        stop_label
       )
     )
   )

diff --git a/R/transcribe-audio.R b/R/transcribe-audio.R
@@ -0,0 +1,107 @@
+#' Parse a Data URI
+#'
+#' This function parses a data URI and returns the MIME type and decoded data.
+#'
+#' @param data_uri A string. The data URI to parse.
+#'
+#' @return A list with two elements: 'mime_type' and 'data'.
+#'
+#' @importFrom base64enc base64decode
+parse_data_uri <- function(data_uri) {
+  match <- regexec("data:(.+);base64,(.+)", data_uri)
+  if (match[[1]][1] == -1) {
+    stop("Invalid data URI format")
+  }
+  groups <- regmatches(data_uri, match)[[1]]
+  mime_type <- groups[2]
+  b64data <- groups[3]
+  # Add padding if necessary
+  padding <- nchar(b64data) %% 4
+  if (padding > 0) {
+    b64data <- paste0(b64data, strrep("=", 4 - padding))
+  }
+  list(mime_type = mime_type, data = base64enc::base64decode(b64data))
+}
+
+#' Transcribe Audio from Data URI Using OpenAI's Whisper Model
+#'
+#' This function takes an audio file in data URI format, converts it to WAV,
+#' and sends it to OpenAI's transcription API to get the transcribed text.
+#'
+#' @param audio_input A string. The audio data in data URI format.
+#' @param api_key A string. Your OpenAI API key. Defaults to the OPENAI_API_KEY environment variable.
+#'
+#' @return A string containing the transcribed text.
+#'
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' audio_uri <- "data:audio/webm;base64,SGVsbG8gV29ybGQ="  # Example data URI
+#' transcription <- transcribe_audio(audio_uri)
+#' print(transcription)
+#' }
+#'
+#' @importFrom httr2 request req_auth_bearer_token req_body_multipart req_perform resp_is_error resp_status_desc resp_body_json
+#' @importFrom jsonlite fromJSON
+transcribe_audio <- function(audio_input, api_key = Sys.getenv("OPENAI_API_KEY")) {
+  # Parse the data URI
+  parsed <- parse_data_uri(audio_input)
+
+  # Convert WebM to WAV (R doesn't have native WebM support, so we're using WAV)
+  temp_webm <- tempfile(fileext = ".webm")
+  temp_wav <- tempfile(fileext = ".wav")
+  writeBin(parsed$data, temp_webm)
+  system_result <- system2("ffmpeg", args = c("-i", temp_webm, "-acodec", "pcm_s16le", "-ar", "44100", temp_wav), stdout = TRUE, stderr = TRUE)
+
+  if (!file.exists(temp_wav)) {
+    stop("Failed to convert audio: ", paste(system_result, collapse = "\n"))
+  }
+
+  # Transcribe audio using OpenAI API
+  req <- httr2::request("https://api.openai.com/v1/audio/transcriptions") %>%
+    httr2::req_auth_bearer_token(api_key) %>%
+    httr2::req_body_multipart(
+      file = curl::form_file(temp_wav),
+      model = "whisper-1",
+      response_format = "text"
+    )
+
+  resp <- httr2::req_perform(req)
+
+  if (httr2::resp_is_error(resp)) {
+    stop("API request failed: ", httr2::resp_status_desc(resp))
+  }
+
+  user_prompt <- resp_body_string(resp)
+
+  # Clean up temporary files
+  file.remove(temp_webm, temp_wav)
+
+  invisible(user_prompt)
+}
+
+
+#' Convert Audio File to Data URI
+#'
+#' This function takes an audio file path and converts it to a data URI.
+#'
+#' @param file_path A string. The path to the audio file.
+#'
+#' @return A string containing the data URI.
+#'
+audio_to_data_uri <- function(file_path) {
+  # Read the file
+  audio_data <- readBin(file_path, "raw", file.info(file_path)$size)
+
+  # Encode the data
+  encoded_data <- base64enc::base64encode(audio_data)
+
+  # Get the MIME type
+  mime_type <- mime::guess_type(file_path)
+
+  # Construct the data URI
+  data_uri <- paste0("data:", mime_type, ";base64,", encoded_data)
+
+  return(data_uri)
+}