Skip to content

Commit

Permalink
first working audio input
Browse files Browse the repository at this point in the history
  • Loading branch information
JamesHWade committed Aug 23, 2024
1 parent f302d96 commit 20a6456
Show file tree
Hide file tree
Showing 5 changed files with 250 additions and 58 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ Imports:
shiny.i18n,
SSEparser,
stringr (>= 1.5.0),
tuneR,
utils,
waiter,
yaml
Suggests:
AzureRMR,
Expand Down
3 changes: 2 additions & 1 deletion R/mod_app.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ mod_app_ui <- function(id, ide_colors = get_ide_theme_info()) {
ns <- NS(id)
translator <- create_translator(language = getOption("gptstudio.language"))
tagList(
waiter::use_waiter(),
useBusyIndicators(),
bslib::page_fluid(
theme = create_chat_app_theme(ide_colors),
title = "ChatGPT from gptstudio",
Expand Down Expand Up @@ -84,6 +84,7 @@ rgb_str_to_hex <- function(rgb_string) {
create_chat_app_theme <- function(ide_colors = get_ide_theme_info()) {
bslib::bs_theme(
version = 5,
preset = "shiny",
bg = ide_colors$bg,
fg = ide_colors$fg,
font_scale = 0.9,
Expand Down
160 changes: 118 additions & 42 deletions R/mod_chat.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,37 @@ mod_chat_ui <- function(id, translator = create_translator()) {
style = css(
"width" = "100%"
),
uiOutput(ns("chat_input"))
div(
div(
style = "flex-grow: 1; height: 100%;",
text_area_input_wrapper(
inputId = ns("chat_input"),
label = NULL,
width = "100%",
height = "100%",
value = "",
resize = "none",
textarea_class = "chat-prompt"
)
),
div(
style = "position: absolute; right: 10px; top: 50%; transform: translateY(-50%);",
bslib::input_task_button(
id = ns("chat"),
label = icon("fas fa-paper-plane"),
label_busy = NULL,
class = "btn-primary p-1 chat-send-btn"
) %>%
bslib::tooltip("Send (click or Enter)")
),
div(
style = "position: absolute; right: 40px; top: 30%; transform: translateY(-50%);",
input_audio_clip(ns("clip"),
record_label = NULL,
stop_label = NULL,
show_mic_settings = FALSE)
)
)
)
)
)
Expand All @@ -51,8 +81,8 @@ mod_chat_server <- function(id,
history) {
# This is where changes will focus
moduleServer(id, function(input, output, session) {
ns <- NS(id)

Check warning on line 84 in R/mod_chat.R

View workflow job for this annotation

GitHub Actions / lint

file=R/mod_chat.R,line=84,col=5,[object_usage_linter] local variable 'ns' assigned but may not be used
# Session data ----

rv <- reactiveValues()
rv$reset_welcome_message <- 0L
rv$reset_streaming_message <- 0L
Expand Down Expand Up @@ -86,9 +116,8 @@ mod_chat_server <- function(id,
}) %>%
bindEvent(history$create_new_chat)


observe({

cli::cli_inform("Chat triggered")
skeleton <- gptstudio_create_skeleton(
service = settings$service,
model = settings$model,
Expand Down Expand Up @@ -122,48 +151,95 @@ mod_chat_server <- function(id,
}

updateTextAreaInput(session, "chat_input", value = "")
if (settings$stream) {
rv$reset_streaming_message <- rv$reset_streaming_message + 1L
}
}) %>%
bindEvent(input$chat)

output$chat_input <- renderUI({
tagList(
fluidRow(
column(
width = 3,
div(
style = "display: flex; align-items: center; height: 100%;",
input_audio_clip("clip", show_mic_settings = FALSE)
)
),
column(
width = 9,
div(
style = "display: flex; align-items: center; position: relative; height: 100%;",
div(
style = "flex-grow: 1;",
text_area_input_wrapper(
inputId = "chat_input",
label = NULL,
width = "100%",
value = "",
resize = "none",
textarea_class = "chat-prompt"
)
),
div(
style = "position: absolute; right: 10px;",
bslib::input_task_button(
id = "chat",
label = icon("fas fa-paper-plane"),
label_busy = NULL,
class = "w-100 btn-primary p-1 chat-send-btn"
) %>%
bslib::tooltip("Send (click or Enter)")
)
)
)
observe({
req(input$clip)
new_prompt <- transcribe_audio(input$clip)
skeleton <- gptstudio_create_skeleton(
service = settings$service,
model = settings$model,
prompt = new_prompt,
history = history$chat_history,
stream = settings$stream
) %>%
gptstudio_skeleton_build(
skill = settings$skill,
style = settings$style,
task = settings$task,
custom_prompt = settings$custom_prompt
)

response <- gptstudio_request_perform(
skeleton = skeleton,
shiny_session = session
) %>%
gptstudio_response_process()

history$chat_history <- response$history

append_to_conversation_history(
id = history$selected_conversation$id %||% ids::random_id(),
title = history$selected_conversation$title %||% find_placeholder_title(history$chat_history), # nolint
messages = history$chat_history
)
})

if (settings$stream) {
rv$reset_streaming_message <- rv$reset_streaming_message + 1L
}

updateTextAreaInput(session, "chat_input", value = "")
if (settings$stream) {
rv$reset_streaming_message <- rv$reset_streaming_message + 1L
}
}) %>%
bindEvent(input$clip)

# output$chat_input <- renderUI({
# audio_recorder <-
# if (rv$audio_input %||% getOption("gptstudio.audio_input")) {
# div(
# style = "position: absolute; right: 40px; top: 30%; transform: translateY(-50%);",

Check warning on line 206 in R/mod_chat.R

View workflow job for this annotation

GitHub Actions / lint

file=R/mod_chat.R,line=206,col=15,[commented_code_linter] Commented code should be removed.
# input_audio_clip("clip",
# record_label = NULL,

Check warning on line 208 in R/mod_chat.R

View workflow job for this annotation

GitHub Actions / lint

file=R/mod_chat.R,line=208,col=32,[commented_code_linter] Commented code should be removed.
# stop_label = NULL,

Check warning on line 209 in R/mod_chat.R

View workflow job for this annotation

GitHub Actions / lint

file=R/mod_chat.R,line=209,col=32,[commented_code_linter] Commented code should be removed.
# show_mic_settings = FALSE)
# )
# } else {
# NULL
# }
#
# tagList(
# div(
# div(
# style = "flex-grow: 1; height: 100%;",

Check warning on line 219 in R/mod_chat.R

View workflow job for this annotation

GitHub Actions / lint

file=R/mod_chat.R,line=219,col=15,[commented_code_linter] Commented code should be removed.
# text_area_input_wrapper(
# inputId = "chat_input",

Check warning on line 221 in R/mod_chat.R

View workflow job for this annotation

GitHub Actions / lint

file=R/mod_chat.R,line=221,col=17,[commented_code_linter] Commented code should be removed.
# label = NULL,

Check warning on line 222 in R/mod_chat.R

View workflow job for this annotation

GitHub Actions / lint

file=R/mod_chat.R,line=222,col=17,[commented_code_linter] Commented code should be removed.
# width = "100%",

Check warning on line 223 in R/mod_chat.R

View workflow job for this annotation

GitHub Actions / lint

file=R/mod_chat.R,line=223,col=17,[commented_code_linter] Commented code should be removed.
# height = "100%",

Check warning on line 224 in R/mod_chat.R

View workflow job for this annotation

GitHub Actions / lint

file=R/mod_chat.R,line=224,col=17,[commented_code_linter] Commented code should be removed.
# value = "",

Check warning on line 225 in R/mod_chat.R

View workflow job for this annotation

GitHub Actions / lint

file=R/mod_chat.R,line=225,col=17,[commented_code_linter] Commented code should be removed.
# resize = "none",
# textarea_class = "chat-prompt"
# )
# ),
# div(
# style = "position: absolute; right: 10px; top: 50%; transform: translateY(-50%);",
# bslib::input_task_button(
# id = "chat",
# label = icon("fas fa-paper-plane"),
# label_busy = NULL,
# class = "btn-primary p-1 chat-send-btn"
# ) %>%
# bslib::tooltip("Send (click or Enter)")
# ),
# audio_recorder
# )
# )
# })
})
}
36 changes: 22 additions & 14 deletions R/record-audio.R
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
#' An audio clip input control that records short audio clips from the microphone
#' An audio clip input control that records short audio clips from the
#' microphone
#'
#' @param inputId The input slot that will be used to access the value.
#' @param reset_on_record Whether to reset the audio clip input value when recording starts. If
#' TRUE, the audio clip input value will become NULL at the moment the
#' Record button is pressed; if FALSE, the value will not change until
#' the user stops recording. Default is TRUE.
#' @param mime_type The MIME type of the audio clip to record. By default, this is NULL,
#' which means the browser will choose a suitable MIME type for audio
#' @param record_label Display label for the "record" control, or NULL for no
#' label. Default is 'Record'.
#' @param stop_label Display label for the "stop" control, or NULL for no label.
#' Default is 'Record'.
#' @param reset_on_record Whether to reset the audio clip input value when
#' recording starts. If TRUE, the audio clip input value will become NULL at
#' the moment the Record button is pressed; if FALSE, the value will not
#' change until the user stops recording. Default is TRUE.
#' @param mime_type The MIME type of the audio clip to record. By default, this
#' is NULL, which means the browser will choose a suitable MIME type for audio
#' recording. Common MIME types include 'audio/webm' and 'audio/mp4'.
#' @param audio_bits_per_second The target audio bitrate in bits per second. By default, this is NULL,
#' which means the browser will choose a suitable bitrate for audio
#' recording. This is only a suggestion; the browser may choose a different
#' bitrate.
#' @param show_mic_settings Whether to show the microphone settings in the settings menu. Default is TRUE.
#' @param audio_bits_per_second The target audio bitrate in bits per second. By
#' default, this is NULL, which means the browser will choose a suitable
#' bitrate for audio recording. This is only a suggestion; the browser may
#' choose a different bitrate.
#' @param show_mic_settings Whether to show the microphone settings in the
#' settings menu. Default is TRUE.
#' @param ... Additional parameters to pass to the underlying HTML tag.
#'
#' @return An audio clip input control that can be added to a UI definition.
Expand All @@ -22,6 +28,8 @@
#' @importFrom shiny icon
input_audio_clip <- function(
inputId,
record_label = "Record",
stop_label = "Stop",
reset_on_record = TRUE,
mime_type = NULL,
audio_bits_per_second = NULL,
Expand Down Expand Up @@ -80,7 +88,7 @@ input_audio_clip <- function(
div(
style = "display: inline-block; background-color: red; width: 1rem; height: 1rem; border-radius: 100%; position: relative; top: 0.175rem; margin-right: 0.3rem;"
),
"Record"
record_label
)
),
tags$button(
Expand All @@ -90,7 +98,7 @@ input_audio_clip <- function(
div(
style = "display: inline-block; background-color: currentColor; width: 1rem; height: 1rem; position: relative; top: 0.175rem; margin-right: 0.3rem;"
),
"Stop"
stop_label
)
)
)
Expand Down
107 changes: 107 additions & 0 deletions R/transcribe-audio.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#' Parse a Data URI
#'
#' This function parses a data URI and returns the MIME type and decoded data.
#'
#' @param data_uri A string. The data URI to parse.
#'
#' @return A list with two elements: 'mime_type' and 'data'.
#'
#' @importFrom base64enc base64decode
parse_data_uri <- function(data_uri) {
match <- regexec("data:(.+);base64,(.+)", data_uri)
if (match[[1]][1] == -1) {
stop("Invalid data URI format")
}
groups <- regmatches(data_uri, match)[[1]]
mime_type <- groups[2]
b64data <- groups[3]
# Add padding if necessary
padding <- nchar(b64data) %% 4
if (padding > 0) {
b64data <- paste0(b64data, strrep("=", 4 - padding))
}
list(mime_type = mime_type, data = base64enc::base64decode(b64data))
}

#' Transcribe Audio from Data URI Using OpenAI's Whisper Model
#'
#' This function takes an audio file in data URI format, converts it to WAV,
#' and sends it to OpenAI's transcription API to get the transcribed text.
#'
#' @param audio_input A string. The audio data in data URI format.
#' @param api_key A string. Your OpenAI API key. Defaults to the OPENAI_API_KEY environment variable.
#'
#' @return A string containing the transcribed text.
#'
#' @export
#'
#' @examples
#' \dontrun{
#' audio_uri <- "data:audio/webm;base64,SGVsbG8gV29ybGQ=" # Example data URI
#' transcription <- transcribe_audio(audio_uri)
#' print(transcription)
#' }
#'
#' @importFrom httr2 request req_auth_bearer_token req_body_multipart req_perform resp_is_error resp_status_desc resp_body_json
#' @importFrom jsonlite fromJSON
transcribe_audio <- function(audio_input, api_key = Sys.getenv("OPENAI_API_KEY")) {
# Parse the data URI
parsed <- parse_data_uri(audio_input)

# Convert WebM to WAV (R doesn't have native WebM support, so we're using WAV)
temp_webm <- tempfile(fileext = ".webm")
temp_wav <- tempfile(fileext = ".wav")
writeBin(parsed$data, temp_webm)
system_result <- system2("ffmpeg", args = c("-i", temp_webm, "-acodec", "pcm_s16le", "-ar", "44100", temp_wav), stdout = TRUE, stderr = TRUE)

if (!file.exists(temp_wav)) {
stop("Failed to convert audio: ", paste(system_result, collapse = "\n"))
}

# Transcribe audio using OpenAI API
req <- httr2::request("https://api.openai.com/v1/audio/transcriptions") %>%
httr2::req_auth_bearer_token(api_key) %>%
httr2::req_body_multipart(
file = curl::form_file(temp_wav),
model = "whisper-1",
response_format = "text"
)

resp <- httr2::req_perform(req)

if (httr2::resp_is_error(resp)) {
stop("API request failed: ", httr2::resp_status_desc(resp))
}

user_prompt <- resp_body_string(resp)

# Clean up temporary files
file.remove(temp_webm, temp_wav)

invisible(user_prompt)
}


#' Convert Audio File to Data URI
#'
#' This function takes an audio file path and converts it to a data URI.
#'
#' @param file_path A string. The path to the audio file.
#'
#' @return A string containing the data URI.
#'
audio_to_data_uri <- function(file_path) {
# Read the file
audio_data <- readBin(file_path, "raw", file.info(file_path)$size)

# Encode the data
encoded_data <- base64enc::base64encode(audio_data)

# Get the MIME type
mime_type <- mime::guess_type(file_path)

# Construct the data URI
data_uri <- paste0("data:", mime_type, ";base64,", encoded_data)

return(data_uri)
}

0 comments on commit 20a6456

Please sign in to comment.