-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtscript_ws.py
270 lines (237 loc) · 13.8 KB
/
tscript_ws.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
import os
import openai
import tempfile
import json
import requests
import logging
import logging.handlers
import tempfile
from support_funcs import gpt_proc, split_text, tok_count, rename_m4a, split_audio, convert_to_wav, process_transcription
from flask import Flask, request, jsonify, render_template
from flask_cors import CORS
from werkzeug.utils import secure_filename
from werkzeug.exceptions import BadRequest
from datetime import timedelta
# Configure the Flask app
ts_flask_app = Flask(__name__)
CORS(ts_flask_app)
ts_flask_app.config['upload_dir'] = 'uploads'
ts_flask_app.config['allowed_exts'] = {'wav', 'mp3', 'ogg', 'm4a', 'mp4'}
ts_flask_app.config['model'] = 'gpt-3.5-turbo'
openai.api_key = os.getenv("OPENAI_API_KEY")
# Create a logger
logger = logging.getLogger('tscript_logger')
logger.setLevel(logging.DEBUG)
# Create a SysLogHandler
syslog_handler = logging.handlers.SysLogHandler(address='/dev/log')
def allowed_file(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ts_flask_app.config['allowed_exts']
@ts_flask_app.route('/')
def index():
return render_template('index.html')
@ts_flask_app.route('/task_sel', methods=['GET', 'POST'])
def task_selection():
# check that we have 'task'. If not, return to index.html
if request.form.get('task_sel'):
task = request.form.get('task_sel')
else:
return render_template('index.html')
if task == "ts_proc":
logger.info("TS_PROC - Task selected: " + task)
return render_template('ts_proc.html')
elif task == "pod_sum":
logger.info("TS_PROC - Task selected: " + task)
return render_template('pod_sum.html')
else:
logger.info("TS_PROC - Task selected: " + task)
return render_template('index.html')
@ts_flask_app.route('/transcribe', methods=['POST'])
def transcribe_audio():
if request.method == 'POST': # Seems to hang sometimes after this. Doesnt' get into the next statement... print("/transcribe received POST request") # Setup the file if 'file' not in request.files: return jsonify(error='No file part'), 400 print("No file part") file = request.files['file'] print(file.filename) # Setup options resp_format = request.form.get('output-format') print(resp_format) # Get engine to determine which API to use # openai_api or local_whisper engine = request.form.get('engine') model = request.form.get('model') lang = request.form.get('language') translate = request.form.get('translate') processing = request.form.get('processing') processing_role = request.form.get('processing-role') # Create a dictionary to store the output
# Get engine to determine which API to use
# openai_api or local_whisper
file_url = request.form.get('file_url')
engine = request.form.get('engine')
model = request.form.get('model')
lang = request.form.get('language')
translate = request.form.get('translate')
resp_format = request.form.get('output-format')
# Check whether the file is a URL or a local file
if file_url:
print("File is a URL")
# Try to download the file from the URL and return an error if it fails
try:
file = requests.get(file_url)
# Set file.filename to the last part of the URL but strip anything after a ? or #
file.filename = file_url.split('/')[-1].split('?')[0].split('#')[0]
print("Audio file, " + file_url + " fetched: " + str(file.filename))
logger.info("Audio file, " + file_url + " fetched: " + file.filename)
except Exception as e:
#print(e)
logger.error("Error downloading file from URL: " + file_url)
return jsonify(error='Error downloading file from URL'), 500
else:
print("File is passed from client")
file = request.files['file']
if 'file' not in request.files:
logger.error("No file part")
return jsonify(error='No file part'), 400
logger.info("Audio file, " + file.filename + " received from client")
# Create a dictionary to store the output
output = {}
if file and allowed_file(file.filename):
filename = secure_filename(file.filename)
file_path = os.path.join(ts_flask_app.config['upload_dir'], filename)
print("Saving file locally: " + file_path)
if file_url:
try:
print("Saving file from URL")
with open (file_path, 'wb') as f:
f.write(file.content)
except Exception as e:
print(e)
return jsonify(error='Error saving file from URL'), 500
else:
print("Saving file from client")
try:
file.save(file_path)
except Exception as e:
print(e)
return jsonify(error='Error saving file'), 500
# Create a dictionary to store the output for OpenAI transcriptions
transcriptions = []
if engine == 'openai_api':
# Option is misspelled in index.html
if resp_format == 'txt':
resp_format = 'text'
print("Using OpenAI API")
f_ext = file_path.rsplit('.', 1)[1].lower()
# the ffmpeg library on MacOS has problems with m4a files
# they can be renamed to mp4 without any issues
if f_ext == 'm4a':
file_path = rename_m4a(file_path, 'mp4')
# update file_extension to mp4
print(file_path)
f_ext = file_path.rsplit('.', 1)[1].lower()
# Split audio into chunks - split_audio returns a list of AudioSegment objects
# file_ext=file_ext is gross. Shouldn't use same variable name.
# OpenAI max size should be stored in one place...
audio_chunks = split_audio(file_path, max_size_bytes=25 * 1024 * 1024, file_ext=f_ext)
logger.info(f"{len(audio_chunks)} chunks created from {file_path} for processing using OpenAI API")
print(f"Audio chunks: {len(audio_chunks)}")
for index, chunk in enumerate(audio_chunks):
with tempfile.NamedTemporaryFile(delete=False, suffix=f'.{f_ext}') as chunk_file:
chunk.export(chunk_file.name, format=f_ext)
print(f"Sending chunk {index + 1} to API: {chunk_file.name}")
with open(chunk_file.name, 'rb') as audio_file:
# Use openai.Audio.transcribe if 'translate' is false
# Use openai.Audio.translate if 'translate' is true
if translate == "true":
print("Translating")
logger.info(f"Translating chunk {index + 1} of {len(audio_chunks)}")
# Translation is always to English and language detection is automatic and cannot be specified.
try:
transcript = openai.Audio.translate(model="whisper-1", file=audio_file, response_format=resp_format)
except openai.error.APIError as e:
print(f"An API error occurred: {e}")
return jsonify({"error": "Error using OpenAI API", "details": str(e)}), 500
except openai.error.APIConnectionError as e:
print(f"Failed to connect to OpenAI API: {e}")
return jsonify({"error": "Error connecting to the OpenAI API", "details": str(e)}), 500
except openai.error.RateLimitError as e:
print(f"Rate limit exceeded: {e}")
return jsonify({"error": "OpenAI API Rate limit exceeded", "details": str(e)}), 500
except openai.error.AuthenticationError as e:
print(f"Authentication error: {e}")
return jsonify({"error": "OpenAI API authentication error", "details": str(e)}), 500
else:
logger.info(f"Transcribing chunk {index + 1} of {len(audio_chunks)}")
# The API only takes the language parameter to when NOT translating.
try:
transcript = openai.Audio.transcribe(model="whisper-1", file=audio_file, response_format=resp_format, language=lang)
except openai.error.APIError as e:
print(f"An API error occurred: {e}")
return jsonify({"error": "Error using OpenAI API", "details": str(e)}), 500
except openai.error.APIConnectionError as e:
print(f"Failed to connect to OpenAI API: {e}")
return jsonify({"error": "Error connecting to the OpenAI API", "details": str(e)}), 500
except openai.error.RateLimitError as e:
print(f"Rate limit exceeded: {e}")
return jsonify({"error": "OpenAI API Rate limit exceeded", "details": str(e)}), 500
except openai.error.AuthenticationError as e:
print(f"Authentication error: {e}")
return jsonify({"error": "OpenAI API authentication error", "details": str(e)}), 500
os.remove(chunk_file.name)
# add the current chunk to transcriptions
transcriptions.append(transcript)
print(transcriptions)
print(transcriptions[0])
output = jsonify({'transcript': transcriptions[0]})
else:
# Run using the local API
print("Using local API")
# BASE_URL = "http://localhost:5007" # change this to the local server address
options = {
'file_id': file_path, # this assumes that the local file path can be used as the file_id
'model': model,
'language': lang,
'outfmt': resp_format
}
# Convert options dictionary to json
options_json = json.dumps(options)
# Convert the file to a wav
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav_file:
output_wav_file_path = temp_wav_file.name
try:
print("Attempting to convert to wav")
convert_to_wav(file_path, output_wav_file_path)
print(f"Converted, {output_wav_file_path}")
except Exception as e:
return jsonify({"error": str(e)}), 500
# Process the transcription
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{options['outfmt']}") as temp_output_file:
output_file_path = temp_output_file.name
try:
print("Attempting to process transcription")
process_transcription(output_wav_file_path, options, output_file_path)
print(f"Processed, {output_file_path}")
except Exception as e:
return jsonify({"error": str(e)}), 500
# Return the transcription from output_file_path or the error returned by process_transcription
try:
with open(output_file_path, 'r') as output_file:
transcriptions = output_file.read()
# Create a dictionary with the content of transcriptions as the value of a key called 'transcript'
output = jsonify({'transcript': transcriptions})
except Exception as e:
return jsonify({"error": str(e)}), 500
print("Returning transcriptions")
print(output)
return output, 200
@ts_flask_app.route('/process', methods=['POST'])
def process():
output = {}
processing = request.form.get('processing')
processing_role = request.form.get('processing-role')
tscript = request.form.get('transcript')
# If the gpt_model has been configured in the form, update the value in the app.config
if request.form.get('gpt_model'):
ts_flask_app.config['model'] = request.form.get('gpt_model')
print("processing_role: " + processing_role)
# TODO: Next job is to get search the roles dict for the name rather than pass it directly
# TODO: Also allow for a custom role description
# Remember is an attempt to give GPT a memory of the previous summary to enable summaries of longer items
logger.info(f"Requesting GPT processing of {tok_count(tscript)} tokens using role: {processing_role}")
try:
summary = gpt_proc(text=tscript, sys_role=processing_role)
except Exception as e:
print("Error summarizing:", e)
logger.info(f"GPT processing error: {e}")
return jsonify({"error": "Error summarizing", "details": str(e)}), 500
print("-----------\nSummary:\n-----------")
print(summary)
tscript = summary[0]
output['summary'] = summary
return output
if __name__ == '__main__':
ts_flask_app.run(debug=True, port=5000)