forked from zolomohan/speech-recognition-in-javascript
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspeechRecognition.js
181 lines (157 loc) · 7 KB
/
speechRecognition.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
const version = "0.0.1.9"
const piString = "3.14159265358979323846264338327950288419716939937510582097494459230781640628620899862803482534211706798214808651328230664709384460955058223172535940812848111745028410270193852110555964462294895493038196442881097566593344612847564823378678316527120190914564856692346034861045432664821339360726024914127372458700660631558817488152092096282925409171536436789259036001133053054882046652138414695194151160943305727036575959195309218611738193261179310511854807446237996274956735188575272489122793818301194912983367336244065664308602139494639522473719070217986094370277053921717629317675238467481846766940513200056812714526356082778577134275778960917363717872146844090122495343014654958537105079227968925892354201995611212902196086403441815981362977477130996051870721134999999837297804995105973173281609631859502445945534690830264252230825334468503526193118817101000313783875288658753320838142061717766914730359825349042875546873115956286388235378759375195778185778053217122680661300192787661119590921642019";
const is_android = /Android/i.test(navigator.userAgent)
const is_ios = /iPhone|iPad|iPod/i.test(navigator.userAgent)
const is_desktop = !is_android && !is_ios
const grouping_size = 5;
const grouping_shift = 2;
let headstart_count = 0
let results = "";
let interim_transcript = "";
let latest_final = "";
if ("webkitSpeechRecognition" in window) {
let speechRecognition = new webkitSpeechRecognition();
speechRecognition.continuous = true;
speechRecognition.interimResults = true;
speechRecognition.onstart = () => {
document.querySelector("#status").style.display = "block";
};
speechRecognition.onerror = () => {
document.querySelector("#status").style.display = "none";
console.log("Speech Recognition Error");
};
speechRecognition.onend = () => {
speech_paused_action(debug="speech recognition ended event.")
console.log("Speech Recognition Ended")
}
function speech_paused_action(debug="") {
// This needs to be idempotent, because it is called from both the stop button (which on some platforms triggers the the onend event)
// and it is also called when speech recognition spontaneously stops, or is stopped via the stop button.
if(is_android) {
save_to_results_array(latest_final)
latest_final = ""
} else if (is_desktop) {
if(results.slice(-interim_transcript.length) != interim_transcript) {
save_to_results_array(interim_transcript)
interim_transcript = ""
}
} else {
save_to_results_array(interim_transcript)
interim_transcript = ""
}
render("", results, debug=debug)
document.querySelector("#status").style.display = "none";
}
speechRecognition.onresult = (event) => {
const last_index = event.results.length - 1;
if (event.results[last_index].isFinal) {
const latest_transcript = event.results[last_index][0].transcript;
const chunk = substitutions(latest_transcript); // remove all whitespace from chunk
if(chunk.length > 0) {
latest_final = chunk;
render(chunk, results, debug=`latest_transcript: ${latest_transcript}`);
}
} else {
interim_transcript = "";
for (let i = 0; i < event.results.length; ++i) {
const not_final = !event.results[i].isFinal;
if (not_final) {
interim_transcript += event.results[i][0].transcript;
}
}
render(interim_transcript, results, debug=`raw interim: ${interim_transcript}`);
}
};
function save_to_results_array(x) {
results += substitutions(x)
}
document.querySelector("#start").onclick = () => {
speechRecognition.start();
}
document.querySelector("#stop").onclick = () => {
speech_paused_action(debug="stop button pressed.")
speechRecognition.stop()
}
document.querySelector("#input_headstart_count").onchange = () => {
const headstart_count_string = document.querySelector("#input_headstart_count").value
headstart_count = parseInt(headstart_count_string)
render("", results)
};
document.querySelector("#delete_chunk").onclick = () => {
const size_of_last_chunk = (results.length + headstart_count - grouping_shift + grouping_size) % grouping_size
const remove_count = size_of_last_chunk == 0 ? grouping_size : size_of_last_chunk
results = results.slice(0, -remove_count)
render("", results)
};
document.querySelector("#reset").onclick = () => {
reset_results()
render("", results);
}
function reset_results() {
results = ""
}
} else {
document.querySelector("#status").innerHTML = "Speech Recognition Not Available, use Chrome"
document.querySelector("#status").style.display = "block";
console.log("Speech Recognition Not Available")
}
function substitutions(latest_transcript) {
return latest_transcript
.toLowerCase()
.replace(/\s/g, '')
.replace(/\//g, '')
.replace(/oh/g, '0')
.replace(/to/g, '2')
.replace(/-/g, '')
.replace(/for/g, '4')
.replace(/zero/g, '0')
.replace(/one/g, '1')
.replace(/two/g, '2')
.replace(/three/g, '3')
.replace(/four/g, '4')
.replace(/five/g, '5')
.replace(/six/g, '6')
.replace(/seven/g, '7')
.replace(/eight/g, '8')
.replace(/nine/g, '9')
}
function render(interim_transcript, results, debug="") {
let final_transcript = results
final_and_interim = substitutions(final_transcript + interim_transcript)
// create a string that has spaces in every position that final_transcript matches piString, and an 'X' in every position that it doesn't
let assessment = "";
let correct = 0;
let errors = 0;
for (let i = 0; i < final_and_interim.length && (i+headstart_count < piString.length); i++) {
const inputted_digit = final_and_interim[i];
const expected_pi_digit = piString[i + headstart_count];
if (inputted_digit == expected_pi_digit) {
assessment += "_"
correct ++
} else {
assessment += "X"
errors ++
}
}
const display_count = 35;
document.querySelector("#final").innerHTML = makeGrouped(final_transcript, grouping_shift, headstart_count).slice(-display_count);
const interim_no_whitespace = substitutions(interim_transcript)
document.querySelector("#interim").innerHTML = makeGrouped(interim_no_whitespace, grouping_shift, final_transcript.length + headstart_count);
document.querySelector("#assessment").innerHTML = makeGrouped(assessment, grouping_shift, headstart_count).slice(-display_count-interim_no_whitespace.length);
document.querySelector("#counts").innerHTML = `Correct: ${correct} <br> Errors: ${errors} <br> Total: ${correct + errors}`
document.querySelector("#debug").innerHTML = `Debug info: ${debug} <br> Version: ${version} <br> User agent:${navigator.userAgent}`
}
function makeGrouped(x, remainder, starting_grouping_from) {
let spaced_x = "";
for (let i = 0; i < x.length; i++) {
if ((i + starting_grouping_from) % grouping_size == remainder) {
spaced_x += " ";
}
spaced_x += x[i];
}
return spaced_x
}
function chunkString(str, length) {
a = str.match(new RegExp('.{1,' + length + '}', 'g'));
return a
}