-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcaptioner.js
144 lines (131 loc) · 4.27 KB
/
captioner.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
// Imports the Google Cloud client library
const speech = require('@google-cloud/speech');
const fs = require('fs');
const ffmpeg = require('fluent-ffmpeg');
// The name of the audio file to transcribe
const videoFileName = process.argv[2] || './magic.mp4';
const audioFileName = './sample.flac';
const srtFileName = './sample.srt';
const outVideoFileName = process.argv[3] || videoFileName + '-captioned.mp4'
// fs.writeFileSync(audioFileName);
// fs.writeFileSync(srtFileName);
function convert(input, output, callback) {
ffmpeg(input)
.audioChannels(1)
.output(output)
.on('end', function() {
console.log('conversion ended');
callback(null);
}).on('error', function(err){
console.log('error: ', err);
callback(err);
}).run();
}
convert(videoFileName, audioFileName, function(err){
if(!err) {
console.log('conversion complete');
generateSrt(videoFileName, audioFileName, srtFileName);
}
});
// Creates a client
const client = new speech.SpeechClient();
function generateSrt(vidName, audName, srtName) {
// Reads a local audio file and converts it to base64
const file = fs.readFileSync(audName);
const audioBytes = file.toString('base64');
// The audio file's encoding, sample rate in hertz, and BCP-47 language code
const audio = {
content: audioBytes,
// uri: 'gs://flac-bucket/sample.flac'
};
const config = {
encoding: 'FLAC',
sampleRateHertz: 48000,
languageCode: 'en-US',
enableWordTimeOffsets: true
};
const request = {
audio: audio,
config: config,
};
let currStart, currEnd;
let currIndex = 1;
let currString = [];
// Detects speech in the audio file
client
.recognize(request)
// .then(data => {
// const operation = data[0];
// // Get a Promise representation of the final result of the job
// return operation.promise();
// })
.then(data => {
const out = fs.createWriteStream(srtName);
data[0].results[0].alternatives[0].words.forEach(w => {
const startNanos = w.startTime.nanos / 100000000;
const endNanos = w.endTime.nanos / 100000000;
const startSecs = `${w.startTime.seconds}.${startNanos}`;
const endSecs = `${w.endTime.seconds}.${endNanos}`;
if(typeof(currStart) == 'undefined')
currStart = startSecs;
if(startSecs <= parseFloat(currStart) + 3) { // current chunk
currString.push(w.word);
} else { // start new chunk
const chunk = `${currIndex}\n` +
`${secToHhmmssms(currStart)} --> ${secToHhmmssms(currEnd)}\n` +
`${currString.join(' ')}\n\n`;
out.write(chunk);
currString = [w.word];
currStart = startSecs;
currIndex++;
}
currEnd = endSecs;
});
// last chunk
const chunk = `${currIndex}\n` +
`${secToHhmmssms(currStart)} --> ${secToHhmmssms(currEnd)}\n` +
`${currString.join(' ')}`;
out.write(chunk);
out.end();
out.on('finish', () => {
console.log('srt generated');
combineSrt(vidName, srtName);
});
// console.log('srt generated');
// combineSrt(vidName, srtName);
// deleteFiles();
// const response = data[0];
// const transcription = response.results
// .map(result => result.alternatives[0].transcript)
// .join('\n');
// console.log(`Transcription: ${transcription}`);
})
// .then(() => combineSrt(vidName, srtName))
// .then(() => deleteFiles())
.catch(err => {
console.error('ERROR:', err);
});
function secToHhmmssms(sec) {
const ms = sec.substr(-1);
const h = Math.floor(sec / 3600).toString().padStart(2, '0');
const m = Math.floor(sec / 60).toString().padStart(2, '0');
const s = Math.floor(sec % 60).toString().padStart(2, '0');
return `${h}:${m}:${s},${ms}00`;
}
}
function combineSrt(vidName, srtName) {
ffmpeg(vidName)
.outputOptions('-vf subtitles=' + srtName)
.output(outVideoFileName)
.on('end', () => {
console.log('srt combined');
deleteFiles();
})
.run();
}
function deleteFiles() {
if(fs.existsSync(audioFileName))
fs.unlinkSync(audioFileName);
// if(fs.existsSync(srtFileName))
// fs.unlinkSync(srtFileName);
}