-
Notifications
You must be signed in to change notification settings - Fork 66
/
Copy pathtranscript.php
62 lines (56 loc) · 1.57 KB
/
transcript.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
<?php
$file = $argv[1];
$data = json_decode(file_get_contents($file));
$basename = basename($file);
// Load up speaker labels.
$labels = $data->results->speaker_labels->segments;
$speaker_start_times = [];
foreach ($labels as $label) {
foreach ($label->items as $item) {
$speaker_start_times[number_format($item->start_time, 3)] = $label->speaker_label;
}
}
// Now we iterate through items and build the transcript
$items = $data->results->items;
$lines = [];
$line = '';
$time = 0;
$speaker = NULL;
foreach ($items as $item) {
$content = $item->alternatives[0]->content;
if (property_exists($item, 'start_time')) {
$current_speaker = $speaker_start_times[number_format($item->start_time, 3)];
}
elseif ($item->type == 'punctuation') {
$line .= $content;
}
if ($current_speaker != $speaker) {
if ($speaker) {
$lines[] = [
'speaker' => $speaker,
'line' => $line,
'time' => $time,
];
}
$line = $content;
$speaker = $current_speaker;
$time = number_format($item->start_time, 3, '.', '');
}
elseif ($item->type != 'punctuation') {
$line .= ' ' . $content;
}
}
// Record the last line since there was no speaker change.
$lines[] = [
'speaker' => $speaker,
'line' => $line,
'time' => $time,
];
// Finally, let's print out our transcript.
$fh = fopen($file . '-transcript.txt', 'w');
foreach ($lines as $line_data) {
$line = '[' . gmdate('H:i:s', $line_data['time']) . '] ' . $line_data['speaker'] . ': ' . $line_data['line'];
fputs($fh, $line . "\n\n");
}
fclose($fh);
/* End of the transcript.php file */