Skip to content

Commit

Permalink
Merge pull request #33 from stang/update-modjo
Browse files Browse the repository at this point in the history
Update modjo import script
  • Loading branch information
albandum authored Oct 23, 2024
2 parents 7542b8c + b9b913b commit 77064e3
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 14 deletions.
47 changes: 35 additions & 12 deletions modjo/modjo-transcripts-to-dust.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,26 @@ const MODJO_BASE_URL = process.env.MODJO_BASE_URL || "https://api.modjo.ai";
const MODJO_API_KEY = process.env.MODJO_API_KEY;
const DUST_API_KEY = process.env.DUST_API_KEY;
const DUST_WORKSPACE_ID = process.env.DUST_WORKSPACE_ID;
const DUST_VAULT_ID = process.env.DUST_VAULT_ID;
const DUST_DATASOURCE_ID = process.env.DUST_DATASOURCE_ID;
const INCLUDE_CONTACT_DETAILS = process.env.INCLUDE_CONTACT_DETAILS !== 'false';
const INCLUDE_RECORDING_URL = process.env.INCLUDE_RECORDING_URL !== 'false';

if (
!MODJO_API_KEY ||
!DUST_API_KEY ||
!DUST_WORKSPACE_ID ||
!DUST_VAULT_ID ||
!DUST_DATASOURCE_ID
) {
throw new Error(
"Please provide values for MODJO_API_KEY, DUST_API_KEY, DUST_WORKSPACE_ID, and DUST_DATASOURCE_ID in .env file."
"Please provide values for MODJO_API_KEY, DUST_API_KEY, DUST_WORKSPACE_ID, DUST_VAULT_ID, and DUST_DATASOURCE_ID in .env file."
);
}

// Can be `null` if you want to fetch all transcripts
const TRANSCRIPTS_SINCE = "2024-01-01";
const YESTERDAY = new Date(Date.now() - 86400000).toISOString().split('T')[0];
const TRANSCRIPTS_SINCE = process.env.TRANSCRIPTS_SINCE === "null" ? null : (process.env.TRANSCRIPTS_SINCE || YESTERDAY);

const modjoApi = axios.create({
baseURL: MODJO_BASE_URL,
Expand Down Expand Up @@ -62,7 +67,9 @@ interface ModjoCallExport {
recording: {
url: string;
};
aiSummary: string | null;
highlights: {
content: string;
} | null;
speakers: {
contactId?: number;
userId?: number;
Expand All @@ -79,6 +86,9 @@ interface ModjoCallExport {
content: string;
topics: { topicId: number; name: string }[];
}[];
tags: {
name: string;
}[];
};
}

Expand All @@ -87,6 +97,12 @@ async function getModjoTranscripts(): Promise<ModjoCallExport[]> {
let page = 1;
const perPage = 50;

console.log(
`Will retrieve all transcripts since: ${TRANSCRIPTS_SINCE}\n` +
`Will include contact details: ${INCLUDE_CONTACT_DETAILS}\n` +
`Will include recording URLs: ${INCLUDE_RECORDING_URL}`
);

do {
try {
const response: AxiosResponse<{
Expand All @@ -104,9 +120,10 @@ async function getModjoTranscripts(): Promise<ModjoCallExport[]> {
},
relations: {
recording: true,
aiSummary: true,
highlights: true,
transcript: true,
speakers: true,
tags: true,
},
});

Expand Down Expand Up @@ -136,26 +153,31 @@ async function upsertToDustDatasource(transcript: ModjoCallExport) {
const documentId = `modjo-transcript-${transcript.callId}`;

let content = `Call ID: ${transcript.callId}\n`;
content += `Tags: ${transcript.relations.tags.map(tag => `"${tag.name}"`).join(', ') }\n`;
content += `Title: ${transcript.title}\n`;
content += `Date: ${transcript.startDate}\n`;
content += `Duration: ${transcript.duration} seconds\n`;
content += `Provider: ${transcript.provider}\n`;
content += `Language: ${transcript.language}\n`;
if (transcript.callCrmId) content += `CRM ID: ${transcript.callCrmId}\n`;
if (transcript.relations.recording)
if (INCLUDE_RECORDING_URL && transcript.relations.recording)
content += `Recording URL: ${transcript.relations.recording.url}\n`;
if (transcript.relations.aiSummary)
content += `AI Summary: ${transcript.relations.aiSummary}\n`;

content += "\nSpeakers:\n";

content += "\n# Speakers\n";
transcript.relations.speakers.forEach((speaker) => {
content += `${speaker.speakerId}: ${speaker.name} (${speaker.type})`;
if (speaker.email) content += ` - Email: ${speaker.email}`;
if (speaker.phoneNumber) content += ` - Phone: ${speaker.phoneNumber}`;
if (INCLUDE_CONTACT_DETAILS) {
if (speaker.email) content += ` - Email: ${speaker.email}`;
if (speaker.phoneNumber) content += ` - Phone: ${speaker.phoneNumber}`;
}
content += "\n";
});

content += "\nTranscript:\n";
if (transcript.relations.highlights)
content += `\n# Highlights\n${transcript.relations.highlights.content.trim()}\n`;

content += "\n# Transcript\n";
transcript.relations.transcript.forEach((entry) => {
const speaker = transcript.relations.speakers.find(
(s) => s.speakerId === entry.speakerId
Expand All @@ -173,9 +195,10 @@ async function upsertToDustDatasource(transcript: ModjoCallExport) {
try {
await limiter.schedule(() =>
dustApi.post(
`/w/${DUST_WORKSPACE_ID}/data_sources/${DUST_DATASOURCE_ID}/documents/${documentId}`,
`/w/${DUST_WORKSPACE_ID}/vaults/${DUST_VAULT_ID}/data_sources/${DUST_DATASOURCE_ID}/documents/${documentId}`,
{
text: content.trim(),
source_url: `https://app.modjo.ai/call-details/${transcript.callId}`,
}
)
);
Expand Down
11 changes: 9 additions & 2 deletions modjo/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,12 @@ This script imports Modjo call transcripts into a Dust datasource. It fetches tr
MODJO_API_KEY=your_modjo_api_key
DUST_API_KEY=your_dust_api_key
DUST_WORKSPACE_ID=your_dust_workspace_id
DUST_VAULT_ID=your_dust_vault_id
DUST_DATASOURCE_ID=your_dust_datasource_id
# TRANSCRIPTS_SINCE=YYYY-MM-DD # or "null" if you want to fetch all transcripts
# INCLUDE_CONTACT_DETAILS=true # or "false" to skip contact details
# INCLUDE_RECORDING_URL=true # or "false" if you don't want the recording URL to appear in Dust
```
Replace the placeholder values with your actual API keys and IDs.

Expand All @@ -39,7 +44,9 @@ This command executes the `modjo-transcripts-to-dust.ts` file using `ts-node`.

## Configuration

- `TRANSCRIPTS_SINCE`: In the script, you can set this variable to a date string (e.g., "2024-01-01") to fetch transcripts from that date onwards. Set it to `null` to fetch all transcripts.
- `TRANSCRIPTS_SINCE`: You can set this environment variable to a date string (e.g., "2024-01-01") to fetch transcripts from that date onwards. Set it to `null` to fetch all transcripts.
- `INCLUDE_CONTACT_DETAILS`: Set this environment variable to `false` if you don't want to ingest contact details (ie: email and phone number) in Dust.
- `INCLUDE_RECORDING_URL`: Set this environment variable to `false` if you don't want to ingest the recording URL in Dust.

## What the Script Does

Expand Down Expand Up @@ -67,7 +74,7 @@ This command executes the `modjo-transcripts-to-dust.ts` file using `ts-node`.
Each transcript is formatted as follows in the Dust datasource:

1. Call metadata (ID, title, date, duration, etc.)
2. Recording URL and AI summary (if available)
2. Recording URL and highlights (if available)
3. List of speakers with their details
4. Full transcript with timestamps, speaker names, and topics

Expand Down

0 comments on commit 77064e3

Please sign in to comment.