-
Notifications
You must be signed in to change notification settings - Fork 304
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
General: Track token usage of LLM service requests (#9455)
- Loading branch information
Showing
32 changed files
with
976 additions
and
112 deletions.
There are no files selected for viewing
17 changes: 17 additions & 0 deletions
17
src/main/java/de/tum/cit/aet/artemis/athena/dto/ResponseMetaDTO.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
package de.tum.cit.aet.artemis.athena.dto; | ||
|
||
import java.util.List; | ||
|
||
import com.fasterxml.jackson.annotation.JsonInclude; | ||
|
||
import de.tum.cit.aet.artemis.core.domain.LLMRequest; | ||
|
||
/** | ||
* DTO representing the meta information in the Athena response. | ||
*/ | ||
@JsonInclude(JsonInclude.Include.NON_EMPTY) | ||
public record ResponseMetaDTO(TotalUsage totalUsage, List<LLMRequest> llmRequests) { | ||
|
||
public record TotalUsage(Integer numInputTokens, Integer numOutputTokens, Integer numTotalTokens, Float cost) { | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
14 changes: 14 additions & 0 deletions
14
src/main/java/de/tum/cit/aet/artemis/core/domain/LLMRequest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
package de.tum.cit.aet.artemis.core.domain; | ||
|
||
/** | ||
* This record is used for the LLMTokenUsageService to provide relevant information about LLM Token usage | ||
* | ||
* @param model LLM model (e.g. gpt-4o) | ||
* @param numInputTokens number of tokens of the LLM call | ||
* @param costPerMillionInputToken cost in Euro per million input tokens | ||
* @param numOutputTokens number of tokens of the LLM answer | ||
* @param costPerMillionOutputToken cost in Euro per million output tokens | ||
* @param pipelineId String with the pipeline name (e.g. IRIS_COURSE_CHAT_PIPELINE) | ||
*/ | ||
public record LLMRequest(String model, int numInputTokens, float costPerMillionInputToken, int numOutputTokens, float costPerMillionOutputToken, String pipelineId) { | ||
} |
8 changes: 8 additions & 0 deletions
8
src/main/java/de/tum/cit/aet/artemis/core/domain/LLMServiceType.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
package de.tum.cit.aet.artemis.core.domain; | ||
|
||
/** | ||
* Enum representing different types of LLM (Large Language Model) services used in the system. | ||
*/ | ||
public enum LLMServiceType { | ||
IRIS, ATHENA | ||
} |
104 changes: 104 additions & 0 deletions
104
src/main/java/de/tum/cit/aet/artemis/core/domain/LLMTokenUsageRequest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
package de.tum.cit.aet.artemis.core.domain; | ||
|
||
import jakarta.persistence.Column; | ||
import jakarta.persistence.Entity; | ||
import jakarta.persistence.ManyToOne; | ||
import jakarta.persistence.Table; | ||
|
||
import org.hibernate.annotations.Cache; | ||
import org.hibernate.annotations.CacheConcurrencyStrategy; | ||
|
||
import com.fasterxml.jackson.annotation.JsonInclude; | ||
|
||
/** | ||
* Represents the token usage details of a single LLM request, including model, service pipeline, token counts, and costs. | ||
*/ | ||
@Entity | ||
@Table(name = "llm_token_usage_request") | ||
@Cache(usage = CacheConcurrencyStrategy.NONSTRICT_READ_WRITE) | ||
@JsonInclude(JsonInclude.Include.NON_EMPTY) | ||
public class LLMTokenUsageRequest extends DomainObject { | ||
|
||
/** | ||
* LLM model (e.g. gpt-4o) | ||
*/ | ||
@Column(name = "model") | ||
private String model; | ||
|
||
/** | ||
* pipeline that was called (e.g. IRIS_COURSE_CHAT_PIPELINE) | ||
*/ | ||
@Column(name = "service_pipeline_id") | ||
private String servicePipelineId; | ||
|
||
@Column(name = "num_input_tokens") | ||
private int numInputTokens; | ||
|
||
@Column(name = "cost_per_million_input_tokens") | ||
private float costPerMillionInputTokens; | ||
|
||
@Column(name = "num_output_tokens") | ||
private int numOutputTokens; | ||
|
||
@Column(name = "cost_per_million_output_tokens") | ||
private float costPerMillionOutputTokens; | ||
|
||
@ManyToOne | ||
private LLMTokenUsageTrace trace; | ||
|
||
public String getModel() { | ||
return model; | ||
} | ||
|
||
public void setModel(String model) { | ||
this.model = model; | ||
} | ||
|
||
public String getServicePipelineId() { | ||
return servicePipelineId; | ||
} | ||
|
||
public void setServicePipelineId(String servicePipelineId) { | ||
this.servicePipelineId = servicePipelineId; | ||
} | ||
|
||
public float getCostPerMillionInputTokens() { | ||
return costPerMillionInputTokens; | ||
} | ||
|
||
public void setCostPerMillionInputTokens(float costPerMillionInputToken) { | ||
this.costPerMillionInputTokens = costPerMillionInputToken; | ||
} | ||
|
||
public float getCostPerMillionOutputTokens() { | ||
return costPerMillionOutputTokens; | ||
} | ||
|
||
public void setCostPerMillionOutputTokens(float costPerMillionOutputToken) { | ||
this.costPerMillionOutputTokens = costPerMillionOutputToken; | ||
} | ||
|
||
public int getNumInputTokens() { | ||
return numInputTokens; | ||
} | ||
|
||
public void setNumInputTokens(int numInputTokens) { | ||
this.numInputTokens = numInputTokens; | ||
} | ||
|
||
public int getNumOutputTokens() { | ||
return numOutputTokens; | ||
} | ||
|
||
public void setNumOutputTokens(int numOutputTokens) { | ||
this.numOutputTokens = numOutputTokens; | ||
} | ||
|
||
public LLMTokenUsageTrace getTrace() { | ||
return trace; | ||
} | ||
|
||
public void setTrace(LLMTokenUsageTrace trace) { | ||
this.trace = trace; | ||
} | ||
} |
111 changes: 111 additions & 0 deletions
111
src/main/java/de/tum/cit/aet/artemis/core/domain/LLMTokenUsageTrace.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
package de.tum.cit.aet.artemis.core.domain; | ||
|
||
import java.time.ZonedDateTime; | ||
import java.util.HashSet; | ||
import java.util.Set; | ||
|
||
import jakarta.annotation.Nullable; | ||
import jakarta.persistence.CascadeType; | ||
import jakarta.persistence.Column; | ||
import jakarta.persistence.Entity; | ||
import jakarta.persistence.EnumType; | ||
import jakarta.persistence.Enumerated; | ||
import jakarta.persistence.FetchType; | ||
import jakarta.persistence.OneToMany; | ||
import jakarta.persistence.Table; | ||
|
||
import org.hibernate.annotations.Cache; | ||
import org.hibernate.annotations.CacheConcurrencyStrategy; | ||
|
||
import com.fasterxml.jackson.annotation.JsonInclude; | ||
|
||
/** | ||
* This represents a trace that contains one or more requests of type {@link LLMTokenUsageRequest} | ||
*/ | ||
@Entity | ||
@Table(name = "llm_token_usage_trace") | ||
@Cache(usage = CacheConcurrencyStrategy.NONSTRICT_READ_WRITE) | ||
@JsonInclude(JsonInclude.Include.NON_EMPTY) | ||
public class LLMTokenUsageTrace extends DomainObject { | ||
|
||
@Column(name = "service") | ||
@Enumerated(EnumType.STRING) | ||
private LLMServiceType serviceType; | ||
|
||
@Nullable | ||
@Column(name = "course_id") | ||
private Long courseId; | ||
|
||
@Nullable | ||
@Column(name = "exercise_id") | ||
private Long exerciseId; | ||
|
||
@Column(name = "user_id") | ||
private Long userId; | ||
|
||
@Column(name = "time") | ||
private ZonedDateTime time = ZonedDateTime.now(); | ||
|
||
@Nullable | ||
@Column(name = "iris_message_id") | ||
private Long irisMessageId; | ||
|
||
@OneToMany(mappedBy = "trace", fetch = FetchType.LAZY, cascade = CascadeType.ALL, orphanRemoval = true) | ||
private Set<LLMTokenUsageRequest> llmRequests = new HashSet<>(); | ||
|
||
public LLMServiceType getServiceType() { | ||
return serviceType; | ||
} | ||
|
||
public void setServiceType(LLMServiceType serviceType) { | ||
this.serviceType = serviceType; | ||
} | ||
|
||
public Long getCourseId() { | ||
return courseId; | ||
} | ||
|
||
public void setCourseId(Long courseId) { | ||
this.courseId = courseId; | ||
} | ||
|
||
public Long getExerciseId() { | ||
return exerciseId; | ||
} | ||
|
||
public void setExerciseId(Long exerciseId) { | ||
this.exerciseId = exerciseId; | ||
} | ||
|
||
public Long getUserId() { | ||
return userId; | ||
} | ||
|
||
public void setUserId(Long userId) { | ||
this.userId = userId; | ||
} | ||
|
||
public ZonedDateTime getTime() { | ||
return time; | ||
} | ||
|
||
public void setTime(ZonedDateTime time) { | ||
this.time = time; | ||
} | ||
|
||
public Set<LLMTokenUsageRequest> getLLMRequests() { | ||
return llmRequests; | ||
} | ||
|
||
public void setLlmRequests(Set<LLMTokenUsageRequest> llmRequests) { | ||
this.llmRequests = llmRequests; | ||
} | ||
|
||
public Long getIrisMessageId() { | ||
return irisMessageId; | ||
} | ||
|
||
public void setIrisMessageId(Long messageId) { | ||
this.irisMessageId = messageId; | ||
} | ||
} |
14 changes: 14 additions & 0 deletions
14
src/main/java/de/tum/cit/aet/artemis/core/repository/LLMTokenUsageRequestRepository.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
package de.tum.cit.aet.artemis.core.repository; | ||
|
||
import static de.tum.cit.aet.artemis.core.config.Constants.PROFILE_CORE; | ||
|
||
import org.springframework.context.annotation.Profile; | ||
import org.springframework.stereotype.Repository; | ||
|
||
import de.tum.cit.aet.artemis.core.domain.LLMTokenUsageRequest; | ||
import de.tum.cit.aet.artemis.core.repository.base.ArtemisJpaRepository; | ||
|
||
@Profile(PROFILE_CORE) | ||
@Repository | ||
public interface LLMTokenUsageRequestRepository extends ArtemisJpaRepository<LLMTokenUsageRequest, Long> { | ||
} |
Oops, something went wrong.