Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Http #16

Closed
wants to merge 3 commits into from
Closed

Http #16

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 4 additions & 10 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@ reqwest-eventsource = "0.4.0"
url = "2.4.1"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
tower = "0.4"
tower = { version = "0.4", features = ["full"] }
tower-lsp = "0.20"
tower-http = { version = "0.4.0" }
tower-layer = "0.3.2"
tracing = "0.1"
tracing-appender = "0.2"
tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
Expand All @@ -30,13 +32,5 @@ chrono = "0.4.31"
regex = "1.9.5"
async-trait = "0.1.73"
similar = "2.3.0"

#use = "0.0.0"
#async-trait = "0.1.73"
#route-recognizer = "0.3.1"
#bytes = "0.5"
#eventsource-client = "0.11.0"
#reqwest-streams = { version = "0.3.0", features = ["json"] }
#tokio-stream = "0.1.14"
#eventsource-stream = "0.2.3"
axum = "0.6.20"

36 changes: 36 additions & 0 deletions src/background_tasks.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
use std::sync::Arc;
use std::vec;

use tokio::sync::RwLock as ARwLock;
use tokio::task::JoinHandle;

use crate::{global_context, telemetry_storage};
use crate::global_context::GlobalContext;
use crate::telemetry_snippets;

pub struct BackgroundTasksHolder {
tasks: Vec<JoinHandle<()>>,
}

impl BackgroundTasksHolder {
pub fn new(tasks: Vec<JoinHandle<()>>) -> Self {
BackgroundTasksHolder {
tasks
}
}

pub async fn abort(self) {
for task in self.tasks {
task.abort();
let _ = task.await;
}
}
}

pub fn start_background_tasks(global_context: Arc<ARwLock<GlobalContext>>) -> BackgroundTasksHolder {
BackgroundTasksHolder::new(vec![
tokio::spawn(global_context::caps_background_reload(global_context.clone())),
tokio::spawn(telemetry_storage::telemetry_background_task(global_context.clone())),
tokio::spawn(telemetry_snippets::tele_snip_background_task(global_context.clone())),
])
}
21 changes: 18 additions & 3 deletions src/custom_error.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,33 @@
use std::error::Error;
use tracing::error;
use hyper::{Body, Response, StatusCode};
use serde_json::json;
use std::fmt;
use axum::Json;
use axum::response::IntoResponse;
use crate::global_context::SharedGlobalContext;
use crate::telemetry_basic;


#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct ScratchError {
pub status_code: StatusCode,
pub message: String,
pub telemetry_skip: bool // because already posted a better description directly
pub telemetry_skip: bool, // because already posted a better description directly
}

impl std::error::Error for ScratchError {}
impl IntoResponse for ScratchError {
fn into_response(self) -> axum::response::Response {
let payload = json!({
"detail": self.message,
});
(self.status_code, Json(payload)).into_response()
}
}

impl Error for ScratchError {}
unsafe impl Send for ScratchError {}
unsafe impl Sync for ScratchError {}
impl fmt::Display for ScratchError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} {}", self.status_code, self.message)
Expand Down
2 changes: 1 addition & 1 deletion src/global_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ pub struct GlobalContext {
pub vecdb_search: Arc<AMutex<Box<dyn VecdbSearch + Send>>>,
}


pub type SharedGlobalContext = Arc<ARwLock<GlobalContext>>;
const CAPS_RELOAD_BACKOFF: u64 = 60; // seconds
const CAPS_BACKGROUND_RELOAD: u64 = 3600; // seconds

Expand Down
90 changes: 90 additions & 0 deletions src/http.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
use axum::{Extension, http::{StatusCode, Uri}, response::IntoResponse, Router};
use tokio::signal;
use tower::ServiceExt;
use tracing::info;

use std::convert::Infallible;
use std::net::SocketAddr;
use std::io::Write;
use std::sync::Arc;
use std::sync::RwLock as StdRwLock;
use tokio::sync::RwLock as ARwLock;
use hyper::{Body, Request, Response, Server, Method};
use hyper::server::conn::AddrStream;
use hyper::service::{make_service_fn, service_fn};
use serde_json::json;

use crate::caps;
use crate::scratchpads;
use crate::call_validation::{CodeCompletionPost, ChatPost};
use crate::global_context::GlobalContext;
use crate::caps::CodeAssistantCaps;
use crate::custom_error::ScratchError;
use crate::telemetry_basic;
use crate::telemetry_snippets;
use crate::completion_cache;
use routers::make_v1_router;

pub mod routers;
mod utils;

async fn handler_404(path: Uri) -> impl IntoResponse {
(StatusCode::NOT_FOUND, format!("no handler for {}", path))
}


pub fn make_server() -> Router {
Router::new()
.fallback(handler_404)
.nest("/v1", make_v1_router())
}


pub async fn shutdown_signal(ask_shutdown_receiver: std::sync::mpsc::Receiver<String>) {
let ctrl_c = async {
signal::ctrl_c()
.await
.expect("failed to install Ctrl+C handler");
};

#[cfg(unix)]
let terminate = async {
signal::unix::signal(signal::unix::SignalKind::terminate())
.expect("failed to install signal handler")
.recv()
.await;
};

#[cfg(not(unix))]
let terminate = std::future::pending::<()>();

tokio::select! {
_ = ctrl_c => {
info!("SIGINT signal received");
},
_ = terminate => {},
_ = tokio::task::spawn_blocking(move || ask_shutdown_receiver.recv()) => {
info!("graceful shutdown to store telemetry");
}
}
}

pub async fn start_server(
global_context: Arc<ARwLock<GlobalContext>>,
ask_shutdown_receiver: std::sync::mpsc::Receiver<String>
) -> Result<(), String> {
let port = global_context.read().await.cmdline.http_port;
let addr = ([127, 0, 0, 1], port).into();
let builder = Server::try_bind(&addr).map_err(|e| {
write!(std::io::stderr(), "PORT_BUSY {}\n", e).unwrap();
std::io::stderr().flush().unwrap();
format!("port busy, address {}: {}", addr, e)
})?;
info!("HTTP server listening on {}", addr);
let router = make_server().layer(Extension(global_context.clone()));
let server = builder
.serve(router.into_make_service())
.with_graceful_shutdown(shutdown_signal(ask_shutdown_receiver));
let resp = server.await.map_err(|e| format!("HTTP server error: {}", e));
resp
}
35 changes: 35 additions & 0 deletions src/http/routers.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
use std::error::Error;
use std::future::Future;
use std::pin::Pin;

use axum::Extension;
use axum::response::IntoResponse;
use axum::Router;
use axum::routing::{get, post};
use hyper::{Body, Response};

use crate::custom_error::ScratchError;
use crate::global_context::SharedGlobalContext;
use crate::http::routers::v1::caps::handle_v1_caps;
use crate::http::routers::v1::chat::handle_v1_chat;
use crate::http::routers::v1::code_completion::handle_v1_code_completion_web;
use crate::http::routers::v1::graceful_shutdown::handle_v1_graceful_shutdown;
use crate::http::routers::v1::snippet_accepted::handle_v1_snippet_accepted;
use crate::http::routers::v1::telemetry_network::handle_v1_telemetry_network;
use crate::http::utils::telemetry_wrapper;
use crate::telemetry_get;
use crate::telemetry_post;

pub mod v1;


pub fn make_v1_router() -> Router {
Router::new()
.route("/code-completion", telemetry_post!(handle_v1_code_completion_web))
.route("/chat", telemetry_post!(handle_v1_chat))
.route("/telemetry-network", telemetry_post!(handle_v1_telemetry_network))
.route("/snippet-accepted", telemetry_post!(handle_v1_snippet_accepted))

.route("/caps", telemetry_get!(handle_v1_caps))
.route("/graceful-shutdown", telemetry_get!(handle_v1_graceful_shutdown))
}
6 changes: 6 additions & 0 deletions src/http/routers/v1.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pub mod code_completion;
pub mod chat;
pub mod telemetry_network;
pub mod snippet_accepted;
pub mod caps;
pub mod graceful_shutdown;
27 changes: 27 additions & 0 deletions src/http/routers/v1/caps.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
use axum::Extension;
use axum::response::Result;
use hyper::{Body, Response, StatusCode};
use serde_json::json;

use crate::custom_error::ScratchError;
use crate::global_context::SharedGlobalContext;

pub async fn handle_v1_caps(
Extension(global_context): Extension<SharedGlobalContext>,
_: hyper::body::Bytes,
) -> Result<Response<Body>, ScratchError> {
let caps_result = crate::global_context::try_load_caps_quickly_if_not_present(global_context.clone()).await;
let caps = match caps_result {
Ok(x) => x,
Err(e) => {
return Err(ScratchError::new(StatusCode::SERVICE_UNAVAILABLE, format!("{}", e)));
}
};
let caps_locked = caps.read().unwrap();
let body = json!(*caps_locked).to_string();
let response = Response::builder()
.header("Content-Type", "application/json")
.body(Body::from(body))
.unwrap();
Ok(response)
}
90 changes: 90 additions & 0 deletions src/http/routers/v1/chat.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
use std::io::Write;
use std::sync::Arc;
use std::sync::RwLock as StdRwLock;

use axum::Extension;
use axum::response::Result;
use hyper::{Body, Response, StatusCode};
use tracing::info;

use crate::call_validation::ChatPost;
use crate::caps;
use crate::caps::CodeAssistantCaps;
use crate::custom_error::ScratchError;
use crate::global_context::SharedGlobalContext;
use crate::scratchpads;

async fn _lookup_chat_scratchpad(
caps: Arc<StdRwLock<CodeAssistantCaps>>,
chat_post: &ChatPost,
) -> Result<(String, String, serde_json::Value), String> {
let caps_locked = caps.read().unwrap();
let (model_name, recommended_model_record) =
caps::which_model_to_use(
&caps_locked.code_chat_models,
&chat_post.model,
&caps_locked.code_chat_default_model,
)?;
let (sname, patch) = caps::which_scratchpad_to_use(
&recommended_model_record.supports_scratchpads,
&chat_post.scratchpad,
&recommended_model_record.default_scratchpad,
)?;
Ok((model_name, sname.clone(), patch.clone()))
}

pub async fn handle_v1_chat(
Extension(global_context): Extension<SharedGlobalContext>,
body_bytes: hyper::body::Bytes,
) -> Result<Response<Body>, ScratchError> {
let mut chat_post = serde_json::from_slice::<ChatPost>(&body_bytes).map_err(|e|
ScratchError::new(StatusCode::BAD_REQUEST, format!("JSON problem: {}", e))
)?;
let caps = crate::global_context::try_load_caps_quickly_if_not_present(global_context.clone()).await?;
let (model_name, scratchpad_name, scratchpad_patch) = _lookup_chat_scratchpad(
caps.clone(),
&chat_post,
).await.map_err(|e| {
ScratchError::new(StatusCode::BAD_REQUEST, format!("{}", e))
})?;
if chat_post.parameters.max_new_tokens == 0 {
chat_post.parameters.max_new_tokens = 2048;
}
chat_post.parameters.temperature = Some(chat_post.parameters.temperature.unwrap_or(0.2));
chat_post.model = model_name.clone();
let (client1, api_key) = {
let cx_locked = global_context.write().await;
(cx_locked.http_client.clone(), cx_locked.cmdline.api_key.clone())
};
let vecdb_search = global_context.read().await.vecdb_search.clone();
let mut scratchpad = scratchpads::create_chat_scratchpad(
global_context.clone(),
caps,
model_name.clone(),
chat_post.clone(),
&scratchpad_name,
&scratchpad_patch,
vecdb_search,
).await.map_err(|e|
ScratchError::new(StatusCode::BAD_REQUEST, e)
)?;
let t1 = std::time::Instant::now();
let prompt = scratchpad.prompt(
2048,
&mut chat_post.parameters,
).await.map_err(|e|
ScratchError::new(StatusCode::INTERNAL_SERVER_ERROR, format!("Prompt: {}", e))
)?;
// info!("chat prompt {:?}\n{}", t1.elapsed(), prompt);
info!("chat prompt {:?}", t1.elapsed());
crate::restream::scratchpad_interaction_stream(
global_context.clone(),
scratchpad,
"chat-stream".to_string(),
prompt,
model_name,
client1,
api_key,
chat_post.parameters.clone(),
).await
}
Loading