Skip to content

Commit

Permalink
More consistent logging for inpod (istio#1256)
Browse files Browse the repository at this point in the history
Log UID wherever we can basically. Only other difference is to log the
error before we say we are retrying, instead of after (which makes it
seem like the error is after the retry)
  • Loading branch information
howardjohn authored Aug 7, 2024
1 parent f0a2fc5 commit 9c5f653
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 11 deletions.
4 changes: 2 additions & 2 deletions src/inpod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ pub mod istio {

#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("error creating proxy: {0}")]
ProxyError(crate::proxy::Error),
#[error("error creating proxy {0}: {1}")]
ProxyError(String, crate::proxy::Error),
#[error("error receiving message: {0}")]
ReceiveMessageError(String),
#[error("error sending ack: {0}")]
Expand Down
21 changes: 15 additions & 6 deletions src/inpod/statemanager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,16 +131,21 @@ impl WorkloadProxyManagerState {
if !self.snapshot_received {
self.snapshot_names.insert(poddata.workload_uid.clone());
}
let netns = InpodNetns::new(self.inpod_config.cur_netns(), poddata.netns)
.map_err(|e| Error::ProxyError(crate::proxy::Error::Io(e)))?;
let netns =
InpodNetns::new(self.inpod_config.cur_netns(), poddata.netns).map_err(|e| {
Error::ProxyError(
poddata.workload_uid.0.clone(),
crate::proxy::Error::Io(e),
)
})?;
let info = poddata.workload_info.map(|w| WorkloadInfo {
name: w.name,
namespace: w.namespace,
service_account: w.service_account,
});
self.add_workload(&poddata.workload_uid, info, netns)
.await
.map_err(Error::ProxyError)
.map_err(|e| Error::ProxyError(poddata.workload_uid.0, e))
}
WorkloadMessage::KeepWorkload(workload_uid) => {
info!(
Expand Down Expand Up @@ -173,7 +178,7 @@ impl WorkloadProxyManagerState {
Ok(())
}
WorkloadMessage::WorkloadSnapshotSent => {
info!("pod received snapshot sent");
info!("received snapshot sent");
if self.snapshot_received {
return Err(Error::ProtocolError("pod snapshot received already".into()));
}
Expand Down Expand Up @@ -328,6 +333,10 @@ impl WorkloadProxyManagerState {
!self.pending_workloads.is_empty()
}

pub fn pending_uids(&self) -> Vec<String> {
self.pending_workloads.keys().map(|k| k.0.clone()).collect()
}

pub fn ready(&self) -> bool {
// We are ready after we received our first snapshot and don't have any proxies that failed to start.
self.snapshot_received && !self.have_pending()
Expand All @@ -337,11 +346,11 @@ impl WorkloadProxyManagerState {
let current_pending_workloads = std::mem::take(&mut self.pending_workloads);

for (uid, (info, netns)) in current_pending_workloads {
info!("retrying workload {:?}", uid);
info!(uid = uid.0, "retrying workload");
match self.add_workload(&uid, info, netns).await {
Ok(()) => {}
Err(e) => {
info!("retrying workload {:?} failed: {}", uid, e);
info!(uid = uid.0, "retrying workload failed: {}", e);
}
}
}
Expand Down
7 changes: 4 additions & 3 deletions src/inpod/workloadmanager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -335,12 +335,12 @@ impl<'a> WorkloadProxyManagerProcessor<'a> {
.await
.map_err(|e| Error::SendAckError(e.to_string()))?;
}
Err(Error::ProxyError(e)) => {
Err(Error::ProxyError(uid, e)) => {
error!(%uid, "failed to start proxy: {:?}", e);
// setup the retry timer:
self.schedule_retry();
// proxy error is a transient error, so report it but don't disconnect
// TODO: raise metrics
error!("failed to start proxy: {:?}", e);
processor
.send_nack(anyhow::anyhow!("failure to start proxy : {:?}", e))
.await
Expand All @@ -363,10 +363,11 @@ impl<'a> WorkloadProxyManagerProcessor<'a> {

fn schedule_retry(&mut self) {
if self.next_pending_retry.is_none() {
info!("scheduling retry");
info!(uids=?self.state.pending_uids(), "scheduling retry");
self.next_pending_retry = Some(Box::pin(tokio::time::sleep(RETRY_DURATION)));
}
}

fn check_ready(&mut self) {
if self.state.ready() {
self.readiness.mark_ready();
Expand Down

0 comments on commit 9c5f653

Please sign in to comment.