Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

November release fixes - activate one agent, adx schema v2, win perf issue, syslog deactivation #459

Merged
merged 2 commits into from
Oct 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions build/linux/installer/scripts/livenessprobe.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,25 @@
(ps -ef | grep omsagent- | grep -v "grep")
if [ $? -ne 0 ]
then
echo "Agent is NOT running" > /dev/termination-log
echo " omsagent is not running" > /dev/termination-log
exit 1
fi

#optionally test to exit non zero value if oneagent is not running
if [ -e "/opt/AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE_V2" ]; then
(ps -ef | grep "mdsd -l" | grep -v "grep")
if [ $? -ne 0 ]
then
echo "oneagent is not running" > /dev/termination-log
exit 1
fi
fi

#test to exit non zero value if fluentbit is not running
(ps -ef | grep td-agent-bit | grep -v "grep")
if [ $? -ne 0 ]
then
echo "Fluentbit is NOT running" > /dev/termination-log
echo "Fluentbit is not running" > /dev/termination-log
exit 1
fi

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ spec:
{{- end }}
imagePullPolicy: IfNotPresent
resources:
{{ toYaml .Values.omsagent.resources.daemonset | indent 9 }}
{{ toYaml .Values.omsagent.resources.daemonset-windows | indent 9 }}
env:
{{- if ne .Values.omsagent.env.clusterId "<your_cluster_id>" }}
- name: AKS_RESOURCE_ID
Expand Down Expand Up @@ -96,6 +96,7 @@ spec:
- C:\opt\omsagentwindows\scripts\cmd\livenessProbe.cmd
periodSeconds: 60
initialDelaySeconds: 180
timeoutSeconds: 15
{{- with .Values.omsagent.tolerations }}
tolerations: {{- toYaml . | nindent 8 }}
{{- end }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ spec:
{{- end }}
imagePullPolicy: IfNotPresent
resources:
{{ toYaml .Values.omsagent.resources.daemonset | indent 9 }}
{{ toYaml .Values.omsagent.resources.daemonset-linux | indent 9 }}
env:
{{- if ne .Values.omsagent.env.clusterId "<your_cluster_id>" }}
- name: AKS_RESOURCE_ID
Expand Down
6 changes: 5 additions & 1 deletion charts/azuremonitor-containers/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -116,13 +116,17 @@ omsagent:
## ref: http://kubernetes.io/docs/user-guide/compute-resources/
##
resources:
daemonset:
daemonset-linux:
requests:
cpu: 75m
memory: 225Mi
limits:
cpu: 150m
memory: 600Mi
daemonset-windows:
limits:
cpu: 200m
memory: 600Mi
deployment:
requests:
cpu: 150m
Expand Down
123 changes: 95 additions & 28 deletions kubernetes/linux/main.sh
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,97 @@ echo "DOCKER_CIMPROV_VERSION=$DOCKER_CIMPROV_VERSION"
export DOCKER_CIMPROV_VERSION=$DOCKER_CIMPROV_VERSION
echo "export DOCKER_CIMPROV_VERSION=$DOCKER_CIMPROV_VERSION" >> ~/.bashrc

#region check to auto-activate oneagent, to route container logs,
#Intent is to activate one agent routing for all managed clusters with region in the regionllist, unless overridden by configmap
# AZMON_CONTAINER_LOGS_ROUTE will have route (if any) specified in the config map
# AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE will have the final route that we compute & set, based on our region list logic
echo "************start oneagent log routing checks************"
# by default, use configmap route for safer side
AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE=$AZMON_CONTAINER_LOGS_ROUTE

#trim region list
oneagentregions="$(echo $AZMON_CONTAINERLOGS_ONEAGENT_REGIONS | xargs)"
#lowercase region list
typeset -l oneagentregions=$oneagentregions
echo "oneagent regions: $oneagentregions"
#trim current region
currentregion="$(echo $AKS_REGION | xargs)"
#lowercase current region
typeset -l currentregion=$currentregion
echo "current region: $currentregion"

#initilze isoneagentregion as false
isoneagentregion=false

#set isoneagentregion as true if matching region is found
if [ ! -z $oneagentregions ] && [ ! -z $currentregion ]; then
for rgn in $(echo $oneagentregions | sed "s/,/ /g"); do
if [ "$rgn" == "$currentregion" ]; then
isoneagentregion=true
echo "current region is in oneagent regions..."
break
fi
done
else
echo "current region is not in oneagent regions..."
fi

if [ "$isoneagentregion" = true ]; then
#if configmap has a routing for logs, but current region is in the oneagent region list, take the configmap route
if [ ! -z $AZMON_CONTAINER_LOGS_ROUTE ]; then
AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE=$AZMON_CONTAINER_LOGS_ROUTE
echo "oneagent region is true for current region:$currentregion and config map logs route is not empty. so using config map logs route as effective route:$AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE"
else #there is no configmap route, so route thru oneagent
AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE="v2"
echo "oneagent region is true for current region:$currentregion and config map logs route is empty. so using oneagent as effective route:$AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE"
fi
else
echo "oneagent region is false for current region:$currentregion"
fi


#start oneagent
if [ ! -e "/etc/config/kube.conf" ]; then
if [ ! -z $AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE ]; then
echo "container logs configmap route is $AZMON_CONTAINER_LOGS_ROUTE"
echo "container logs effective route is $AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE"
#trim
containerlogsroute="$(echo $AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE | xargs)"
# convert to lowercase
typeset -l containerlogsroute=$containerlogsroute

echo "setting AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE as :$containerlogsroute"
export AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE=$containerlogsroute
echo "export AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE=$containerlogsroute" >> ~/.bashrc
source ~/.bashrc

if [ "$containerlogsroute" == "v2" ]; then
echo "activating oneagent..."
echo "configuring mdsd..."
cat /etc/mdsd.d/envmdsd | while read line; do
echo $line >> ~/.bashrc
done
source /etc/mdsd.d/envmdsd

echo "setting mdsd workspaceid & key for workspace:$CIWORKSPACE_id"
export CIWORKSPACE_id=$CIWORKSPACE_id
echo "export CIWORKSPACE_id=$CIWORKSPACE_id" >> ~/.bashrc
export CIWORKSPACE_key=$CIWORKSPACE_key
echo "export CIWORKSPACE_key=$CIWORKSPACE_key" >> ~/.bashrc

source ~/.bashrc

dpkg -l | grep mdsd | awk '{print $2 " " $3}'

echo "starting mdsd ..."
mdsd -l -e ${MDSD_LOG}/mdsd.err -w ${MDSD_LOG}/mdsd.warn -o ${MDSD_LOG}/mdsd.info -q ${MDSD_LOG}/mdsd.qos &

touch /opt/AZMON_CONTAINER_LOGS_EFFECTIVE_ROUTE_V2
fi
fi
fi
echo "************end oneagent log routing checks************"

#telegraf & fluentbit requirements
if [ ! -e "/etc/config/kube.conf" ]; then
if [ "$CONTAINER_RUNTIME" == "docker" ]; then
Expand Down Expand Up @@ -491,37 +582,13 @@ dpkg -l | grep td-agent-bit | awk '{print $2 " " $3}'

#dpkg -l | grep telegraf | awk '{print $2 " " $3}'

#start oneagent
if [ ! -e "/etc/config/kube.conf" ]; then
if [ ! -z $AZMON_CONTAINER_LOGS_ROUTE ]; then
echo "container logs route is defined as $AZMON_CONTAINER_LOGS_ROUTE"
#trim
containerlogsroute="$(echo $AZMON_CONTAINER_LOGS_ROUTE | xargs)"
# convert to lowercase
typeset -l containerlogsroute=$containerlogsroute
if [ "$containerlogsroute" == "v2" ]; then
echo "containerlogsroute $containerlogsroute"
echo "configuring mdsd..."
cat /etc/mdsd.d/envmdsd | while read line; do
echo $line >> ~/.bashrc
done
source /etc/mdsd.d/envmdsd

echo "setting mdsd workspaceid & key for workspace:$CIWORKSPACE_id"
export CIWORKSPACE_id=$CIWORKSPACE_id
echo "export CIWORKSPACE_id=$CIWORKSPACE_id" >> ~/.bashrc
export CIWORKSPACE_key=$CIWORKSPACE_key
echo "export CIWORKSPACE_key=$CIWORKSPACE_key" >> ~/.bashrc

source ~/.bashrc
echo "stopping rsyslog..."
service rsyslog stop

dpkg -l | grep mdsd | awk '{print $2 " " $3}'

echo "starting mdsd ..."
mdsd -l -e ${MDSD_LOG}/mdsd.err -w ${MDSD_LOG}/mdsd.warn -o ${MDSD_LOG}/mdsd.info -q ${MDSD_LOG}/mdsd.qos &
fi
fi
fi
echo "getting rsyslog status..."
service rsyslog status

shutdown() {
/opt/microsoft/omsagent/bin/service_control stop
Expand Down
4 changes: 2 additions & 2 deletions kubernetes/linux/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ mv $TMPDIR/omsbundle* $TMPDIR/omsbundle
/usr/bin/dpkg -i $TMPDIR/omsbundle/110/omsagent*.deb
#/usr/bin/dpkg -i $TMPDIR/omsbundle/100/omsconfig*.deb

#install oneagent - Latest dev bits (7/17)
wget https://github.com/microsoft/Docker-Provider/releases/download/7172020-oneagent/azure-mdsd_1.5.124-build.develop.1294_x86_64.deb
#install oneagent - Official bits (10/18)
wget https://github.com/microsoft/Docker-Provider/releases/download/10182020-oneagent/azure-mdsd_1.5.126-build.master.99_x86_64.deb
/usr/bin/dpkg -i $TMPDIR/azure-mdsd*.deb
cp -f $TMPDIR/mdsd.xml /etc/mdsd.d
cp -f $TMPDIR/envmdsd /etc/mdsd.d
Expand Down
10 changes: 5 additions & 5 deletions kubernetes/omsagent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ spec:
imagePullPolicy: IfNotPresent
resources:
limits:
cpu: 150m
cpu: 250m
memory: 600Mi
requests:
cpu: 75m
Expand All @@ -370,6 +370,8 @@ spec:
# Update this with the user assigned msi client id for omsagent
- name: USER_ASSIGNED_IDENTITY_CLIENT_ID
value: ""
- name: AZMON_CONTAINERLOGS_ONEAGENT_REGIONS
value: "koreacentral,norwayeast"
securityContext:
privileged: true
ports:
Expand Down Expand Up @@ -650,11 +652,8 @@ spec:
imagePullPolicy: IfNotPresent
resources:
limits:
cpu: 150m
cpu: 200m
memory: 600Mi
requests:
cpu: 75m
memory: 225Mi
env:
# azure devops pipeline uses AKS_RESOURCE_ID and AKS_REGION hence ensure to uncomment these
- name: AKS_RESOURCE_ID
Expand Down Expand Up @@ -696,6 +695,7 @@ spec:
- C:\opt\omsagentwindows\scripts\cmd\livenessProbe.cmd
periodSeconds: 60
initialDelaySeconds: 180
timeoutSeconds: 15
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
Expand Down
Loading