diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index dc00f6f6f4..0000000000 --- a/Dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -FROM registry.k8s.io/build-image/debian-base:bullseye-v1.4.3 - -RUN apt update && apt upgrade -y && apt-mark unhold libcap2 && clean-install ca-certificates uuid-dev util-linux mount udev wget e2fsprogs nfs-common netbase procps conntrack iptables bind9-host iproute2 bash - -COPY aznfswatchdog /usr/sbin/aznfswatchdog - -CMD [ "aznfswatchdog" ] \ No newline at end of file diff --git a/aznfswatchdog b/aznfswatchdog deleted file mode 100644 index 86cc8f78dd..0000000000 --- a/aznfswatchdog +++ /dev/null @@ -1,358 +0,0 @@ -#!/bin/bash - -# -------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# -------------------------------------------------------------------------------------------- - -# -# How often does the watchdog look for unmounts and/or IP address changes for -# Blob endpoints. -# -MONITOR_INTERVAL_SECS=5 - -# How often do we check for change in FQDN->IP? -IP_CHANGE_DETECTION_FREQUENCY=60 - -# -# Remove unmounted entries only if MOUNTMAP has not been changed till MONITOR_INTERVAL_SECS seconds. -# Don't set it below 3 minutes. -# -MOUNTMAP_INACTIVITY_SECS=300 - -# -# Don't perform mountmap and iptables rule cleanup for unmounted filesystems. -# This can be set if you want lazy umount to work. -# -AZNFS_SKIP_UNMOUNT_CLEANUP="${AZNFS_SKIP_UNMOUNT_CLEANUP:-0}" - -# TIMEWAIT timeout to be used for conntrack entries. -AZNFS_TIMEWAIT_TIMEOUT="${AZNFS_TIMEWAIT_TIMEOUT:-65}" - -next_ip_change_detection_epoch=0 - -# Load common aznfs helpers. -. /opt/microsoft/aznfs/common.sh - - -# Create /opt/microsoft/aznfs/randbytes if not already created. -if [ ! -s /opt/microsoft/aznfs/randbytes ]; then - dd if=/dev/urandom of=/opt/microsoft/aznfs/randbytes bs=256 count=1 -fi -if [ ! -s /opt/microsoft/aznfs/randbytes ]; then - uuidgen > /opt/microsoft/aznfs/randbytes -fi -if [ ! -s /opt/microsoft/aznfs/randbytes ]; then - date | md5sum | awk '{print $1}' > /opt/microsoft/aznfs/randbytes -fi -if [ ! -s /opt/microsoft/aznfs/randbytes ]; then - date > /opt/microsoft/aznfs/randbytes -fi -chattr +i /opt/microsoft/aznfs/randbytes - -# -# Hash for storing how many times we have seen a conntrack entry in SYN_SENT state. -# Used for finding if some entry is stuck in SYN_SENT state due to a bug in older -# kernels. If we find an entry stuck for more than a certain time in SYN_SENT state -# we delete the entry so that kernel looks up fresh NAT rules and creates a new entry. -# -declare -A cthash_synsent -declare -A cthash_synsent_now - -reconcile_conntrack_one() -{ - local l_ip=$1 - local l_sport=$2 - local l_dport=$3 - local l_nfsip=$4 - - key="${l_ip}:${l_sport}:${l_dport}:${l_nfsip}" - let cthash_synsent[$key]++ - let cthash_synsent_now[$key]++ - - # - # We are called every 5 secs, so this deletes an entry stuck in - # SYN_SENT for 25/30 secs. - # - if [ ${cthash_synsent[$key]} -ge 5 ]; then - cmd="conntrack -D -p tcp -d $l_ip -r $l_nfsip --sport $l_sport --dport $l_dport" - wecho "Deleting conntrack entry stuck in SYN_SENT state [$cmd]" - - eval $cmd - if [ $? -ne 0 ]; then - eecho "Failed to delete conntrack entry [$cmd]!" - else - unset cthash_synsent[$key] - fi - fi -} - -reconcile_conntrack() -{ - local l_ip=$1 - local l_nfsip=$2 - - # cthash_synsent_now holds only entries found in this call, so clear it before starting. - unset cthash_synsent_now - declare -A cthash_synsent_now - - # - # For mounts with nconnect, there could be more than one conntrack entries to the same - # proxy IP, but with different local ports. We must track them separately. - # - IFS=$'\n' output111=$(conntrack -L -p tcp -d $l_ip -r $l_nfsip --dport 111 --state SYN_SENT 2>/dev/null) - IFS=$'\n' output2048=$(conntrack -L -p tcp -d $l_ip -r $l_nfsip --dport 2048 --state SYN_SENT 2>/dev/null) - output="$output111"$'\n'"$output2048" - - if [ -n "$output" ]; then - for line in $output; do - # XXX Remove this log after running for few days. - vecho "$line" - - matchstr=".* SYN_SENT src=.* dst=$l_ip sport=([0-9]+) dport=([0-9]+).*" - if [[ "$line" =~ $matchstr ]]; then - l_sport=${BASH_REMATCH[1]} - l_dport=${BASH_REMATCH[2]} - reconcile_conntrack_one $l_ip $l_sport $l_dport $l_nfsip - fi - done - fi - - # - # Any port that is not stuck now, means it's recovered since the last time and can be removed - # from the global cthash_synsent map. - # - for key in "${!cthash_synsent[@]}"; do - if [ ! -v cthash_synsent_now[$key] ]; then - unset cthash_synsent[$key] - fi - done -} - -vecho "Starting aznfswatchdog..." - -# SIGTERM handler. -sigterm_handler() -{ - wecho "SIGTERM received, exiting..." - exit 0 -} -trap sigterm_handler SIGTERM - -# Dump NAT table once on startup in case we have reported conflicts. -vecho "NAT table:\n$(iptables-save -t nat)" -conntrack -L > /dev/null - -# conntrack timewait timeout higher than the TCP timewait timeout value isn't very valuable. -conntrack_timeo_timew=$(cat /proc/sys/net/netfilter/nf_conntrack_tcp_timeout_time_wait 2>/dev/null) -if [ $? -eq 0 -a $conntrack_timeo_timew -gt $AZNFS_TIMEWAIT_TIMEOUT ]; then - vecho "Changing /proc/sys/net/netfilter/nf_conntrack_tcp_timeout_time_wait [$conntrack_timeo_timew -> $AZNFS_TIMEWAIT_TIMEOUT]" - echo $AZNFS_TIMEWAIT_TIMEOUT > /proc/sys/net/netfilter/nf_conntrack_tcp_timeout_time_wait -fi - -if ! chattr -f +i $MOUNTMAP; then - wecho "chattr does not work for ${MOUNTMAP}!" -fi - - - -# -# Watchdog for monitoring unmounts and more importantly change in blob endpoint -# addresses possibly as a result of migration. -# -while :; do - sleep $MONITOR_INTERVAL_SECS - - # - # TODO: Add a function reconcile_mountmap() and call it from here. This - # should reconstruct the MOUNTMAP file from findmnt and output of - # iptables. This will be added in subsequent revisions. - # - - epoch_now=$(date +%s) - - # - # Go over all lines in $MOUNTMAP and check them for two things: - # 1. Is that entry still in use by at least one aznfs mount, if not remove the entry. - # 2. Has the Blob endpoint address changed from what is stored? - # If yes, update DNAT rule to point to the new address and update entry accordingly. - # - # Sample line in $MOUNTMAP. - # account.blob.preprod.core.windows.net 10.100.100.100 52.230.170.200 - # - # where the format is - # blobendpoint_fqdn proxy_ip blobendpoint_ip - # - # We store the mtime of MOUNTMAP while inside the lock so that if any mount helper process - # updates it after this we will skip modification for sake of safety. We will come to it - # in the next iteration when it's safer. - # - exec {fd}<$MOUNTMAP - flock -e $fd - mtime_mountmap=$(stat -c%Y $MOUNTMAP) - IFS=$'\n' lines=$(cat $MOUNTMAP) - flock -u $fd - exec {fd}<&- - - do_ip_change_detection=false - if [ $epoch_now -ge $next_ip_change_detection_epoch ]; then - do_ip_change_detection=true - next_ip_change_detection_epoch=$(expr $(date +%s) + $IP_CHANGE_DETECTION_FREQUENCY) - fi - - # - # Do unmount GC only if MOUNTMAP file is not modified in the last - # MOUNTMAP_INACTIVITY_SECS seconds. We don't want to incorrectly delete an - # entry while some aznfs mount is ongoing. - # - do_unmount_gc=false - if [ "$AZNFS_SKIP_UNMOUNT_CLEANUP" == "0" ]; then - if [ $epoch_now -ge $(expr $mtime_mountmap + $MOUNTMAP_INACTIVITY_SECS) ]; then - do_unmount_gc=true - fi - fi - - # - # findmnt must be done after reading MOUNTMAP so that if we come accross a - # MOUNTMAP entry whose proxy_ip is not used by any existing mount, we know - # for sure that it's not in use by any mount and can be removed. - # - findmnt=$(findmnt --raw --noheading -o MAJ:MIN,FSTYPE,SOURCE,TARGET,OPTIONS -t nfs 2>&1) - - # - # For no matching mounts also, findmnt exits with a failure return, so check - # for both exit status and non-empty error o/p. - # - if [ $? -ne 0 -a -n "$findmnt" ]; then - eecho "${findmnt}." - eecho "[FATAL] findmnt failed unexpectedly!" - eecho "[FATAL] Aznfswatchdog service is exiting, will not monitor Azure NFS shares." - eecho "[FATAL] Please contact Microsoft support before using any Blob NFS shares." - # This usually indicates some non-transient issue, bail out. - exit 1 - fi - - for line in $lines; do - if [ -z "$line" ]; then - continue - fi - - # - # MOUNTMAP line is of the form: - # account.blob.preprod.core.windows.net [] - # - IFS=" " read l_host l_ip l_nfsip <<< "$line" - - if [ -z "$l_host" -o -z "$l_ip" -o -z "$l_nfsip" ]; then - wecho "[FATAL] Deleting invalid line in $MOUNTMAP: [$line]!" - l_mtime=$(ensure_mountmap_not_exist "$line") - [ $? -eq 0 ] && mtime_mountmap=$l_mtime - continue - fi - - # Since we added it to the MOUNTMAP file, it cannot be invalid. - if ! is_private_ip "$l_ip"; then - wecho "[FATAL] local ip ($l_ip) is invalid!" - l_mtime=$(ensure_mountmap_not_exist "$line") - [ $? -eq 0 ] && mtime_mountmap=$l_mtime - continue - fi - - # Since we added it to the MOUNTMAP file, it cannot be invalid. - if ! is_valid_ipv4_address "$l_nfsip"; then - wecho "[FATAL] Blob endpoint ip ($l_nfsip) is invalid!" - l_mtime=$(ensure_mountmap_not_exist "$line") - [ $? -eq 0 ] && mtime_mountmap=$l_mtime - continue - fi - - # - # Delete entry from MOUNTMAP if there are no mounted shares on that host. - # As long as we have at least one mount using the MOUNTMAP entry, we leave - # it around. - # - if ! echo "$findmnt" | grep " nfs ${l_ip}:" >/dev/null; then - if $do_unmount_gc; then - pecho "No mounted shares for host $l_host, deleting from ${MOUNTMAP} [$line]." - - # Delete IFF mountmap is not changed since we read it above. - l_mtime=$(ensure_mountmap_not_exist "$line" "$mtime_mountmap") - - # - # Update ifmatch time in case of successful updation of MOUNTMAP, - # so that we can distinguish between MOUNTMAP mtime changing because - # of our action or some mount helper changing it. In the former case - # it's safe to update the MOUNTMAP, so update mtime_mountmap to the - # mtime after this update. - # - [ $? -eq 0 ] && mtime_mountmap=$l_mtime - continue - fi - else - # - # Verify that iptable entry should be present for corresponding - # MOUNTMAP entry if the share is not unmounted. - # - # Note: This is extra protection in case user flushes the iptable - # entries or removes it by mistake. This should not be - # required normally. - # - # We also reconcile conntrack entries stuck in some bad states which - # may hamper communication, f.e., in older kernels there's a bug due to - # which conntrack entry may get stuck in SYN_SENT state if client - # reuse the source port and keep retransmitting SYNs before the entry - # can timeout. - # - reconcile_conntrack "$l_ip" "$l_nfsip" - verify_iptable_entry "$l_ip" "$l_nfsip" - - fi - - # - # We do IP change detection less frequently than unmount detection - # since it will cause DNS calls on network. - # - if ! $do_ip_change_detection; then - continue - fi - - # - # Check if blob endpoint IP address changed. - # This is the migration check. - # - new_ip=$(resolve_ipv4 "$l_host") - - # If we fail to resolve the host name, try next time. - if [ $? -ne 0 ]; then - # - # If account is deleted then we need to delete the MOUNTMAP entry along - # with the proxy iptable entry created for that account. - # Note that we don't delete if the MOUNTMAP was changed recently since - # the account may have been re-created after the dns lookup failure. - # - if [ "$new_ip" == "NXDOMAIN" ]; then - pecho "Account corresponding to $l_host seems to have been deleted, deleting from ${MOUNTMAP} [$line]!" - - l_mtime=$(ensure_mountmap_not_exist "$line" "$mtime_mountmap") - [ $? -eq 0 ] && mtime_mountmap=$l_mtime - else - eecho "resolve_ipv4($l_host) failed: $new_ip" - fi - continue - fi - - # - # If the IP changed for the Blob endpoint, we need to update the DNAT rule. - # This will take care of migration/failover causing the Blob endpoint IP to change. - # - if [ "$new_ip" != "$l_nfsip" ]; then - pecho "IP for $l_host changed [$l_nfsip -> $new_ip]." - - # This will update DNAT rule as well. - if ! update_mountmap_entry "$line" "$l_host $l_ip $new_ip"; then - eecho "Will reattempt the operation in next iteration." - fi - fi - done - -done diff --git a/charts/latest/blob-csi-driver/templates/csi-blob-node.yaml b/charts/latest/blob-csi-driver/templates/csi-blob-node.yaml index ba8454bfcb..e2ac87fe35 100644 --- a/charts/latest/blob-csi-driver/templates/csi-blob-node.yaml +++ b/charts/latest/blob-csi-driver/templates/csi-blob-node.yaml @@ -238,17 +238,23 @@ spec: mountPath: /etc/pki/ca-trust/extracted readOnly: true {{- end }} - - mountPath: /opt/microsoft/aznfs/ - name: aznfs + - mountPath: /opt/microsoft/aznfs/data + name: aznfs-data resources: {{- toYaml .Values.node.resources.blob | nindent 12 }} - name: aznfswatchdog - image: cvvz/aznfswatchdog:latest - imagePullPolicy: Always +{{- if hasPrefix "/" .Values.image.blob.repository }} + image: "{{ .Values.image.baseRepo }}{{ .Values.image.blob.repository }}:{{ .Values.image.blob.tag }}" +{{- else }} + image: "{{ .Values.image.blob.repository }}:{{ .Values.image.blob.tag }}" +{{- end }} + command: + - "aznfswatchdog" + imagePullPolicy: {{ .Values.image.blob.pullPolicy }} securityContext: privileged: true volumeMounts: - - mountPath: /opt/microsoft/aznfs/ - name: aznfs + - mountPath: /opt/microsoft/aznfs/data + name: aznfs-data - mountPath: {{ .Values.linux.kubelet }}/ mountPropagation: Bidirectional name: mountpoint-dir @@ -294,8 +300,9 @@ spec: path: /etc/pki/ca-trust/extracted {{- end }} - hostPath: - path: /opt/microsoft/aznfs/ - name: aznfs + path: /opt/microsoft/aznfs/data + type: DirectoryOrCreate + name: aznfs-data {{- if .Values.securityContext }} securityContext: {{- toYaml .Values.securityContext | nindent 8 }} {{- end }} diff --git a/mount.aznfs b/mount.aznfs deleted file mode 100644 index 18ea81c5ae..0000000000 Binary files a/mount.aznfs and /dev/null differ diff --git a/pkg/blobfuse-proxy/init.sh b/pkg/blobfuse-proxy/init.sh index 4d30ff6464..40643f51ac 100755 --- a/pkg/blobfuse-proxy/init.sh +++ b/pkg/blobfuse-proxy/init.sh @@ -147,9 +147,3 @@ SUBSYSTEM=="bdi", ACTION=="add", PROGRAM="$AWK_PATH -v bdi=\$kernel 'BEGIN{ret=1 EOF $HOST_CMD udevadm control --reload fi - -# install aznfs -echo "install aznfs" -$HOST_CMD wget -O aznfs-0.1.114.tar.gz https://github.com/Azure/AZNFS-mount/releases/download/0.1.114/aznfs-0.1.114.tar.gz -$HOST_CMD tar xvzhf aznfs-0.1.114.tar.gz -C / -$HOST_CMD rm -f aznfs-0.1.114.tar.gz diff --git a/pkg/blobplugin/Dockerfile b/pkg/blobplugin/Dockerfile index 79ebbb9b97..702da8b681 100644 --- a/pkg/blobplugin/Dockerfile +++ b/pkg/blobplugin/Dockerfile @@ -29,15 +29,16 @@ RUN chmod +x /blobfuse-proxy/init.sh && \ chmod +x /blobfuse-proxy/blobfuse-proxy RUN apt update && apt upgrade -y && apt-mark unhold libcap2 && clean-install ca-certificates uuid-dev util-linux mount udev wget e2fsprogs nfs-common netbase procps conntrack iptables bind9-host iproute2 bash +# install aznfs +RUN wget -O aznfs.tar.gz https://github.com/Azure/AZNFS-mount/releases/download/0.1.115/aznfs-0.1.115.tar.gz +RUN tar xvzf aznfs.tar.gz -C / && rm aznfs.tar.gz ARG ARCH=amd64 RUN if [ "$ARCH" = "amd64" ] ; then \ clean-install libcurl4-gnutls-dev && \ wget -O /blobfuse-proxy/packages-microsoft-prod-22.04.deb https://packages.microsoft.com/config/ubuntu/22.04/packages-microsoft-prod.deb && \ wget -O /blobfuse-proxy/packages-microsoft-prod-18.04.deb https://packages.microsoft.com/config/ubuntu/18.04/packages-microsoft-prod.deb && \ - dpkg -i /blobfuse-proxy/packages-microsoft-prod-18.04.deb && apt update && apt install blobfuse blobfuse2 fuse -y; fi + dpkg -i /blobfuse-proxy/packages-microsoft-prod-18.04.deb && apt update && apt install blobfuse blobfuse2 fuse -y && apt remove wget -y; fi LABEL maintainers="andyzhangx" LABEL description="Azure Blob Storage CSI driver" -COPY mount.aznfs /sbin/mount.aznfs - ENTRYPOINT ["/blobplugin"]