Skip to content

Commit

Permalink
Add a model cache to avoid running out of storage (#201)
Browse files Browse the repository at this point in the history
  • Loading branch information
magdyksaleh authored Jan 23, 2024
1 parent 1be43cf commit f05f3cb
Showing 1 changed file with 65 additions and 1 deletion.
66 changes: 65 additions & 1 deletion sync.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ OBJECT_ID="${MODEL_ID//\//--}"
S3_BASE_DIRECTORY="models--$OBJECT_ID"
S3_PATH="s3://${HF_CACHE_BUCKET}/${S3_BASE_DIRECTORY}/"
LOCAL_MODEL_DIR="${HUGGINGFACE_HUB_CACHE}/${S3_BASE_DIRECTORY}"
LOCKFILE="${HUGGINGFACE_HUB_CACHE}/cache.lock"
CACHE_FILE="${HUGGINGFACE_HUB_CACHE}/cache.txt"

DEFAULT_CACHE_SIZE=4
CACHE_SIZE=${CACHE_SIZE:-$DEFAULT_CACHE_SIZE}

sudo mkdir -p $LOCAL_MODEL_DIR

# Function to check if lorax-launcher is running
is_launcher_running() {
Expand All @@ -20,7 +27,63 @@ is_launcher_running() {
kill -0 "$launcher_pid" >/dev/null 2>&1
}

sudo mkdir -p $LOCAL_MODEL_DIR
clean_up_cache() {
local temp_file=$(mktemp)
local removed_lines=""
local key=$1
local file=$2

# Remove the key if it exists
grep -v "^$key\$" "$file" > "$temp_file"

# Add the key to the bottom of the file
echo "$key" >> "$temp_file"

# Count total lines in temp file
local total_lines=$(wc -l < "$temp_file")

# Calculate number of lines to be removed, if any
local lines_to_remove=$((total_lines - CACHE_SIZE))

if [ "$lines_to_remove" -gt 0 ]; then
# Store removed lines in a variable
removed_lines=$(head -n "$lines_to_remove" "$temp_file")
echo "Deleting $removed_lines from cache"
fi

# Ensure only the last CACHE_SIZE items are retained
tail -n $CACHE_SIZE "$temp_file" > "$file"

# Clean up the temporary file
rm "$temp_file"

for line in $removed_lines; do
model_to_remove="${HUGGINGFACE_HUB_CACHE}/${line}"
echo "Removing $model_to_remove"
rm -rf $model_to_remove
done
}

(
# Wait for lock on $LOCKFILE (fd 200)
flock -x 200

echo "Lock acquired."

if [ -f "$CACHE_FILE" ]; then
echo "Cache file exists."
while read -r line; do
echo "Line read: $line"
if [ "$line" = "$S3_BASE_DIRECTORY" ]; then
echo "Model found in cache."
fi
done < "$CACHE_FILE"
else
echo "Cache file does not exist."
fi
clean_up_cache "$S3_BASE_DIRECTORY" "$CACHE_FILE"
) 200>$LOCKFILE


if [ -n "$(ls -A $LOCAL_MODEL_DIR)" ]; then
echo "Files have already been downloaded to ${LOCAL_MODEL_DIR}"
Expand Down Expand Up @@ -56,6 +119,7 @@ else
echo "Downloading weights from ${S3_PATH}"
fi


echo "Files found for model ${MODEL_ID}"
aws s3 ls "${S3_PATH}" --recursive | awk '{print $4}'

Expand Down

0 comments on commit f05f3cb

Please sign in to comment.