From 6c1caebfd3e9d90ab0fd2a20b8d429c1c3b89c5a Mon Sep 17 00:00:00 2001 From: jfrery Date: Wed, 4 Sep 2024 14:49:02 +0200 Subject: [PATCH] chore: rename use case and notebook + add readme --- .github/workflows/refresh-one-notebook.yaml | 4 +- .../.gitignore | 0 .../GPT2FineTuneHybrid.ipynb} | 0 .../Makefile | 0 use_case_examples/lora_finetuning/README.md | 133 ++++++++++++++++++ .../data_finetune/what_is_fhe.txt | 0 .../lora_module.py | 0 .../remote_module.py | 0 .../requirements.txt | 0 9 files changed, 135 insertions(+), 2 deletions(-) rename use_case_examples/{lora_finetune => lora_finetuning}/.gitignore (100%) rename use_case_examples/{lora_finetune/gpt2_finetune_hybrid.ipynb => lora_finetuning/GPT2FineTuneHybrid.ipynb} (100%) rename use_case_examples/{lora_finetune => lora_finetuning}/Makefile (100%) create mode 100644 use_case_examples/lora_finetuning/README.md rename use_case_examples/{lora_finetune => lora_finetuning}/data_finetune/what_is_fhe.txt (100%) rename use_case_examples/{lora_finetune => lora_finetuning}/lora_module.py (100%) rename use_case_examples/{lora_finetune => lora_finetuning}/remote_module.py (100%) rename use_case_examples/{lora_finetune => lora_finetuning}/requirements.txt (100%) diff --git a/.github/workflows/refresh-one-notebook.yaml b/.github/workflows/refresh-one-notebook.yaml index 5ad4ed154d..b0ffebec17 100644 --- a/.github/workflows/refresh-one-notebook.yaml +++ b/.github/workflows/refresh-one-notebook.yaml @@ -21,7 +21,7 @@ on: - FullyConnectedNeuralNetwork \n - FullyConnectedNeuralNetworkOnMNIST \n - GLMComparison \n - - gpt2_finetune_hybrid \n + - GPT2FineTuneHybrid \n - HealthCarePrediction \n - ImportingFromScikitLearn \n - KaggleTitanic \n @@ -68,7 +68,7 @@ env: FullyConnectedNeuralNetwork: "docs/advanced_examples/FullyConnectedNeuralNetwork.ipynb" FullyConnectedNeuralNetworkOnMNIST: "docs/advanced_examples/FullyConnectedNeuralNetworkOnMNIST.ipynb" GLMComparison: "docs/advanced_examples/GLMComparison.ipynb" - gpt2_finetune_hybrid: "use_case_examples/lora_finetune/gpt2_finetune_hybrid.ipynb" + GPT2FineTuneHybrid: "use_case_examples/lora_finetune/GPT2FineTuneHybrid.ipynb" HealthCarePrediction: "use_case_examples/disease_prediction/HealthCarePrediction.ipynb" ImportingFromScikitLearn: "docs/advanced_examples/ImportingFromScikitLearn.ipynb" KaggleTitanic: "use_case_examples/titanic/KaggleTitanic.ipynb" diff --git a/use_case_examples/lora_finetune/.gitignore b/use_case_examples/lora_finetuning/.gitignore similarity index 100% rename from use_case_examples/lora_finetune/.gitignore rename to use_case_examples/lora_finetuning/.gitignore diff --git a/use_case_examples/lora_finetune/gpt2_finetune_hybrid.ipynb b/use_case_examples/lora_finetuning/GPT2FineTuneHybrid.ipynb similarity index 100% rename from use_case_examples/lora_finetune/gpt2_finetune_hybrid.ipynb rename to use_case_examples/lora_finetuning/GPT2FineTuneHybrid.ipynb diff --git a/use_case_examples/lora_finetune/Makefile b/use_case_examples/lora_finetuning/Makefile similarity index 100% rename from use_case_examples/lora_finetune/Makefile rename to use_case_examples/lora_finetuning/Makefile diff --git a/use_case_examples/lora_finetuning/README.md b/use_case_examples/lora_finetuning/README.md new file mode 100644 index 0000000000..d812b4ff47 --- /dev/null +++ b/use_case_examples/lora_finetuning/README.md @@ -0,0 +1,133 @@ +# Privacy Preserving GPT2 LoRA + + +This project demonstrates how to fine-tune GPT-2 using Low-Rank Adaptation (LoRA) weights with Fully Homomorphic Encryption (FHE). The goal is to train a specialized model in a privacy-preserving manner, with minimal memory requirements. + +## Overview + +Fine-tuning large language models typically requires access to sensitive data, which can raise privacy concerns. By leveraging FHE, we can perform computations on encrypted data, ensuring that the data remains private throughout the training process. In this approach, the LoRA weights are only known to the user who owns the data and the memory hungry foundation model remains on the server. + + +## Key Features + +- **LoRA Fine-Tuning**: Fine-tune GPT-2 by adapting low-rank weights. +- **Fully Homomorphic Encryption**: Perform training and inference on encrypted data. +- **Hybrid Model**: Combine traditional and encrypted computations for optimal performance. +- **Low Memory Requirements**: Minimal client-side memory needed for LoRA weights. + + +## Setup + +### Installation + +Install the required packages: +```sh +pip install -r requirements.txt +``` + +## Usage + +### 1. Prepare the Dataset + +Replace the dataset in the `data_finetune` directory to the one you want to use for fine-tuning. + +### 2. Run the Fine-Tuning Script + +Execute the Jupyter notebook `GPT2FineTuneHybrid.ipynb` to start the fine-tuning process. The notebook is structured into several steps: + +1. **Load Pre-trained Model and Tokenizer**: + + + +```python +model_name = "gpt2" +tokenizer = AutoTokenizer.from_pretrained(model_name) +model = AutoModelForCausalLM.from_pretrained(model_name) +``` + +2. **Freeze Model Weights**: + + +```python +for param in model.parameters(): + param.requires_grad = False +``` + +3. **Define LoRA Configuration**: + + +```python +peft_config = LoraConfig( + task_type=TaskType.CAUSAL_LM, + r=4, + lora_alpha=32, + lora_dropout=0.05, + fan_in_fan_out=True, +) +peft_model = get_peft_model(model, peft_config) +``` + +4. **Prepare Training Arguments and Data**: + + +```python +training_args = TrainingArguments( + output_dir="./checkpoints", + num_train_epochs=100, + per_device_train_batch_size=8, + gradient_accumulation_steps=2, + use_cpu=True, + learning_rate=5e-4, + logging_strategy="epoch", + optim="adamw_torch", + seed=0, + data_seed=0, + weight_decay=0.0, + warmup_steps=0, + max_grad_norm=1.0, +) +``` + +6. **Train the Model**: + + +```python +train_custom_model(hybrid_model, train_dataloader, training_args, fhe="simulate") +``` + +### 3. Generate Text with the Fine-Tuned Model + +After training, you can generate text using the fine-tuned model: + + +```python +prompt = "What is FHE?" +generated_text = generate_text(prompt, fine_tuned_model, tokenizer) +print(generated_text) +``` + + +## Deployment/Production Scenario + +In a deployment or production scenario, the model can be fine-tuned as follows: + +1. **Server Setup**: The server hosts a foundation model with generic weights. +2. **Client Setup**: The user (client) has a set of LoRA weights and the sensitive data required for fine-tuning. +3. **Fine-Tuning Process**: + - The client requests inference and backward passes from the server, which uses the generic weights/parameters. + - Any computation that requires the LoRA weights is executed on the client's end. +4. **Storage**: The LoRA weights are stored on the client's end for later inference, ensuring full privacy of both the specialized model and the sensitive data. + + +## Results + +The fine-tuned model can generate specialized text based on the provided dataset while ensuring data privacy through FHE. + +## Conclusion + +This project showcases the potential of combining LoRA and FHE to fine-tune language models in a privacy-preserving manner. By following the steps outlined in the notebook, you can adapt this approach to your own datasets and use cases. + +## References + +- [LoRA: Low-Rank Adaptation of Large Language Models](https://arxiv.org/abs/2106.09685) +- [PEFT](https://github.com/huggingface/peft) diff --git a/use_case_examples/lora_finetune/data_finetune/what_is_fhe.txt b/use_case_examples/lora_finetuning/data_finetune/what_is_fhe.txt similarity index 100% rename from use_case_examples/lora_finetune/data_finetune/what_is_fhe.txt rename to use_case_examples/lora_finetuning/data_finetune/what_is_fhe.txt diff --git a/use_case_examples/lora_finetune/lora_module.py b/use_case_examples/lora_finetuning/lora_module.py similarity index 100% rename from use_case_examples/lora_finetune/lora_module.py rename to use_case_examples/lora_finetuning/lora_module.py diff --git a/use_case_examples/lora_finetune/remote_module.py b/use_case_examples/lora_finetuning/remote_module.py similarity index 100% rename from use_case_examples/lora_finetune/remote_module.py rename to use_case_examples/lora_finetuning/remote_module.py diff --git a/use_case_examples/lora_finetune/requirements.txt b/use_case_examples/lora_finetuning/requirements.txt similarity index 100% rename from use_case_examples/lora_finetune/requirements.txt rename to use_case_examples/lora_finetuning/requirements.txt