Skip to content

Commit

Permalink
Add support for PyTorch GPU memory limit (#41)
Browse files Browse the repository at this point in the history
* Add support for PyTorch GPU memory limit

* log bad CUDA memory setting as an error

* Add support for sentence-t5-base model

* Switch to github actions

* Fixed deploy step

* Fix GIT_TAG env variable in deploy step

* Enable all of the transformers models

* Add duplicate /vectors route to remove 307 redirects

* remove un-used packages, remove installation of third party libs (#38)

* remove un-used packages, remove instalation of third party libs

* update requirements

* more explicit variable naming for CUDA memory percentage

Co-authored-by: Marcin Antas <[email protected]>
Co-authored-by: John Trengrove <[email protected]>
Co-authored-by: Stefan Bogdan <[email protected]>
  • Loading branch information
4 people authored Jan 18, 2023
1 parent 495dce8 commit 4a38a7b
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 2 deletions.
10 changes: 9 additions & 1 deletion app.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@ def startup_event():
global meta_config

cuda_env = os.getenv("ENABLE_CUDA")
cuda_per_process_memory_fraction = 1.0
if "CUDA_PER_PROCESS_MEMORY_FRACTION" in os.environ:
try:
cuda_per_process_memory_fraction = float(os.getenv("CUDA_PER_PROCESS_MEMORY_FRACTION"))
except ValueError:
logger.error(f"Invalid CUDA_PER_PROCESS_MEMORY_FRACTION (should be between 0.0-1.0)")
if 0.0 <= cuda_per_process_memory_fraction <= 1.0:
logger.info(f"CUDA_PER_PROCESS_MEMORY_FRACTION set to {cuda_per_process_memory_fraction}")
cuda_support=False
cuda_core=""

Expand All @@ -30,7 +38,7 @@ def startup_event():
logger.info("Running on CPU")

meta_config = Meta('./models/model')
vec = Vectorizer('./models/model', cuda_support, cuda_core,
vec = Vectorizer('./models/model', cuda_support, cuda_core, cuda_per_process_memory_fraction,
meta_config.getModelType(), meta_config.get_architecture())


Expand Down
5 changes: 4 additions & 1 deletion vectorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,19 @@ class Vectorizer:
cuda_core: str
model_type: str

def __init__(self, model_path: str, cuda_support: bool, cuda_core: str, model_type: str, architecture: str):
def __init__(self, model_path: str, cuda_support: bool, cuda_core: str, cuda_per_process_memory_fraction: float, model_type: str, architecture: str):
self.cuda = cuda_support
self.cuda_core = cuda_core
self.cuda_per_process_memory_fraction = cuda_per_process_memory_fraction
self.model_type = model_type

self.model_delegate: HFModel = ModelFactory.model(model_type, architecture)
self.model = self.model_delegate.create_model(model_path)

if self.cuda:
self.model.to(self.cuda_core)
if self.cuda_per_process_memory_fraction:
torch.cuda.set_per_process_memory_fraction(self.cuda_per_process_memory_fraction)
self.model.eval() # make sure we're in inference mode, not training

self.tokenizer = self.model_delegate.create_tokenizer(model_path)
Expand Down

0 comments on commit 4a38a7b

Please sign in to comment.