Skip to content

Commit

Permalink
more explicit variable naming for CUDA memory percentage
Browse files Browse the repository at this point in the history
  • Loading branch information
kcm committed Jan 17, 2023
1 parent 4469e92 commit e7f5788
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 11 deletions.
14 changes: 7 additions & 7 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ def startup_event():
global meta_config

cuda_env = os.getenv("ENABLE_CUDA")
cuda_memory_pct = 1.0
if "CUDA_MEMORY_PCT" in os.environ:
cuda_per_process_memory_fraction = 1.0
if "CUDA_PER_PROCESS_MEMORY_FRACTION" in os.environ:
try:
cuda_memory_pct = float(os.getenv("CUDA_MEMORY_PCT"))
cuda_per_process_memory_fraction = float(os.getenv("CUDA_PER_PROCESS_MEMORY_FRACTION"))
except ValueError:
logger.error(f"Invalid CUDA_MEMORY_PCT (should be between 0.0-1.0)")
if 0.0 <= cuda_memory_pct <= 1.0:
logger.info(f"CUDA_MEMORY_PCT set to {cuda_memory_pct}")
logger.error(f"Invalid CUDA_PER_PROCESS_MEMORY_FRACTION (should be between 0.0-1.0)")
if 0.0 <= cuda_per_process_memory_fraction <= 1.0:
logger.info(f"CUDA_PER_PROCESS_MEMORY_FRACTION set to {cuda_per_process_memory_fraction}")
cuda_support=False
cuda_core=""

Expand All @@ -38,7 +38,7 @@ def startup_event():
logger.info("Running on CPU")

meta_config = Meta('./models/model')
vec = Vectorizer('./models/model', cuda_support, cuda_core, cuda_memory_pct,
vec = Vectorizer('./models/model', cuda_support, cuda_core, cuda_per_process_memory_fraction,
meta_config.getModelType(), meta_config.get_architecture())


Expand Down
8 changes: 4 additions & 4 deletions vectorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,19 +33,19 @@ class Vectorizer:
cuda_core: str
model_type: str

def __init__(self, model_path: str, cuda_support: bool, cuda_core: str, cuda_memory_pct: float, model_type: str, architecture: str):
def __init__(self, model_path: str, cuda_support: bool, cuda_core: str, cuda_per_process_memory_fraction: float, model_type: str, architecture: str):
self.cuda = cuda_support
self.cuda_core = cuda_core
self.cuda_memory_pct = cuda_memory_pct
self.cuda_per_process_memory_fraction = cuda_per_process_memory_fraction
self.model_type = model_type

self.model_delegate: HFModel = ModelFactory.model(model_type, architecture)
self.model = self.model_delegate.create_model(model_path)

if self.cuda:
self.model.to(self.cuda_core)
if self.cuda_memory_pct:
torch.cuda.set_per_process_memory_fraction(self.cuda_memory_pct)
if self.cuda_per_process_memory_fraction:
torch.cuda.set_per_process_memory_fraction(self.cuda_per_process_memory_fraction)
self.model.eval() # make sure we're in inference mode, not training

self.tokenizer = self.model_delegate.create_tokenizer(model_path)
Expand Down

0 comments on commit e7f5788

Please sign in to comment.