diff --git a/app/api/router/predict.py b/app/api/router/predict.py index 09c986c..08f4a5f 100644 --- a/app/api/router/predict.py +++ b/app/api/router/predict.py @@ -30,11 +30,6 @@ def get_db(): ) -@router.get("/") -def hello_world(): - return {"message": "Hello predict"} - - @router.put("/insurance") def predict_insurance(info: RegModelPrediction, db: Session = Depends(get_db)): """ diff --git a/app/api/router/test.py b/app/api/router/test.py deleted file mode 100644 index 84a5217..0000000 --- a/app/api/router/test.py +++ /dev/null @@ -1,84 +0,0 @@ -# -*- coding: utf-8 -*- -import numpy as np -import pickle -from fastapi import APIRouter -from fastapi import Depends -from sqlalchemy.orm import Session - -from app import crud -from app import models -from app.database import engine -from app.database import SessionLocal - - -models.Base.metadata.create_all(bind=engine) - -# Dependency - - -def get_db(): - db = SessionLocal() - try: - yield db - finally: - db.close() - - -router = APIRouter( - prefix="/test", - tags=["test"], - responses={404: {"description": "Not Found"}} -) - - -@router.get('/file') -def read_file(column: int = 0, row: int = 5, db: Session = Depends(get_db)): - """ - Read file Temporary - - param - (validation 진행 필요) - column: int (default=0) - row: int (deafult=5) - return - (임시적으로 구성) - file_name: str (path of filename) - pickle: - path: str - version: int - """ - pkl = crud.get_dataset(db, version=1) - try: - data = np.load(pkl.path, allow_pickle=True) - return {'data': data[:row, 0:column+1].tolist()} - except Exception as e: - print(e) - return 0 - - -@router.get('/model') -def read_model(version=1, name='random_forest', db: Session = Depends(get_db)): - """ - Read Model Temporary - - param - version: int - name: str - return - path: str - version: int - name: str - classes: int - """ - clf_model = crud.get_clf_model(db, version=version, name=name) - try: - loaded_model = pickle.load(open(clf_model.path, 'rb')) - test = pickle.load(open('test_mnist.pkl', 'rb')).reshape(1, -1) - - pred = loaded_model.predict(test) - - return pred.tolist() - - except Exception as e: - print(e) - return 11 diff --git a/app/api/router/train.py b/app/api/router/train.py deleted file mode 100644 index 40be369..0000000 --- a/app/api/router/train.py +++ /dev/null @@ -1,72 +0,0 @@ -# -*- coding: utf-8 -*- -from fastapi import APIRouter -from fastapi import Depends -import numpy as np -import pickle -from sklearn.ensemble import RandomForestClassifier -from sqlalchemy.orm import Session - -from app import crud -from app.database import engine -from app.database import SessionLocal -import app.models as models -from app.util import mnist_preprocessing - - -models.Base.metadata.create_all(bind=engine) - -# Dependency - - -def get_db(): - db = SessionLocal() - try: - yield db - finally: - db.close() - - -router = APIRouter( - prefix="/train", - tags=["train"], - responses={404: {"description": "Not Found"}} -) - - -@router.post('/mnist') -def train_mnist_rf( - model_name: str = 'model.pkl', - version: int = 1, - db: Session = Depends(get_db) -): - """ - param - version: int - return - path: str - version: int - name: str - classes: int - """ - - dataset = crud.get_dataset(db, version=version) - data = np.load(dataset.path, allow_pickle=True) - - X_train, X_valid, y_train, y_test = mnist_preprocessing(data) - - clf_model = RandomForestClassifier(n_estimators=500, - max_depth=3, - random_state=0) - - clf_model.fit(X_train, y_train) - - pickle_md = crud.create_clf_model(db, clf_model={ - 'path': 'model.pkl', - 'version': version, - 'name': 'random_forest', - 'classes': len(np.unique(y_train)) - }) - - pickle.dump(clf_model, open('model.pkl', 'wb')) - - return pickle_md diff --git a/app/api/router/upload.py b/app/api/router/upload.py deleted file mode 100644 index 021405c..0000000 --- a/app/api/router/upload.py +++ /dev/null @@ -1,52 +0,0 @@ -# -*- coding: utf-8 -*- -import shutil - -from fastapi import APIRouter -from fastapi import Depends -from fastapi import File -from fastapi import UploadFile -from sqlalchemy.orm import Session - -from app import crud -from app import models -from app.database import engine -from app.database import SessionLocal - -models.Base.metadata.create_all(bind=engine) - - -def get_db(): - db = SessionLocal() - try: - yield db - finally: - db.close() - - -router = APIRouter( - prefix="/upload", - tags=["upload"], - responses={404: {"description": "Not Found"}} -) - - -@router.post('/file') -async def upload_file(file: UploadFile = File(...), db: Session = Depends(get_db)): - """ - Upload file Temporary - - param - file: File - return (임시적으로 구성) - file_name: str (path of filename) - dataset: - path: str - version: int - """ - with open(f'{file.filename}', 'wb') as buffer: - shutil.copyfileobj(file.file, buffer) - dataset = crud.create_dataset(db, Dataset={ - 'path': file.filename, - 'version': 1 - }) - return {'file_name': file.filename, 'dataset': dataset} diff --git a/app/api/schemas.py b/app/api/schemas.py index 4506481..0df4913 100644 --- a/app/api/schemas.py +++ b/app/api/schemas.py @@ -1,50 +1,6 @@ from pydantic import BaseModel -class ItemBase(BaseModel): - title: str - - -class ItemCreate(ItemBase): - pass - - -class Item(ItemBase): - class Config: - orm_mode = True - - -class DatasetBase(BaseModel): - path: str - version: int - - -class DatasetCreate(DatasetBase): - pass - - -class Dataset(DatasetBase): - class config: - orm_mode = True - - -class ClfModelBase(BaseModel): - path: str - version: int - name: str - classes: int - score: float - - -class ClfModelCreate(ClfModelBase): - pass - - -class ClfModel(ClfModelBase): - class Config: - orm_mode = True - - class RegModelBase(BaseModel): model_name: str diff --git a/app/crud.py b/app/crud.py index 3d91ed6..a00180a 100644 --- a/app/crud.py +++ b/app/crud.py @@ -4,35 +4,6 @@ from app.api import schemas -def get_dataset(db: Session, version=1): - return db.query(models.Dataset).filter( - models.Dataset.version == version - ).first() - - -def create_dataset(db: Session, Dataset: schemas.DatasetCreate): - db_dataset = models.Dataset(**Dataset) - db.add(db_dataset) - db.commit() - db.refresh(db_dataset) - return db_dataset - - -def get_clf_model(db: Session, version=1, name='random_forest'): - return db.query(models.ClfModel).filter( - models.ClfModel.version == version and - models.ClfModel.name == name - ).first() - - -def create_clf_model(db: Session, clf_model: schemas.ClfModelCreate): - db_cf_model = models.ClfModel(**clf_model) - db.add(db_cf_model) - db.commit() - db.refresh(db_cf_model) - return db_cf_model - - def get_reg_model(db: Session, model_name: schemas.RegModelBase): return db.query(models.RegModel).filter( models.RegModel.model_name == model_name diff --git a/app/models.py b/app/models.py index d99b57b..41ddaf2 100644 --- a/app/models.py +++ b/app/models.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- import datetime -from sqlalchemy import Column, Integer, String, FLOAT, DateTime, ForeignKey +from sqlalchemy import Column, String, FLOAT, DateTime, ForeignKey from sqlalchemy.sql.functions import now from sqlalchemy.orm import relationship @@ -9,32 +9,6 @@ KST = datetime.timezone(datetime.timedelta(hours=9)) -class Item(Base): - __tablename__ = 'items' - - id = Column(Integer, primary_key=True, index=True, autoincrement=True) - title = Column(String, index=True, default='test') - - -class Dataset(Base): - __tablename__ = 'dataset' - - id = Column(Integer, primary_key=True, index=True, autoincrement=True) - path = Column(String, index=True) - version = Column(Integer, index=True, autoincrement=True) - - -class ClfModel(Base): - __tablename__ = 'clf_model' - - id = Column(Integer, primary_key=True, index=True, autoincrement=True) - path = Column(String, index=True) - version = Column(Integer, index=True, autoincrement=True) - name = Column(String, index=True) - classes = Column(Integer) - score = Column(FLOAT) - - class RegModel(Base): __tablename__ = 'reg_model' diff --git a/app/util.py b/app/util.py deleted file mode 100644 index 2406b38..0000000 --- a/app/util.py +++ /dev/null @@ -1,15 +0,0 @@ -import numpy as np -from sklearn.model_selection import train_test_split - - -def mnist_preprocessing(data: np.ndarray, - TEST_SIZE: float = 0.2): - """ - return train_test_split result - """ - - X, y = data[:, 1:], data[:, 0] - X_train, X_valid, y_train, y_valid = \ - train_test_split(X, y, test_size=TEST_SIZE) - - return X_train, X_valid, y_train, y_valid diff --git a/main.py b/main.py index 0179e37..2ed0b60 100644 --- a/main.py +++ b/main.py @@ -1,8 +1,7 @@ from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware -from app.api.router import test, predict, train, upload -from app.database import SessionLocal +from app.api.router import predict app = FastAPI() @@ -16,18 +15,7 @@ allow_headers=["*"], ) -app.include_router(test.router) app.include_router(predict.router) -app.include_router(upload.router) -app.include_router(train.router) - - -def get_db(): - db = SessionLocal() - try: - yield db - finally: - db.close() @app.get("/") diff --git a/module/query.py b/module/query.py index 15a81bf..992cde5 100644 --- a/module/query.py +++ b/module/query.py @@ -4,8 +4,8 @@ model_name, path ) VALUES( - {}, - {} + %s, + %s ) """ @@ -20,14 +20,14 @@ train_mse, val_mse ) VALUES ( - {}, - {}, - {}, - {}, - {}, - {}, - {}, - {} + %s, + %s, + %s, + %s, + %s, + %s, + %s, + %s ) """ @@ -35,26 +35,26 @@ UPDATE_REG_MODEL_METADATA = """ UPDATE reg_model_metadata SET - train_mae = {}, - val_mae = {}, - train_mse = {}, - val_mse = {} - WHERE experiment_name = {} + train_mae = %s, + val_mae = %s, + train_mse = %s, + val_mse = %s + WHERE experiment_name = %s """ # pd READ_SQL -ALL_INSURANCE = """ +SELECT_ALL_INSURANCE = """ SELECT * FROM insurance """ -VAL_MAE = """ +SELECT_VAL_MAE = """ SELECT val_mae FROM reg_model_metadata WHERE reg_model_name = %s """ -MODEL = """ +SELECT_REG_MODEL = """ SELECT * FROM reg_model WHERE model_name = %s diff --git a/module/trial.py b/module/trial.py index 6b5919b..edc2fee 100644 --- a/module/trial.py +++ b/module/trial.py @@ -108,7 +108,7 @@ def main(params, df, engine, experiment_info, connection): tr_mse_mean = np.mean(tr_mse) tr_mae_mean = np.mean(tr_mae) - best_model = pd.read_sql(MODEL % (model_name), engine) + best_model = pd.read_sql(SELECT_REG_MODEL % (model_name), engine) if len(best_model) == 0: @@ -127,7 +127,8 @@ def main(params, df, engine, experiment_info, connection): ) else: - best_model_metadata = pd.read_sql(VAL_MAE % (model_name), engine) + best_model_metadata = pd.read_sql( + SELECT_VAL_MAE % (model_name), engine) saved_score = best_model_metadata.values[0] if saved_score > valid_mae: with open(f"{os.path.join(path, model_name)}.pkl".replace("'", ""), "wb") as f: @@ -165,7 +166,7 @@ def main(params, df, engine, experiment_info, connection): 'version': 0.1 } - df = pd.read_sql(ALL_INSURANCE, engine) + df = pd.read_sql(SELECT_ALL_INSURANCE, engine) with engine.connect() as connection: with connection.begin():