feat: add Docker support

Basic docker build & run now works
dvdblk · Jan 10, 2024 · 0abda9d · 0abda9d
1 parent 01faded
commit 0abda9d
Show file tree

Hide file tree

Showing 4 changed files with 7 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ Create an `.env` file in the root of the repo (you can use `cp .env.default .env
 | `ADOBE_CLIENT_SECRET` | Copy from "Get credentials" [here](https://developer.adobe.com/document-services/docs/overview/pdf-extract-api/) as with `ADOBE_CLIENT_ID` |
 | `OPENAI_API_KEY` | Get the [OpenAI API key](https://help.openai.com/en/articles/4936850-where-do-i-find-my-api-key) |
 
-After setting the environment variables, you can run the code in one of two ways:
+After setting the environment variables (make sure to to **not** enclose the env variables in quotes), you can run the code in one of two ways:
 
 <details>
 <summary><b><font size="+1">Conda Environment</font></b></summary>
@@ -66,7 +66,7 @@ docker run -p 8501:8501 --env-file .env ghcr.io/dvdblk/hack4good:latest
 (Optional) To **build** the docker image locally and run it:
 ```
 docker build -t hack4good .
-docker run -p 8501:8501 --env-file .env hack4good
+docker run -p 8501:8501 --env-file .env --volume $PWD/data:/app/app/data hack4good
 ```
 
 2. Access the streamlit app at [http://localhost:8501](http://localhost:8501)

diff --git a/app/llm.py b/app/llm.py
@@ -111,7 +111,6 @@ def wrapper(self, *args, **kwargs):
             self.n_prompt_tokens += cb.prompt_tokens
             self.n_completion_tokens += cb.completion_tokens
             self.total_cost += cb.total_cost
-            print(cb)
         return result
 
     return wrapper
@@ -274,7 +273,6 @@ def generic_question_chain(
                 intermediate_answer=fetched_sections,
                 section_ids=[],
             )
-        print(fetched_sections)
         refine_io = RefineIO(intermediate_answer="", section_ids=[])
         refine_answer_runnable = create_structured_output_runnable(
             RefineIO, self.llm, refine_answer_prompt_template

diff --git a/app/main.py b/app/main.py
@@ -23,7 +23,7 @@ def init_state():
             client_id=os.getenv("ADOBE_CLIENT_ID"),
             client_secret=os.getenv("ADOBE_CLIENT_SECRET"),
             # FIXME: Path selectable by user?
-            extract_dir_path="data/interim/000-adobe-extract",
+            extract_dir_path="/app/app/data/interim/000-adobe-extract",
         )
 
     if "qna_input" not in st.session_state:

diff --git a/app/preprocessing/adobe/model.py b/app/preprocessing/adobe/model.py
@@ -208,7 +208,10 @@ def __init__(
     def n_pages(self) -> int:
         """Return the number of pages in the document"""
         # Recursively go to the last nested subsection and get the last page
-        return sorted(self.all_sections[-1].pages)[-1]
+        if self.all_sections:
+            return sorted(self.all_sections[-1].pages)[-1]
+        else:
+            return 0
 
     def get_section_by_id(self, section_id: str) -> Optional[Section]:
         """Return a section by its ID"""