From 75bc73e04c4cfafe581474783f8e66954bbe14c6 Mon Sep 17 00:00:00 2001 From: feder-cr <85809106+feder-cr@users.noreply.github.com> Date: Thu, 5 Dec 2024 00:57:48 +0100 Subject: [PATCH] First --- main.py | 191 +++++++++++++--- .../llm/llm_job_parser.py | 181 --------------- .../resume_style/__init__.py | 0 src/job.py | 20 +- src/job_application.py | 19 -- src/{ai_hawk => }/libs/llm_manager.py | 0 .../libs/resume_and_cover_builder/__init__.py | 0 .../libs/resume_and_cover_builder/config.py | 0 .../cover_letter_prompt}/__init__.py | 0 .../cover_letter_prompt/strings_feder-cr.py | 2 +- .../llm/llm_generate_cover_letter_from_job.py | 0 .../llm/llm_generate_resume.py | 2 +- .../llm/llm_generate_resume_from_job.py | 4 +- .../llm/llm_job_parser.py | 208 ++++++++++++++++++ .../resume_and_cover_builder/module_loader.py | 0 .../resume_and_cover_builder/resume_facade.py | 88 ++++++-- .../resume_generator.py | 11 +- .../__init__.py | 0 .../strings_feder-cr.py | 0 .../resume_prompt}/__init__.py | 0 .../resume_prompt/strings_feder-cr.py | 0 .../resume_style}/__init__.py | 0 .../resume_style/style_cloyola.css | 0 .../resume_style/style_josylad_blue.css | 0 .../resume_style/style_josylad_grey.css | 0 .../resume_style/style_krishnavalliappan.css | 0 .../resume_style/style_samodum_bold.css | 0 .../resume_and_cover_builder/style_manager.py | 2 +- .../resume_and_cover_builder/template_base.py | 0 .../libs/resume_and_cover_builder/utils.py | 0 .../resume_schemas/job_application_profile.py | 0 src/{ai_hawk => }/resume_schemas/resume.py | 0 32 files changed, 452 insertions(+), 276 deletions(-) delete mode 100644 src/ai_hawk/libs/resume_and_cover_builder/llm/llm_job_parser.py delete mode 100644 src/ai_hawk/libs/resume_and_cover_builder/resume_style/__init__.py delete mode 100644 src/job_application.py rename src/{ai_hawk => }/libs/llm_manager.py (100%) rename src/{ai_hawk => }/libs/resume_and_cover_builder/__init__.py (100%) rename src/{ai_hawk => }/libs/resume_and_cover_builder/config.py (100%) rename src/{ai_hawk => libs/resume_and_cover_builder/cover_letter_prompt}/__init__.py (100%) rename src/{ai_hawk => }/libs/resume_and_cover_builder/cover_letter_prompt/strings_feder-cr.py (97%) rename src/{ai_hawk => }/libs/resume_and_cover_builder/llm/llm_generate_cover_letter_from_job.py (100%) rename src/{ai_hawk => }/libs/resume_and_cover_builder/llm/llm_generate_resume.py (99%) rename src/{ai_hawk => }/libs/resume_and_cover_builder/llm/llm_generate_resume_from_job.py (96%) create mode 100644 src/libs/resume_and_cover_builder/llm/llm_job_parser.py rename src/{ai_hawk => }/libs/resume_and_cover_builder/module_loader.py (100%) rename src/{ai_hawk => }/libs/resume_and_cover_builder/resume_facade.py (56%) rename src/{ai_hawk => }/libs/resume_and_cover_builder/resume_generator.py (85%) rename src/{ai_hawk/libs/resume_and_cover_builder/cover_letter_prompt => libs/resume_and_cover_builder/resume_job_description_prompt}/__init__.py (100%) rename src/{ai_hawk => }/libs/resume_and_cover_builder/resume_job_description_prompt/strings_feder-cr.py (100%) rename src/{ai_hawk/libs/resume_and_cover_builder/resume_job_description_prompt => libs/resume_and_cover_builder/resume_prompt}/__init__.py (100%) rename src/{ai_hawk => }/libs/resume_and_cover_builder/resume_prompt/strings_feder-cr.py (100%) rename src/{ai_hawk/libs/resume_and_cover_builder/resume_prompt => libs/resume_and_cover_builder/resume_style}/__init__.py (100%) rename src/{ai_hawk => }/libs/resume_and_cover_builder/resume_style/style_cloyola.css (100%) rename src/{ai_hawk => }/libs/resume_and_cover_builder/resume_style/style_josylad_blue.css (100%) rename src/{ai_hawk => }/libs/resume_and_cover_builder/resume_style/style_josylad_grey.css (100%) rename src/{ai_hawk => }/libs/resume_and_cover_builder/resume_style/style_krishnavalliappan.css (100%) rename src/{ai_hawk => }/libs/resume_and_cover_builder/resume_style/style_samodum_bold.css (100%) rename src/{ai_hawk => }/libs/resume_and_cover_builder/style_manager.py (98%) rename src/{ai_hawk => }/libs/resume_and_cover_builder/template_base.py (100%) rename src/{ai_hawk => }/libs/resume_and_cover_builder/utils.py (100%) rename src/{ai_hawk => }/resume_schemas/job_application_profile.py (100%) rename src/{ai_hawk => }/resume_schemas/resume.py (100%) diff --git a/main.py b/main.py index 534b16bcb..b04bf4eef 100644 --- a/main.py +++ b/main.py @@ -12,9 +12,9 @@ from selenium.webdriver.chrome.service import Service as ChromeService from webdriver_manager.chrome import ChromeDriverManager import re -from src.ai_hawk.libs.resume_and_cover_builder import ResumeFacade, ResumeGenerator, StyleManager -from src.ai_hawk.resume_schemas.job_application_profile import JobApplicationProfile -from src.ai_hawk.resume_schemas.resume import Resume +from src.libs.resume_and_cover_builder import ResumeFacade, ResumeGenerator, StyleManager +from src.resume_schemas.job_application_profile import JobApplicationProfile +from src.resume_schemas.resume import Resume from src.logging import logger from src.utils.chrome_utils import init_browser from src.utils.constants import ( @@ -217,20 +217,86 @@ def get_uploads(plain_text_resume_file: Path) -> Dict[str, Path]: return uploads -def create_cv(parameters: dict, llm_api_key: str): +def create_cover_letter(parameters: dict, llm_api_key: str): """ Logic to create a CV. """ try: logger.info("Generating a CV based on provided parameters.") - # Load plain text resume + # Carica il resume in testo semplice with open(parameters["uploads"]["plainTextResume"], "r", encoding="utf-8") as file: plain_text_resume = file.read() style_manager = StyleManager() style_manager.choose_style() + questions = [ + inquirer.Text('job_url', message="Please enter the URL of the job description:") + ] + answers = inquirer.prompt(questions) + job_url = answers.get('job_url') + resume_generator = ResumeGenerator() + resume_object = Resume(plain_text_resume) + driver = init_browser() + resume_generator.set_resume_object(resume_object) + resume_facade = ResumeFacade( + api_key=llm_api_key, + style_manager=style_manager, + resume_generator=resume_generator, + resume_object=resume_object, + output_path=Path("data_folder/output"), + ) + resume_facade.set_driver(driver) + resume_facade.link_to_job(job_url) + result_base64, suggested_name = resume_facade.create_cover_letter() + + # Decodifica Base64 in dati binari + try: + pdf_data = base64.b64decode(result_base64) + except base64.binascii.Error as e: + logger.error("Error decoding Base64: %s", e) + raise + # Definisci il percorso della cartella di output utilizzando `suggested_name` + output_dir = Path(parameters["outputFileDirectory"]) / suggested_name + + # Crea la cartella se non esiste + try: + output_dir.mkdir(parents=True, exist_ok=True) + logger.info(f"Cartella di output creata o già esistente: {output_dir}") + except IOError as e: + logger.error("Error creating output directory: %s", e) + raise + + output_path = output_dir / "cover_letter_tailored.pdf" + try: + with open(output_path, "wb") as file: + file.write(pdf_data) + logger.info(f"CV salvato in: {output_path}") + except IOError as e: + logger.error("Error writing file: %s", e) + raise + except Exception as e: + logger.exception(f"An error occurred while creating the CV: {e}") + raise + + +def create_resume_pdf_job_tailored(parameters: dict, llm_api_key: str): + """ + Logic to create a CV. + """ + try: + logger.info("Generating a CV based on provided parameters.") + + # Carica il resume in testo semplice + with open(parameters["uploads"]["plainTextResume"], "r", encoding="utf-8") as file: + plain_text_resume = file.read() + + style_manager = StyleManager() + style_manager.choose_style() + questions = [inquirer.Text('job_url', message="Please enter the URL of the job description:")] + answers = inquirer.prompt(questions) + job_url = answers.get('job_url') resume_generator = ResumeGenerator() resume_object = Resume(plain_text_resume) driver = init_browser() @@ -243,23 +309,32 @@ def create_cv(parameters: dict, llm_api_key: str): output_path=Path("data_folder/output"), ) resume_facade.set_driver(driver) - result_base64 = resume_facade.create_cover_letter("Software engineer with Java experience") + resume_facade.link_to_job(job_url) + result_base64, suggested_name = resume_facade.create_resume_pdf_job_tailored() - # Decode Base64 to binary data + # Decodifica Base64 in dati binari try: pdf_data = base64.b64decode(result_base64) except base64.binascii.Error as e: logger.error("Error decoding Base64: %s", e) raise - # Define the output path - output_path = Path(parameters["outputFileDirectory"]) / "resume.pdf" + # Definisci il percorso della cartella di output utilizzando `suggested_name` + output_dir = Path(parameters["outputFileDirectory"]) / suggested_name - # Write binary data to the PDF file + # Crea la cartella se non esiste + try: + output_dir.mkdir(parents=True, exist_ok=True) + logger.info(f"Cartella di output creata o già esistente: {output_dir}") + except IOError as e: + logger.error("Error creating output directory: %s", e) + raise + + output_path = output_dir / "resume_tailored.pdf" try: with open(output_path, "wb") as file: file.write(pdf_data) - logger.info(f"CV saved to {output_path}") + logger.info(f"CV salvato in: {output_path}") except IOError as e: logger.error("Error writing file: %s", e) raise @@ -268,24 +343,67 @@ def create_cv(parameters: dict, llm_api_key: str): raise -def create_cover_letter(parameters: dict, llm_api_key: str): +def create_resume_pdf(parameters: dict, llm_api_key: str): """ - Logic to create a cover letter. + Logic to create a CV. """ - # try: - # logger.info("Generating a cover letter based on provided parameters.") - # # Example implementation for generating the letter - # cover_letter_generator = CoverLetterGenerator(llm_api_key) - # result = cover_letter_generator.generate(parameters) - # output_path = Path(parameters["outputFileDirectory"]) / "cover_letter.docx" - # with open(output_path, "w", encoding="utf-8") as file: - # file.write(result) - # logger.info(f"Cover letter saved to {output_path}") - # except Exception as e: - # logger.exception(f"An error occurred while creating the cover letter: {e}") - # raise - pass + try: + logger.info("Generating a CV based on provided parameters.") + + # Carica il resume in testo semplice + with open(parameters["uploads"]["plainTextResume"], "r", encoding="utf-8") as file: + plain_text_resume = file.read() + + style_manager = StyleManager() + style_manager.choose_style() + questions = [inquirer.Text('job_url', message="Please enter the URL of the job description:")] + answers = inquirer.prompt(questions) + job_url = answers.get('job_url') + resume_generator = ResumeGenerator() + resume_object = Resume(plain_text_resume) + driver = init_browser() + resume_generator.set_resume_object(resume_object) + resume_facade = ResumeFacade( + api_key=llm_api_key, + style_manager=style_manager, + resume_generator=resume_generator, + resume_object=resume_object, + output_path=Path("data_folder/output"), + ) + resume_facade.set_driver(driver) + resume_facade.link_to_job(job_url) + result_base64, suggested_name = resume_facade.create_resume_pdf() + + # Decodifica Base64 in dati binari + try: + pdf_data = base64.b64decode(result_base64) + except base64.binascii.Error as e: + logger.error("Error decoding Base64: %s", e) + raise + # Definisci il percorso della cartella di output utilizzando `suggested_name` + output_dir = Path(parameters["outputFileDirectory"]) / suggested_name + + # Crea la cartella se non esiste + try: + output_dir.mkdir(parents=True, exist_ok=True) + logger.info(f"Cartella di output creata o già esistente: {output_dir}") + except IOError as e: + logger.error("Error creating output directory: %s", e) + raise + + output_path = output_dir / "resume.pdf" + try: + with open(output_path, "wb") as file: + file.write(pdf_data) + logger.info(f"CV salvato in: {output_path}") + except IOError as e: + logger.error("Error writing file: %s", e) + raise + except Exception as e: + logger.exception(f"An error occurred while creating the CV: {e}") + raise + def handle_inquiries(selected_actions: List[str], parameters: dict, llm_api_key: str): """ Decide which function to call based on the selected user actions. @@ -296,12 +414,18 @@ def handle_inquiries(selected_actions: List[str], parameters: dict, llm_api_key: """ try: if selected_actions: - if "Create Cover Letter" in selected_actions: - logger.info("Creating a cover letter...") + if "Generate Resume" in selected_actions: + logger.info("Crafting a standout professional resume...") + create_resume_pdf(parameters, llm_api_key) + + if "Generate Resume Tailored for Job Description" in selected_actions: + logger.info("Customizing your resume to enhance your job application...") + create_resume_pdf_job_tailored(parameters, llm_api_key) + + if "Generate Tailored Cover Letter for Job Description" in selected_actions: + logger.info("Designing a personalized cover letter to enhance your job application...") create_cover_letter(parameters, llm_api_key) - if "Create CV" in selected_actions: - logger.info("Creating a CV...") - create_cv(parameters, llm_api_key) + else: logger.warning("No actions selected. Nothing to execute.") except Exception as e: @@ -320,8 +444,9 @@ def prompt_user_action() -> str: 'action', message="Select the action you want to perform:", choices=[ - "Create Cover Letter", - "Create CV", + "Generate Resume", + "Generate Resume Tailored for Job Description", + "Generate Tailored Cover Letter for Job Description", ], ), ] diff --git a/src/ai_hawk/libs/resume_and_cover_builder/llm/llm_job_parser.py b/src/ai_hawk/libs/resume_and_cover_builder/llm/llm_job_parser.py deleted file mode 100644 index 51762363c..000000000 --- a/src/ai_hawk/libs/resume_and_cover_builder/llm/llm_job_parser.py +++ /dev/null @@ -1,181 +0,0 @@ -import os -import tempfile -import textwrap -import time -from src.ai_hawk.libs.resume_and_cover_builder.utils import LoggerChatModel -from langchain_core.output_parsers import StrOutputParser -from langchain_core.prompts import ChatPromptTemplate, PromptTemplate -from langchain_openai import ChatOpenAI -from dotenv import load_dotenv -from concurrent.futures import ThreadPoolExecutor, as_completed -from loguru import logger -from pathlib import Path -from langchain_core.prompt_values import StringPromptValue -from langchain_core.runnables import RunnablePassthrough -from langchain_text_splitters import TokenTextSplitter -from langchain_community.embeddings import OpenAIEmbeddings -from langchain_community.vectorstores import FAISS -from lib_resume_builder_AIHawk.config import global_config -from langchain_community.document_loaders import TextLoader -import logging -import re # Per la parsing regex, soprattutto in `parse_wait_time_from_error_message` -from requests.exceptions import HTTPError as HTTPStatusError # Gestione degli errori HTTP -import openai - -# Carica le variabili d'ambiente dal file .env -load_dotenv() - -# Configura il file di log -log_folder = 'log/resume/gpt_resume' -if not os.path.exists(log_folder): - os.makedirs(log_folder) -log_path = Path(log_folder).resolve() -logger.add(log_path / "gpt_resume.log", rotation="1 day", compression="zip", retention="7 days", level="DEBUG") - - -class LLMResumer: - def __init__(self, openai_api_key, strings): - self.llm_cheap = LoggerChatModel( - ChatOpenAI( - model_name="gpt-4o-mini", openai_api_key=openai_api_key, temperature=0.4 - ) - ) - self.strings = strings - self.llm_embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key) # Inizializza gli embeddings - - @staticmethod - def _preprocess_template_string(template: str) -> str: - """ - Preprocessa la stringa del template rimuovendo gli spazi bianchi iniziali e l'indentazione. - Args: - template (str): La stringa del template da preprocessare. - Returns: - str: La stringa del template preprocessata. - """ - return textwrap.dedent(template) - - def get_job_description_from_url(self, url_job_description): - from lib_resume_builder_AIHawk.utils import create_driver_selenium - driver = create_driver_selenium() - driver.get(url_job_description) - time.sleep(3) - body_element = driver.find_element("tag name", "body") - response = body_element.get_attribute("outerHTML") - driver.quit() - with tempfile.NamedTemporaryFile(delete=False, suffix=".html", mode="w", encoding="utf-8") as temp_file: - temp_file.write(response) - temp_file_path = temp_file.name - try: - loader = TextLoader(temp_file_path, encoding="utf-8", autodetect_encoding=True) - document = loader.load() - finally: - os.remove(temp_file_path) - text_splitter = TokenTextSplitter(chunk_size=500, chunk_overlap=50) - all_splits = text_splitter.split_documents(document) - vectorstore = FAISS.from_documents(documents=all_splits, embedding=self.llm_embeddings) - prompt = PromptTemplate( - template=""" - You are an expert job description analyst. Your role is to meticulously analyze and interpret job descriptions. - After analyzing the job description, answer the following question in a clear, and informative manner. - - Question: {question} - Job Description: {context} - Answer: - """, - input_variables=["question", "context"] - ) - def format_docs(docs): - return "\n\n".join(doc.page_content for doc in docs) - context_formatter = vectorstore.as_retriever() | format_docs - question_passthrough = RunnablePassthrough() - chain_job_description = prompt | self.llm_cheap | StrOutputParser() - summarize_prompt_template = self._preprocess_template_string(self.strings.summarize_prompt_template) - prompt_summarize = ChatPromptTemplate.from_template(summarize_prompt_template) - chain_summarize = prompt_summarize | self.llm_cheap | StrOutputParser() - qa_chain = ( - { - "context": context_formatter, - "question": question_passthrough, - } - | chain_job_description - | (lambda output: {"text": output}) - | chain_summarize - ) - result = qa_chain.invoke("Provide, full job description") - self.job_description = result - - def extract_company_name(self): - """ - Estrae il nome dell'azienda dalla descrizione del lavoro. - Returns: - str: Il nome dell'azienda estratto. - """ - return self._extract_information("What is the company name in this job description?") - - def extract_role(self): - """ - Estrae il ruolo/titolo ricercato dalla descrizione del lavoro. - Returns: - str: Il ruolo/titolo estratto. - """ - return self._extract_information("What is the role or title being sought in this job description?") - - def extract_location(self): - """ - Estrae la località dalla descrizione del lavoro. - Returns: - str: La località estratta. - """ - return self._extract_information("What is the location mentioned in this job description?") - - def extract_recruiter_email(self): - """ - Estrae l'email del recruiter dalla descrizione del lavoro. - Returns: - str: L'email del recruiter estratta. - """ - return self._extract_information("What is the recruiter's email address in this job description?") - - def _extract_information(self, question): - """ - Metodo generico per estrarre informazioni specifiche basate sulla domanda fornita. - Args: - question (str): La domanda da porre al LLM per l'estrazione. - Returns: - str: L'informazione estratta. - """ - if not hasattr(self, 'job_description'): - raise ValueError("Job description not found. Please run get_job_description_from_url first.") - - prompt = PromptTemplate( - template=""" - You are an expert in extracting specific information from job descriptions. - Carefully read the job description below and provide a clear and concise answer to the question. - - Job Description: {job_description} - - Question: {question} - Answer: - """, - input_variables=["job_description", "question"] - ) - - chain = prompt | self.llm_cheap | StrOutputParser() - result = chain.invoke({ - "job_description": self.job_description, - "question": question - }) - return result.strip() - - def extract_all_details(self): - """ - Estrae il nome dell'azienda, il ruolo, la località e l'email del recruiter dalla descrizione del lavoro. - Returns: - dict: Un dizionario contenente tutti i dettagli estratti. - """ - details = {} - details['company_name'] = self.extract_company_name() - details['role'] = self.extract_role() - details['location'] = self.extract_location() - details['recruiter_email'] = self.extract_recruiter_email() - return details diff --git a/src/ai_hawk/libs/resume_and_cover_builder/resume_style/__init__.py b/src/ai_hawk/libs/resume_and_cover_builder/resume_style/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/job.py b/src/job.py index 13ac84ba9..7a48e8df0 100644 --- a/src/job.py +++ b/src/job.py @@ -3,8 +3,7 @@ @dataclass class Job: - id: str = "" - title: str = "" + role: str = "" company: str = "" location: str = "" link: str = "" @@ -12,31 +11,18 @@ class Job: description: str = "" summarize_job_description: str = "" recruiter_link: str = "" - # TODO: to move these properties to JobApplication resume_path: str = "" cover_letter_path: str = "" - def set_summarize_job_description(self, summarize_job_description): - logger.debug(f"Setting summarized job description: {summarize_job_description}") - self.summarize_job_description = summarize_job_description - - def set_job_description(self, description): - logger.debug(f"Setting job description: {description}") - self.description = description - - def set_recruiter_link(self, recruiter_link): - logger.debug(f"Setting recruiter link: {recruiter_link}") - self.recruiter_link = recruiter_link - def formatted_job_information(self): """ Formats the job information as a markdown string. """ - logger.debug(f"Formatting job information for job: {self.title} at {self.company}") + logger.debug(f"Formatting job information for job: {self.role} at {self.company}") job_information = f""" # Job Description ## Job Information - - Position: {self.title} + - Position: {self.role} - At: {self.company} - Location: {self.location} - Recruiter Profile: {self.recruiter_link or 'Not available'} diff --git a/src/job_application.py b/src/job_application.py deleted file mode 100644 index ad3fe0047..000000000 --- a/src/job_application.py +++ /dev/null @@ -1,19 +0,0 @@ -from src.job import Job - -class JobApplication: - - def __init__(self, job: Job): - self.job :Job = job - self.application = [] - self.resume_path = "" - self.cover_letter_path = "" - - def save_application_data(self, application_questions: dict): - self.application.append(application_questions) - - def set_resume_path(self, resume_path: str): - self.resume_path = resume_path - - def set_cover_letter_path(self, cv_path: str): - self.cover_letter_path = cv_path - diff --git a/src/ai_hawk/libs/llm_manager.py b/src/libs/llm_manager.py similarity index 100% rename from src/ai_hawk/libs/llm_manager.py rename to src/libs/llm_manager.py diff --git a/src/ai_hawk/libs/resume_and_cover_builder/__init__.py b/src/libs/resume_and_cover_builder/__init__.py similarity index 100% rename from src/ai_hawk/libs/resume_and_cover_builder/__init__.py rename to src/libs/resume_and_cover_builder/__init__.py diff --git a/src/ai_hawk/libs/resume_and_cover_builder/config.py b/src/libs/resume_and_cover_builder/config.py similarity index 100% rename from src/ai_hawk/libs/resume_and_cover_builder/config.py rename to src/libs/resume_and_cover_builder/config.py diff --git a/src/ai_hawk/__init__.py b/src/libs/resume_and_cover_builder/cover_letter_prompt/__init__.py similarity index 100% rename from src/ai_hawk/__init__.py rename to src/libs/resume_and_cover_builder/cover_letter_prompt/__init__.py diff --git a/src/ai_hawk/libs/resume_and_cover_builder/cover_letter_prompt/strings_feder-cr.py b/src/libs/resume_and_cover_builder/cover_letter_prompt/strings_feder-cr.py similarity index 97% rename from src/ai_hawk/libs/resume_and_cover_builder/cover_letter_prompt/strings_feder-cr.py rename to src/libs/resume_and_cover_builder/cover_letter_prompt/strings_feder-cr.py index 5113fa6fe..9568da2df 100644 --- a/src/ai_hawk/libs/resume_and_cover_builder/cover_letter_prompt/strings_feder-cr.py +++ b/src/libs/resume_and_cover_builder/cover_letter_prompt/strings_feder-cr.py @@ -1,4 +1,4 @@ -from src.ai_hawk.libs.resume_and_cover_builder.template_base import prompt_cover_letter_template +from src.libs.resume_and_cover_builder.template_base import prompt_cover_letter_template cover_letter_template = """ diff --git a/src/ai_hawk/libs/resume_and_cover_builder/llm/llm_generate_cover_letter_from_job.py b/src/libs/resume_and_cover_builder/llm/llm_generate_cover_letter_from_job.py similarity index 100% rename from src/ai_hawk/libs/resume_and_cover_builder/llm/llm_generate_cover_letter_from_job.py rename to src/libs/resume_and_cover_builder/llm/llm_generate_cover_letter_from_job.py diff --git a/src/ai_hawk/libs/resume_and_cover_builder/llm/llm_generate_resume.py b/src/libs/resume_and_cover_builder/llm/llm_generate_resume.py similarity index 99% rename from src/ai_hawk/libs/resume_and_cover_builder/llm/llm_generate_resume.py rename to src/libs/resume_and_cover_builder/llm/llm_generate_resume.py index b324d362d..6b0e44122 100644 --- a/src/ai_hawk/libs/resume_and_cover_builder/llm/llm_generate_resume.py +++ b/src/libs/resume_and_cover_builder/llm/llm_generate_resume.py @@ -4,7 +4,7 @@ # app/libs/resume_and_cover_builder/gpt_resume.py import os import textwrap -from src.ai_hawk.libs.resume_and_cover_builder.utils import LoggerChatModel +from src.libs.resume_and_cover_builder.utils import LoggerChatModel from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate from langchain_openai import ChatOpenAI diff --git a/src/ai_hawk/libs/resume_and_cover_builder/llm/llm_generate_resume_from_job.py b/src/libs/resume_and_cover_builder/llm/llm_generate_resume_from_job.py similarity index 96% rename from src/ai_hawk/libs/resume_and_cover_builder/llm/llm_generate_resume_from_job.py rename to src/libs/resume_and_cover_builder/llm/llm_generate_resume_from_job.py index 8ab861a61..1cff92064 100644 --- a/src/ai_hawk/libs/resume_and_cover_builder/llm/llm_generate_resume_from_job.py +++ b/src/libs/resume_and_cover_builder/llm/llm_generate_resume_from_job.py @@ -3,8 +3,8 @@ """ # app/libs/resume_and_cover_builder/llm_generate_resume_from_job.py import os -from src.ai_hawk.libs.resume_and_cover_builder.llm.llm_generate_resume import LLMResumer -from src.ai_hawk.libs.resume_and_cover_builder.utils import LoggerChatModel +from src.libs.resume_and_cover_builder.llm.llm_generate_resume import LLMResumer +from src.libs.resume_and_cover_builder.utils import LoggerChatModel from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate from langchain_openai import ChatOpenAI diff --git a/src/libs/resume_and_cover_builder/llm/llm_job_parser.py b/src/libs/resume_and_cover_builder/llm/llm_job_parser.py new file mode 100644 index 000000000..08302ff56 --- /dev/null +++ b/src/libs/resume_and_cover_builder/llm/llm_job_parser.py @@ -0,0 +1,208 @@ +import os +import tempfile +import textwrap +import time +import re # For email validation +from src.libs.resume_and_cover_builder.utils import LoggerChatModel +from langchain_core.output_parsers import StrOutputParser +from langchain_core.prompts import ChatPromptTemplate, PromptTemplate +from langchain_openai import ChatOpenAI +from dotenv import load_dotenv +from concurrent.futures import ThreadPoolExecutor, as_completed +from loguru import logger +from pathlib import Path +from langchain_core.prompt_values import StringPromptValue +from langchain_core.runnables import RunnablePassthrough +from langchain_text_splitters import TokenTextSplitter +from langchain_community.embeddings import OpenAIEmbeddings +from langchain_community.vectorstores import FAISS +from lib_resume_builder_AIHawk.config import global_config +from langchain_community.document_loaders import TextLoader +from requests.exceptions import HTTPError as HTTPStatusError # HTTP error handling +import openai + +# Load environment variables from the .env file +load_dotenv() + +# Configure the log file +log_folder = 'log/resume/gpt_resume' +if not os.path.exists(log_folder): + os.makedirs(log_folder) +log_path = Path(log_folder).resolve() +logger.add(log_path / "gpt_resume.log", rotation="1 day", compression="zip", retention="7 days", level="DEBUG") + + +class LLMParser: + def __init__(self, openai_api_key): + self.llm = LoggerChatModel( + ChatOpenAI( + model_name="gpt-4o-mini", openai_api_key=openai_api_key, temperature=0.4 + ) + ) + self.llm_embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key) # Initialize embeddings + self.vectorstore = None # Will be initialized after document loading + + @staticmethod + def _preprocess_template_string(template: str) -> str: + """ + Preprocess the template string by removing leading whitespaces and indentation. + Args: + template (str): The template string to preprocess. + Returns: + str: The preprocessed template string. + """ + return textwrap.dedent(template) + + def set_body_html(self, body_html): + """ + Retrieves the job description from HTML, processes it, and initializes the vectorstore. + Args: + body_html (str): The HTML content to process. + """ + + # Save the HTML content to a temporary file + with tempfile.NamedTemporaryFile(delete=False, suffix=".html", mode="w", encoding="utf-8") as temp_file: + temp_file.write(body_html) + temp_file_path = temp_file.name + try: + loader = TextLoader(temp_file_path, encoding="utf-8", autodetect_encoding=True) + document = loader.load() + logger.debug("Document successfully loaded.") + except Exception as e: + logger.error(f"Error during document loading: {e}") + raise + finally: + os.remove(temp_file_path) + logger.debug(f"Temporary file removed: {temp_file_path}") + + # Split the text into chunks + text_splitter = TokenTextSplitter(chunk_size=500, chunk_overlap=50) + all_splits = text_splitter.split_documents(document) + logger.debug(f"Text split into {len(all_splits)} fragments.") + + # Create the vectorstore using FAISS + try: + self.vectorstore = FAISS.from_documents(documents=all_splits, embedding=self.llm_embeddings) + logger.debug("Vectorstore successfully initialized.") + except Exception as e: + logger.error(f"Error during vectorstore creation: {e}") + raise + + def _retrieve_context(self, query: str, top_k: int = 3) -> str: + """ + Retrieves the most relevant text fragments using the retriever. + Args: + query (str): The search query. + top_k (int): Number of fragments to retrieve. + Returns: + str: Concatenated text fragments. + """ + if not self.vectorstore: + raise ValueError("Vectorstore not initialized. Run extract_job_description first.") + + retriever = self.vectorstore.as_retriever() + retrieved_docs = retriever.get_relevant_documents(query)[:top_k] + context = "\n\n".join(doc.page_content for doc in retrieved_docs) + logger.debug(f"Context retrieved for query '{query}': {context[:200]}...") # Log the first 200 characters + return context + + def _extract_information(self, question: str, retrieval_query: str) -> str: + """ + Generic method to extract specific information using the retriever and LLM. + Args: + question (str): The question to ask the LLM for extraction. + retrieval_query (str): The query to use for retrieving relevant context. + Returns: + str: The extracted information. + """ + context = self._retrieve_context(retrieval_query) + + prompt = ChatPromptTemplate.from_template( + template=""" + You are an expert in extracting specific information from job descriptions. + Carefully read the job description context below and provide a clear and concise answer to the question. + + Context: {context} + + Question: {question} + Answer: + """ + ) + + formatted_prompt = prompt.format(context=context, question=question) + logger.debug(f"Formatted prompt for extraction: {formatted_prompt[:200]}...") # Log the first 200 characters + + try: + chain = prompt | self.llm | StrOutputParser() + result = chain.invoke({"context": context, "question": question}) + extracted_info = result.strip() + logger.debug(f"Extracted information: {extracted_info}") + return extracted_info + except Exception as e: + logger.error(f"Error during information extraction: {e}") + return "" + + def extract_job_description(self) -> str: + """ + Extracts the company name from the job description. + Returns: + str: The extracted job description. + """ + question = "What is the job description of the company?" + retrieval_query = "Job description" + logger.debug("Starting job description extraction.") + return self._extract_information(question, retrieval_query) + + def extract_company_name(self) -> str: + """ + Extracts the company name from the job description. + Returns: + str: The extracted company name. + """ + question = "What is the company's name?" + retrieval_query = "Company name" + logger.debug("Starting company name extraction.") + return self._extract_information(question, retrieval_query) + + def extract_role(self) -> str: + """ + Extracts the sought role/title from the job description. + Returns: + str: The extracted role/title. + """ + question = "What is the role or title sought in this job description?" + retrieval_query = "Job title" + logger.debug("Starting role/title extraction.") + return self._extract_information(question, retrieval_query) + + def extract_location(self) -> str: + """ + Extracts the location from the job description. + Returns: + str: The extracted location. + """ + question = "What is the location mentioned in this job description?" + retrieval_query = "Location" + logger.debug("Starting location extraction.") + return self._extract_information(question, retrieval_query) + + def extract_recruiter_email(self) -> str: + """ + Extracts the recruiter's email from the job description. + Returns: + str: The extracted recruiter's email. + """ + question = "What is the recruiter's email address in this job description?" + retrieval_query = "Recruiter email" + logger.debug("Starting recruiter email extraction.") + email = self._extract_information(question, retrieval_query) + + # Validate the extracted email using regex + email_regex = r'[\w\.-]+@[\w\.-]+\.\w+' + if re.match(email_regex, email): + logger.debug("Valid recruiter's email.") + return email + else: + logger.warning("Invalid or not found recruiter's email.") + return "" + diff --git a/src/ai_hawk/libs/resume_and_cover_builder/module_loader.py b/src/libs/resume_and_cover_builder/module_loader.py similarity index 100% rename from src/ai_hawk/libs/resume_and_cover_builder/module_loader.py rename to src/libs/resume_and_cover_builder/module_loader.py diff --git a/src/ai_hawk/libs/resume_and_cover_builder/resume_facade.py b/src/libs/resume_and_cover_builder/resume_facade.py similarity index 56% rename from src/ai_hawk/libs/resume_and_cover_builder/resume_facade.py rename to src/libs/resume_and_cover_builder/resume_facade.py index 39491c407..0119814d8 100644 --- a/src/ai_hawk/libs/resume_and_cover_builder/resume_facade.py +++ b/src/libs/resume_and_cover_builder/resume_facade.py @@ -1,13 +1,15 @@ """ -This module contains the FacadeManager class, which is responsible for managing the interaction between the user and the other components of the application. +This module contains the FacadeManager class, which is responsible for managing the interaction between the user and other components of the application. """ # app/libs/resume_and_cover_builder/manager_facade.py -import logging -import os - +import hashlib import inquirer from pathlib import Path +from loguru import logger + +from src.libs.resume_and_cover_builder.llm.llm_job_parser import LLMParser +from src.job import Job from src.utils.chrome_utils import HTML_to_PDF from .config import global_config @@ -33,7 +35,7 @@ def __init__(self, api_key, style_manager, resume_generator, resume_object, outp self.style_manager = style_manager self.resume_generator = resume_generator self.resume_generator.set_resume_object(resume_object) - self.selected_style = None # Proprietà per memorizzare lo stile selezionato + self.selected_style = None # Property to store the selected style def set_driver(self, driver): self.driver = driver @@ -76,36 +78,88 @@ def choose_style(self) -> None: formatted_choices = self.style_manager.format_choices(styles) selected_choice = self.prompt_user(formatted_choices, "Which style would you like to adopt?") self.selected_style = selected_choice.split(' (')[0] + + def link_to_job(self, job_url): + self.driver.get(job_url) + self.driver.implicitly_wait(10) + body_element = self.driver.find_element("tag name", "body") + body_element = body_element.get_attribute("outerHTML") + self.llm_job_parser = LLMParser(openai_api_key=global_config.API_KEY) + self.llm_job_parser.set_body_html(body_element) + + self.job = Job() + self.job.role = self.llm_job_parser.extract_role() + self.job.company = self.llm_job_parser.extract_company_name() + self.job.description = self.llm_job_parser.extract_job_description() + self.job.location = self.llm_job_parser.extract_location() + self.job.link = job_url + logger.info(f"Extracting job details from URL: {job_url}") - def create_resume_pdf(self, job_description_text=None) -> bytes: + + def create_resume_pdf_job_tailored(self) -> tuple[bytes, str]: """ Create a resume PDF using the selected style and the given job description text. Args: + job_url (str): The job URL to generate the hash for. job_description_text (str): The job description text to include in the resume. Returns: - bytes: The PDF content as bytes. + tuple: A tuple containing the PDF content as bytes and the unique filename. """ if self.selected_style is None: - raise ValueError("Devi scegliere uno stile prima di generare il PDF.") + raise ValueError("You must choose a style before generating the PDF.") style_path = self.style_manager.get_style_path(self.selected_style) - if job_description_text is None: - html_resume = self.resume_generator.create_resume(style_path) - else: - html_resume = self.resume_generator.create_resume_job_description_text(style_path, job_description_text) + html_resume = self.resume_generator.create_resume_job_description_text(style_path, self.job.description) + + # Generate a unique name using the job URL hash + suggested_name = hashlib.md5(self.job.link.encode()).hexdigest()[:10] + result = HTML_to_PDF(html_resume, self.driver) self.driver.quit() - return result + return result, suggested_name - def create_cover_letter(self, job_description_text: str) -> None: + + + def create_resume_pdf(self) -> tuple[bytes, str]: + """ + Create a resume PDF using the selected style and the given job description text. + Args: + job_url (str): The job URL to generate the hash for. + job_description_text (str): The job description text to include in the resume. + Returns: + tuple: A tuple containing the PDF content as bytes and the unique filename. """ - Create a cover letter based on the given job description text and format. + + if self.selected_style is None: + raise ValueError("You must choose a style before generating the PDF.") + + style_path = self.style_manager.get_style_path(self.selected_style) + html_resume = self.resume_generator.create_resume(style_path) + suggested_name = hashlib.md5(self.job.link.encode()).hexdigest()[:10] + result = HTML_to_PDF(html_resume, self.driver) + self.driver.quit() + return result, suggested_name + + def create_cover_letter(self) -> tuple[bytes, str]: + """ + Create a cover letter based on the given job description text and job URL. Args: + job_url (str): The job URL to generate the hash for. job_description_text (str): The job description text to include in the cover letter. + Returns: + tuple: A tuple containing the PDF content as bytes and the unique filename. """ + if self.selected_style is None: + raise ValueError("You must choose a style before generating the PDF.") + style_path = self.style_manager.get_style_path() - cover_letter_html = self.resume_generator.create_cover_letter_job_description(style_path, job_description_text) + cover_letter_html = self.resume_generator.create_cover_letter_job_description(style_path, self.job.description) + + # Generate a unique name using the job URL hash + suggested_name = hashlib.md5(self.job.link.encode()).hexdigest()[:10] + + result = HTML_to_PDF(cover_letter_html, self.driver) self.driver.quit() - return result + return result, suggested_name \ No newline at end of file diff --git a/src/ai_hawk/libs/resume_and_cover_builder/resume_generator.py b/src/libs/resume_and_cover_builder/resume_generator.py similarity index 85% rename from src/ai_hawk/libs/resume_and_cover_builder/resume_generator.py rename to src/libs/resume_and_cover_builder/resume_generator.py index 6cd5a5816..8844b2876 100644 --- a/src/ai_hawk/libs/resume_and_cover_builder/resume_generator.py +++ b/src/libs/resume_and_cover_builder/resume_generator.py @@ -4,9 +4,9 @@ # app/libs/resume_and_cover_builder/resume_generator.py from string import Template from typing import Any -from src.ai_hawk.libs.resume_and_cover_builder.llm.llm_generate_resume import LLMResumer -from src.ai_hawk.libs.resume_and_cover_builder.llm.llm_generate_resume_from_job import LLMResumeJobDescription -from src.ai_hawk.libs.resume_and_cover_builder.llm.llm_generate_cover_letter_from_job import LLMCoverLetterJobDescription +from src.libs.resume_and_cover_builder.llm.llm_generate_resume import LLMResumer +from src.libs.resume_and_cover_builder.llm.llm_generate_resume_from_job import LLMResumeJobDescription +from src.libs.resume_and_cover_builder.llm.llm_generate_cover_letter_from_job import LLMCoverLetterJobDescription from .module_loader import load_module from .config import global_config @@ -41,4 +41,7 @@ def create_cover_letter_job_description(self, style_path: str, job_description_t gpt_answerer.set_job_description_from_text(job_description_text) cover_letter_html = gpt_answerer.generate_cover_letter() template = Template(global_config.html_template) - return template.substitute(body=cover_letter_html, style_path=style_path) \ No newline at end of file + return template.substitute(body=cover_letter_html, style_path=style_path) + + + \ No newline at end of file diff --git a/src/ai_hawk/libs/resume_and_cover_builder/cover_letter_prompt/__init__.py b/src/libs/resume_and_cover_builder/resume_job_description_prompt/__init__.py similarity index 100% rename from src/ai_hawk/libs/resume_and_cover_builder/cover_letter_prompt/__init__.py rename to src/libs/resume_and_cover_builder/resume_job_description_prompt/__init__.py diff --git a/src/ai_hawk/libs/resume_and_cover_builder/resume_job_description_prompt/strings_feder-cr.py b/src/libs/resume_and_cover_builder/resume_job_description_prompt/strings_feder-cr.py similarity index 100% rename from src/ai_hawk/libs/resume_and_cover_builder/resume_job_description_prompt/strings_feder-cr.py rename to src/libs/resume_and_cover_builder/resume_job_description_prompt/strings_feder-cr.py diff --git a/src/ai_hawk/libs/resume_and_cover_builder/resume_job_description_prompt/__init__.py b/src/libs/resume_and_cover_builder/resume_prompt/__init__.py similarity index 100% rename from src/ai_hawk/libs/resume_and_cover_builder/resume_job_description_prompt/__init__.py rename to src/libs/resume_and_cover_builder/resume_prompt/__init__.py diff --git a/src/ai_hawk/libs/resume_and_cover_builder/resume_prompt/strings_feder-cr.py b/src/libs/resume_and_cover_builder/resume_prompt/strings_feder-cr.py similarity index 100% rename from src/ai_hawk/libs/resume_and_cover_builder/resume_prompt/strings_feder-cr.py rename to src/libs/resume_and_cover_builder/resume_prompt/strings_feder-cr.py diff --git a/src/ai_hawk/libs/resume_and_cover_builder/resume_prompt/__init__.py b/src/libs/resume_and_cover_builder/resume_style/__init__.py similarity index 100% rename from src/ai_hawk/libs/resume_and_cover_builder/resume_prompt/__init__.py rename to src/libs/resume_and_cover_builder/resume_style/__init__.py diff --git a/src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_cloyola.css b/src/libs/resume_and_cover_builder/resume_style/style_cloyola.css similarity index 100% rename from src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_cloyola.css rename to src/libs/resume_and_cover_builder/resume_style/style_cloyola.css diff --git a/src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_josylad_blue.css b/src/libs/resume_and_cover_builder/resume_style/style_josylad_blue.css similarity index 100% rename from src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_josylad_blue.css rename to src/libs/resume_and_cover_builder/resume_style/style_josylad_blue.css diff --git a/src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_josylad_grey.css b/src/libs/resume_and_cover_builder/resume_style/style_josylad_grey.css similarity index 100% rename from src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_josylad_grey.css rename to src/libs/resume_and_cover_builder/resume_style/style_josylad_grey.css diff --git a/src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_krishnavalliappan.css b/src/libs/resume_and_cover_builder/resume_style/style_krishnavalliappan.css similarity index 100% rename from src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_krishnavalliappan.css rename to src/libs/resume_and_cover_builder/resume_style/style_krishnavalliappan.css diff --git a/src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_samodum_bold.css b/src/libs/resume_and_cover_builder/resume_style/style_samodum_bold.css similarity index 100% rename from src/ai_hawk/libs/resume_and_cover_builder/resume_style/style_samodum_bold.css rename to src/libs/resume_and_cover_builder/resume_style/style_samodum_bold.css diff --git a/src/ai_hawk/libs/resume_and_cover_builder/style_manager.py b/src/libs/resume_and_cover_builder/style_manager.py similarity index 98% rename from src/ai_hawk/libs/resume_and_cover_builder/style_manager.py rename to src/libs/resume_and_cover_builder/style_manager.py index 54a317052..d6aa23e80 100644 --- a/src/ai_hawk/libs/resume_and_cover_builder/style_manager.py +++ b/src/libs/resume_and_cover_builder/style_manager.py @@ -20,7 +20,7 @@ def __init__(self): project_root = current_file.parent.parent.parent.parent # Adatta se la struttura cambia # Imposta la directory degli stili in modo robusto - self.styles_directory = project_root / "ai_hawk" / "libs" / "resume_and_cover_builder" / "resume_style" + self.styles_directory = project_root / "src" / "libs" / "resume_and_cover_builder" / "resume_style" logging.debug(f"Project root determinato come: {project_root}") logging.debug(f"Directory degli stili impostata su: {self.styles_directory}") diff --git a/src/ai_hawk/libs/resume_and_cover_builder/template_base.py b/src/libs/resume_and_cover_builder/template_base.py similarity index 100% rename from src/ai_hawk/libs/resume_and_cover_builder/template_base.py rename to src/libs/resume_and_cover_builder/template_base.py diff --git a/src/ai_hawk/libs/resume_and_cover_builder/utils.py b/src/libs/resume_and_cover_builder/utils.py similarity index 100% rename from src/ai_hawk/libs/resume_and_cover_builder/utils.py rename to src/libs/resume_and_cover_builder/utils.py diff --git a/src/ai_hawk/resume_schemas/job_application_profile.py b/src/resume_schemas/job_application_profile.py similarity index 100% rename from src/ai_hawk/resume_schemas/job_application_profile.py rename to src/resume_schemas/job_application_profile.py diff --git a/src/ai_hawk/resume_schemas/resume.py b/src/resume_schemas/resume.py similarity index 100% rename from src/ai_hawk/resume_schemas/resume.py rename to src/resume_schemas/resume.py