Skip to content

Commit

Permalink
test: Test ds refactor ll (#523)
Browse files Browse the repository at this point in the history
* fix bugs to former scenario

* fix a bug because coding in rdloop changed

* fix the bug when feedback gets no hypothesis

* fix trace structure

* change all trace hist when merging hypothesis to experiments

* ignore some error in ruff

* fix kaggle scenario bugs

* refine one line

* another bug

* another small bug

* fix ui bugs

* chage kaggle  train.py path

---------

Co-authored-by: Xu Yang <[email protected]>
  • Loading branch information
SunsetWolf and peteryang1 authored Jan 17, 2025
1 parent e572aa1 commit ae0ec76
Show file tree
Hide file tree
Showing 19 changed files with 134 additions and 102 deletions.
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ src = ["rdagent"]
[tool.ruff.lint]
ignore = [
# https://docs.astral.sh/ruff/rules/#pydocstyle-d
"ANN101",
"ANN401",
"D",
"ERA001",
Expand All @@ -92,7 +91,7 @@ ignore = [
"S101",
"S301",
"T20",
"TCH003",
"TC003",
"TD",
]
select = ["ALL"]
Expand Down
4 changes: 2 additions & 2 deletions rdagent/app/kaggle/loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from rdagent.components.workflow.conf import BasePropSetting
from rdagent.components.workflow.rd_loop import RDLoop
from rdagent.core.developer import Developer
from rdagent.core.exception import FactorEmptyError, ModelEmptyError
from rdagent.core.exception import CoderError, FactorEmptyError, ModelEmptyError
from rdagent.core.proposal import (
Experiment2Feedback,
Hypothesis2Experiment,
Expand Down Expand Up @@ -115,7 +115,7 @@ def running(self, prev_out: dict[str, Any]):

return exp

skip_loop_error = (ModelEmptyError, FactorEmptyError)
skip_loop_error = (ModelEmptyError, FactorEmptyError, CoderError)


def main(path=None, step_n=None, competition=None):
Expand Down
11 changes: 10 additions & 1 deletion rdagent/app/qlib_rd_loop/factor_from_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ def extract_hypothesis_and_exp_from_reports(report_file_path: str) -> Tuple[Qlib

report_content = "\n".join(docs_dict.values())
hypothesis = generate_hypothesis(factor_result, report_content)
exp.hypothesis = hypothesis
return exp, hypothesis


Expand Down Expand Up @@ -128,7 +129,9 @@ def propose_hypo_exp(self, prev_out: dict[str, Any]):
if exp is None:
continue
self.valid_pdf_file_count += 1
exp.based_experiments = [QlibFactorExperiment(sub_tasks=[])] + [t[1] for t in self.trace.hist if t[2]]
exp.based_experiments = [QlibFactorExperiment(sub_tasks=[], hypothesis=hypothesis)] + [
t[0] for t in self.trace.hist if t[1]
]
exp.sub_workspace_list = exp.sub_workspace_list[: FACTOR_FROM_REPORT_PROP_SETTING.max_factors_per_exp]
exp.sub_tasks = exp.sub_tasks[: FACTOR_FROM_REPORT_PROP_SETTING.max_factors_per_exp]
logger.log_object(hypothesis, tag="hypothesis generation")
Expand All @@ -143,6 +146,12 @@ def propose(self, prev_out: dict[str, Any]):
def exp_gen(self, prev_out: dict[str, Any]):
return self.current_loop_exp

def coding(self, prev_out: dict[str, Any]):
with logger.tag("d"): # develop
exp = self.coder.develop(prev_out["exp_gen"])
logger.log_object(exp.sub_workspace_list, tag="coder result")
return exp


def main(report_folder=None, path=None, step_n=None):
"""
Expand Down
2 changes: 1 addition & 1 deletion rdagent/components/coder/factor_coder/eva_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def evaluate(
**kwargs,
):
factor_information = target_task.get_task_information()
code = implementation.code
code = implementation.all_codes

system_prompt = (
Environment(undefined=StrictUndefined)
Expand Down
22 changes: 16 additions & 6 deletions rdagent/components/workflow/rd_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
Experiment2Feedback,
Hypothesis,
Hypothesis2Experiment,
HypothesisFeedback,
HypothesisGen,
Trace,
)
Expand Down Expand Up @@ -74,9 +75,18 @@ def running(self, prev_out: dict[str, Any]):
return exp

def feedback(self, prev_out: dict[str, Any]):
feedback = self.summarizer.generate_feedback(
prev_out["running"], prev_out["direct_exp_gen"]["propose"], self.trace
)
with logger.tag("ef"): # evaluate and feedback
logger.log_object(feedback, tag="feedback")
self.trace.hist.append((prev_out["direct_exp_gen"]["propose"], prev_out["running"], feedback))
e = prev_out.get(self.EXCEPTION_KEY, None)
if e is not None:
feedback = HypothesisFeedback(
observations="Error occurred in loop, skip this loop",
hypothesis_evaluation="",
new_hypothesis="",
reason="",
decision=False,
)
self.trace.hist.append((prev_out["direct_exp_gen"]["exp_gen"], feedback))
else:
feedback = self.summarizer.generate_feedback(prev_out["running"], self.trace)
with logger.tag("ef"): # evaluate and feedback
logger.log_object(feedback, tag="feedback")
self.trace.hist.append((prev_out["running"], feedback))
86 changes: 43 additions & 43 deletions rdagent/core/evolving_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,50 +59,50 @@ def multistep_evolve(
filter_final_evo: bool = False,
) -> EvolvableSubjects:
for evo_loop_id in tqdm(range(self.max_loop), "Implementing"):
with logger.tag(f"evo_loop_{evo_loop_id}"):
# 1. knowledge self-evolving
if self.knowledge_self_gen and self.rag is not None:
self.rag.generate_knowledge(self.evolving_trace)
# 2. RAG
queried_knowledge = None
if self.with_knowledge and self.rag is not None:
# TODO: Putting the evolving trace in here doesn't actually work
queried_knowledge = self.rag.query(evo, self.evolving_trace)

# 3. evolve
evo = self.evolving_strategy.evolve(
evo=evo,
evolving_trace=self.evolving_trace,
queried_knowledge=queried_knowledge,
# with logger.tag(f"evo_loop_{evo_loop_id}"):
# 1. knowledge self-evolving
if self.knowledge_self_gen and self.rag is not None:
self.rag.generate_knowledge(self.evolving_trace)
# 2. RAG
queried_knowledge = None
if self.with_knowledge and self.rag is not None:
# TODO: Putting the evolving trace in here doesn't actually work
queried_knowledge = self.rag.query(evo, self.evolving_trace)

# 3. evolve
evo = self.evolving_strategy.evolve(
evo=evo,
evolving_trace=self.evolving_trace,
queried_knowledge=queried_knowledge,
)
# TODO: Due to design issues, we have chosen to ignore this mypy error.
logger.log_object(evo.sub_workspace_list, tag="evolving code") # type: ignore[attr-defined]
for sw in evo.sub_workspace_list: # type: ignore[attr-defined]
logger.info(f"evolving code workspace: {sw}")

# 4. Pack evolve results
es = EvoStep(evo, queried_knowledge)

# 5. Evaluation
if self.with_feedback:
es.feedback = (
# TODO: Due to the irregular design of rdagent.core.evaluation.Evaluator,
# it fails mypy's test here, so we'll ignore this error for now.
eva
if isinstance(eva, Feedback)
else eva.evaluate(evo, queried_knowledge=queried_knowledge) # type: ignore[arg-type, call-arg]
)
# TODO: Due to design issues, we have chosen to ignore this mypy error.
logger.log_object(evo.sub_workspace_list, tag="evolving code") # type: ignore[attr-defined]
for sw in evo.sub_workspace_list: # type: ignore[attr-defined]
logger.info(f"evolving code workspace: {sw}")

# 4. Pack evolve results
es = EvoStep(evo, queried_knowledge)

# 5. Evaluation
if self.with_feedback:
es.feedback = (
# TODO: Due to the irregular design of rdagent.core.evaluation.Evaluator,
# it fails mypy's test here, so we'll ignore this error for now.
eva
if isinstance(eva, Feedback)
else eva.evaluate(evo, queried_knowledge=queried_knowledge) # type: ignore[arg-type, call-arg]
)
logger.log_object(es.feedback, tag="evolving feedback")

# 6. update trace
self.evolving_trace.append(es)

# 7. check if all tasks are completed
if self.with_feedback:
all_completed = all(es.feedback) if isinstance(es.feedback, list) else es.feedback
if all_completed:
logger.info("All tasks in evolving subject have been completed.")
break
logger.log_object(es.feedback, tag="evolving feedback")

# 6. update trace
self.evolving_trace.append(es)

# 7. check if all tasks are completed
if self.with_feedback:
all_completed = all(es.feedback) if isinstance(es.feedback, list) else es.feedback
if all_completed:
logger.info("All tasks in evolving subject have been completed.")
break

if self.with_feedback and filter_final_evo:
evo = self.filter_evolvable_subjects_by_feedback(evo, self.evolving_trace[-1].feedback)
Expand Down
2 changes: 2 additions & 0 deletions rdagent/log/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ def iter_msg(self, watch: bool = False) -> Generator[Message, None, None]:
msg_l.append(m)

for file in self.path.glob("**/*.pkl"):
if file.name == "debug_llm.pkl":
continue
tag = ".".join(file.relative_to(self.path).as_posix().replace("/", ".").split(".")[:-3])
pid = file.parent.name

Expand Down
2 changes: 1 addition & 1 deletion rdagent/log/ui/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ def hypothesis_hover_text(h: Hypothesis, d: bool = False):
hover_texts = [
hypothesis_hover_text(state.hypotheses[int(i[6:])], state.h_decisions[int(i[6:])])
for i in df.index
if i != "alpha158"
if i != "alpha158" and i != "Baseline"
]
if state.alpha158_metrics is not None:
hover_texts = ["Baseline: alpha158"] + hover_texts
Expand Down
4 changes: 2 additions & 2 deletions rdagent/scenarios/data_mining/proposal/model_proposal.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def prepare_context(self, hypothesis: Hypothesis, trace: Trace) -> Tuple[dict, b
else "No previous hypothesis and feedback available since it's the first round."
)

experiment_list: List[ModelExperiment] = [t[1] for t in trace.hist]
experiment_list: List[ModelExperiment] = [t[0] for t in trace.hist]

model_list = []
for experiment in experiment_list:
Expand Down Expand Up @@ -117,5 +117,5 @@ def convert_response(self, response: str, hypothesis: Hypothesis, trace: Trace)
)
)
exp = DMModelExperiment(tasks, hypothesis=hypothesis)
exp.based_experiments = [t[1] for t in trace.hist if t[2]]
exp.based_experiments = [t[0] for t in trace.hist if t[1]]
return exp
9 changes: 9 additions & 0 deletions rdagent/scenarios/data_science/proposal/prompts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,15 @@ hypothesis_model: # It is deprecated now, please refer to direct_exp_gen
Please generate the output using the following format and specifications:
{{ hypothesis_output_format }}
hypothesis_and_feedback: |-
{% for experiment, feedback in hist %}
Hypothesis {{ loop.index }}
The experiment is design driven by hypothesis : {{ experiment.hypothesis }}
Observation on the result with the hypothesis: {{ feedback.observations }}
Feedback on the original hypothesis: {{ feedback.hypothesis_evaluation }}
Did changing to this hypothesis work? (focus on the change): {{ feedback.decision }}
{% endfor %}
task_gen: # It is deprecated now, please refer to direct_exp_gen
system: |-
{% if hypothesis is not none %}
Expand Down
4 changes: 2 additions & 2 deletions rdagent/scenarios/kaggle/developer/feedback.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,15 +111,15 @@ def generate_feedback(self, exp: Experiment, trace: Trace) -> HypothesisFeedback
]
else:
current_sub_exps_to_code = {
sub_ws.target_task.get_task_information(): sub_ws.code for sub_ws in exp.sub_workspace_list
sub_ws.target_task.get_task_information(): sub_ws.all_codes for sub_ws in exp.sub_workspace_list
}
current_sub_exps_to_code_str = json.dumps(current_sub_exps_to_code, indent=2)
current_result = exp.result
current_sub_results = exp.sub_results

last_hypothesis_and_feedback = None
if trace.hist and len(trace.hist) > 0:
last_hypothesis_and_feedback = (trace.hist[-1][0], trace.hist[-1][2])
last_hypothesis_and_feedback = (trace.hist[-1][0].hypothesis, trace.hist[-1][1])

# Prepare render dictionary
render_dict = {
Expand Down
2 changes: 1 addition & 1 deletion rdagent/scenarios/kaggle/experiment/scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def background(self) -> str:
background_template = prompt_dict["kg_background"]

train_script = (
Path(__file__).parent / f"{KAGGLE_IMPLEMENT_SETTING.competition}_template" / "train.py"
Path(__file__).parent / "templates" / KAGGLE_IMPLEMENT_SETTING.competition / "train.py"
).read_text()

background_prompt = (
Expand Down
4 changes: 2 additions & 2 deletions rdagent/scenarios/kaggle/prompts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ KG_hypothesis_gen_RAG: |-
{% endif %}
hypothesis_and_feedback: |-
{% for hypothesis, experiment, feedback in trace.hist[-10:] %}
Hypothesis {{ loop.index }}: {{ hypothesis }}
{% for experiment, feedback in trace.hist[-10:] %}
Hypothesis {{ loop.index }}: {{ experiment.hypothesis }}
Observation on the result with the hypothesis: {{ feedback.observations }}
Feedback on the original hypothesis: {{ feedback.hypothesis_evaluation }}
Did changing to this hypothesis work? (focus on the change): {{ feedback.decision }}
Expand Down
12 changes: 6 additions & 6 deletions rdagent/scenarios/kaggle/proposal/proposal.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,11 +276,11 @@ def prepare_context(self, trace: Trace) -> Tuple[dict, bool]:

hypothesis_specification = f"Hypothesis should avoid being too general and vague, and should be specific and actionable. For example, hypothesis like 'tune a model' is too general, while hypothesis like 'increase the learning rate to 0.1 of the lightgbm model will improve the performance' is specific and actionable."
if len(trace.hist) > 0:
sota_features = str(trace.hist[-1][1].based_experiments[-1].experiment_workspace.data_description)
sota_features = str(trace.hist[-1][0].based_experiments[-1].experiment_workspace.data_description)
sota_models = json.dumps(
trace.hist[-1][1].based_experiments[-1].experiment_workspace.model_description, indent=2
trace.hist[-1][0].based_experiments[-1].experiment_workspace.model_description, indent=2
)
sota_result = trace.hist[-1][1].based_experiments[-1].result
sota_result = trace.hist[-1][0].based_experiments[-1].result
hypothesis_specification += f"\nYour hypothesis should based on current SOTA solution. The user will conduct experiments based on the SOTA solution to test whether your hypothesis is right on this specific ecompetition. \n\nSOTA Features: {sota_features}\n\nSOTA Models: {sota_models}\n\nSOTA Result: {sota_result}"
if self.scen.if_action_choosing_based_on_UCB:
hypothesis_specification += (
Expand Down Expand Up @@ -340,7 +340,7 @@ def prepare_context(self, hypothesis: Hypothesis, trace: Trace) -> Tuple[dict, b
else "No previous hypothesis and feedback available since it's the first round."
)

experiment_list: List[ModelExperiment] = [t[1] for t in trace.hist]
experiment_list: List[ModelExperiment] = [t[0] for t in trace.hist]

model_list = []
for experiment in experiment_list:
Expand Down Expand Up @@ -384,7 +384,7 @@ def convert_feature_experiment(self, response: str, hypothesis: Hypothesis, trac
sub_tasks=tasks,
based_experiments=(
[KGFactorExperiment(sub_tasks=[], source_feature_size=trace.scen.input_shape[-1])]
+ [t[1] for t in trace.hist if t[2]]
+ [t[0] for t in trace.hist if t[1]]
),
hypothesis=hypothesis,
)
Expand All @@ -400,7 +400,7 @@ def convert_model_experiment(self, response: str, hypothesis: Hypothesis, trace:
)

based_experiments = [KGModelExperiment(sub_tasks=[], source_feature_size=trace.scen.input_shape[-1])] + [
t[1] for t in trace.hist if t[2]
t[0] for t in trace.hist if t[1]
]
model_type = response_dict.get("model_type", "Model type not provided")
if model_type in KG_MODEL_MAPPING:
Expand Down
4 changes: 2 additions & 2 deletions rdagent/scenarios/qlib/prompts.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
hypothesis_and_feedback: |-
{% for hypothesis, experiment, feedback in trace.hist[-10:] %}
Hypothesis {{ loop.index }}: {{ hypothesis }}
{% for experiment, feedback in trace.hist[-10:] %}
Hypothesis {{ loop.index }}: {{ experiment.hypothesis }}
Corresponding Code (that leads to the difference in performance): {{experiment.sub_workspace_list[0].file_dict.get("model.py")}}
Observation on the result with the hypothesis: {{ feedback.observations }}
Feedback on the original hypothesis: {{ feedback.hypothesis_evaluation }}
Expand Down
4 changes: 2 additions & 2 deletions rdagent/scenarios/qlib/proposal/factor_proposal.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def prepare_context(self, hypothesis: Hypothesis, trace: Trace) -> Tuple[dict |
else "No previous hypothesis and feedback available since it's the first round."
)

experiment_list: List[FactorExperiment] = [t[1] for t in trace.hist]
experiment_list: List[FactorExperiment] = [t[0] for t in trace.hist]

factor_list = []
for experiment in experiment_list:
Expand Down Expand Up @@ -98,7 +98,7 @@ def convert_response(self, response: str, hypothesis: Hypothesis, trace: Trace)
)

exp = QlibFactorExperiment(tasks, hypothesis=hypothesis)
exp.based_experiments = [QlibFactorExperiment(sub_tasks=[])] + [t[1] for t in trace.hist if t[2]]
exp.based_experiments = [QlibFactorExperiment(sub_tasks=[])] + [t[0] for t in trace.hist if t[1]]

unique_tasks = []

Expand Down
4 changes: 2 additions & 2 deletions rdagent/scenarios/qlib/proposal/model_proposal.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def prepare_context(self, hypothesis: Hypothesis, trace: Trace) -> Tuple[dict, b
else "No previous hypothesis and feedback available since it's the first round."
)

experiment_list: List[ModelExperiment] = [t[1] for t in trace.hist]
experiment_list: List[ModelExperiment] = [t[0] for t in trace.hist]

model_list = []
for experiment in experiment_list:
Expand Down Expand Up @@ -102,5 +102,5 @@ def convert_response(self, response: str, hypothesis: Hypothesis, trace: Trace)
)
)
exp = QlibModelExperiment(tasks, hypothesis=hypothesis)
exp.based_experiments = [t[1] for t in trace.hist if t[2]]
exp.based_experiments = [t[0] for t in trace.hist if t[1]]
return exp
Loading

0 comments on commit ae0ec76

Please sign in to comment.