From 05c6ee42ce958d7a5077e9661ea475fa6e830279 Mon Sep 17 00:00:00 2001 From: Rishabh <134101578+GitHoobar@users.noreply.github.com> Date: Thu, 6 Feb 2025 18:28:20 +0530 Subject: [PATCH 1/8] test --- camel/verifiers/__init__.py | 20 ++ camel/verifiers/base_verifier.py | 117 ++++++++++++ camel/verifiers/code_verifier.py | 15 ++ camel/verifiers/math_verifier.py | 180 ++++++++++++++++++ camel/verifiers/router/verification_router.py | 99 ++++++++++ camel/verifiers/types.py | 84 ++++++++ examples/verifiers/math_example.py | 76 ++++++++ 7 files changed, 591 insertions(+) create mode 100644 camel/verifiers/__init__.py create mode 100644 camel/verifiers/base_verifier.py create mode 100644 camel/verifiers/code_verifier.py create mode 100644 camel/verifiers/math_verifier.py create mode 100644 camel/verifiers/router/verification_router.py create mode 100644 camel/verifiers/types.py create mode 100644 examples/verifiers/math_example.py diff --git a/camel/verifiers/__init__.py b/camel/verifiers/__init__.py new file mode 100644 index 0000000000..76c1391c63 --- /dev/null +++ b/camel/verifiers/__init__.py @@ -0,0 +1,20 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= + +from .base_verifier import BaseVerifier +from .code_verifier import CodeVerifier +from .math_verifier import MathVerifier +from .router.verification_router import DomainVerifier + +__all__ = ['BaseVerifier', 'MathVerifier', 'CodeVerifier', 'DomainVerifier'] diff --git a/camel/verifiers/base_verifier.py b/camel/verifiers/base_verifier.py new file mode 100644 index 0000000000..d4ba52d73d --- /dev/null +++ b/camel/verifiers/base_verifier.py @@ -0,0 +1,117 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +"""Base verifier class that all domain-specific verifiers inherit from.""" + +from abc import ABC, abstractmethod +from typing import Any, Dict, Optional + +from datasets import Dataset + + +class BaseVerifier(ABC): + """Abstract base class for all verifiers. + + This class defines the interface that all domain-specific verifiers + must implement. + It provides common functionality and enforces a consistent verification + pattern. + """ + + def __init__(self, criteria: Optional[Dict[str, Any]] = None) -> None: + """Initialize the verifier. + + Args: + criteria: Optional dictionary of verification criteria that + override defaults + """ + self.criteria = criteria or {} + + @abstractmethod + def verify( + self, + data: Dataset, + criteria: Optional[Dict[str, Any]] = None, + ) -> Dataset: + """Verify the provided data. + + Args: + data: Dataset containing items to verify + criteria: Optional verification criteria for this specific call + + Returns: + Dataset with verification results added + + Note: + The returned dataset should include at minimum a 'correct' column + indicating whether each item passed verification. + """ + raise NotImplementedError + + def _calculate_score( + self, + details: Dict[str, Any], + weights: Optional[Dict[str, float]] = None, + ) -> float: + """Calculate overall verification score from component scores. + + Args: + details: Dictionary of component verification results + weights: Optional weights for each component + + Returns: + Float between 0 and 1 representing overall score + """ + if not details: + return 0.0 + + weights = weights or {k: 1.0 for k in details.keys()} + total_weight = sum(weights[k] for k in details.keys() if k in weights) + + if total_weight == 0: + return 0.0 + + weighted_sum = sum( + details[k] * weights[k] + for k in details.keys() + if k in weights and isinstance(details[k], (int, float)) + ) + + return weighted_sum / total_weight + + def _format_feedback( + self, details: Dict[str, Any], threshold: float = 0.7 + ) -> str: + """Format verification details into human-readable feedback. + + Args: + details: Dictionary of verification details + threshold: Score threshold for passing + + Returns: + Formatted feedback string + """ + feedback = [] + + for key, value in details.items(): + if isinstance(value, (int, float)): + status = "PASS" if value >= threshold else "FAIL" + feedback.append(f"{key}: {value:.2f} [{status}]") + else: + feedback.append(f"{key}: {value}") + + return "\n".join(feedback) + + def __repr__(self) -> str: + """Return string representation of the verifier.""" + return f"{self.__class__.__name__}(criteria={self.criteria})" diff --git a/camel/verifiers/code_verifier.py b/camel/verifiers/code_verifier.py new file mode 100644 index 0000000000..8b37dc896d --- /dev/null +++ b/camel/verifiers/code_verifier.py @@ -0,0 +1,15 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +class CodeVerifier: + print() diff --git a/camel/verifiers/math_verifier.py b/camel/verifiers/math_verifier.py new file mode 100644 index 0000000000..00d383af4d --- /dev/null +++ b/camel/verifiers/math_verifier.py @@ -0,0 +1,180 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= + + +import re +from typing import Any, ClassVar, Dict, Optional + +from datasets import Dataset + +from camel.agents import ChatAgent +from camel.responses import ChatAgentResponse +from camel.verifiers.base_verifier import BaseVerifier +from camel.verifiers.types import VerificationResult + + +class MathVerifier(BaseVerifier): + """Verifier for mathematical problems using LLM.""" + + DEFAULT_CRITERIA: ClassVar[Dict[str, Any]] = { + "numerical_tolerance": 1e-6, + "verify_steps": True, + } + + def __init__( + self, + criteria: Optional[Dict[str, Any]] = None, + agent: Optional[ChatAgent] = None, + ) -> None: + """Initialize the verifier. + + Args: + criteria: Optional verification criteria + agent: ChatAgent instance for verification + """ + super().__init__(criteria) + self.agent = agent + + def verify( + self, data: Dataset, criteria: Optional[Dict[str, Any]] = None + ) -> Dataset: + """Verify mathematical solutions in the dataset.""" + criteria = {**self.DEFAULT_CRITERIA, **(criteria or {})} + + def verify_single(example): + result = self._verify_solution( + question=example["question"], + solution=example["solution"], + answer=example.get("answer"), + criteria=criteria, + ) + + example["verification_result"] = result.dict() + example["correct"] = result.passed + return example + + return data.map(verify_single) + + def _verify_solution( + self, + question: str, + solution: str, + answer: Optional[str] = None, + criteria: Optional[Dict[str, Any]] = None, + ) -> VerificationResult: + """Verify a single mathematical solution using LLM.""" + try: + # Extract boxed answers + solution_value = self._extract_boxed_answer(solution) + answer_value = ( + self._extract_boxed_answer(answer) if answer else None + ) + + if not solution_value: + return VerificationResult( + score=0.0, + passed=False, + details={"error": "No \\boxed{} answer found"}, + feedback="Solution must include a \\boxed{} answer", + error=None, + ) + + # Construct prompt for LLM + prompt = self._construct_verification_prompt( + question=question, solution=solution_value, answer=answer_value + ) + + if self.agent is None: + raise ValueError("ChatAgent not initialized") + + # Get LLM response + response: ChatAgentResponse = self.agent.step(prompt) + verification_result = self._parse_llm_response( + response.msgs[0].content + ) + + return VerificationResult( + score=verification_result["score"], + passed=verification_result["passed"], + details=verification_result["details"], + feedback=verification_result["feedback"], + error=None, + ) + + except Exception as e: + return VerificationResult( + score=0.0, + passed=False, + details={"error": str(e)}, + feedback=f"Verification failed: {e!s}", + error=str(e), + ) + + def _extract_boxed_answer(self, text: Optional[str]) -> Optional[str]: + """Extract answer from \\boxed{} notation.""" + if not text: + return None + + boxed_pattern = r'\\boxed\s*{\s*([^}]+)\s*}' + match = re.search(boxed_pattern, text) + return match.group(1).strip() if match else None + + def _construct_verification_prompt( + self, question: str, solution: str, answer: Optional[str] + ) -> str: + """Construct prompt for LLM verification.""" + prompt = ( + "Please verify this mathematical solution.\n\n" + f"Question: {question}\n" + f"Student's solution: {solution}\n" + ) + + if answer: + prompt += f"Correct answer: {answer}\n" + + prompt += ( + "\nPlease verify if the solution is correct and provide feedback " + "in the following JSON format:\n" + "{\n" + ' "score": ,\n' + ' "passed": ,\n' + ' "details": {},\n' + ' "feedback": ""\n' + "}" + ) + + return prompt + + def _parse_llm_response(self, response: str) -> Dict[str, Any]: + """Parse LLM response into verification result.""" + try: + # Basic parsing in practice you might want more robust JSON parsing + import json + + result = json.loads(response) + + # Ensure required fields + required_fields = ["score", "passed", "details", "feedback"] + if not all(field in result for field in required_fields): + raise ValueError("Missing required fields in LLM response") + + return result + + except Exception as e: + return { + "score": 0.0, + "passed": False, + "details": {"error": f"Failed to parse LLM response: {e!s}"}, + "feedback": "Error in LLM verification", + } diff --git a/camel/verifiers/router/verification_router.py b/camel/verifiers/router/verification_router.py new file mode 100644 index 0000000000..36c704f49f --- /dev/null +++ b/camel/verifiers/router/verification_router.py @@ -0,0 +1,99 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +"""Verifier module for routing to domain-specific verifiers.""" + +from typing import Any, ClassVar, Dict, Optional, Type, Union + +from datasets import Dataset + +from camel.verifiers.base_verifier import BaseVerifier +from camel.verifiers.math_verifier import MathVerifier + + +class DomainVerifier: + """Main verifier class that routes to domain-specific verifiers.""" + + VERIFIERS: ClassVar[Dict[str, Type[BaseVerifier]]] = { + "math": MathVerifier, + } + + @classmethod + def verify( + cls, + domain: str, + data: Union[Dataset, Dict[str, Any]], + criteria: Optional[Dict[str, Any]] = None, + ) -> Dataset: + """Verify data using appropriate domain-specific verifier. + + Args: + domain: Domain identifier ("math", "code", etc) + data: Data to verify (Dataset or dict) + criteria: Optional verification criteria + + Returns: + Verified dataset with results + + Raises: + ValueError: If domain is not supported and strict_mode is True + """ + # Convert dict to dataset if needed + if isinstance(data, dict): + data = Dataset.from_dict(data) + + # Get appropriate verifier + verifier_cls = cls.VERIFIERS.get(domain) + if verifier_cls is None: + if criteria and criteria.get("strict_mode", False): + raise ValueError(f"Unsupported domain: {domain}") + # Default to marking everything as correct if no specific verifier + return data.add_column("correct", [True] * len(data)) + + # Create verifier instance and verify + verifier = verifier_cls(criteria=criteria) + verified_data = verifier.verify(data) + + # Filter to only correct results if specified + if criteria and criteria.get("filter_incorrect", False): + verified_data = verified_data.filter(lambda x: x["correct"]) + + return verified_data + + @classmethod + def get_supported_domains(cls) -> list[str]: + """Get list of supported verification domains. + + Returns: + List of domain identifiers that have registered verifiers + """ + return list(cls.VERIFIERS.keys()) + + @classmethod + def register_verifier( + cls, domain: str, verifier_cls: Type[BaseVerifier] + ) -> None: + """Register a new domain verifier. + + Args: + domain: Domain identifier + verifier_cls: Verifier class to register + + Raises: + ValueError: If domain is already registered + """ + if domain in cls.VERIFIERS: + raise ValueError( + f"Domain {domain} already has a registered verifier" + ) + cls.VERIFIERS[domain] = verifier_cls diff --git a/camel/verifiers/types.py b/camel/verifiers/types.py new file mode 100644 index 0000000000..271776dbf2 --- /dev/null +++ b/camel/verifiers/types.py @@ -0,0 +1,84 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +"""Type definitions for verification results and metrics.""" + +from typing import Any, Dict, List, Optional + +from pydantic import BaseModel, Field + + +class VerificationMetrics(BaseModel): + """Metrics used in verification process.""" + + name: str = Field(..., description="Name of the metric") + value: float = Field( + ..., ge=0, le=1, description="Metric value between 0 and 1" + ) + weight: float = Field( + 1.0, ge=0, description="Weight of this metric in overall score" + ) + threshold: float = Field( + 0.7, ge=0, le=1, description="Passing threshold for this metric" + ) + + class Config: + """Pydantic configuration.""" + + frozen = True + + +class VerificationResult(BaseModel): + """Results from verification process.""" + + score: float = Field( + ..., + ge=0, + le=1, + description="Overall verification score between 0 and 1", + ) + passed: bool = Field( + ..., description="Whether verification passed overall threshold" + ) + details: Dict[str, Any] = Field( + default_factory=dict, description="Detailed verification results" + ) + metrics: List[VerificationMetrics] = Field( + default_factory=list, description="List of individual metrics" + ) + feedback: str = Field(..., description="Human-readable feedback message") + error: Optional[str] = Field( + None, description="Error message if verification failed" + ) + + def dict(self, *args, **kwargs) -> Dict[str, Any]: + """Convert to dictionary, handling nested models.""" + d = super().dict(*args, **kwargs) + if "metrics" in d: + d["metrics"] = [m.dict() for m in self.metrics] + return d + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "VerificationResult": + """Create from dictionary, handling nested models.""" + if "metrics" in data: + data["metrics"] = [ + VerificationMetrics(**m) if isinstance(m, dict) else m + for m in data["metrics"] + ] + return cls(**data) + + class Config: + """Pydantic configuration.""" + + frozen = True diff --git a/examples/verifiers/math_example.py b/examples/verifiers/math_example.py new file mode 100644 index 0000000000..2eb897812a --- /dev/null +++ b/examples/verifiers/math_example.py @@ -0,0 +1,76 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +"""Example usage of math verifier with Gurobi.""" + +import os +import sys + +from camel.verifiers import DomainVerifier + +# Add project root to Python path if not installed +project_root = os.path.abspath( + os.path.join(os.path.dirname(__file__), "../..") +) +if project_root not in sys.path: + sys.path.append(project_root) + + +def main(): + # Example problems + data = { + "question": [ + "Solve: 2x + 3 = 7", + "Maximize: z = 3x + 4y subject to: x + y <= 10, x >= 0, y >= 0", + ], + "solution": ["x = 2", "x = 6, y = 4"], + "answer": [ + "2", + "36", # Optimal objective value + ], + } + + verified_data = DomainVerifier.verify( + domain="math", + data=data, + criteria={ + "numerical_tolerance": 1e-8, + "verify_steps": True, + "check_feasibility": True, + "verify_optimality": True, + }, + ) + + # Print detailed results + for item in verified_data: + print("\nVerification Results:") + print("-" * 50) + print(f"Question: {item['question']}") + print(f"Solution: {item['solution']}") + print(f"Expected Answer: {item['answer']}") + print(f"Correct: {item['correct']}") + + result = item['verification_result'] + print("\nDetails:") + print(f"Score: {result['score']:.2f}") + print(f"Passed: {result['passed']}") + print(f"Feedback: {result['feedback']}") + + if 'details' in result: + print("\nComponent Scores:") + for key, value in result['details'].items(): + print(f"- {key}: {value}") + + +if __name__ == "__main__": + main() From 7b6ebfa5380ad11acb684f58cf88e3200aa74d4d Mon Sep 17 00:00:00 2001 From: Rishabh <134101578+GitHoobar@users.noreply.github.com> Date: Fri, 7 Feb 2025 06:35:27 +0530 Subject: [PATCH 2/8] added code verifier and examples --- camel/verifiers/__init__.py | 5 +- camel/verifiers/base_verifier.py | 117 ------- camel/verifiers/code_verifier.py | 264 ++++++++++++++- camel/verifiers/math_verifier.py | 180 ---------- camel/verifiers/router/verification_router.py | 99 ------ camel/verifiers/types.py | 84 ----- examples/verifiers/code_example.py | 187 +++++++++++ examples/verifiers/math_example.py | 76 ----- examples/verifiers/math_program_example.py | 308 ++++++++++++++++++ 9 files changed, 759 insertions(+), 561 deletions(-) delete mode 100644 camel/verifiers/base_verifier.py delete mode 100644 camel/verifiers/math_verifier.py delete mode 100644 camel/verifiers/router/verification_router.py delete mode 100644 camel/verifiers/types.py create mode 100644 examples/verifiers/code_example.py delete mode 100644 examples/verifiers/math_example.py create mode 100644 examples/verifiers/math_program_example.py diff --git a/camel/verifiers/__init__.py b/camel/verifiers/__init__.py index 76c1391c63..691346bc19 100644 --- a/camel/verifiers/__init__.py +++ b/camel/verifiers/__init__.py @@ -12,9 +12,6 @@ # limitations under the License. # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= -from .base_verifier import BaseVerifier from .code_verifier import CodeVerifier -from .math_verifier import MathVerifier -from .router.verification_router import DomainVerifier -__all__ = ['BaseVerifier', 'MathVerifier', 'CodeVerifier', 'DomainVerifier'] +__all__ = ['CodeVerifier'] diff --git a/camel/verifiers/base_verifier.py b/camel/verifiers/base_verifier.py deleted file mode 100644 index d4ba52d73d..0000000000 --- a/camel/verifiers/base_verifier.py +++ /dev/null @@ -1,117 +0,0 @@ -# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= -"""Base verifier class that all domain-specific verifiers inherit from.""" - -from abc import ABC, abstractmethod -from typing import Any, Dict, Optional - -from datasets import Dataset - - -class BaseVerifier(ABC): - """Abstract base class for all verifiers. - - This class defines the interface that all domain-specific verifiers - must implement. - It provides common functionality and enforces a consistent verification - pattern. - """ - - def __init__(self, criteria: Optional[Dict[str, Any]] = None) -> None: - """Initialize the verifier. - - Args: - criteria: Optional dictionary of verification criteria that - override defaults - """ - self.criteria = criteria or {} - - @abstractmethod - def verify( - self, - data: Dataset, - criteria: Optional[Dict[str, Any]] = None, - ) -> Dataset: - """Verify the provided data. - - Args: - data: Dataset containing items to verify - criteria: Optional verification criteria for this specific call - - Returns: - Dataset with verification results added - - Note: - The returned dataset should include at minimum a 'correct' column - indicating whether each item passed verification. - """ - raise NotImplementedError - - def _calculate_score( - self, - details: Dict[str, Any], - weights: Optional[Dict[str, float]] = None, - ) -> float: - """Calculate overall verification score from component scores. - - Args: - details: Dictionary of component verification results - weights: Optional weights for each component - - Returns: - Float between 0 and 1 representing overall score - """ - if not details: - return 0.0 - - weights = weights or {k: 1.0 for k in details.keys()} - total_weight = sum(weights[k] for k in details.keys() if k in weights) - - if total_weight == 0: - return 0.0 - - weighted_sum = sum( - details[k] * weights[k] - for k in details.keys() - if k in weights and isinstance(details[k], (int, float)) - ) - - return weighted_sum / total_weight - - def _format_feedback( - self, details: Dict[str, Any], threshold: float = 0.7 - ) -> str: - """Format verification details into human-readable feedback. - - Args: - details: Dictionary of verification details - threshold: Score threshold for passing - - Returns: - Formatted feedback string - """ - feedback = [] - - for key, value in details.items(): - if isinstance(value, (int, float)): - status = "PASS" if value >= threshold else "FAIL" - feedback.append(f"{key}: {value:.2f} [{status}]") - else: - feedback.append(f"{key}: {value}") - - return "\n".join(feedback) - - def __repr__(self) -> str: - """Return string representation of the verifier.""" - return f"{self.__class__.__name__}(criteria={self.criteria})" diff --git a/camel/verifiers/code_verifier.py b/camel/verifiers/code_verifier.py index 8b37dc896d..90932e3c33 100644 --- a/camel/verifiers/code_verifier.py +++ b/camel/verifiers/code_verifier.py @@ -11,5 +11,267 @@ # See the License for the specific language governing permissions and # limitations under the License. # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= + +from typing import Any, Dict, List, Optional, Union + +from datasets import Dataset + +from camel.interpreters import ( + BaseInterpreter, + SubprocessInterpreter, +) + + class CodeVerifier: - print() + r"""Verifier for code solutions. + + This verifier checks code solutions by: + 1. Validating syntax + 2. Running test cases + 3. Verifying outputs against expected results + """ + + def __init__( + self, + interpreter: str = "subprocess", + require_confirmation: bool = False, + ) -> None: + r"""Initialize the code verifier. + + Args: + interpreter (str, optional): Type of interpreter to use. + (default: :obj:`"subprocess"`) + require_confirmation (bool, optional): Whether to require user + confirmation before execution. (default: :obj:`False`) + """ + self.interpreter = self._get_interpreter( + interpreter, require_confirmation + ) + + def _get_interpreter( + self, + interpreter_type: str, + require_confirmation: bool, + ) -> BaseInterpreter: + r"""Initialize appropriate interpreter based on type. + + Args: + interpreter_type (str): Type of interpreter to use + require_confirmation (bool): Whether to require confirmation + + Returns: + BaseInterpreter: Configured interpreter instance + + Raises: + ValueError: If interpreter type is not supported + """ + if interpreter_type == "subprocess": + return SubprocessInterpreter( + require_confirm=require_confirmation, + print_stdout=False, + print_stderr=True, + ) + raise ValueError(f"Unsupported interpreter type: {interpreter_type}") + + def verify( + self, + data: Union[Dataset, Dict[str, Any]], + criteria: Optional[Dict[str, Any]] = None, + ) -> Dataset: + r"""Verify code solutions. + + Args: + data (Union[Dataset, Dict[str, Any]]): Data containing code to + verify + + criteria (Optional[Dict[str, Any]], optional): Optional + verification criteria for this specific call. + (default: :obj:`None`) + + Returns: + Dataset: Dataset with verification results added + """ + if isinstance(data, dict): + data = Dataset.from_dict(data) + + def verify_single(example: Dict[str, Any]) -> Dict[str, Any]: + r"""Verify a single code example. + + Args: + example (Dict[str, Any]): Example containing code to verify + + Returns: + Dict[str, Any]: Example with verification results added + """ + code = example.get("code", "") + language = example.get("language", "python") + test_cases = example.get("test_cases", []) + + # Check syntax first + try: + if language == "python": + compile(code, '', 'exec') + except SyntaxError as e: + return self._handle_syntax_error(example, e) + + try: + return self._run_test_cases( + example, code, language, test_cases + ) + except Exception as e: + return self._handle_execution_error(example, e) + + return data.map(verify_single) + + def _prepare_test_code( + self, + code: str, + test_case: Dict[str, Any], + ) -> str: + r"""Prepare code with test case inputs and assertions. + + Args: + code (str): Original code to test + test_case (Dict[str, Any]): Test case configuration + + Returns: + str: Complete test code with assertions + """ + full_code = [code] + + # Add test case setup + test_setup = [ + f"{k} = {v!r}" for k, v in test_case.get("inputs", {}).items() + ] + if test_setup: + full_code.extend(test_setup) + + # Add test assertions + test_assertions = [] + for expr, expected in test_case.get("expected", {}).items(): + test_assertions.append( + f""" +result = {expr} +if result != {expected!r}: + raise AssertionError( + f"Test failed:\\n Expression: {expr}\\n " + f"Expected: {expected!r}\\n Got: {{result}}" + ) +print(f"Test passed: {{result}}") +""" + ) + + if test_assertions: + full_code.extend(test_assertions) + + return "\n".join(full_code) + + def _handle_syntax_error( + self, + example: Dict[str, Any], + error: SyntaxError, + ) -> Dict[str, Any]: + r"""Handle syntax errors in code verification. + + Args: + example (Dict[str, Any]): The example being verified + error (SyntaxError): The syntax error that occurred + + Returns: + Dict[str, Any]: Updated example with error information + """ + return { + **example, + "verification_result": { + "passed": False, + "test_results": [], + "error": f"Syntax error: {error!s}", + "details": { + "type": "syntax_error", + "line": error.lineno, + "offset": error.offset, + "text": error.text, + }, + }, + } + + def _handle_execution_error( + self, + example: Dict[str, Any], + error: Exception, + ) -> Dict[str, Any]: + r"""Handle execution errors in code verification. + + Args: + example (Dict[str, Any]): The example being verified + error (Exception): The execution error that occurred + + Returns: + Dict[str, Any]: Updated example with error information + """ + example["verification_result"] = { + "passed": False, + "test_results": [], + "error": str(error), + "details": { + "type": "execution_error", + "message": str(error), + }, + } + return example + + def _run_test_cases( + self, + example: Dict[str, Any], + code: str, + language: str, + test_cases: List[Dict[str, Any]], + ) -> Dict[str, Any]: + r"""Run test cases for code verification. + + Args: + example (Dict[str, Any]): The example being verified + code (str): The code to test + language (str): Programming language of the code + test_cases (List[Dict[str, Any]]): List of test cases to run + + Returns: + Dict[str, Any]: Updated example with test results + """ + test_results = [] + test_details = [] + + if test_cases: + for i, test_case in enumerate(test_cases): + test_code = self._prepare_test_code(code, test_case) + try: + output = self.interpreter.run(test_code, language) + test_results.append(True) + test_details.append( + { + "test_case": i + 1, + "status": "passed", + "output": output, + } + ) + except Exception as e: + test_results.append(False) + test_details.append( + { + "test_case": i + 1, + "status": "failed", + "error": str(e), + } + ) + + example["verification_result"] = { + "passed": all(test_results) if test_results else True, + "test_results": test_results, + "error": None, + "details": { + "test_count": len(test_results), + "tests": test_details, + }, + } + + return example diff --git a/camel/verifiers/math_verifier.py b/camel/verifiers/math_verifier.py deleted file mode 100644 index 00d383af4d..0000000000 --- a/camel/verifiers/math_verifier.py +++ /dev/null @@ -1,180 +0,0 @@ -# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= - - -import re -from typing import Any, ClassVar, Dict, Optional - -from datasets import Dataset - -from camel.agents import ChatAgent -from camel.responses import ChatAgentResponse -from camel.verifiers.base_verifier import BaseVerifier -from camel.verifiers.types import VerificationResult - - -class MathVerifier(BaseVerifier): - """Verifier for mathematical problems using LLM.""" - - DEFAULT_CRITERIA: ClassVar[Dict[str, Any]] = { - "numerical_tolerance": 1e-6, - "verify_steps": True, - } - - def __init__( - self, - criteria: Optional[Dict[str, Any]] = None, - agent: Optional[ChatAgent] = None, - ) -> None: - """Initialize the verifier. - - Args: - criteria: Optional verification criteria - agent: ChatAgent instance for verification - """ - super().__init__(criteria) - self.agent = agent - - def verify( - self, data: Dataset, criteria: Optional[Dict[str, Any]] = None - ) -> Dataset: - """Verify mathematical solutions in the dataset.""" - criteria = {**self.DEFAULT_CRITERIA, **(criteria or {})} - - def verify_single(example): - result = self._verify_solution( - question=example["question"], - solution=example["solution"], - answer=example.get("answer"), - criteria=criteria, - ) - - example["verification_result"] = result.dict() - example["correct"] = result.passed - return example - - return data.map(verify_single) - - def _verify_solution( - self, - question: str, - solution: str, - answer: Optional[str] = None, - criteria: Optional[Dict[str, Any]] = None, - ) -> VerificationResult: - """Verify a single mathematical solution using LLM.""" - try: - # Extract boxed answers - solution_value = self._extract_boxed_answer(solution) - answer_value = ( - self._extract_boxed_answer(answer) if answer else None - ) - - if not solution_value: - return VerificationResult( - score=0.0, - passed=False, - details={"error": "No \\boxed{} answer found"}, - feedback="Solution must include a \\boxed{} answer", - error=None, - ) - - # Construct prompt for LLM - prompt = self._construct_verification_prompt( - question=question, solution=solution_value, answer=answer_value - ) - - if self.agent is None: - raise ValueError("ChatAgent not initialized") - - # Get LLM response - response: ChatAgentResponse = self.agent.step(prompt) - verification_result = self._parse_llm_response( - response.msgs[0].content - ) - - return VerificationResult( - score=verification_result["score"], - passed=verification_result["passed"], - details=verification_result["details"], - feedback=verification_result["feedback"], - error=None, - ) - - except Exception as e: - return VerificationResult( - score=0.0, - passed=False, - details={"error": str(e)}, - feedback=f"Verification failed: {e!s}", - error=str(e), - ) - - def _extract_boxed_answer(self, text: Optional[str]) -> Optional[str]: - """Extract answer from \\boxed{} notation.""" - if not text: - return None - - boxed_pattern = r'\\boxed\s*{\s*([^}]+)\s*}' - match = re.search(boxed_pattern, text) - return match.group(1).strip() if match else None - - def _construct_verification_prompt( - self, question: str, solution: str, answer: Optional[str] - ) -> str: - """Construct prompt for LLM verification.""" - prompt = ( - "Please verify this mathematical solution.\n\n" - f"Question: {question}\n" - f"Student's solution: {solution}\n" - ) - - if answer: - prompt += f"Correct answer: {answer}\n" - - prompt += ( - "\nPlease verify if the solution is correct and provide feedback " - "in the following JSON format:\n" - "{\n" - ' "score": ,\n' - ' "passed": ,\n' - ' "details": {},\n' - ' "feedback": ""\n' - "}" - ) - - return prompt - - def _parse_llm_response(self, response: str) -> Dict[str, Any]: - """Parse LLM response into verification result.""" - try: - # Basic parsing in practice you might want more robust JSON parsing - import json - - result = json.loads(response) - - # Ensure required fields - required_fields = ["score", "passed", "details", "feedback"] - if not all(field in result for field in required_fields): - raise ValueError("Missing required fields in LLM response") - - return result - - except Exception as e: - return { - "score": 0.0, - "passed": False, - "details": {"error": f"Failed to parse LLM response: {e!s}"}, - "feedback": "Error in LLM verification", - } diff --git a/camel/verifiers/router/verification_router.py b/camel/verifiers/router/verification_router.py deleted file mode 100644 index 36c704f49f..0000000000 --- a/camel/verifiers/router/verification_router.py +++ /dev/null @@ -1,99 +0,0 @@ -# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= -"""Verifier module for routing to domain-specific verifiers.""" - -from typing import Any, ClassVar, Dict, Optional, Type, Union - -from datasets import Dataset - -from camel.verifiers.base_verifier import BaseVerifier -from camel.verifiers.math_verifier import MathVerifier - - -class DomainVerifier: - """Main verifier class that routes to domain-specific verifiers.""" - - VERIFIERS: ClassVar[Dict[str, Type[BaseVerifier]]] = { - "math": MathVerifier, - } - - @classmethod - def verify( - cls, - domain: str, - data: Union[Dataset, Dict[str, Any]], - criteria: Optional[Dict[str, Any]] = None, - ) -> Dataset: - """Verify data using appropriate domain-specific verifier. - - Args: - domain: Domain identifier ("math", "code", etc) - data: Data to verify (Dataset or dict) - criteria: Optional verification criteria - - Returns: - Verified dataset with results - - Raises: - ValueError: If domain is not supported and strict_mode is True - """ - # Convert dict to dataset if needed - if isinstance(data, dict): - data = Dataset.from_dict(data) - - # Get appropriate verifier - verifier_cls = cls.VERIFIERS.get(domain) - if verifier_cls is None: - if criteria and criteria.get("strict_mode", False): - raise ValueError(f"Unsupported domain: {domain}") - # Default to marking everything as correct if no specific verifier - return data.add_column("correct", [True] * len(data)) - - # Create verifier instance and verify - verifier = verifier_cls(criteria=criteria) - verified_data = verifier.verify(data) - - # Filter to only correct results if specified - if criteria and criteria.get("filter_incorrect", False): - verified_data = verified_data.filter(lambda x: x["correct"]) - - return verified_data - - @classmethod - def get_supported_domains(cls) -> list[str]: - """Get list of supported verification domains. - - Returns: - List of domain identifiers that have registered verifiers - """ - return list(cls.VERIFIERS.keys()) - - @classmethod - def register_verifier( - cls, domain: str, verifier_cls: Type[BaseVerifier] - ) -> None: - """Register a new domain verifier. - - Args: - domain: Domain identifier - verifier_cls: Verifier class to register - - Raises: - ValueError: If domain is already registered - """ - if domain in cls.VERIFIERS: - raise ValueError( - f"Domain {domain} already has a registered verifier" - ) - cls.VERIFIERS[domain] = verifier_cls diff --git a/camel/verifiers/types.py b/camel/verifiers/types.py deleted file mode 100644 index 271776dbf2..0000000000 --- a/camel/verifiers/types.py +++ /dev/null @@ -1,84 +0,0 @@ -# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= -"""Type definitions for verification results and metrics.""" - -from typing import Any, Dict, List, Optional - -from pydantic import BaseModel, Field - - -class VerificationMetrics(BaseModel): - """Metrics used in verification process.""" - - name: str = Field(..., description="Name of the metric") - value: float = Field( - ..., ge=0, le=1, description="Metric value between 0 and 1" - ) - weight: float = Field( - 1.0, ge=0, description="Weight of this metric in overall score" - ) - threshold: float = Field( - 0.7, ge=0, le=1, description="Passing threshold for this metric" - ) - - class Config: - """Pydantic configuration.""" - - frozen = True - - -class VerificationResult(BaseModel): - """Results from verification process.""" - - score: float = Field( - ..., - ge=0, - le=1, - description="Overall verification score between 0 and 1", - ) - passed: bool = Field( - ..., description="Whether verification passed overall threshold" - ) - details: Dict[str, Any] = Field( - default_factory=dict, description="Detailed verification results" - ) - metrics: List[VerificationMetrics] = Field( - default_factory=list, description="List of individual metrics" - ) - feedback: str = Field(..., description="Human-readable feedback message") - error: Optional[str] = Field( - None, description="Error message if verification failed" - ) - - def dict(self, *args, **kwargs) -> Dict[str, Any]: - """Convert to dictionary, handling nested models.""" - d = super().dict(*args, **kwargs) - if "metrics" in d: - d["metrics"] = [m.dict() for m in self.metrics] - return d - - @classmethod - def from_dict(cls, data: Dict[str, Any]) -> "VerificationResult": - """Create from dictionary, handling nested models.""" - if "metrics" in data: - data["metrics"] = [ - VerificationMetrics(**m) if isinstance(m, dict) else m - for m in data["metrics"] - ] - return cls(**data) - - class Config: - """Pydantic configuration.""" - - frozen = True diff --git a/examples/verifiers/code_example.py b/examples/verifiers/code_example.py new file mode 100644 index 0000000000..517c88c534 --- /dev/null +++ b/examples/verifiers/code_example.py @@ -0,0 +1,187 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= + +from pprint import pprint + +from datasets import Dataset + +from camel.verifiers import CodeVerifier + + +def main(): + print("\nExample 1: Basic Function Test") + verifier = CodeVerifier(require_confirmation=False) + result = verifier.verify( + { + "code": ["def add(a, b): return a + b"], + "language": ["python"], + "test_cases": [ + [ + {"inputs": {"a": 1, "b": 2}, "expected": {"add(a, b)": 3}}, + { + "inputs": {"a": -1, "b": 1}, + "expected": {"add(a, b)": 0}, + }, + ] + ], + } + ) + pprint(result[0]["verification_result"]) + + # Example 2: Multiple Solutions + print("\nExample 2: Multiple Solutions") + data = Dataset.from_dict( + { + "code": [ + "def factorial(n): return 1 if n <= 1 else n * factorial(n-1)", + "def factorial(n): return n * factorial(n-1) if n > 1 else 1", + ], + "language": ["python", "python"], + "test_cases": [ + [{"inputs": {"n": 5}, "expected": {"factorial(n)": 120}}], + [{"inputs": {"n": 5}, "expected": {"factorial(n)": 120}}], + ], + } + ) + results = verifier.verify(data) + for i, result in enumerate(results): + print(f"Solution {i+1} result:", result["verification_result"]) + + # Example 3: Using subprocess interpreter + print("\nExample 3: External Imports") + verifier = CodeVerifier(interpreter="subprocess") + result = verifier.verify( + { + "code": [ + """ +import numpy as np +def process_array(): + arr = np.array([1, 2, 3]) + return arr.mean() + """ + ], + "language": ["python"], + "test_cases": [ + [{"inputs": {}, "expected": {"process_array()": 2.0}}] + ], + } + ) + print("Result:", result[0]["verification_result"]) + + # Example 4: Syntax Error + print("\nExample 4: Syntax Error") + result = verifier.verify( + { + "code": ["def broken_function(x: return x"], # Syntax error + "language": ["python"], + } + ) + print("Result:", result[0]["verification_result"]) + + +if __name__ == "__main__": + main() + + +""" +Example Output: + +Example 1: Basic Function Test +Map: 100%|██████████| 1/1 [00:00<00:00, 14.90 examples/s] +{ + 'details': { + 'test_count': 2, + 'tests': [ + { + 'output': 'Test passed: 3\n', + 'status': 'passed', + 'test_case': 1 + }, + { + 'output': 'Test passed: 0\n', + 'status': 'passed', + 'test_case': 2 + } + ] + }, + 'error': None, + 'passed': True, + 'test_results': [True, True] +} + +Example 2: Multiple Solutions +Map: 100%|██████████| 2/2 [00:00<00:00, 25.12 examples/s] +Solution 1 result: { + 'details': { + 'test_count': 1, + 'tests': [ + { + 'output': 'Test passed: 120\n', + 'status': 'passed', + 'test_case': 1 + } + ] + }, + 'error': None, + 'passed': True, + 'test_results': [True] +} +Solution 2 result: { + 'details': { + 'test_count': 1, + 'tests': [ + { + 'output': 'Test passed: 120\n', + 'status': 'passed', + 'test_case': 1 + } + ] + }, + 'error': None, + 'passed': True, + 'test_results': [True] +} + +Example 3: External Imports +Map: 100%|██████████| 1/1 [00:00<00:00, 3.33 examples/s] +Result: { + 'details': { + 'test_count': 1, + 'tests': [ + { + 'output': 'Test passed: 2.0\n', + 'status': 'passed', + 'test_case': 1 + } + ] + }, + 'error': None, + 'passed': True, + 'test_results': [True] +} + +Example 4: Syntax Error +Map: 100%|██████████| 1/1 [00:00<00:00, 661.88 examples/s] +Result: { + 'details': { + 'line': 1, + 'offset': 24, + 'text': 'def broken_function(x: return x\n', + 'type': 'syntax_error' + }, + 'error': 'Syntax error: invalid syntax (, line 1)', + 'passed': False, + 'test_results': [] +} +""" diff --git a/examples/verifiers/math_example.py b/examples/verifiers/math_example.py deleted file mode 100644 index 2eb897812a..0000000000 --- a/examples/verifiers/math_example.py +++ /dev/null @@ -1,76 +0,0 @@ -# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= -"""Example usage of math verifier with Gurobi.""" - -import os -import sys - -from camel.verifiers import DomainVerifier - -# Add project root to Python path if not installed -project_root = os.path.abspath( - os.path.join(os.path.dirname(__file__), "../..") -) -if project_root not in sys.path: - sys.path.append(project_root) - - -def main(): - # Example problems - data = { - "question": [ - "Solve: 2x + 3 = 7", - "Maximize: z = 3x + 4y subject to: x + y <= 10, x >= 0, y >= 0", - ], - "solution": ["x = 2", "x = 6, y = 4"], - "answer": [ - "2", - "36", # Optimal objective value - ], - } - - verified_data = DomainVerifier.verify( - domain="math", - data=data, - criteria={ - "numerical_tolerance": 1e-8, - "verify_steps": True, - "check_feasibility": True, - "verify_optimality": True, - }, - ) - - # Print detailed results - for item in verified_data: - print("\nVerification Results:") - print("-" * 50) - print(f"Question: {item['question']}") - print(f"Solution: {item['solution']}") - print(f"Expected Answer: {item['answer']}") - print(f"Correct: {item['correct']}") - - result = item['verification_result'] - print("\nDetails:") - print(f"Score: {result['score']:.2f}") - print(f"Passed: {result['passed']}") - print(f"Feedback: {result['feedback']}") - - if 'details' in result: - print("\nComponent Scores:") - for key, value in result['details'].items(): - print(f"- {key}: {value}") - - -if __name__ == "__main__": - main() diff --git a/examples/verifiers/math_program_example.py b/examples/verifiers/math_program_example.py new file mode 100644 index 0000000000..1868e6cbd0 --- /dev/null +++ b/examples/verifiers/math_program_example.py @@ -0,0 +1,308 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= + +from camel.verifiers import CodeVerifier + + +def main(): + verifier = CodeVerifier(interpreter="subprocess") + + # Example 1: Matrix Operations + matrix_test = { + "code": [ + """ +import numpy as np + +def matrix_multiply(A, B): + return np.dot(A, B) +""" + ], + "language": ["python"], + "test_cases": [ + [ + { + "inputs": {"A": [[1, 2], [3, 4]], "B": [[5, 6], [7, 8]]}, + "expected": { + "np.allclose(matrix_multiply(A, B), " + "np.array([[19, 22], [43, 50]]))": True + }, + } + ] + ], + } + + print("\nTesting Matrix Operations:") + result = verifier.verify(matrix_test) + print(result[0]["verification_result"]) + + # Example 2: Linear System Solver + linear_system_test = { + "code": [ + """ +import numpy as np + +def solve_linear_system(A, b): + return np.linalg.solve(A, b) +""" + ], + "language": ["python"], + "test_cases": [ + [ + { + "inputs": {"A": [[2, 1], [1, 3]], "b": [4, 5]}, + "expected": { + "np.allclose(solve_linear_system(A, b), " + "np.array([1.4, 1.2]))": True + }, + } + ] + ], + } + + print("\nTesting Linear System Solver:") + result = verifier.verify(linear_system_test) + print(result[0]["verification_result"]) + + # Example 3: Eigenvalue Decomposition + eigenvalue_test = { + "code": [ + """ +import numpy as np + +def compute_eigendecomposition(A): + eigenvalues, eigenvectors = np.linalg.eig(A) + # Sort by eigenvalues to ensure consistent order + idx = eigenvalues.argsort() + return eigenvalues[idx], eigenvectors[:, idx] +""" + ], + "language": ["python"], + "test_cases": [ + [ + { + "inputs": {"A": [[4, -1], [2, 1]]}, + "expected": { + "np.allclose(compute_eigendecomposition(A)[0], " + "np.array([2., 3.]))": True + }, + } + ] + ], + } + + print("\nTesting Eigenvalue Decomposition:") + result = verifier.verify(eigenvalue_test) + print(result[0]["verification_result"]) + + # Example 4: Singular Value Decomposition + svd_test = { + "code": [ + """ +import numpy as np + +def compute_svd(A): + U, s, Vh = np.linalg.svd(A) + return s # Return singular values +""" + ], + "language": ["python"], + "test_cases": [ + [ + { + "inputs": {"A": [[1, 2], [3, 4], [5, 6]]}, + "expected": { + "np.allclose(compute_svd(A), " + "np.array([9.52551809, 0.51430058]))": True + }, + } + ] + ], + } + + print("\nTesting SVD:") + result = verifier.verify(svd_test) + print(result[0]["verification_result"]) + + # Example 5: Optimization (Minimization) + optimization_test = { + "code": [ + """ +import numpy as np +from scipy.optimize import minimize + +def optimize_quadratic(x0): + # Minimize f(x,y) = x^2 + y^2 + 2x + 4y + 4 + def objective(x): + return x[0]**2 + x[1]**2 + 2*x[0] + 4*x[1] + 4 + + result = minimize(objective, x0, method='BFGS') + return result.x +""" + ], + "language": ["python"], + "test_cases": [ + [ + { + "inputs": {"x0": [0, 0]}, + "expected": { + "np.allclose(optimize_quadratic(x0), " + "np.array([-1., -2.]))": True + }, + } + ] + ], + } + + print("\nTesting Optimization:") + result = verifier.verify(optimization_test) + print(result[0]["verification_result"]) + + # Example 6: Numerical Integration + integration_test = { + "code": [ + """ +import numpy as np +from scipy import integrate + +def integrate_function(a, b): + # Integrate sin(x^2) from a to b + def f(x): + return np.sin(x**2) + + result, _ = integrate.quad(f, a, b) + return result +""" + ], + "language": ["python"], + "test_cases": [ + [ + { + "inputs": {"a": 0, "b": 1}, + "expected": { + "np.allclose(integrate_function(a, b), " + "0.3102683017233811)": True + }, + } + ] + ], + } + + print("\nTesting Numerical Integration:") + result = verifier.verify(integration_test) + print(result[0]["verification_result"]) + + +if __name__ == "__main__": + main() + + +""" +Example Output: + +Testing Matrix Operations: +Map: 100%|██████████| 1/1 [00:00<00:00, 8.49 examples/s] +{ + 'details': { + 'test_count': 1, + 'tests': [{ + 'output': 'Test passed: True\n', + 'status': 'passed', + 'test_case': 1 + }] + }, + 'error': None, + 'passed': True, + 'test_results': [True] +} + +Testing Linear System Solver: +Map: 100%|██████████| 1/1 [00:00<00:00, 6.27 examples/s] +{ + 'details': { + 'test_count': 1, + 'tests': [{ + 'output': 'Test passed: True\n', + 'status': 'passed', + 'test_case': 1 + }] + }, + 'error': None, + 'passed': True, + 'test_results': [True] +} + +Testing Eigenvalue Decomposition: +Map: 100%|██████████| 1/1 [00:00<00:00, 11.11 examples/s] +{ + 'details': { + 'test_count': 1, + 'tests': [{ + 'output': 'Test passed: True\n', + 'status': 'passed', + 'test_case': 1 + }] + }, + 'error': None, + 'passed': True, + 'test_results': [True] +} + +Testing SVD: +Map: 100%|██████████| 1/1 [00:00<00:00, 10.40 examples/s] +{ + 'details': { + 'test_count': 1, + 'tests': [{ + 'output': 'Test passed: True\n', + 'status': 'passed', + 'test_case': 1 + }] + }, + 'error': None, + 'passed': True, + 'test_results': [True] +} + +Testing Optimization: +Map: 100%|██████████| 1/1 [00:00<00:00, 2.97 examples/s] +{ + 'details': { + 'test_count': 1, + 'tests': [{ + 'output': 'Test passed: True\n', + 'status': 'passed', + 'test_case': 1 + }] + }, + 'error': None, + 'passed': True, + 'test_results': [True] +} + +Testing Numerical Integration: +Map: 100%|██████████| 1/1 [00:00<00:00, 3.61 examples/s] +{ + 'details': { + 'test_count': 1, + 'tests': [{ + 'output': 'Test passed: True\n', + 'status': 'passed', + 'test_case': 1 + }] + }, + 'error': None, + 'passed': True, + 'test_results': [True] +} +""" From e5f7a64c3d7559e5f4012bbc6b5e9ebe4c7deb64 Mon Sep 17 00:00:00 2001 From: Rishabh <134101578+GitHoobar@users.noreply.github.com> Date: Sat, 8 Feb 2025 09:32:21 +0530 Subject: [PATCH 3/8] tweaks --- camel/verifiers/code_verifier.py | 7 ++- examples/verifiers/code_example.py | 86 +++++++++++++++++++++++++++--- 2 files changed, 84 insertions(+), 9 deletions(-) diff --git a/camel/verifiers/code_verifier.py b/camel/verifiers/code_verifier.py index 90932e3c33..8d40a224bd 100644 --- a/camel/verifiers/code_verifier.py +++ b/camel/verifiers/code_verifier.py @@ -121,7 +121,12 @@ def verify_single(example: Dict[str, Any]) -> Dict[str, Any]: except Exception as e: return self._handle_execution_error(example, e) - return data.map(verify_single) + # For Parallelization + num_proc = min(4, len(data)) + + return data.map( + verify_single, num_proc=num_proc, desc="Verifying code" + ) def _prepare_test_code( self, diff --git a/examples/verifiers/code_example.py b/examples/verifiers/code_example.py index 517c88c534..7007bb0fcf 100644 --- a/examples/verifiers/code_example.py +++ b/examples/verifiers/code_example.py @@ -79,8 +79,43 @@ def process_array(): ) print("Result:", result[0]["verification_result"]) - # Example 4: Syntax Error - print("\nExample 4: Syntax Error") + # Example 4: Multi-threaded verification + print("\nExample 4: Multi-threaded verification") + verifier = CodeVerifier(interpreter="subprocess") + result = verifier.verify( + { + "code": [ + """ + def square(x): + return x * x + """, + """ + def cube(x): + return x * x * x + """, + """ + def double(x): + return x + x + """, + """ + def half(x): + return x / 2 + """, + ], + "language": ["python"] * 4, + "test_cases": [ + [{"inputs": {"x": 4}, "expected": {"square(x)": 16}}], + [{"inputs": {"x": 3}, "expected": {"cube(x)": 27}}], + [{"inputs": {"x": 5}, "expected": {"double(x)": 10}}], + [{"inputs": {"x": 8}, "expected": {"half(x)": 4.0}}], + ], + } + ) + for i, result in enumerate(results): + print(f"\nFunction {i+1} result:", result["verification_result"]) + + # Example 5: Syntax Error + print("\nExample 5: Syntax Error") result = verifier.verify( { "code": ["def broken_function(x: return x"], # Syntax error @@ -98,14 +133,14 @@ def process_array(): Example Output: Example 1: Basic Function Test -Map: 100%|██████████| 1/1 [00:00<00:00, 14.90 examples/s] +Verifying code: 100%|██████████| 1/1 [00:00<00:00, 16.84 examples/s] { 'details': { 'test_count': 2, 'tests': [ { 'output': 'Test passed: 3\n', - 'status': 'passed', + 'status': 'passed', 'test_case': 1 }, { @@ -121,7 +156,7 @@ def process_array(): } Example 2: Multiple Solutions -Map: 100%|██████████| 2/2 [00:00<00:00, 25.12 examples/s] +Verifying code (num_proc=2):100%|██████████| 2/2 [00:00<00:00,15.64 examples/s] Solution 1 result: { 'details': { 'test_count': 1, @@ -154,7 +189,7 @@ def process_array(): } Example 3: External Imports -Map: 100%|██████████| 1/1 [00:00<00:00, 3.33 examples/s] +Verifying code: 100%|██████████| 1/1 [00:00<00:00, 9.29 examples/s] Result: { 'details': { 'test_count': 1, @@ -171,8 +206,43 @@ def process_array(): 'test_results': [True] } -Example 4: Syntax Error -Map: 100%|██████████| 1/1 [00:00<00:00, 661.88 examples/s] +Example 4: Multi-threaded verification +Verifying code(num_proc=4):100%|██████████| 4/4 [00:00<00:00, 35.86 examples/s] + +Function 1 result: { + 'details': { + 'test_count': 1, + 'tests': [ + { + 'output': 'Test passed: 120\n', + 'status': 'passed', + 'test_case': 1 + } + ] + }, + 'error': None, + 'passed': True, + 'test_results': [True] +} + +Function 2 result: { + 'details': { + 'test_count': 1, + 'tests': [ + { + 'output': 'Test passed: 120\n', + 'status': 'passed', + 'test_case': 1 + } + ] + }, + 'error': None, + 'passed': True, + 'test_results': [True] +} + +Example 5: Syntax Error +Verifying code: 100%|██████████| 1/1 [00:00<00:00, 504.24 examples/s] Result: { 'details': { 'line': 1, From 605b4caeb74c582b72d8488bc8508e0cc6b8a49f Mon Sep 17 00:00:00 2001 From: Rishabh <134101578+GitHoobar@users.noreply.github.com> Date: Sun, 9 Feb 2025 00:10:06 +0530 Subject: [PATCH 4/8] updates --- camel/verifiers/code_verifier.py | 32 ++-------------------- examples/verifiers/code_example.py | 10 +++++-- examples/verifiers/math_program_example.py | 4 ++- 3 files changed, 12 insertions(+), 34 deletions(-) diff --git a/camel/verifiers/code_verifier.py b/camel/verifiers/code_verifier.py index 8d40a224bd..954f836f80 100644 --- a/camel/verifiers/code_verifier.py +++ b/camel/verifiers/code_verifier.py @@ -18,7 +18,6 @@ from camel.interpreters import ( BaseInterpreter, - SubprocessInterpreter, ) @@ -33,7 +32,7 @@ class CodeVerifier: def __init__( self, - interpreter: str = "subprocess", + interpreter: BaseInterpreter, require_confirmation: bool = False, ) -> None: r"""Initialize the code verifier. @@ -44,34 +43,7 @@ def __init__( require_confirmation (bool, optional): Whether to require user confirmation before execution. (default: :obj:`False`) """ - self.interpreter = self._get_interpreter( - interpreter, require_confirmation - ) - - def _get_interpreter( - self, - interpreter_type: str, - require_confirmation: bool, - ) -> BaseInterpreter: - r"""Initialize appropriate interpreter based on type. - - Args: - interpreter_type (str): Type of interpreter to use - require_confirmation (bool): Whether to require confirmation - - Returns: - BaseInterpreter: Configured interpreter instance - - Raises: - ValueError: If interpreter type is not supported - """ - if interpreter_type == "subprocess": - return SubprocessInterpreter( - require_confirm=require_confirmation, - print_stdout=False, - print_stderr=True, - ) - raise ValueError(f"Unsupported interpreter type: {interpreter_type}") + self.interpreter = interpreter def verify( self, diff --git a/examples/verifiers/code_example.py b/examples/verifiers/code_example.py index 7007bb0fcf..60e58894f0 100644 --- a/examples/verifiers/code_example.py +++ b/examples/verifiers/code_example.py @@ -16,12 +16,14 @@ from datasets import Dataset +from camel.interpreters import SubprocessInterpreter from camel.verifiers import CodeVerifier def main(): print("\nExample 1: Basic Function Test") - verifier = CodeVerifier(require_confirmation=False) + interpreter = SubprocessInterpreter(require_confirm=False) + verifier = CodeVerifier(interpreter=interpreter) result = verifier.verify( { "code": ["def add(a, b): return a + b"], @@ -60,7 +62,8 @@ def main(): # Example 3: Using subprocess interpreter print("\nExample 3: External Imports") - verifier = CodeVerifier(interpreter="subprocess") + interpreter = SubprocessInterpreter(require_confirm=False) + verifier = CodeVerifier(interpreter=interpreter) result = verifier.verify( { "code": [ @@ -81,7 +84,8 @@ def process_array(): # Example 4: Multi-threaded verification print("\nExample 4: Multi-threaded verification") - verifier = CodeVerifier(interpreter="subprocess") + interpreter = SubprocessInterpreter() + verifier = CodeVerifier(interpreter=interpreter) result = verifier.verify( { "code": [ diff --git a/examples/verifiers/math_program_example.py b/examples/verifiers/math_program_example.py index 1868e6cbd0..b99647c0ec 100644 --- a/examples/verifiers/math_program_example.py +++ b/examples/verifiers/math_program_example.py @@ -12,11 +12,13 @@ # limitations under the License. # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +from camel.interpreters import SubprocessInterpreter from camel.verifiers import CodeVerifier def main(): - verifier = CodeVerifier(interpreter="subprocess") + interpreter = SubprocessInterpreter(require_confirm=False) + verifier = CodeVerifier(interpreter=interpreter) # Example 1: Matrix Operations matrix_test = { From cd456d426eeaa72128371431080611c68eed5052 Mon Sep 17 00:00:00 2001 From: Rishabh <134101578+GitHoobar@users.noreply.github.com> Date: Mon, 10 Feb 2025 07:22:56 +0530 Subject: [PATCH 5/8] added logging --- camel/verifiers/code_verifier.py | 41 +++++++++++++++++++++++++++--- examples/verifiers/code_example.py | 32 ++++++++--------------- 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/camel/verifiers/code_verifier.py b/camel/verifiers/code_verifier.py index 954f836f80..d725fed5ed 100644 --- a/camel/verifiers/code_verifier.py +++ b/camel/verifiers/code_verifier.py @@ -12,6 +12,7 @@ # limitations under the License. # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +import os from typing import Any, Dict, List, Optional, Union from datasets import Dataset @@ -19,6 +20,9 @@ from camel.interpreters import ( BaseInterpreter, ) +from camel.logger import logging + +logger = logging.getLogger(__name__) class CodeVerifier: @@ -38,12 +42,15 @@ def __init__( r"""Initialize the code verifier. Args: - interpreter (str, optional): Type of interpreter to use. - (default: :obj:`"subprocess"`) + interpreter (BaseInterpreter): The interpreter instance to use for + code execution require_confirmation (bool, optional): Whether to require user confirmation before execution. (default: :obj:`False`) """ self.interpreter = interpreter + logger.info( + "Initialized CodeVerifier with interpreter %s", interpreter + ) def verify( self, @@ -66,6 +73,8 @@ def verify( if isinstance(data, dict): data = Dataset.from_dict(data) + logger.info("Starting verification of %d examples", len(data)) + def verify_single(example: Dict[str, Any]) -> Dict[str, Any]: r"""Verify a single code example. @@ -79,11 +88,18 @@ def verify_single(example: Dict[str, Any]) -> Dict[str, Any]: language = example.get("language", "python") test_cases = example.get("test_cases", []) + logger.debug( + "Verifying code in %s with %d test cases", + language, + len(test_cases), + ) + # Check syntax first try: if language == "python": compile(code, '', 'exec') except SyntaxError as e: + logger.warning("Syntax error in code: %s", e) return self._handle_syntax_error(example, e) try: @@ -91,10 +107,13 @@ def verify_single(example: Dict[str, Any]) -> Dict[str, Any]: example, code, language, test_cases ) except Exception as e: + logger.error("Execution error: %s", e) return self._handle_execution_error(example, e) # For Parallelization - num_proc = min(4, len(data)) + default_cpus = max(1, min(8, (os.cpu_count() or 1) // 2)) + num_proc = min(default_cpus, len(data)) + logger.info("Using %d processes for parallel verification", num_proc) return data.map( verify_single, num_proc=num_proc, desc="Verifying code" @@ -114,6 +133,9 @@ def _prepare_test_code( Returns: str: Complete test code with assertions """ + logger.debug( + "Preparing test code with inputs: %s", test_case.get("inputs") + ) full_code = [code] # Add test case setup @@ -157,6 +179,9 @@ def _handle_syntax_error( Returns: Dict[str, Any]: Updated example with error information """ + logger.warning( + "Handling syntax error: %s at line %d", error, error.lineno + ) return { **example, "verification_result": { @@ -186,6 +211,7 @@ def _handle_execution_error( Returns: Dict[str, Any]: Updated example with error information """ + logger.error("Handling execution error: %s", error) example["verification_result"] = { "passed": False, "test_results": [], @@ -219,7 +245,9 @@ def _run_test_cases( test_details = [] if test_cases: + logger.info("Running %d test cases", len(test_cases)) for i, test_case in enumerate(test_cases): + logger.debug("Running test case %d", i + 1) test_code = self._prepare_test_code(code, test_case) try: output = self.interpreter.run(test_code, language) @@ -231,6 +259,7 @@ def _run_test_cases( "output": output, } ) + logger.debug("Test case %d passed", i + 1) except Exception as e: test_results.append(False) test_details.append( @@ -240,9 +269,13 @@ def _run_test_cases( "error": str(e), } ) + logger.warning("Test case %d failed: %s", i + 1, e) + + passed = all(test_results) if test_results else True + logger.info("All test cases %s", "passed" if passed else "failed") example["verification_result"] = { - "passed": all(test_results) if test_results else True, + "passed": passed, "test_results": test_results, "error": None, "details": { diff --git a/examples/verifiers/code_example.py b/examples/verifiers/code_example.py index 60e58894f0..71cfe59bfc 100644 --- a/examples/verifiers/code_example.py +++ b/examples/verifiers/code_example.py @@ -84,34 +84,22 @@ def process_array(): # Example 4: Multi-threaded verification print("\nExample 4: Multi-threaded verification") - interpreter = SubprocessInterpreter() + interpreter = SubprocessInterpreter(require_confirm=False) verifier = CodeVerifier(interpreter=interpreter) - result = verifier.verify( + results = verifier.verify( { "code": [ - """ - def square(x): - return x * x - """, - """ - def cube(x): - return x * x * x - """, - """ - def double(x): - return x + x - """, - """ - def half(x): - return x / 2 - """, + "def square(x):\n return x * x\nresult = square(4)", + "def cube(x):\n return x * x * x\nresult = cube(3)", + "def double(x):\n return x + x\nresult = double(5)", + "def half(x):\n return x / 2\nresult = half(8)", ], "language": ["python"] * 4, "test_cases": [ - [{"inputs": {"x": 4}, "expected": {"square(x)": 16}}], - [{"inputs": {"x": 3}, "expected": {"cube(x)": 27}}], - [{"inputs": {"x": 5}, "expected": {"double(x)": 10}}], - [{"inputs": {"x": 8}, "expected": {"half(x)": 4.0}}], + [{"inputs": {}, "expected": {"result": 16}}], + [{"inputs": {}, "expected": {"result": 27}}], + [{"inputs": {}, "expected": {"result": 10}}], + [{"inputs": {}, "expected": {"result": 4.0}}], ], } ) From 58090c5a363f9d49180ddb9923a81ba6ff71aa63 Mon Sep 17 00:00:00 2001 From: Rishabh <134101578+GitHoobar@users.noreply.github.com> Date: Mon, 10 Feb 2025 07:45:08 +0530 Subject: [PATCH 6/8] language and test case validation --- camel/verifiers/code_verifier.py | 60 +++++++++++++++++++++++------- examples/verifiers/code_example.py | 33 ++++++++++++++++ 2 files changed, 80 insertions(+), 13 deletions(-) diff --git a/camel/verifiers/code_verifier.py b/camel/verifiers/code_verifier.py index d725fed5ed..3c589bd048 100644 --- a/camel/verifiers/code_verifier.py +++ b/camel/verifiers/code_verifier.py @@ -13,13 +13,11 @@ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= import os -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Union from datasets import Dataset -from camel.interpreters import ( - BaseInterpreter, -) +from camel.interpreters import BaseInterpreter, InterpreterError from camel.logger import logging logger = logging.getLogger(__name__) @@ -52,21 +50,13 @@ def __init__( "Initialized CodeVerifier with interpreter %s", interpreter ) - def verify( - self, - data: Union[Dataset, Dict[str, Any]], - criteria: Optional[Dict[str, Any]] = None, - ) -> Dataset: + def verify(self, data: Union[Dataset, Dict[str, Any]]) -> Dataset: r"""Verify code solutions. Args: data (Union[Dataset, Dict[str, Any]]): Data containing code to verify - criteria (Optional[Dict[str, Any]], optional): Optional - verification criteria for this specific call. - (default: :obj:`None`) - Returns: Dataset: Dataset with verification results added """ @@ -86,8 +76,32 @@ def verify_single(example: Dict[str, Any]) -> Dict[str, Any]: """ code = example.get("code", "") language = example.get("language", "python") + + # Validate language is supported by interpreter + supported_languages = self.interpreter.supported_code_types() + if language not in supported_languages: + logger.warning( + "Language %s not supported by interpreter %s. " + "Supported languages: %s", + language, + self.interpreter.__class__.__name__, + supported_languages, + ) + return self._handle_execution_error( + example, + InterpreterError(f"Language {language} not supported"), + ) + test_cases = example.get("test_cases", []) + try: + self._validate_test_cases(test_cases) + except ValueError as e: + logger.warning("Invalid test cases: %s", e) + return self._handle_execution_error( + example, ValueError(f"Invalid test cases: {e!s}") + ) + logger.debug( "Verifying code in %s with %d test cases", language, @@ -165,6 +179,26 @@ def _prepare_test_code( return "\n".join(full_code) + def _validate_test_cases(self, test_cases: List[Dict[str, Any]]) -> None: + """Validate test cases structure. + + Args: + test_cases (List[Dict[str, Any]]): List of test cases to validate + + Raises: + ValueError: If test cases are malformed + """ + if not isinstance(test_cases, list): + raise ValueError("Test cases must be provided as a list") + + for i, test_case in enumerate(test_cases): + if not isinstance(test_case, dict): + raise ValueError(f"Test case {i} must be a dictionary") + if not test_case.get("expected"): + raise ValueError( + f"Test case {i} must contain 'expected' results" + ) + def _handle_syntax_error( self, example: Dict[str, Any], diff --git a/examples/verifiers/code_example.py b/examples/verifiers/code_example.py index 71cfe59bfc..a7b70a2fe4 100644 --- a/examples/verifiers/code_example.py +++ b/examples/verifiers/code_example.py @@ -116,6 +116,18 @@ def process_array(): ) print("Result:", result[0]["verification_result"]) + # Example 6: Test Case Validation + print("\nExample 6: Test Case Validation") + # Invalid test case (not a list) + result = verifier.verify( + { + "code": ["def add(a, b): return a + b"], + "language": ["python"], + "test_cases": {"not": "a list"}, # Invalid: not a list + } + ) + print("Invalid test case (not a list):", result[0]["verification_result"]) + if __name__ == "__main__": main() @@ -246,4 +258,25 @@ def process_array(): 'passed': False, 'test_results': [] } + +Example 6: Test Case Validation +Verifying code: 0%| | 0/1 [00:00 Date: Mon, 10 Feb 2025 07:54:47 +0530 Subject: [PATCH 7/8] add base verifier --- camel/verifiers/__init__.py | 3 +- camel/verifiers/base_verifier.py | 53 ++++++++++++++++++++++++++++++++ camel/verifiers/code_verifier.py | 4 ++- 3 files changed, 58 insertions(+), 2 deletions(-) create mode 100644 camel/verifiers/base_verifier.py diff --git a/camel/verifiers/__init__.py b/camel/verifiers/__init__.py index 691346bc19..f1e7011d0e 100644 --- a/camel/verifiers/__init__.py +++ b/camel/verifiers/__init__.py @@ -12,6 +12,7 @@ # limitations under the License. # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +from .base_verifier import BaseVerifier from .code_verifier import CodeVerifier -__all__ = ['CodeVerifier'] +__all__ = ['BaseVerifier', 'CodeVerifier'] diff --git a/camel/verifiers/base_verifier.py b/camel/verifiers/base_verifier.py new file mode 100644 index 0000000000..10eabf366e --- /dev/null +++ b/camel/verifiers/base_verifier.py @@ -0,0 +1,53 @@ +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= + +from abc import ABC, abstractmethod +from typing import Any, Dict + + +class BaseVerifier(ABC): + """Base class for verifiers. + + Provides a common interface and structure for all verifiers + in the CAMEL library. + """ + + def __init__(self, **kwargs: Any) -> None: + """Initializes the verifier. + + Args: + **kwargs: Keyword arguments for verifier configuration. + These could be logging settings, paths, etc. + """ + self.config = kwargs + + @abstractmethod + def verify(self, *args: Any, **kwargs: Any) -> Dict[str, Any]: + """Performs the verification. + + This method MUST be implemented by subclasses. + + Args: + *args: Positional arguments specific to the verifier. + **kwargs: Keyword arguments specific to the verifier. + + Returns: + A dictionary containing the verification results. The keys + and values in this dictionary will depend on the specific + verifier. Common keys might include: + - "success": bool (whether the verification passed) + - "errors": list (list of error messages) + - "details": dict (additional details about the verification) + """ + pass diff --git a/camel/verifiers/code_verifier.py b/camel/verifiers/code_verifier.py index 3c589bd048..fbf79bc3d5 100644 --- a/camel/verifiers/code_verifier.py +++ b/camel/verifiers/code_verifier.py @@ -19,11 +19,12 @@ from camel.interpreters import BaseInterpreter, InterpreterError from camel.logger import logging +from camel.verifiers import BaseVerifier logger = logging.getLogger(__name__) -class CodeVerifier: +class CodeVerifier(BaseVerifier): r"""Verifier for code solutions. This verifier checks code solutions by: @@ -45,6 +46,7 @@ def __init__( require_confirmation (bool, optional): Whether to require user confirmation before execution. (default: :obj:`False`) """ + super().__init__() self.interpreter = interpreter logger.info( "Initialized CodeVerifier with interpreter %s", interpreter From abc44b9ab7d810b747dd7c19ce21b57dc2a224e5 Mon Sep 17 00:00:00 2001 From: Rishabh <134101578+GitHoobar@users.noreply.github.com> Date: Fri, 14 Feb 2025 09:48:25 +0530 Subject: [PATCH 8/8] changes --- camel/verifiers/__init__.py | 3 +- camel/verifiers/base_verifier.py | 53 -------------------------------- camel/verifiers/code_verifier.py | 3 +- 3 files changed, 2 insertions(+), 57 deletions(-) delete mode 100644 camel/verifiers/base_verifier.py diff --git a/camel/verifiers/__init__.py b/camel/verifiers/__init__.py index f1e7011d0e..691346bc19 100644 --- a/camel/verifiers/__init__.py +++ b/camel/verifiers/__init__.py @@ -12,7 +12,6 @@ # limitations under the License. # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= -from .base_verifier import BaseVerifier from .code_verifier import CodeVerifier -__all__ = ['BaseVerifier', 'CodeVerifier'] +__all__ = ['CodeVerifier'] diff --git a/camel/verifiers/base_verifier.py b/camel/verifiers/base_verifier.py deleted file mode 100644 index 10eabf366e..0000000000 --- a/camel/verifiers/base_verifier.py +++ /dev/null @@ -1,53 +0,0 @@ -# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= - -from abc import ABC, abstractmethod -from typing import Any, Dict - - -class BaseVerifier(ABC): - """Base class for verifiers. - - Provides a common interface and structure for all verifiers - in the CAMEL library. - """ - - def __init__(self, **kwargs: Any) -> None: - """Initializes the verifier. - - Args: - **kwargs: Keyword arguments for verifier configuration. - These could be logging settings, paths, etc. - """ - self.config = kwargs - - @abstractmethod - def verify(self, *args: Any, **kwargs: Any) -> Dict[str, Any]: - """Performs the verification. - - This method MUST be implemented by subclasses. - - Args: - *args: Positional arguments specific to the verifier. - **kwargs: Keyword arguments specific to the verifier. - - Returns: - A dictionary containing the verification results. The keys - and values in this dictionary will depend on the specific - verifier. Common keys might include: - - "success": bool (whether the verification passed) - - "errors": list (list of error messages) - - "details": dict (additional details about the verification) - """ - pass diff --git a/camel/verifiers/code_verifier.py b/camel/verifiers/code_verifier.py index fbf79bc3d5..ea34ed2edd 100644 --- a/camel/verifiers/code_verifier.py +++ b/camel/verifiers/code_verifier.py @@ -19,12 +19,11 @@ from camel.interpreters import BaseInterpreter, InterpreterError from camel.logger import logging -from camel.verifiers import BaseVerifier logger = logging.getLogger(__name__) -class CodeVerifier(BaseVerifier): +class CodeVerifier: r"""Verifier for code solutions. This verifier checks code solutions by: