From 05c6ee42ce958d7a5077e9661ea475fa6e830279 Mon Sep 17 00:00:00 2001
From: Rishabh <134101578+GitHoobar@users.noreply.github.com>
Date: Thu, 6 Feb 2025 18:28:20 +0530
Subject: [PATCH 1/8] test

---
 camel/verifiers/__init__.py                   |  20 ++
 camel/verifiers/base_verifier.py              | 117 ++++++++++++
 camel/verifiers/code_verifier.py              |  15 ++
 camel/verifiers/math_verifier.py              | 180 ++++++++++++++++++
 camel/verifiers/router/verification_router.py |  99 ++++++++++
 camel/verifiers/types.py                      |  84 ++++++++
 examples/verifiers/math_example.py            |  76 ++++++++
 7 files changed, 591 insertions(+)
 create mode 100644 camel/verifiers/__init__.py
 create mode 100644 camel/verifiers/base_verifier.py
 create mode 100644 camel/verifiers/code_verifier.py
 create mode 100644 camel/verifiers/math_verifier.py
 create mode 100644 camel/verifiers/router/verification_router.py
 create mode 100644 camel/verifiers/types.py
 create mode 100644 examples/verifiers/math_example.py

diff --git a/camel/verifiers/__init__.py b/camel/verifiers/__init__.py
new file mode 100644
index 0000000000..76c1391c63
--- /dev/null
+++ b/camel/verifiers/__init__.py
@@ -0,0 +1,20 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+
+from .base_verifier import BaseVerifier
+from .code_verifier import CodeVerifier
+from .math_verifier import MathVerifier
+from .router.verification_router import DomainVerifier
+
+__all__ = ['BaseVerifier', 'MathVerifier', 'CodeVerifier', 'DomainVerifier']
diff --git a/camel/verifiers/base_verifier.py b/camel/verifiers/base_verifier.py
new file mode 100644
index 0000000000..d4ba52d73d
--- /dev/null
+++ b/camel/verifiers/base_verifier.py
@@ -0,0 +1,117 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+"""Base verifier class that all domain-specific verifiers inherit from."""
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, Optional
+
+from datasets import Dataset
+
+
+class BaseVerifier(ABC):
+    """Abstract base class for all verifiers.
+
+    This class defines the interface that all domain-specific verifiers
+    must implement.
+    It provides common functionality and enforces a consistent verification
+    pattern.
+    """
+
+    def __init__(self, criteria: Optional[Dict[str, Any]] = None) -> None:
+        """Initialize the verifier.
+
+        Args:
+            criteria: Optional dictionary of verification criteria that
+            override defaults
+        """
+        self.criteria = criteria or {}
+
+    @abstractmethod
+    def verify(
+        self,
+        data: Dataset,
+        criteria: Optional[Dict[str, Any]] = None,
+    ) -> Dataset:
+        """Verify the provided data.
+
+        Args:
+            data: Dataset containing items to verify
+            criteria: Optional verification criteria for this specific call
+
+        Returns:
+            Dataset with verification results added
+
+        Note:
+            The returned dataset should include at minimum a 'correct' column
+            indicating whether each item passed verification.
+        """
+        raise NotImplementedError
+
+    def _calculate_score(
+        self,
+        details: Dict[str, Any],
+        weights: Optional[Dict[str, float]] = None,
+    ) -> float:
+        """Calculate overall verification score from component scores.
+
+        Args:
+            details: Dictionary of component verification results
+            weights: Optional weights for each component
+
+        Returns:
+            Float between 0 and 1 representing overall score
+        """
+        if not details:
+            return 0.0
+
+        weights = weights or {k: 1.0 for k in details.keys()}
+        total_weight = sum(weights[k] for k in details.keys() if k in weights)
+
+        if total_weight == 0:
+            return 0.0
+
+        weighted_sum = sum(
+            details[k] * weights[k]
+            for k in details.keys()
+            if k in weights and isinstance(details[k], (int, float))
+        )
+
+        return weighted_sum / total_weight
+
+    def _format_feedback(
+        self, details: Dict[str, Any], threshold: float = 0.7
+    ) -> str:
+        """Format verification details into human-readable feedback.
+
+        Args:
+            details: Dictionary of verification details
+            threshold: Score threshold for passing
+
+        Returns:
+            Formatted feedback string
+        """
+        feedback = []
+
+        for key, value in details.items():
+            if isinstance(value, (int, float)):
+                status = "PASS" if value >= threshold else "FAIL"
+                feedback.append(f"{key}: {value:.2f} [{status}]")
+            else:
+                feedback.append(f"{key}: {value}")
+
+        return "\n".join(feedback)
+
+    def __repr__(self) -> str:
+        """Return string representation of the verifier."""
+        return f"{self.__class__.__name__}(criteria={self.criteria})"
diff --git a/camel/verifiers/code_verifier.py b/camel/verifiers/code_verifier.py
new file mode 100644
index 0000000000..8b37dc896d
--- /dev/null
+++ b/camel/verifiers/code_verifier.py
@@ -0,0 +1,15 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+class CodeVerifier:
+    print()
diff --git a/camel/verifiers/math_verifier.py b/camel/verifiers/math_verifier.py
new file mode 100644
index 0000000000..00d383af4d
--- /dev/null
+++ b/camel/verifiers/math_verifier.py
@@ -0,0 +1,180 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+
+
+import re
+from typing import Any, ClassVar, Dict, Optional
+
+from datasets import Dataset
+
+from camel.agents import ChatAgent
+from camel.responses import ChatAgentResponse
+from camel.verifiers.base_verifier import BaseVerifier
+from camel.verifiers.types import VerificationResult
+
+
+class MathVerifier(BaseVerifier):
+    """Verifier for mathematical problems using LLM."""
+
+    DEFAULT_CRITERIA: ClassVar[Dict[str, Any]] = {
+        "numerical_tolerance": 1e-6,
+        "verify_steps": True,
+    }
+
+    def __init__(
+        self,
+        criteria: Optional[Dict[str, Any]] = None,
+        agent: Optional[ChatAgent] = None,
+    ) -> None:
+        """Initialize the verifier.
+
+        Args:
+            criteria: Optional verification criteria
+            agent: ChatAgent instance for verification
+        """
+        super().__init__(criteria)
+        self.agent = agent
+
+    def verify(
+        self, data: Dataset, criteria: Optional[Dict[str, Any]] = None
+    ) -> Dataset:
+        """Verify mathematical solutions in the dataset."""
+        criteria = {**self.DEFAULT_CRITERIA, **(criteria or {})}
+
+        def verify_single(example):
+            result = self._verify_solution(
+                question=example["question"],
+                solution=example["solution"],
+                answer=example.get("answer"),
+                criteria=criteria,
+            )
+
+            example["verification_result"] = result.dict()
+            example["correct"] = result.passed
+            return example
+
+        return data.map(verify_single)
+
+    def _verify_solution(
+        self,
+        question: str,
+        solution: str,
+        answer: Optional[str] = None,
+        criteria: Optional[Dict[str, Any]] = None,
+    ) -> VerificationResult:
+        """Verify a single mathematical solution using LLM."""
+        try:
+            # Extract boxed answers
+            solution_value = self._extract_boxed_answer(solution)
+            answer_value = (
+                self._extract_boxed_answer(answer) if answer else None
+            )
+
+            if not solution_value:
+                return VerificationResult(
+                    score=0.0,
+                    passed=False,
+                    details={"error": "No \\boxed{} answer found"},
+                    feedback="Solution must include a \\boxed{} answer",
+                    error=None,
+                )
+
+            # Construct prompt for LLM
+            prompt = self._construct_verification_prompt(
+                question=question, solution=solution_value, answer=answer_value
+            )
+
+            if self.agent is None:
+                raise ValueError("ChatAgent not initialized")
+
+            # Get LLM response
+            response: ChatAgentResponse = self.agent.step(prompt)
+            verification_result = self._parse_llm_response(
+                response.msgs[0].content
+            )
+
+            return VerificationResult(
+                score=verification_result["score"],
+                passed=verification_result["passed"],
+                details=verification_result["details"],
+                feedback=verification_result["feedback"],
+                error=None,
+            )
+
+        except Exception as e:
+            return VerificationResult(
+                score=0.0,
+                passed=False,
+                details={"error": str(e)},
+                feedback=f"Verification failed: {e!s}",
+                error=str(e),
+            )
+
+    def _extract_boxed_answer(self, text: Optional[str]) -> Optional[str]:
+        """Extract answer from \\boxed{} notation."""
+        if not text:
+            return None
+
+        boxed_pattern = r'\\boxed\s*{\s*([^}]+)\s*}'
+        match = re.search(boxed_pattern, text)
+        return match.group(1).strip() if match else None
+
+    def _construct_verification_prompt(
+        self, question: str, solution: str, answer: Optional[str]
+    ) -> str:
+        """Construct prompt for LLM verification."""
+        prompt = (
+            "Please verify this mathematical solution.\n\n"
+            f"Question: {question}\n"
+            f"Student's solution: {solution}\n"
+        )
+
+        if answer:
+            prompt += f"Correct answer: {answer}\n"
+
+        prompt += (
+            "\nPlease verify if the solution is correct and provide feedback "
+            "in the following JSON format:\n"
+            "{\n"
+            '  "score": <float between 0 and 1>,\n'
+            '  "passed": <boolean>,\n'
+            '  "details": {<relevant details>},\n'
+            '  "feedback": "<explanation>"\n'
+            "}"
+        )
+
+        return prompt
+
+    def _parse_llm_response(self, response: str) -> Dict[str, Any]:
+        """Parse LLM response into verification result."""
+        try:
+            # Basic parsing in practice you might want more robust JSON parsing
+            import json
+
+            result = json.loads(response)
+
+            # Ensure required fields
+            required_fields = ["score", "passed", "details", "feedback"]
+            if not all(field in result for field in required_fields):
+                raise ValueError("Missing required fields in LLM response")
+
+            return result
+
+        except Exception as e:
+            return {
+                "score": 0.0,
+                "passed": False,
+                "details": {"error": f"Failed to parse LLM response: {e!s}"},
+                "feedback": "Error in LLM verification",
+            }
diff --git a/camel/verifiers/router/verification_router.py b/camel/verifiers/router/verification_router.py
new file mode 100644
index 0000000000..36c704f49f
--- /dev/null
+++ b/camel/verifiers/router/verification_router.py
@@ -0,0 +1,99 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+"""Verifier module for routing to domain-specific verifiers."""
+
+from typing import Any, ClassVar, Dict, Optional, Type, Union
+
+from datasets import Dataset
+
+from camel.verifiers.base_verifier import BaseVerifier
+from camel.verifiers.math_verifier import MathVerifier
+
+
+class DomainVerifier:
+    """Main verifier class that routes to domain-specific verifiers."""
+
+    VERIFIERS: ClassVar[Dict[str, Type[BaseVerifier]]] = {
+        "math": MathVerifier,
+    }
+
+    @classmethod
+    def verify(
+        cls,
+        domain: str,
+        data: Union[Dataset, Dict[str, Any]],
+        criteria: Optional[Dict[str, Any]] = None,
+    ) -> Dataset:
+        """Verify data using appropriate domain-specific verifier.
+
+        Args:
+            domain: Domain identifier ("math", "code", etc)
+            data: Data to verify (Dataset or dict)
+            criteria: Optional verification criteria
+
+        Returns:
+            Verified dataset with results
+
+        Raises:
+            ValueError: If domain is not supported and strict_mode is True
+        """
+        # Convert dict to dataset if needed
+        if isinstance(data, dict):
+            data = Dataset.from_dict(data)
+
+        # Get appropriate verifier
+        verifier_cls = cls.VERIFIERS.get(domain)
+        if verifier_cls is None:
+            if criteria and criteria.get("strict_mode", False):
+                raise ValueError(f"Unsupported domain: {domain}")
+            # Default to marking everything as correct if no specific verifier
+            return data.add_column("correct", [True] * len(data))
+
+        # Create verifier instance and verify
+        verifier = verifier_cls(criteria=criteria)
+        verified_data = verifier.verify(data)
+
+        # Filter to only correct results if specified
+        if criteria and criteria.get("filter_incorrect", False):
+            verified_data = verified_data.filter(lambda x: x["correct"])
+
+        return verified_data
+
+    @classmethod
+    def get_supported_domains(cls) -> list[str]:
+        """Get list of supported verification domains.
+
+        Returns:
+            List of domain identifiers that have registered verifiers
+        """
+        return list(cls.VERIFIERS.keys())
+
+    @classmethod
+    def register_verifier(
+        cls, domain: str, verifier_cls: Type[BaseVerifier]
+    ) -> None:
+        """Register a new domain verifier.
+
+        Args:
+            domain: Domain identifier
+            verifier_cls: Verifier class to register
+
+        Raises:
+            ValueError: If domain is already registered
+        """
+        if domain in cls.VERIFIERS:
+            raise ValueError(
+                f"Domain {domain} already has a registered verifier"
+            )
+        cls.VERIFIERS[domain] = verifier_cls
diff --git a/camel/verifiers/types.py b/camel/verifiers/types.py
new file mode 100644
index 0000000000..271776dbf2
--- /dev/null
+++ b/camel/verifiers/types.py
@@ -0,0 +1,84 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+"""Type definitions for verification results and metrics."""
+
+from typing import Any, Dict, List, Optional
+
+from pydantic import BaseModel, Field
+
+
+class VerificationMetrics(BaseModel):
+    """Metrics used in verification process."""
+
+    name: str = Field(..., description="Name of the metric")
+    value: float = Field(
+        ..., ge=0, le=1, description="Metric value between 0 and 1"
+    )
+    weight: float = Field(
+        1.0, ge=0, description="Weight of this metric in overall score"
+    )
+    threshold: float = Field(
+        0.7, ge=0, le=1, description="Passing threshold for this metric"
+    )
+
+    class Config:
+        """Pydantic configuration."""
+
+        frozen = True
+
+
+class VerificationResult(BaseModel):
+    """Results from verification process."""
+
+    score: float = Field(
+        ...,
+        ge=0,
+        le=1,
+        description="Overall verification score between 0 and 1",
+    )
+    passed: bool = Field(
+        ..., description="Whether verification passed overall threshold"
+    )
+    details: Dict[str, Any] = Field(
+        default_factory=dict, description="Detailed verification results"
+    )
+    metrics: List[VerificationMetrics] = Field(
+        default_factory=list, description="List of individual metrics"
+    )
+    feedback: str = Field(..., description="Human-readable feedback message")
+    error: Optional[str] = Field(
+        None, description="Error message if verification failed"
+    )
+
+    def dict(self, *args, **kwargs) -> Dict[str, Any]:
+        """Convert to dictionary, handling nested models."""
+        d = super().dict(*args, **kwargs)
+        if "metrics" in d:
+            d["metrics"] = [m.dict() for m in self.metrics]
+        return d
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "VerificationResult":
+        """Create from dictionary, handling nested models."""
+        if "metrics" in data:
+            data["metrics"] = [
+                VerificationMetrics(**m) if isinstance(m, dict) else m
+                for m in data["metrics"]
+            ]
+        return cls(**data)
+
+    class Config:
+        """Pydantic configuration."""
+
+        frozen = True
diff --git a/examples/verifiers/math_example.py b/examples/verifiers/math_example.py
new file mode 100644
index 0000000000..2eb897812a
--- /dev/null
+++ b/examples/verifiers/math_example.py
@@ -0,0 +1,76 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+"""Example usage of math verifier with Gurobi."""
+
+import os
+import sys
+
+from camel.verifiers import DomainVerifier
+
+# Add project root to Python path if not installed
+project_root = os.path.abspath(
+    os.path.join(os.path.dirname(__file__), "../..")
+)
+if project_root not in sys.path:
+    sys.path.append(project_root)
+
+
+def main():
+    # Example problems
+    data = {
+        "question": [
+            "Solve: 2x + 3 = 7",
+            "Maximize: z = 3x + 4y subject to: x + y <= 10, x >= 0, y >= 0",
+        ],
+        "solution": ["x = 2", "x = 6, y = 4"],
+        "answer": [
+            "2",
+            "36",  # Optimal objective value
+        ],
+    }
+
+    verified_data = DomainVerifier.verify(
+        domain="math",
+        data=data,
+        criteria={
+            "numerical_tolerance": 1e-8,
+            "verify_steps": True,
+            "check_feasibility": True,
+            "verify_optimality": True,
+        },
+    )
+
+    # Print detailed results
+    for item in verified_data:
+        print("\nVerification Results:")
+        print("-" * 50)
+        print(f"Question: {item['question']}")
+        print(f"Solution: {item['solution']}")
+        print(f"Expected Answer: {item['answer']}")
+        print(f"Correct: {item['correct']}")
+
+        result = item['verification_result']
+        print("\nDetails:")
+        print(f"Score: {result['score']:.2f}")
+        print(f"Passed: {result['passed']}")
+        print(f"Feedback: {result['feedback']}")
+
+        if 'details' in result:
+            print("\nComponent Scores:")
+            for key, value in result['details'].items():
+                print(f"- {key}: {value}")
+
+
+if __name__ == "__main__":
+    main()

From 7b6ebfa5380ad11acb684f58cf88e3200aa74d4d Mon Sep 17 00:00:00 2001
From: Rishabh <134101578+GitHoobar@users.noreply.github.com>
Date: Fri, 7 Feb 2025 06:35:27 +0530
Subject: [PATCH 2/8] added code verifier and examples

---
 camel/verifiers/__init__.py                   |   5 +-
 camel/verifiers/base_verifier.py              | 117 -------
 camel/verifiers/code_verifier.py              | 264 ++++++++++++++-
 camel/verifiers/math_verifier.py              | 180 ----------
 camel/verifiers/router/verification_router.py |  99 ------
 camel/verifiers/types.py                      |  84 -----
 examples/verifiers/code_example.py            | 187 +++++++++++
 examples/verifiers/math_example.py            |  76 -----
 examples/verifiers/math_program_example.py    | 308 ++++++++++++++++++
 9 files changed, 759 insertions(+), 561 deletions(-)
 delete mode 100644 camel/verifiers/base_verifier.py
 delete mode 100644 camel/verifiers/math_verifier.py
 delete mode 100644 camel/verifiers/router/verification_router.py
 delete mode 100644 camel/verifiers/types.py
 create mode 100644 examples/verifiers/code_example.py
 delete mode 100644 examples/verifiers/math_example.py
 create mode 100644 examples/verifiers/math_program_example.py

diff --git a/camel/verifiers/__init__.py b/camel/verifiers/__init__.py
index 76c1391c63..691346bc19 100644
--- a/camel/verifiers/__init__.py
+++ b/camel/verifiers/__init__.py
@@ -12,9 +12,6 @@
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
 
-from .base_verifier import BaseVerifier
 from .code_verifier import CodeVerifier
-from .math_verifier import MathVerifier
-from .router.verification_router import DomainVerifier
 
-__all__ = ['BaseVerifier', 'MathVerifier', 'CodeVerifier', 'DomainVerifier']
+__all__ = ['CodeVerifier']
diff --git a/camel/verifiers/base_verifier.py b/camel/verifiers/base_verifier.py
deleted file mode 100644
index d4ba52d73d..0000000000
--- a/camel/verifiers/base_verifier.py
+++ /dev/null
@@ -1,117 +0,0 @@
-# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
-"""Base verifier class that all domain-specific verifiers inherit from."""
-
-from abc import ABC, abstractmethod
-from typing import Any, Dict, Optional
-
-from datasets import Dataset
-
-
-class BaseVerifier(ABC):
-    """Abstract base class for all verifiers.
-
-    This class defines the interface that all domain-specific verifiers
-    must implement.
-    It provides common functionality and enforces a consistent verification
-    pattern.
-    """
-
-    def __init__(self, criteria: Optional[Dict[str, Any]] = None) -> None:
-        """Initialize the verifier.
-
-        Args:
-            criteria: Optional dictionary of verification criteria that
-            override defaults
-        """
-        self.criteria = criteria or {}
-
-    @abstractmethod
-    def verify(
-        self,
-        data: Dataset,
-        criteria: Optional[Dict[str, Any]] = None,
-    ) -> Dataset:
-        """Verify the provided data.
-
-        Args:
-            data: Dataset containing items to verify
-            criteria: Optional verification criteria for this specific call
-
-        Returns:
-            Dataset with verification results added
-
-        Note:
-            The returned dataset should include at minimum a 'correct' column
-            indicating whether each item passed verification.
-        """
-        raise NotImplementedError
-
-    def _calculate_score(
-        self,
-        details: Dict[str, Any],
-        weights: Optional[Dict[str, float]] = None,
-    ) -> float:
-        """Calculate overall verification score from component scores.
-
-        Args:
-            details: Dictionary of component verification results
-            weights: Optional weights for each component
-
-        Returns:
-            Float between 0 and 1 representing overall score
-        """
-        if not details:
-            return 0.0
-
-        weights = weights or {k: 1.0 for k in details.keys()}
-        total_weight = sum(weights[k] for k in details.keys() if k in weights)
-
-        if total_weight == 0:
-            return 0.0
-
-        weighted_sum = sum(
-            details[k] * weights[k]
-            for k in details.keys()
-            if k in weights and isinstance(details[k], (int, float))
-        )
-
-        return weighted_sum / total_weight
-
-    def _format_feedback(
-        self, details: Dict[str, Any], threshold: float = 0.7
-    ) -> str:
-        """Format verification details into human-readable feedback.
-
-        Args:
-            details: Dictionary of verification details
-            threshold: Score threshold for passing
-
-        Returns:
-            Formatted feedback string
-        """
-        feedback = []
-
-        for key, value in details.items():
-            if isinstance(value, (int, float)):
-                status = "PASS" if value >= threshold else "FAIL"
-                feedback.append(f"{key}: {value:.2f} [{status}]")
-            else:
-                feedback.append(f"{key}: {value}")
-
-        return "\n".join(feedback)
-
-    def __repr__(self) -> str:
-        """Return string representation of the verifier."""
-        return f"{self.__class__.__name__}(criteria={self.criteria})"
diff --git a/camel/verifiers/code_verifier.py b/camel/verifiers/code_verifier.py
index 8b37dc896d..90932e3c33 100644
--- a/camel/verifiers/code_verifier.py
+++ b/camel/verifiers/code_verifier.py
@@ -11,5 +11,267 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+
+from typing import Any, Dict, List, Optional, Union
+
+from datasets import Dataset
+
+from camel.interpreters import (
+    BaseInterpreter,
+    SubprocessInterpreter,
+)
+
+
 class CodeVerifier:
-    print()
+    r"""Verifier for code solutions.
+
+    This verifier checks code solutions by:
+    1. Validating syntax
+    2. Running test cases
+    3. Verifying outputs against expected results
+    """
+
+    def __init__(
+        self,
+        interpreter: str = "subprocess",
+        require_confirmation: bool = False,
+    ) -> None:
+        r"""Initialize the code verifier.
+
+        Args:
+            interpreter (str, optional): Type of interpreter to use.
+                (default: :obj:`"subprocess"`)
+            require_confirmation (bool, optional): Whether to require user
+                confirmation before execution. (default: :obj:`False`)
+        """
+        self.interpreter = self._get_interpreter(
+            interpreter, require_confirmation
+        )
+
+    def _get_interpreter(
+        self,
+        interpreter_type: str,
+        require_confirmation: bool,
+    ) -> BaseInterpreter:
+        r"""Initialize appropriate interpreter based on type.
+
+        Args:
+            interpreter_type (str): Type of interpreter to use
+            require_confirmation (bool): Whether to require confirmation
+
+        Returns:
+            BaseInterpreter: Configured interpreter instance
+
+        Raises:
+            ValueError: If interpreter type is not supported
+        """
+        if interpreter_type == "subprocess":
+            return SubprocessInterpreter(
+                require_confirm=require_confirmation,
+                print_stdout=False,
+                print_stderr=True,
+            )
+        raise ValueError(f"Unsupported interpreter type: {interpreter_type}")
+
+    def verify(
+        self,
+        data: Union[Dataset, Dict[str, Any]],
+        criteria: Optional[Dict[str, Any]] = None,
+    ) -> Dataset:
+        r"""Verify code solutions.
+
+        Args:
+            data (Union[Dataset, Dict[str, Any]]): Data containing code to
+                verify
+
+            criteria (Optional[Dict[str, Any]], optional): Optional
+                verification criteria for this specific call.
+                (default: :obj:`None`)
+
+        Returns:
+            Dataset: Dataset with verification results added
+        """
+        if isinstance(data, dict):
+            data = Dataset.from_dict(data)
+
+        def verify_single(example: Dict[str, Any]) -> Dict[str, Any]:
+            r"""Verify a single code example.
+
+            Args:
+                example (Dict[str, Any]): Example containing code to verify
+
+            Returns:
+                Dict[str, Any]: Example with verification results added
+            """
+            code = example.get("code", "")
+            language = example.get("language", "python")
+            test_cases = example.get("test_cases", [])
+
+            # Check syntax first
+            try:
+                if language == "python":
+                    compile(code, '<string>', 'exec')
+            except SyntaxError as e:
+                return self._handle_syntax_error(example, e)
+
+            try:
+                return self._run_test_cases(
+                    example, code, language, test_cases
+                )
+            except Exception as e:
+                return self._handle_execution_error(example, e)
+
+        return data.map(verify_single)
+
+    def _prepare_test_code(
+        self,
+        code: str,
+        test_case: Dict[str, Any],
+    ) -> str:
+        r"""Prepare code with test case inputs and assertions.
+
+        Args:
+            code (str): Original code to test
+            test_case (Dict[str, Any]): Test case configuration
+
+        Returns:
+            str: Complete test code with assertions
+        """
+        full_code = [code]
+
+        # Add test case setup
+        test_setup = [
+            f"{k} = {v!r}" for k, v in test_case.get("inputs", {}).items()
+        ]
+        if test_setup:
+            full_code.extend(test_setup)
+
+        # Add test assertions
+        test_assertions = []
+        for expr, expected in test_case.get("expected", {}).items():
+            test_assertions.append(
+                f"""
+result = {expr}
+if result != {expected!r}:
+    raise AssertionError(
+        f"Test failed:\\n  Expression: {expr}\\n  "
+        f"Expected: {expected!r}\\n  Got: {{result}}"
+    )
+print(f"Test passed: {{result}}")
+"""
+            )
+
+        if test_assertions:
+            full_code.extend(test_assertions)
+
+        return "\n".join(full_code)
+
+    def _handle_syntax_error(
+        self,
+        example: Dict[str, Any],
+        error: SyntaxError,
+    ) -> Dict[str, Any]:
+        r"""Handle syntax errors in code verification.
+
+        Args:
+            example (Dict[str, Any]): The example being verified
+            error (SyntaxError): The syntax error that occurred
+
+        Returns:
+            Dict[str, Any]: Updated example with error information
+        """
+        return {
+            **example,
+            "verification_result": {
+                "passed": False,
+                "test_results": [],
+                "error": f"Syntax error: {error!s}",
+                "details": {
+                    "type": "syntax_error",
+                    "line": error.lineno,
+                    "offset": error.offset,
+                    "text": error.text,
+                },
+            },
+        }
+
+    def _handle_execution_error(
+        self,
+        example: Dict[str, Any],
+        error: Exception,
+    ) -> Dict[str, Any]:
+        r"""Handle execution errors in code verification.
+
+        Args:
+            example (Dict[str, Any]): The example being verified
+            error (Exception): The execution error that occurred
+
+        Returns:
+            Dict[str, Any]: Updated example with error information
+        """
+        example["verification_result"] = {
+            "passed": False,
+            "test_results": [],
+            "error": str(error),
+            "details": {
+                "type": "execution_error",
+                "message": str(error),
+            },
+        }
+        return example
+
+    def _run_test_cases(
+        self,
+        example: Dict[str, Any],
+        code: str,
+        language: str,
+        test_cases: List[Dict[str, Any]],
+    ) -> Dict[str, Any]:
+        r"""Run test cases for code verification.
+
+        Args:
+            example (Dict[str, Any]): The example being verified
+            code (str): The code to test
+            language (str): Programming language of the code
+            test_cases (List[Dict[str, Any]]): List of test cases to run
+
+        Returns:
+            Dict[str, Any]: Updated example with test results
+        """
+        test_results = []
+        test_details = []
+
+        if test_cases:
+            for i, test_case in enumerate(test_cases):
+                test_code = self._prepare_test_code(code, test_case)
+                try:
+                    output = self.interpreter.run(test_code, language)
+                    test_results.append(True)
+                    test_details.append(
+                        {
+                            "test_case": i + 1,
+                            "status": "passed",
+                            "output": output,
+                        }
+                    )
+                except Exception as e:
+                    test_results.append(False)
+                    test_details.append(
+                        {
+                            "test_case": i + 1,
+                            "status": "failed",
+                            "error": str(e),
+                        }
+                    )
+
+        example["verification_result"] = {
+            "passed": all(test_results) if test_results else True,
+            "test_results": test_results,
+            "error": None,
+            "details": {
+                "test_count": len(test_results),
+                "tests": test_details,
+            },
+        }
+
+        return example
diff --git a/camel/verifiers/math_verifier.py b/camel/verifiers/math_verifier.py
deleted file mode 100644
index 00d383af4d..0000000000
--- a/camel/verifiers/math_verifier.py
+++ /dev/null
@@ -1,180 +0,0 @@
-# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
-
-
-import re
-from typing import Any, ClassVar, Dict, Optional
-
-from datasets import Dataset
-
-from camel.agents import ChatAgent
-from camel.responses import ChatAgentResponse
-from camel.verifiers.base_verifier import BaseVerifier
-from camel.verifiers.types import VerificationResult
-
-
-class MathVerifier(BaseVerifier):
-    """Verifier for mathematical problems using LLM."""
-
-    DEFAULT_CRITERIA: ClassVar[Dict[str, Any]] = {
-        "numerical_tolerance": 1e-6,
-        "verify_steps": True,
-    }
-
-    def __init__(
-        self,
-        criteria: Optional[Dict[str, Any]] = None,
-        agent: Optional[ChatAgent] = None,
-    ) -> None:
-        """Initialize the verifier.
-
-        Args:
-            criteria: Optional verification criteria
-            agent: ChatAgent instance for verification
-        """
-        super().__init__(criteria)
-        self.agent = agent
-
-    def verify(
-        self, data: Dataset, criteria: Optional[Dict[str, Any]] = None
-    ) -> Dataset:
-        """Verify mathematical solutions in the dataset."""
-        criteria = {**self.DEFAULT_CRITERIA, **(criteria or {})}
-
-        def verify_single(example):
-            result = self._verify_solution(
-                question=example["question"],
-                solution=example["solution"],
-                answer=example.get("answer"),
-                criteria=criteria,
-            )
-
-            example["verification_result"] = result.dict()
-            example["correct"] = result.passed
-            return example
-
-        return data.map(verify_single)
-
-    def _verify_solution(
-        self,
-        question: str,
-        solution: str,
-        answer: Optional[str] = None,
-        criteria: Optional[Dict[str, Any]] = None,
-    ) -> VerificationResult:
-        """Verify a single mathematical solution using LLM."""
-        try:
-            # Extract boxed answers
-            solution_value = self._extract_boxed_answer(solution)
-            answer_value = (
-                self._extract_boxed_answer(answer) if answer else None
-            )
-
-            if not solution_value:
-                return VerificationResult(
-                    score=0.0,
-                    passed=False,
-                    details={"error": "No \\boxed{} answer found"},
-                    feedback="Solution must include a \\boxed{} answer",
-                    error=None,
-                )
-
-            # Construct prompt for LLM
-            prompt = self._construct_verification_prompt(
-                question=question, solution=solution_value, answer=answer_value
-            )
-
-            if self.agent is None:
-                raise ValueError("ChatAgent not initialized")
-
-            # Get LLM response
-            response: ChatAgentResponse = self.agent.step(prompt)
-            verification_result = self._parse_llm_response(
-                response.msgs[0].content
-            )
-
-            return VerificationResult(
-                score=verification_result["score"],
-                passed=verification_result["passed"],
-                details=verification_result["details"],
-                feedback=verification_result["feedback"],
-                error=None,
-            )
-
-        except Exception as e:
-            return VerificationResult(
-                score=0.0,
-                passed=False,
-                details={"error": str(e)},
-                feedback=f"Verification failed: {e!s}",
-                error=str(e),
-            )
-
-    def _extract_boxed_answer(self, text: Optional[str]) -> Optional[str]:
-        """Extract answer from \\boxed{} notation."""
-        if not text:
-            return None
-
-        boxed_pattern = r'\\boxed\s*{\s*([^}]+)\s*}'
-        match = re.search(boxed_pattern, text)
-        return match.group(1).strip() if match else None
-
-    def _construct_verification_prompt(
-        self, question: str, solution: str, answer: Optional[str]
-    ) -> str:
-        """Construct prompt for LLM verification."""
-        prompt = (
-            "Please verify this mathematical solution.\n\n"
-            f"Question: {question}\n"
-            f"Student's solution: {solution}\n"
-        )
-
-        if answer:
-            prompt += f"Correct answer: {answer}\n"
-
-        prompt += (
-            "\nPlease verify if the solution is correct and provide feedback "
-            "in the following JSON format:\n"
-            "{\n"
-            '  "score": <float between 0 and 1>,\n'
-            '  "passed": <boolean>,\n'
-            '  "details": {<relevant details>},\n'
-            '  "feedback": "<explanation>"\n'
-            "}"
-        )
-
-        return prompt
-
-    def _parse_llm_response(self, response: str) -> Dict[str, Any]:
-        """Parse LLM response into verification result."""
-        try:
-            # Basic parsing in practice you might want more robust JSON parsing
-            import json
-
-            result = json.loads(response)
-
-            # Ensure required fields
-            required_fields = ["score", "passed", "details", "feedback"]
-            if not all(field in result for field in required_fields):
-                raise ValueError("Missing required fields in LLM response")
-
-            return result
-
-        except Exception as e:
-            return {
-                "score": 0.0,
-                "passed": False,
-                "details": {"error": f"Failed to parse LLM response: {e!s}"},
-                "feedback": "Error in LLM verification",
-            }
diff --git a/camel/verifiers/router/verification_router.py b/camel/verifiers/router/verification_router.py
deleted file mode 100644
index 36c704f49f..0000000000
--- a/camel/verifiers/router/verification_router.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
-"""Verifier module for routing to domain-specific verifiers."""
-
-from typing import Any, ClassVar, Dict, Optional, Type, Union
-
-from datasets import Dataset
-
-from camel.verifiers.base_verifier import BaseVerifier
-from camel.verifiers.math_verifier import MathVerifier
-
-
-class DomainVerifier:
-    """Main verifier class that routes to domain-specific verifiers."""
-
-    VERIFIERS: ClassVar[Dict[str, Type[BaseVerifier]]] = {
-        "math": MathVerifier,
-    }
-
-    @classmethod
-    def verify(
-        cls,
-        domain: str,
-        data: Union[Dataset, Dict[str, Any]],
-        criteria: Optional[Dict[str, Any]] = None,
-    ) -> Dataset:
-        """Verify data using appropriate domain-specific verifier.
-
-        Args:
-            domain: Domain identifier ("math", "code", etc)
-            data: Data to verify (Dataset or dict)
-            criteria: Optional verification criteria
-
-        Returns:
-            Verified dataset with results
-
-        Raises:
-            ValueError: If domain is not supported and strict_mode is True
-        """
-        # Convert dict to dataset if needed
-        if isinstance(data, dict):
-            data = Dataset.from_dict(data)
-
-        # Get appropriate verifier
-        verifier_cls = cls.VERIFIERS.get(domain)
-        if verifier_cls is None:
-            if criteria and criteria.get("strict_mode", False):
-                raise ValueError(f"Unsupported domain: {domain}")
-            # Default to marking everything as correct if no specific verifier
-            return data.add_column("correct", [True] * len(data))
-
-        # Create verifier instance and verify
-        verifier = verifier_cls(criteria=criteria)
-        verified_data = verifier.verify(data)
-
-        # Filter to only correct results if specified
-        if criteria and criteria.get("filter_incorrect", False):
-            verified_data = verified_data.filter(lambda x: x["correct"])
-
-        return verified_data
-
-    @classmethod
-    def get_supported_domains(cls) -> list[str]:
-        """Get list of supported verification domains.
-
-        Returns:
-            List of domain identifiers that have registered verifiers
-        """
-        return list(cls.VERIFIERS.keys())
-
-    @classmethod
-    def register_verifier(
-        cls, domain: str, verifier_cls: Type[BaseVerifier]
-    ) -> None:
-        """Register a new domain verifier.
-
-        Args:
-            domain: Domain identifier
-            verifier_cls: Verifier class to register
-
-        Raises:
-            ValueError: If domain is already registered
-        """
-        if domain in cls.VERIFIERS:
-            raise ValueError(
-                f"Domain {domain} already has a registered verifier"
-            )
-        cls.VERIFIERS[domain] = verifier_cls
diff --git a/camel/verifiers/types.py b/camel/verifiers/types.py
deleted file mode 100644
index 271776dbf2..0000000000
--- a/camel/verifiers/types.py
+++ /dev/null
@@ -1,84 +0,0 @@
-# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
-"""Type definitions for verification results and metrics."""
-
-from typing import Any, Dict, List, Optional
-
-from pydantic import BaseModel, Field
-
-
-class VerificationMetrics(BaseModel):
-    """Metrics used in verification process."""
-
-    name: str = Field(..., description="Name of the metric")
-    value: float = Field(
-        ..., ge=0, le=1, description="Metric value between 0 and 1"
-    )
-    weight: float = Field(
-        1.0, ge=0, description="Weight of this metric in overall score"
-    )
-    threshold: float = Field(
-        0.7, ge=0, le=1, description="Passing threshold for this metric"
-    )
-
-    class Config:
-        """Pydantic configuration."""
-
-        frozen = True
-
-
-class VerificationResult(BaseModel):
-    """Results from verification process."""
-
-    score: float = Field(
-        ...,
-        ge=0,
-        le=1,
-        description="Overall verification score between 0 and 1",
-    )
-    passed: bool = Field(
-        ..., description="Whether verification passed overall threshold"
-    )
-    details: Dict[str, Any] = Field(
-        default_factory=dict, description="Detailed verification results"
-    )
-    metrics: List[VerificationMetrics] = Field(
-        default_factory=list, description="List of individual metrics"
-    )
-    feedback: str = Field(..., description="Human-readable feedback message")
-    error: Optional[str] = Field(
-        None, description="Error message if verification failed"
-    )
-
-    def dict(self, *args, **kwargs) -> Dict[str, Any]:
-        """Convert to dictionary, handling nested models."""
-        d = super().dict(*args, **kwargs)
-        if "metrics" in d:
-            d["metrics"] = [m.dict() for m in self.metrics]
-        return d
-
-    @classmethod
-    def from_dict(cls, data: Dict[str, Any]) -> "VerificationResult":
-        """Create from dictionary, handling nested models."""
-        if "metrics" in data:
-            data["metrics"] = [
-                VerificationMetrics(**m) if isinstance(m, dict) else m
-                for m in data["metrics"]
-            ]
-        return cls(**data)
-
-    class Config:
-        """Pydantic configuration."""
-
-        frozen = True
diff --git a/examples/verifiers/code_example.py b/examples/verifiers/code_example.py
new file mode 100644
index 0000000000..517c88c534
--- /dev/null
+++ b/examples/verifiers/code_example.py
@@ -0,0 +1,187 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+
+from pprint import pprint
+
+from datasets import Dataset
+
+from camel.verifiers import CodeVerifier
+
+
+def main():
+    print("\nExample 1: Basic Function Test")
+    verifier = CodeVerifier(require_confirmation=False)
+    result = verifier.verify(
+        {
+            "code": ["def add(a, b): return a + b"],
+            "language": ["python"],
+            "test_cases": [
+                [
+                    {"inputs": {"a": 1, "b": 2}, "expected": {"add(a, b)": 3}},
+                    {
+                        "inputs": {"a": -1, "b": 1},
+                        "expected": {"add(a, b)": 0},
+                    },
+                ]
+            ],
+        }
+    )
+    pprint(result[0]["verification_result"])
+
+    # Example 2: Multiple Solutions
+    print("\nExample 2: Multiple Solutions")
+    data = Dataset.from_dict(
+        {
+            "code": [
+                "def factorial(n): return 1 if n <= 1 else n * factorial(n-1)",
+                "def factorial(n): return n * factorial(n-1) if n > 1 else 1",
+            ],
+            "language": ["python", "python"],
+            "test_cases": [
+                [{"inputs": {"n": 5}, "expected": {"factorial(n)": 120}}],
+                [{"inputs": {"n": 5}, "expected": {"factorial(n)": 120}}],
+            ],
+        }
+    )
+    results = verifier.verify(data)
+    for i, result in enumerate(results):
+        print(f"Solution {i+1} result:", result["verification_result"])
+
+    # Example 3: Using subprocess interpreter
+    print("\nExample 3: External Imports")
+    verifier = CodeVerifier(interpreter="subprocess")
+    result = verifier.verify(
+        {
+            "code": [
+                """
+import numpy as np
+def process_array():
+    arr = np.array([1, 2, 3])
+    return arr.mean()
+        """
+            ],
+            "language": ["python"],
+            "test_cases": [
+                [{"inputs": {}, "expected": {"process_array()": 2.0}}]
+            ],
+        }
+    )
+    print("Result:", result[0]["verification_result"])
+
+    # Example 4: Syntax Error
+    print("\nExample 4: Syntax Error")
+    result = verifier.verify(
+        {
+            "code": ["def broken_function(x: return x"],  # Syntax error
+            "language": ["python"],
+        }
+    )
+    print("Result:", result[0]["verification_result"])
+
+
+if __name__ == "__main__":
+    main()
+
+
+"""
+Example Output:
+
+Example 1: Basic Function Test
+Map: 100%|██████████| 1/1 [00:00<00:00, 14.90 examples/s]
+{
+    'details': {
+        'test_count': 2,
+        'tests': [
+            {
+                'output': 'Test passed: 3\n',
+                'status': 'passed', 
+                'test_case': 1
+            },
+            {
+                'output': 'Test passed: 0\n',
+                'status': 'passed',
+                'test_case': 2
+            }
+        ]
+    },
+    'error': None,
+    'passed': True,
+    'test_results': [True, True]
+}
+
+Example 2: Multiple Solutions
+Map: 100%|██████████| 2/2 [00:00<00:00, 25.12 examples/s]
+Solution 1 result: {
+    'details': {
+        'test_count': 1,
+        'tests': [
+            {
+                'output': 'Test passed: 120\n',
+                'status': 'passed',
+                'test_case': 1
+            }
+        ]
+    },
+    'error': None,
+    'passed': True,
+    'test_results': [True]
+}
+Solution 2 result: {
+    'details': {
+        'test_count': 1,
+        'tests': [
+            {
+                'output': 'Test passed: 120\n',
+                'status': 'passed',
+                'test_case': 1
+            }
+        ]
+    },
+    'error': None,
+    'passed': True,
+    'test_results': [True]
+}
+
+Example 3: External Imports
+Map: 100%|██████████| 1/1 [00:00<00:00,  3.33 examples/s]
+Result: {
+    'details': {
+        'test_count': 1,
+        'tests': [
+            {
+                'output': 'Test passed: 2.0\n',
+                'status': 'passed',
+                'test_case': 1
+            }
+        ]
+    },
+    'error': None,
+    'passed': True,
+    'test_results': [True]
+}
+
+Example 4: Syntax Error
+Map: 100%|██████████| 1/1 [00:00<00:00, 661.88 examples/s]
+Result: {
+    'details': {
+        'line': 1,
+        'offset': 24,
+        'text': 'def broken_function(x: return x\n',
+        'type': 'syntax_error'
+    },
+    'error': 'Syntax error: invalid syntax (<string>, line 1)',
+    'passed': False,
+    'test_results': []
+}
+"""
diff --git a/examples/verifiers/math_example.py b/examples/verifiers/math_example.py
deleted file mode 100644
index 2eb897812a..0000000000
--- a/examples/verifiers/math_example.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
-"""Example usage of math verifier with Gurobi."""
-
-import os
-import sys
-
-from camel.verifiers import DomainVerifier
-
-# Add project root to Python path if not installed
-project_root = os.path.abspath(
-    os.path.join(os.path.dirname(__file__), "../..")
-)
-if project_root not in sys.path:
-    sys.path.append(project_root)
-
-
-def main():
-    # Example problems
-    data = {
-        "question": [
-            "Solve: 2x + 3 = 7",
-            "Maximize: z = 3x + 4y subject to: x + y <= 10, x >= 0, y >= 0",
-        ],
-        "solution": ["x = 2", "x = 6, y = 4"],
-        "answer": [
-            "2",
-            "36",  # Optimal objective value
-        ],
-    }
-
-    verified_data = DomainVerifier.verify(
-        domain="math",
-        data=data,
-        criteria={
-            "numerical_tolerance": 1e-8,
-            "verify_steps": True,
-            "check_feasibility": True,
-            "verify_optimality": True,
-        },
-    )
-
-    # Print detailed results
-    for item in verified_data:
-        print("\nVerification Results:")
-        print("-" * 50)
-        print(f"Question: {item['question']}")
-        print(f"Solution: {item['solution']}")
-        print(f"Expected Answer: {item['answer']}")
-        print(f"Correct: {item['correct']}")
-
-        result = item['verification_result']
-        print("\nDetails:")
-        print(f"Score: {result['score']:.2f}")
-        print(f"Passed: {result['passed']}")
-        print(f"Feedback: {result['feedback']}")
-
-        if 'details' in result:
-            print("\nComponent Scores:")
-            for key, value in result['details'].items():
-                print(f"- {key}: {value}")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/verifiers/math_program_example.py b/examples/verifiers/math_program_example.py
new file mode 100644
index 0000000000..1868e6cbd0
--- /dev/null
+++ b/examples/verifiers/math_program_example.py
@@ -0,0 +1,308 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+
+from camel.verifiers import CodeVerifier
+
+
+def main():
+    verifier = CodeVerifier(interpreter="subprocess")
+
+    # Example 1: Matrix Operations
+    matrix_test = {
+        "code": [
+            """
+import numpy as np
+
+def matrix_multiply(A, B):
+    return np.dot(A, B)
+"""
+        ],
+        "language": ["python"],
+        "test_cases": [
+            [
+                {
+                    "inputs": {"A": [[1, 2], [3, 4]], "B": [[5, 6], [7, 8]]},
+                    "expected": {
+                        "np.allclose(matrix_multiply(A, B), "
+                        "np.array([[19, 22], [43, 50]]))": True
+                    },
+                }
+            ]
+        ],
+    }
+
+    print("\nTesting Matrix Operations:")
+    result = verifier.verify(matrix_test)
+    print(result[0]["verification_result"])
+
+    # Example 2: Linear System Solver
+    linear_system_test = {
+        "code": [
+            """
+import numpy as np
+
+def solve_linear_system(A, b):
+    return np.linalg.solve(A, b)
+"""
+        ],
+        "language": ["python"],
+        "test_cases": [
+            [
+                {
+                    "inputs": {"A": [[2, 1], [1, 3]], "b": [4, 5]},
+                    "expected": {
+                        "np.allclose(solve_linear_system(A, b), "
+                        "np.array([1.4, 1.2]))": True
+                    },
+                }
+            ]
+        ],
+    }
+
+    print("\nTesting Linear System Solver:")
+    result = verifier.verify(linear_system_test)
+    print(result[0]["verification_result"])
+
+    # Example 3: Eigenvalue Decomposition
+    eigenvalue_test = {
+        "code": [
+            """
+import numpy as np
+
+def compute_eigendecomposition(A):
+    eigenvalues, eigenvectors = np.linalg.eig(A)
+    # Sort by eigenvalues to ensure consistent order
+    idx = eigenvalues.argsort()
+    return eigenvalues[idx], eigenvectors[:, idx]
+"""
+        ],
+        "language": ["python"],
+        "test_cases": [
+            [
+                {
+                    "inputs": {"A": [[4, -1], [2, 1]]},
+                    "expected": {
+                        "np.allclose(compute_eigendecomposition(A)[0], "
+                        "np.array([2., 3.]))": True
+                    },
+                }
+            ]
+        ],
+    }
+
+    print("\nTesting Eigenvalue Decomposition:")
+    result = verifier.verify(eigenvalue_test)
+    print(result[0]["verification_result"])
+
+    # Example 4: Singular Value Decomposition
+    svd_test = {
+        "code": [
+            """
+import numpy as np
+
+def compute_svd(A):
+    U, s, Vh = np.linalg.svd(A)
+    return s  # Return singular values
+"""
+        ],
+        "language": ["python"],
+        "test_cases": [
+            [
+                {
+                    "inputs": {"A": [[1, 2], [3, 4], [5, 6]]},
+                    "expected": {
+                        "np.allclose(compute_svd(A), "
+                        "np.array([9.52551809, 0.51430058]))": True
+                    },
+                }
+            ]
+        ],
+    }
+
+    print("\nTesting SVD:")
+    result = verifier.verify(svd_test)
+    print(result[0]["verification_result"])
+
+    # Example 5: Optimization (Minimization)
+    optimization_test = {
+        "code": [
+            """
+import numpy as np
+from scipy.optimize import minimize
+
+def optimize_quadratic(x0):
+    # Minimize f(x,y) = x^2 + y^2 + 2x + 4y + 4
+    def objective(x):
+        return x[0]**2 + x[1]**2 + 2*x[0] + 4*x[1] + 4
+    
+    result = minimize(objective, x0, method='BFGS')
+    return result.x
+"""
+        ],
+        "language": ["python"],
+        "test_cases": [
+            [
+                {
+                    "inputs": {"x0": [0, 0]},
+                    "expected": {
+                        "np.allclose(optimize_quadratic(x0), "
+                        "np.array([-1., -2.]))": True
+                    },
+                }
+            ]
+        ],
+    }
+
+    print("\nTesting Optimization:")
+    result = verifier.verify(optimization_test)
+    print(result[0]["verification_result"])
+
+    # Example 6: Numerical Integration
+    integration_test = {
+        "code": [
+            """
+import numpy as np
+from scipy import integrate
+
+def integrate_function(a, b):
+    # Integrate sin(x^2) from a to b
+    def f(x):
+        return np.sin(x**2)
+    
+    result, _ = integrate.quad(f, a, b)
+    return result
+"""
+        ],
+        "language": ["python"],
+        "test_cases": [
+            [
+                {
+                    "inputs": {"a": 0, "b": 1},
+                    "expected": {
+                        "np.allclose(integrate_function(a, b), "
+                        "0.3102683017233811)": True
+                    },
+                }
+            ]
+        ],
+    }
+
+    print("\nTesting Numerical Integration:")
+    result = verifier.verify(integration_test)
+    print(result[0]["verification_result"])
+
+
+if __name__ == "__main__":
+    main()
+
+
+"""
+Example Output:
+
+Testing Matrix Operations:
+Map: 100%|██████████| 1/1 [00:00<00:00,  8.49 examples/s]
+{
+    'details': {
+        'test_count': 1,
+        'tests': [{
+            'output': 'Test passed: True\n',
+            'status': 'passed',
+            'test_case': 1
+        }]
+    },
+    'error': None,
+    'passed': True,
+    'test_results': [True]
+}
+
+Testing Linear System Solver:
+Map: 100%|██████████| 1/1 [00:00<00:00,  6.27 examples/s]
+{
+    'details': {
+        'test_count': 1,
+        'tests': [{
+            'output': 'Test passed: True\n',
+            'status': 'passed',
+            'test_case': 1
+        }]
+    },
+    'error': None,
+    'passed': True,
+    'test_results': [True]
+}
+
+Testing Eigenvalue Decomposition:
+Map: 100%|██████████| 1/1 [00:00<00:00, 11.11 examples/s]
+{
+    'details': {
+        'test_count': 1,
+        'tests': [{
+            'output': 'Test passed: True\n',
+            'status': 'passed',
+            'test_case': 1
+        }]
+    },
+    'error': None,
+    'passed': True,
+    'test_results': [True]
+}
+
+Testing SVD:
+Map: 100%|██████████| 1/1 [00:00<00:00, 10.40 examples/s]
+{
+    'details': {
+        'test_count': 1,
+        'tests': [{
+            'output': 'Test passed: True\n',
+            'status': 'passed',
+            'test_case': 1
+        }]
+    },
+    'error': None,
+    'passed': True,
+    'test_results': [True]
+}
+
+Testing Optimization:
+Map: 100%|██████████| 1/1 [00:00<00:00,  2.97 examples/s]
+{
+    'details': {
+        'test_count': 1,
+        'tests': [{
+            'output': 'Test passed: True\n',
+            'status': 'passed',
+            'test_case': 1
+        }]
+    },
+    'error': None,
+    'passed': True,
+    'test_results': [True]
+}
+
+Testing Numerical Integration:
+Map: 100%|██████████| 1/1 [00:00<00:00,  3.61 examples/s]
+{
+    'details': {
+        'test_count': 1,
+        'tests': [{
+            'output': 'Test passed: True\n',
+            'status': 'passed',
+            'test_case': 1
+        }]
+    },
+    'error': None,
+    'passed': True,
+    'test_results': [True]
+}
+"""

From e5f7a64c3d7559e5f4012bbc6b5e9ebe4c7deb64 Mon Sep 17 00:00:00 2001
From: Rishabh <134101578+GitHoobar@users.noreply.github.com>
Date: Sat, 8 Feb 2025 09:32:21 +0530
Subject: [PATCH 3/8] tweaks

---
 camel/verifiers/code_verifier.py   |  7 ++-
 examples/verifiers/code_example.py | 86 +++++++++++++++++++++++++++---
 2 files changed, 84 insertions(+), 9 deletions(-)

diff --git a/camel/verifiers/code_verifier.py b/camel/verifiers/code_verifier.py
index 90932e3c33..8d40a224bd 100644
--- a/camel/verifiers/code_verifier.py
+++ b/camel/verifiers/code_verifier.py
@@ -121,7 +121,12 @@ def verify_single(example: Dict[str, Any]) -> Dict[str, Any]:
             except Exception as e:
                 return self._handle_execution_error(example, e)
 
-        return data.map(verify_single)
+        # For Parallelization
+        num_proc = min(4, len(data))
+
+        return data.map(
+            verify_single, num_proc=num_proc, desc="Verifying code"
+        )
 
     def _prepare_test_code(
         self,
diff --git a/examples/verifiers/code_example.py b/examples/verifiers/code_example.py
index 517c88c534..7007bb0fcf 100644
--- a/examples/verifiers/code_example.py
+++ b/examples/verifiers/code_example.py
@@ -79,8 +79,43 @@ def process_array():
     )
     print("Result:", result[0]["verification_result"])
 
-    # Example 4: Syntax Error
-    print("\nExample 4: Syntax Error")
+    # Example 4: Multi-threaded verification
+    print("\nExample 4: Multi-threaded verification")
+    verifier = CodeVerifier(interpreter="subprocess")
+    result = verifier.verify(
+        {
+            "code": [
+                """
+            def square(x): 
+                return x * x
+            """,
+                """
+            def cube(x): 
+                return x * x * x
+            """,
+                """
+            def double(x): 
+                return x + x
+            """,
+                """
+            def half(x): 
+                return x / 2
+            """,
+            ],
+            "language": ["python"] * 4,
+            "test_cases": [
+                [{"inputs": {"x": 4}, "expected": {"square(x)": 16}}],
+                [{"inputs": {"x": 3}, "expected": {"cube(x)": 27}}],
+                [{"inputs": {"x": 5}, "expected": {"double(x)": 10}}],
+                [{"inputs": {"x": 8}, "expected": {"half(x)": 4.0}}],
+            ],
+        }
+    )
+    for i, result in enumerate(results):
+        print(f"\nFunction {i+1} result:", result["verification_result"])
+
+    # Example 5: Syntax Error
+    print("\nExample 5: Syntax Error")
     result = verifier.verify(
         {
             "code": ["def broken_function(x: return x"],  # Syntax error
@@ -98,14 +133,14 @@ def process_array():
 Example Output:
 
 Example 1: Basic Function Test
-Map: 100%|██████████| 1/1 [00:00<00:00, 14.90 examples/s]
+Verifying code: 100%|██████████| 1/1 [00:00<00:00, 16.84 examples/s]
 {
     'details': {
         'test_count': 2,
         'tests': [
             {
                 'output': 'Test passed: 3\n',
-                'status': 'passed', 
+                'status': 'passed',
                 'test_case': 1
             },
             {
@@ -121,7 +156,7 @@ def process_array():
 }
 
 Example 2: Multiple Solutions
-Map: 100%|██████████| 2/2 [00:00<00:00, 25.12 examples/s]
+Verifying code (num_proc=2):100%|██████████| 2/2 [00:00<00:00,15.64 examples/s]
 Solution 1 result: {
     'details': {
         'test_count': 1,
@@ -154,7 +189,7 @@ def process_array():
 }
 
 Example 3: External Imports
-Map: 100%|██████████| 1/1 [00:00<00:00,  3.33 examples/s]
+Verifying code: 100%|██████████| 1/1 [00:00<00:00,  9.29 examples/s]
 Result: {
     'details': {
         'test_count': 1,
@@ -171,8 +206,43 @@ def process_array():
     'test_results': [True]
 }
 
-Example 4: Syntax Error
-Map: 100%|██████████| 1/1 [00:00<00:00, 661.88 examples/s]
+Example 4: Multi-threaded verification
+Verifying code(num_proc=4):100%|██████████| 4/4 [00:00<00:00, 35.86 examples/s]
+
+Function 1 result: {
+    'details': {
+        'test_count': 1,
+        'tests': [
+            {
+                'output': 'Test passed: 120\n',
+                'status': 'passed',
+                'test_case': 1
+            }
+        ]
+    },
+    'error': None,
+    'passed': True,
+    'test_results': [True]
+}
+
+Function 2 result: {
+    'details': {
+        'test_count': 1,
+        'tests': [
+            {
+                'output': 'Test passed: 120\n',
+                'status': 'passed',
+                'test_case': 1
+            }
+        ]
+    },
+    'error': None,
+    'passed': True,
+    'test_results': [True]
+}
+
+Example 5: Syntax Error
+Verifying code: 100%|██████████| 1/1 [00:00<00:00, 504.24 examples/s]
 Result: {
     'details': {
         'line': 1,

From 605b4caeb74c582b72d8488bc8508e0cc6b8a49f Mon Sep 17 00:00:00 2001
From: Rishabh <134101578+GitHoobar@users.noreply.github.com>
Date: Sun, 9 Feb 2025 00:10:06 +0530
Subject: [PATCH 4/8] updates

---
 camel/verifiers/code_verifier.py           | 32 ++--------------------
 examples/verifiers/code_example.py         | 10 +++++--
 examples/verifiers/math_program_example.py |  4 ++-
 3 files changed, 12 insertions(+), 34 deletions(-)

diff --git a/camel/verifiers/code_verifier.py b/camel/verifiers/code_verifier.py
index 8d40a224bd..954f836f80 100644
--- a/camel/verifiers/code_verifier.py
+++ b/camel/verifiers/code_verifier.py
@@ -18,7 +18,6 @@
 
 from camel.interpreters import (
     BaseInterpreter,
-    SubprocessInterpreter,
 )
 
 
@@ -33,7 +32,7 @@ class CodeVerifier:
 
     def __init__(
         self,
-        interpreter: str = "subprocess",
+        interpreter: BaseInterpreter,
         require_confirmation: bool = False,
     ) -> None:
         r"""Initialize the code verifier.
@@ -44,34 +43,7 @@ def __init__(
             require_confirmation (bool, optional): Whether to require user
                 confirmation before execution. (default: :obj:`False`)
         """
-        self.interpreter = self._get_interpreter(
-            interpreter, require_confirmation
-        )
-
-    def _get_interpreter(
-        self,
-        interpreter_type: str,
-        require_confirmation: bool,
-    ) -> BaseInterpreter:
-        r"""Initialize appropriate interpreter based on type.
-
-        Args:
-            interpreter_type (str): Type of interpreter to use
-            require_confirmation (bool): Whether to require confirmation
-
-        Returns:
-            BaseInterpreter: Configured interpreter instance
-
-        Raises:
-            ValueError: If interpreter type is not supported
-        """
-        if interpreter_type == "subprocess":
-            return SubprocessInterpreter(
-                require_confirm=require_confirmation,
-                print_stdout=False,
-                print_stderr=True,
-            )
-        raise ValueError(f"Unsupported interpreter type: {interpreter_type}")
+        self.interpreter = interpreter
 
     def verify(
         self,
diff --git a/examples/verifiers/code_example.py b/examples/verifiers/code_example.py
index 7007bb0fcf..60e58894f0 100644
--- a/examples/verifiers/code_example.py
+++ b/examples/verifiers/code_example.py
@@ -16,12 +16,14 @@
 
 from datasets import Dataset
 
+from camel.interpreters import SubprocessInterpreter
 from camel.verifiers import CodeVerifier
 
 
 def main():
     print("\nExample 1: Basic Function Test")
-    verifier = CodeVerifier(require_confirmation=False)
+    interpreter = SubprocessInterpreter(require_confirm=False)
+    verifier = CodeVerifier(interpreter=interpreter)
     result = verifier.verify(
         {
             "code": ["def add(a, b): return a + b"],
@@ -60,7 +62,8 @@ def main():
 
     # Example 3: Using subprocess interpreter
     print("\nExample 3: External Imports")
-    verifier = CodeVerifier(interpreter="subprocess")
+    interpreter = SubprocessInterpreter(require_confirm=False)
+    verifier = CodeVerifier(interpreter=interpreter)
     result = verifier.verify(
         {
             "code": [
@@ -81,7 +84,8 @@ def process_array():
 
     # Example 4: Multi-threaded verification
     print("\nExample 4: Multi-threaded verification")
-    verifier = CodeVerifier(interpreter="subprocess")
+    interpreter = SubprocessInterpreter()
+    verifier = CodeVerifier(interpreter=interpreter)
     result = verifier.verify(
         {
             "code": [
diff --git a/examples/verifiers/math_program_example.py b/examples/verifiers/math_program_example.py
index 1868e6cbd0..b99647c0ec 100644
--- a/examples/verifiers/math_program_example.py
+++ b/examples/verifiers/math_program_example.py
@@ -12,11 +12,13 @@
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
 
+from camel.interpreters import SubprocessInterpreter
 from camel.verifiers import CodeVerifier
 
 
 def main():
-    verifier = CodeVerifier(interpreter="subprocess")
+    interpreter = SubprocessInterpreter(require_confirm=False)
+    verifier = CodeVerifier(interpreter=interpreter)
 
     # Example 1: Matrix Operations
     matrix_test = {

From cd456d426eeaa72128371431080611c68eed5052 Mon Sep 17 00:00:00 2001
From: Rishabh <134101578+GitHoobar@users.noreply.github.com>
Date: Mon, 10 Feb 2025 07:22:56 +0530
Subject: [PATCH 5/8] added logging

---
 camel/verifiers/code_verifier.py   | 41 +++++++++++++++++++++++++++---
 examples/verifiers/code_example.py | 32 ++++++++---------------
 2 files changed, 47 insertions(+), 26 deletions(-)

diff --git a/camel/verifiers/code_verifier.py b/camel/verifiers/code_verifier.py
index 954f836f80..d725fed5ed 100644
--- a/camel/verifiers/code_verifier.py
+++ b/camel/verifiers/code_verifier.py
@@ -12,6 +12,7 @@
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
 
+import os
 from typing import Any, Dict, List, Optional, Union
 
 from datasets import Dataset
@@ -19,6 +20,9 @@
 from camel.interpreters import (
     BaseInterpreter,
 )
+from camel.logger import logging
+
+logger = logging.getLogger(__name__)
 
 
 class CodeVerifier:
@@ -38,12 +42,15 @@ def __init__(
         r"""Initialize the code verifier.
 
         Args:
-            interpreter (str, optional): Type of interpreter to use.
-                (default: :obj:`"subprocess"`)
+            interpreter (BaseInterpreter): The interpreter instance to use for
+                code execution
             require_confirmation (bool, optional): Whether to require user
                 confirmation before execution. (default: :obj:`False`)
         """
         self.interpreter = interpreter
+        logger.info(
+            "Initialized CodeVerifier with interpreter %s", interpreter
+        )
 
     def verify(
         self,
@@ -66,6 +73,8 @@ def verify(
         if isinstance(data, dict):
             data = Dataset.from_dict(data)
 
+        logger.info("Starting verification of %d examples", len(data))
+
         def verify_single(example: Dict[str, Any]) -> Dict[str, Any]:
             r"""Verify a single code example.
 
@@ -79,11 +88,18 @@ def verify_single(example: Dict[str, Any]) -> Dict[str, Any]:
             language = example.get("language", "python")
             test_cases = example.get("test_cases", [])
 
+            logger.debug(
+                "Verifying code in %s with %d test cases",
+                language,
+                len(test_cases),
+            )
+
             # Check syntax first
             try:
                 if language == "python":
                     compile(code, '<string>', 'exec')
             except SyntaxError as e:
+                logger.warning("Syntax error in code: %s", e)
                 return self._handle_syntax_error(example, e)
 
             try:
@@ -91,10 +107,13 @@ def verify_single(example: Dict[str, Any]) -> Dict[str, Any]:
                     example, code, language, test_cases
                 )
             except Exception as e:
+                logger.error("Execution error: %s", e)
                 return self._handle_execution_error(example, e)
 
         # For Parallelization
-        num_proc = min(4, len(data))
+        default_cpus = max(1, min(8, (os.cpu_count() or 1) // 2))
+        num_proc = min(default_cpus, len(data))
+        logger.info("Using %d processes for parallel verification", num_proc)
 
         return data.map(
             verify_single, num_proc=num_proc, desc="Verifying code"
@@ -114,6 +133,9 @@ def _prepare_test_code(
         Returns:
             str: Complete test code with assertions
         """
+        logger.debug(
+            "Preparing test code with inputs: %s", test_case.get("inputs")
+        )
         full_code = [code]
 
         # Add test case setup
@@ -157,6 +179,9 @@ def _handle_syntax_error(
         Returns:
             Dict[str, Any]: Updated example with error information
         """
+        logger.warning(
+            "Handling syntax error: %s at line %d", error, error.lineno
+        )
         return {
             **example,
             "verification_result": {
@@ -186,6 +211,7 @@ def _handle_execution_error(
         Returns:
             Dict[str, Any]: Updated example with error information
         """
+        logger.error("Handling execution error: %s", error)
         example["verification_result"] = {
             "passed": False,
             "test_results": [],
@@ -219,7 +245,9 @@ def _run_test_cases(
         test_details = []
 
         if test_cases:
+            logger.info("Running %d test cases", len(test_cases))
             for i, test_case in enumerate(test_cases):
+                logger.debug("Running test case %d", i + 1)
                 test_code = self._prepare_test_code(code, test_case)
                 try:
                     output = self.interpreter.run(test_code, language)
@@ -231,6 +259,7 @@ def _run_test_cases(
                             "output": output,
                         }
                     )
+                    logger.debug("Test case %d passed", i + 1)
                 except Exception as e:
                     test_results.append(False)
                     test_details.append(
@@ -240,9 +269,13 @@ def _run_test_cases(
                             "error": str(e),
                         }
                     )
+                    logger.warning("Test case %d failed: %s", i + 1, e)
+
+        passed = all(test_results) if test_results else True
+        logger.info("All test cases %s", "passed" if passed else "failed")
 
         example["verification_result"] = {
-            "passed": all(test_results) if test_results else True,
+            "passed": passed,
             "test_results": test_results,
             "error": None,
             "details": {
diff --git a/examples/verifiers/code_example.py b/examples/verifiers/code_example.py
index 60e58894f0..71cfe59bfc 100644
--- a/examples/verifiers/code_example.py
+++ b/examples/verifiers/code_example.py
@@ -84,34 +84,22 @@ def process_array():
 
     # Example 4: Multi-threaded verification
     print("\nExample 4: Multi-threaded verification")
-    interpreter = SubprocessInterpreter()
+    interpreter = SubprocessInterpreter(require_confirm=False)
     verifier = CodeVerifier(interpreter=interpreter)
-    result = verifier.verify(
+    results = verifier.verify(
         {
             "code": [
-                """
-            def square(x): 
-                return x * x
-            """,
-                """
-            def cube(x): 
-                return x * x * x
-            """,
-                """
-            def double(x): 
-                return x + x
-            """,
-                """
-            def half(x): 
-                return x / 2
-            """,
+                "def square(x):\n    return x * x\nresult = square(4)",
+                "def cube(x):\n    return x * x * x\nresult = cube(3)",
+                "def double(x):\n    return x + x\nresult = double(5)",
+                "def half(x):\n    return x / 2\nresult = half(8)",
             ],
             "language": ["python"] * 4,
             "test_cases": [
-                [{"inputs": {"x": 4}, "expected": {"square(x)": 16}}],
-                [{"inputs": {"x": 3}, "expected": {"cube(x)": 27}}],
-                [{"inputs": {"x": 5}, "expected": {"double(x)": 10}}],
-                [{"inputs": {"x": 8}, "expected": {"half(x)": 4.0}}],
+                [{"inputs": {}, "expected": {"result": 16}}],
+                [{"inputs": {}, "expected": {"result": 27}}],
+                [{"inputs": {}, "expected": {"result": 10}}],
+                [{"inputs": {}, "expected": {"result": 4.0}}],
             ],
         }
     )

From 58090c5a363f9d49180ddb9923a81ba6ff71aa63 Mon Sep 17 00:00:00 2001
From: Rishabh <134101578+GitHoobar@users.noreply.github.com>
Date: Mon, 10 Feb 2025 07:45:08 +0530
Subject: [PATCH 6/8] language and test case validation

---
 camel/verifiers/code_verifier.py   | 60 +++++++++++++++++++++++-------
 examples/verifiers/code_example.py | 33 ++++++++++++++++
 2 files changed, 80 insertions(+), 13 deletions(-)

diff --git a/camel/verifiers/code_verifier.py b/camel/verifiers/code_verifier.py
index d725fed5ed..3c589bd048 100644
--- a/camel/verifiers/code_verifier.py
+++ b/camel/verifiers/code_verifier.py
@@ -13,13 +13,11 @@
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
 
 import os
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Union
 
 from datasets import Dataset
 
-from camel.interpreters import (
-    BaseInterpreter,
-)
+from camel.interpreters import BaseInterpreter, InterpreterError
 from camel.logger import logging
 
 logger = logging.getLogger(__name__)
@@ -52,21 +50,13 @@ def __init__(
             "Initialized CodeVerifier with interpreter %s", interpreter
         )
 
-    def verify(
-        self,
-        data: Union[Dataset, Dict[str, Any]],
-        criteria: Optional[Dict[str, Any]] = None,
-    ) -> Dataset:
+    def verify(self, data: Union[Dataset, Dict[str, Any]]) -> Dataset:
         r"""Verify code solutions.
 
         Args:
             data (Union[Dataset, Dict[str, Any]]): Data containing code to
                 verify
 
-            criteria (Optional[Dict[str, Any]], optional): Optional
-                verification criteria for this specific call.
-                (default: :obj:`None`)
-
         Returns:
             Dataset: Dataset with verification results added
         """
@@ -86,8 +76,32 @@ def verify_single(example: Dict[str, Any]) -> Dict[str, Any]:
             """
             code = example.get("code", "")
             language = example.get("language", "python")
+
+            # Validate language is supported by interpreter
+            supported_languages = self.interpreter.supported_code_types()
+            if language not in supported_languages:
+                logger.warning(
+                    "Language %s not supported by interpreter %s. "
+                    "Supported languages: %s",
+                    language,
+                    self.interpreter.__class__.__name__,
+                    supported_languages,
+                )
+                return self._handle_execution_error(
+                    example,
+                    InterpreterError(f"Language {language} not supported"),
+                )
+
             test_cases = example.get("test_cases", [])
 
+            try:
+                self._validate_test_cases(test_cases)
+            except ValueError as e:
+                logger.warning("Invalid test cases: %s", e)
+                return self._handle_execution_error(
+                    example, ValueError(f"Invalid test cases: {e!s}")
+                )
+
             logger.debug(
                 "Verifying code in %s with %d test cases",
                 language,
@@ -165,6 +179,26 @@ def _prepare_test_code(
 
         return "\n".join(full_code)
 
+    def _validate_test_cases(self, test_cases: List[Dict[str, Any]]) -> None:
+        """Validate test cases structure.
+
+        Args:
+            test_cases (List[Dict[str, Any]]): List of test cases to validate
+
+        Raises:
+            ValueError: If test cases are malformed
+        """
+        if not isinstance(test_cases, list):
+            raise ValueError("Test cases must be provided as a list")
+
+        for i, test_case in enumerate(test_cases):
+            if not isinstance(test_case, dict):
+                raise ValueError(f"Test case {i} must be a dictionary")
+            if not test_case.get("expected"):
+                raise ValueError(
+                    f"Test case {i} must contain 'expected' results"
+                )
+
     def _handle_syntax_error(
         self,
         example: Dict[str, Any],
diff --git a/examples/verifiers/code_example.py b/examples/verifiers/code_example.py
index 71cfe59bfc..a7b70a2fe4 100644
--- a/examples/verifiers/code_example.py
+++ b/examples/verifiers/code_example.py
@@ -116,6 +116,18 @@ def process_array():
     )
     print("Result:", result[0]["verification_result"])
 
+    # Example 6: Test Case Validation
+    print("\nExample 6: Test Case Validation")
+    # Invalid test case (not a list)
+    result = verifier.verify(
+        {
+            "code": ["def add(a, b): return a + b"],
+            "language": ["python"],
+            "test_cases": {"not": "a list"},  # Invalid: not a list
+        }
+    )
+    print("Invalid test case (not a list):", result[0]["verification_result"])
+
 
 if __name__ == "__main__":
     main()
@@ -246,4 +258,25 @@ def process_array():
     'passed': False,
     'test_results': []
 }
+
+Example 6: Test Case Validation
+Verifying code:   0%|         | 0/1 [00:00<?, ? examples/s]
+
+2025-02-10 07:41:14,094 - 
+camel.verifiers.code_verifier - WARNING - Invalid test cases: Test cases must 
+be a list
+2025-02-10 07:41:14,094 - 
+camel.verifiers.code_verifier - ERROR - Handling execution error: Invalid test 
+cases: Test cases must be a list
+
+Verifying code: 100%|██████████| 1/1 [00:00<00:00, 662.71 examples/s]
+Result: {
+    'details': {
+        'message': 'Invalid test cases: Test cases must be provided as a list',
+        'type': 'execution_error'
+    },
+    'error': 'Invalid test cases: Test cases must be provided as a list',
+    'passed': False,
+    'test_results': []
+}
 """

From 5eb9a6f7fd430dfe28d4da61fca2433c128c836c Mon Sep 17 00:00:00 2001
From: Rishabh <134101578+GitHoobar@users.noreply.github.com>
Date: Mon, 10 Feb 2025 07:54:47 +0530
Subject: [PATCH 7/8] add base verifier

---
 camel/verifiers/__init__.py      |  3 +-
 camel/verifiers/base_verifier.py | 53 ++++++++++++++++++++++++++++++++
 camel/verifiers/code_verifier.py |  4 ++-
 3 files changed, 58 insertions(+), 2 deletions(-)
 create mode 100644 camel/verifiers/base_verifier.py

diff --git a/camel/verifiers/__init__.py b/camel/verifiers/__init__.py
index 691346bc19..f1e7011d0e 100644
--- a/camel/verifiers/__init__.py
+++ b/camel/verifiers/__init__.py
@@ -12,6 +12,7 @@
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
 
+from .base_verifier import BaseVerifier
 from .code_verifier import CodeVerifier
 
-__all__ = ['CodeVerifier']
+__all__ = ['BaseVerifier', 'CodeVerifier']
diff --git a/camel/verifiers/base_verifier.py b/camel/verifiers/base_verifier.py
new file mode 100644
index 0000000000..10eabf366e
--- /dev/null
+++ b/camel/verifiers/base_verifier.py
@@ -0,0 +1,53 @@
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict
+
+
+class BaseVerifier(ABC):
+    """Base class for verifiers.
+
+    Provides a common interface and structure for all verifiers
+    in the CAMEL library.
+    """
+
+    def __init__(self, **kwargs: Any) -> None:
+        """Initializes the verifier.
+
+        Args:
+            **kwargs:  Keyword arguments for verifier configuration.
+                These could be logging settings, paths, etc.
+        """
+        self.config = kwargs
+
+    @abstractmethod
+    def verify(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
+        """Performs the verification.
+
+        This method MUST be implemented by subclasses.
+
+        Args:
+            *args: Positional arguments specific to the verifier.
+            **kwargs: Keyword arguments specific to the verifier.
+
+        Returns:
+            A dictionary containing the verification results.  The keys
+            and values in this dictionary will depend on the specific
+            verifier.  Common keys might include:
+            - "success": bool (whether the verification passed)
+            - "errors": list (list of error messages)
+            - "details": dict (additional details about the verification)
+        """
+        pass
diff --git a/camel/verifiers/code_verifier.py b/camel/verifiers/code_verifier.py
index 3c589bd048..fbf79bc3d5 100644
--- a/camel/verifiers/code_verifier.py
+++ b/camel/verifiers/code_verifier.py
@@ -19,11 +19,12 @@
 
 from camel.interpreters import BaseInterpreter, InterpreterError
 from camel.logger import logging
+from camel.verifiers import BaseVerifier
 
 logger = logging.getLogger(__name__)
 
 
-class CodeVerifier:
+class CodeVerifier(BaseVerifier):
     r"""Verifier for code solutions.
 
     This verifier checks code solutions by:
@@ -45,6 +46,7 @@ def __init__(
             require_confirmation (bool, optional): Whether to require user
                 confirmation before execution. (default: :obj:`False`)
         """
+        super().__init__()
         self.interpreter = interpreter
         logger.info(
             "Initialized CodeVerifier with interpreter %s", interpreter

From abc44b9ab7d810b747dd7c19ce21b57dc2a224e5 Mon Sep 17 00:00:00 2001
From: Rishabh <134101578+GitHoobar@users.noreply.github.com>
Date: Fri, 14 Feb 2025 09:48:25 +0530
Subject: [PATCH 8/8] changes

---
 camel/verifiers/__init__.py      |  3 +-
 camel/verifiers/base_verifier.py | 53 --------------------------------
 camel/verifiers/code_verifier.py |  3 +-
 3 files changed, 2 insertions(+), 57 deletions(-)
 delete mode 100644 camel/verifiers/base_verifier.py

diff --git a/camel/verifiers/__init__.py b/camel/verifiers/__init__.py
index f1e7011d0e..691346bc19 100644
--- a/camel/verifiers/__init__.py
+++ b/camel/verifiers/__init__.py
@@ -12,7 +12,6 @@
 # limitations under the License.
 # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
 
-from .base_verifier import BaseVerifier
 from .code_verifier import CodeVerifier
 
-__all__ = ['BaseVerifier', 'CodeVerifier']
+__all__ = ['CodeVerifier']
diff --git a/camel/verifiers/base_verifier.py b/camel/verifiers/base_verifier.py
deleted file mode 100644
index 10eabf366e..0000000000
--- a/camel/verifiers/base_verifier.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
-
-from abc import ABC, abstractmethod
-from typing import Any, Dict
-
-
-class BaseVerifier(ABC):
-    """Base class for verifiers.
-
-    Provides a common interface and structure for all verifiers
-    in the CAMEL library.
-    """
-
-    def __init__(self, **kwargs: Any) -> None:
-        """Initializes the verifier.
-
-        Args:
-            **kwargs:  Keyword arguments for verifier configuration.
-                These could be logging settings, paths, etc.
-        """
-        self.config = kwargs
-
-    @abstractmethod
-    def verify(self, *args: Any, **kwargs: Any) -> Dict[str, Any]:
-        """Performs the verification.
-
-        This method MUST be implemented by subclasses.
-
-        Args:
-            *args: Positional arguments specific to the verifier.
-            **kwargs: Keyword arguments specific to the verifier.
-
-        Returns:
-            A dictionary containing the verification results.  The keys
-            and values in this dictionary will depend on the specific
-            verifier.  Common keys might include:
-            - "success": bool (whether the verification passed)
-            - "errors": list (list of error messages)
-            - "details": dict (additional details about the verification)
-        """
-        pass
diff --git a/camel/verifiers/code_verifier.py b/camel/verifiers/code_verifier.py
index fbf79bc3d5..ea34ed2edd 100644
--- a/camel/verifiers/code_verifier.py
+++ b/camel/verifiers/code_verifier.py
@@ -19,12 +19,11 @@
 
 from camel.interpreters import BaseInterpreter, InterpreterError
 from camel.logger import logging
-from camel.verifiers import BaseVerifier
 
 logger = logging.getLogger(__name__)
 
 
-class CodeVerifier(BaseVerifier):
+class CodeVerifier:
     r"""Verifier for code solutions.
 
     This verifier checks code solutions by: