modelscope
diff --git a/‎gen_data.py
+68 b/‎gen_data.py
+68
diff --git a/‎math_operations_dataset.jsonl
+1,000 b/‎math_operations_dataset.jsonl
+1,000
diff --git a/‎math_tool.py
+179 b/‎math_tool.py
+179
diff --git a/‎swift/plugin/orm.py
+60 b/‎swift/plugin/orm.py
+60
@@ -0,0 +1,68 @@
+"""
+Dataset Generator for Custom Mathematical Operations
+This module generates a dataset of custom mathematical expressions and their results.
+"""
+
+import random
+import json
+import re
+from typing import Dict, List
+from math_tool import parse_expression, SYMBOL_TO_OPERATION, OPERATION_DEFINITIONS
+
+def generate_safe_expression():
+    """Generate an expression that won't cause overflow errors"""
+    # Start with a moderate number
+    expression = str(random.randint(1, 20))
+    
+    # Add 3-5 operations with safe numbers
+    num_ops = random.randint(1, 6)
+    
+    for i in range(num_ops):
+        # Choose operation
+        op = random.choice(['@', '&', '$', '^'])
+        
+        # For @ operation, use smaller numbers to avoid overflow
+        if op == '@':
+            # For exponentiation, keep the exponent small
+            num = random.randint(1, 3)
+        else:
+            num = random.randint(1, 10)
+            
+        expression += op + str(num)
+    
+    return expression
+
+def generate_dataset(num_samples: int = 1000, output_file: str = "math_operations_dataset.jsonl") -> None:
+    """
+    Generates a dataset of custom mathematical expressions and their results.
+    Saves the dataset as a JSONL file.
+    
+    Args:
+        num_samples: Number of samples to generate
+        output_file: Path to save the JSONL file
+    """
+    with open(output_file, 'w') as f:
+        for _ in range(num_samples):
+            # Generate a safe expression
+            expression = generate_safe_expression()
+            
+            # Calculate the result
+            try:
+                result = parse_expression(expression)
+                
+                # Create the data entry
+                # data_entry = {
+                #     "query": f"Calculate the result of the expression: {expression}",
+                #     "answer": result
+                # }
+                data_entry = {"messages": [{"role": "user", "content": f"Calculate the result of the expression: {expression}"}],"response":result}
+                
+                # Write to JSONL file
+                f.write(json.dumps(data_entry) + '\n')
+            except Exception as e:
+                print(f"Skipping problematic expression {expression}: {e}")
+                continue
+    
+    print(f"Generated dataset with {num_samples} samples and saved to {output_file}")
+
+generate_dataset()
@@ -0,0 +1,179 @@
+"""
+Custom Mathematical Operations Tool Module
+This module provides a unified tool call interface for various mathematical operations.
+"""
+
+import json
+import re
+import math
+from typing import Tuple, Optional, Any, Dict
+
+# Define the custom mathematical operations with overflow protection
+def operation_at(a: int, b: int) -> float:
+    """@ operation: Returns a raised to the power of b, then adds a*b
+    
+    Includes overflow protection for large numbers
+    """
+    try:
+        # For very large exponents, use log approximation
+        if b > 100 or (a > 20 and b > 10):
+            # Fall back to a simpler calculation for very large values
+            return a * b * 2  # Simplified approximation
+        power_result = a ** b
+        product_result = a * b
+        return power_result + product_result
+    except OverflowError:
+        # If overflow occurs, return a simplified approximation
+        return a * b * 2  # Simplified approximation
+
+def operation_amp(a: int, b: int) -> float:
+    """& operation: Returns the average of a and b, multiplied by their absolute difference"""
+    avg = (a + b) / 2
+    diff = abs(a - b)
+    return avg * diff
+
+def operation_dollar(a: int, b: int) -> float:
+    """$ operation: Returns a factorial-like sum of a repeated b times: a + (a-1) + (a-2) + ... + (a-b+1)"""
+    if b <= 0 or b > a:
+        return a
+        
+    # For large values, use arithmetic sequence sum formula
+    if b > 1000:
+        # Sum of arithmetic sequence: n/2 * (first + last)
+        n = min(b, a)
+        first = a
+        last = a - n + 1
+        return n * (first + last) / 2
+        
+    return sum(a - i for i in range(int(min(b, a))))
+
+def operation_caret(a: int, b: int) -> float:
+    """^ operation: Returns a * b if both are even, a + b if both are odd, a - b otherwise"""
+    if a % 2 == 0 and b % 2 == 0:
+        return a * b
+    elif a % 2 == 1 and b % 2 == 1:
+        return a + b
+    else:
+        return a - b
+
+class TOOL_CALL:
+    def __call__(self, completion: str) -> Tuple[Any, bool, Optional[float]]:
+        raise NotImplementedError
+
+class MathOperation_Tool(TOOL_CALL):
+    """Unified tool for handling all mathematical operations"""
+    
+    def __init__(self):
+        self.operations = {
+            "at_operation": operation_at,
+            "amp_operation": operation_amp,
+            "dollar_operation": operation_dollar,
+            "caret_operation": operation_caret
+        }
+    
+    def __call__(self, completion: str) -> Tuple[float, bool, float]:
+        try:
+            # Check for required strict format
+            pattern = r'^<think>(.*?)</think>\n<tool>(.*?)</tool>$'
+            match = re.match(pattern, completion.strip(), re.DOTALL)
+            
+            if not match:
+                return "", True, 0
+                
+            tool_content = match.group(2).strip()
+            
+            # Parse JSON from tool content
+            try:
+                tool_data = json.loads(tool_content)
+            except json.JSONDecodeError:
+                return "", True, 0
+                
+            # Check if JSON has required fields
+            if not isinstance(tool_data, dict) or "tool" not in tool_data or "a" not in tool_data or "b" not in tool_data:
+                return "", True, 0
+                
+            tool_name = tool_data["tool"]
+            
+            # Check if the requested operation exists
+            if tool_name not in self.operations:
+                return "", True, 0
+                
+            # Get the operation function
+            operation_func = self.operations[tool_name]
+            
+            # Execute operation
+            try:
+                a, b = float(tool_data["a"]), float(tool_data["b"])
+                result = operation_func(a, b)
+                return f"<result>\n{result}\n</reuslt>", False, 0.2
+            except (ValueError, TypeError):
+                return "", True, 0
+                
+        except Exception as e:
+            print(f"Error in MathOperation_Tool: {e}")
+            return "", True, 0
+
+# Parser for expressions with overflow protection
+def parse_expression(expression: str) -> float:
+    """
+    Parses and evaluates a custom mathematical expression.
+    Supports operations: @, &, $, ^
+    Example: "11@2&1$44^2"
+    
+    Includes overflow protection for large numbers
+    """
+    # Tokenize the expression - find all numbers and operators
+    tokens = re.findall(r'(\d+|\@|\&|\$|\^)', expression)
+    
+    # Process tokens
+    result = None
+    current_op = None
+    
+    for token in tokens:
+        if token in ['@', '&', '$', '^']:
+            current_op = token
+        else:
+            try:
+                num = int(token)
+                if result is None:
+                    result = num
+                elif current_op == '@':
+                    # Limit very large inputs for @ operation
+                    if result > 10000 or num > 100:
+                        result = result * num * 2  # Simplified approximation
+                    else:
+                        result = operation_at(result, num)
+                elif current_op == '&':
+                    result = operation_amp(result, num)
+                elif current_op == '$':
+                    result = operation_dollar(result, num)
+                elif current_op == '^':
+                    result = operation_caret(result, num)
+            except (OverflowError, ValueError):
+                # Handle overflow by using a simplified calculation
+                if current_op == '@':
+                    result = result * num * 2  # Simplified approximation
+                elif current_op == '&':
+                    result = result * num  # Simplified approximation
+                elif current_op == '$':
+                    result = result + num  # Simplified approximation
+                elif current_op == '^':
+                    result = max(result, num)  # Simplified approximation
+    
+    return result
+
+# Map symbols to operation names
+SYMBOL_TO_OPERATION = {
+    '@': 'at_operation',
+    '&': 'amp_operation',
+    '$': 'dollar_operation',
+    '^': 'caret_operation'
+}
+
+# Operation definitions for reference
+OPERATION_DEFINITIONS = {
+    "@": "a@b = (a^b) + (a*b)",
+    "&": "a&b = ((a+b)/2) * |a-b|",
+    "$": "a$b = a + (a-1) + (a-2) + ... + (a-b+1)",
+    "^": "a^b = a*b if both even, a+b if both odd, a-b otherwise"
+}
@@ -286,6 +286,65 @@ def __call__(self, completions, **kwargs) -> List[float]:
         matches = [re.match(pattern, content, re.DOTALL | re.MULTILINE) for content in completions]
         return [1.0 if match else 0.0 for match in matches]
 
+import re
+from typing import List
+
+class SimpleReward(ORM):
+
+    def __call__(self, completions, **kwargs) -> List[float]:
+        """
+        Reward function that checks if the completion has a specific format
+        and compares the extracted answer with the expected answer.
+        
+        Args:
+            completions: List of completion strings to evaluate
+            kwargs: Additional arguments, should include 'answer' key with expected float answer
+            
+        Returns:
+            List of scores: 0.0 for incorrect format, 0.2 for correct format but wrong answer,
+            1.2 for correct format and correct answer
+        """
+        # Format pattern to match <think>...</think><answer>...</answer>
+        format_pattern = r'^<think>(.*?)</think>\n<answer>(.*?)</answer>$'
+        scores = []
+        expected_answer = kwargs.get("response", [0])[0]  # Default to 0.0 if not provided
+        
+        for content in completions:
+            # Check if the format matches
+            match = re.match(format_pattern, content, re.DOTALL | re.MULTILINE)
+            
+            if not match:
+                # Incorrect format
+                scores.append(0.0)
+            else:
+                # Extract the answer
+                extracted_answer_str = match.group(1).strip()
+                
+                # Try to convert the extracted answer to float
+                try:
+                    # Extract numeric part from the answer
+                    # This regex finds all numbers (including decimals) in the string
+                    number_matches = re.findall(r'-?\d+\.?\d*', extracted_answer_str)
+                    
+                    if number_matches:
+                        # Take the first number found in the answer
+                        extracted_answer = float(number_matches[0])
+                        
+                        # Compare with expected answer with some tolerance for floating point
+                        if abs(extracted_answer - expected_answer) < 1e-6:
+                            # Correct format and correct answer
+                            scores.append(1.2)
+                        else:
+                            # Correct format but wrong answer
+                            scores.append(0.2)
+                    else:
+                        # No numeric value found in the answer
+                        scores.append(0.2)
+                except (ValueError, TypeError):
+                    # Failed to convert to float
+                    scores.append(0.2)
+        
+        return scores
 
 class ReActFormat(ORM):
 
@@ -384,4 +443,5 @@ def __call__(self, completions, **kwargs) -> List[float]:
     'react_format': ReActFormat,
     'cosine': CosineReward,
     'repetition': RepetitionPenalty,
+    'simplereward':SimpleReward
 }