modelscope
diff --git a/‎math_operations_dataset.jsonl
-1,000 b/‎math_operations_dataset.jsonl
-1,000
diff --git a/‎math_tool.py
-179 b/‎math_tool.py
-179
diff --git a/‎swift/plugin/orm.py
+1-61 b/‎swift/plugin/orm.py
+1-61
diff --git a/‎swift/plugin/tool_call.py
+4-97 b/‎swift/plugin/tool_call.py
+4-97
diff --git a/‎swift/trainers/arguments.py
+4-1 b/‎swift/trainers/arguments.py
+4-1
diff --git a/‎swift/trainers/rlhf_arguments.py
+1-2 b/‎swift/trainers/rlhf_arguments.py
+1-2
@@ -286,65 +286,6 @@ def __call__(self, completions, **kwargs) -> List[float]:
         matches = [re.match(pattern, content, re.DOTALL | re.MULTILINE) for content in completions]
         return [1.0 if match else 0.0 for match in matches]
 
-import re
-from typing import List
-
-class SimpleReward(ORM):
-
-    def __call__(self, completions, **kwargs) -> List[float]:
-        """
-        Reward function that checks if the completion has a specific format
-        and compares the extracted answer with the expected answer.
-        
-        Args:
-            completions: List of completion strings to evaluate
-            kwargs: Additional arguments, should include 'answer' key with expected float answer
-            
-        Returns:
-            List of scores: 0.0 for incorrect format, 0.2 for correct format but wrong answer,
-            1.2 for correct format and correct answer
-        """
-        # Format pattern to match <think>...</think><answer>...</answer>
-        format_pattern = r'^<think>(.*?)</think>\n<answer>(.*?)</answer>$'
-        scores = []
-        expected_answer = kwargs.get("response", [0])[0]  # Default to 0.0 if not provided
-        
-        for content in completions:
-            # Check if the format matches
-            match = re.match(format_pattern, content, re.DOTALL | re.MULTILINE)
-            
-            if not match:
-                # Incorrect format
-                scores.append(0.0)
-            else:
-                # Extract the answer
-                extracted_answer_str = match.group(1).strip()
-                
-                # Try to convert the extracted answer to float
-                try:
-                    # Extract numeric part from the answer
-                    # This regex finds all numbers (including decimals) in the string
-                    number_matches = re.findall(r'-?\d+\.?\d*', extracted_answer_str)
-                    
-                    if number_matches:
-                        # Take the first number found in the answer
-                        extracted_answer = float(number_matches[0])
-                        
-                        # Compare with expected answer with some tolerance for floating point
-                        if abs(extracted_answer - expected_answer) < 1e-6:
-                            # Correct format and correct answer
-                            scores.append(1.2)
-                        else:
-                            # Correct format but wrong answer
-                            scores.append(0.2)
-                    else:
-                        # No numeric value found in the answer
-                        scores.append(0.2)
-                except (ValueError, TypeError):
-                    # Failed to convert to float
-                    scores.append(0.2)
-        
-        return scores
 
 class ReActFormat(ORM):
 
@@ -442,6 +383,5 @@ def __call__(self, completions, **kwargs) -> List[float]:
     'format': Format,
     'react_format': ReActFormat,
     'cosine': CosineReward,
-    'repetition': RepetitionPenalty,
-    'simplereward':SimpleReward
+    'repetition': RepetitionPenalty
 }
@@ -1,102 +1,9 @@
-from typing import Union, Tuple, Optional
-
+from typing import Tuple,Any, Optional
 class TOOL_CALL:
-
-    def __call__(self, completion:str) -> Tuple[str, bool, Optional[int]]:
+    def __call__(self, completion: str) -> Tuple[Any, bool, Optional[float]]:
         raise NotImplementedError
 
 
-"""
-Search module for RL training loop.
-This module provides functions to search through vectorized documents and retrieve question-answer pairs.
-"""
-
-import json
-import re
-from typing import Tuple, Optional
-import traceback
-
-# Load the vectorstore when module is imported
-try:
-    vectorstore = load_vectorstore()
-    if vectorstore is None:
-        print("Warning: FAISS vectorstore could not be loaded.")
-except Exception as e:
-    print(f"Error loading vectorstore: {e}")
-    vectorstore = None
-
-def search(query: str, results: int = 5):
-    """
-    Search for relevant chunks using similarity search.
-    
-    Args:
-        query: The search query
-        return_type: Return as string or list (default: str)
-        results: Number of results to return (default: 5)
-        
-    Returns:
-        Results as string or list depending on return_type
-    """
-    if vectorstore is None:
-        raise ValueError("Vectorstore not loaded. Please ensure FAISS index exists.")
-        
-    search_results = vectorstore.similarity_search(query, k=results)
+tools = {
 
-    result_dict = {}
-    for idx, result in enumerate(search_results, start=1):
-        result_dict[idx] = result.page_content
-    
-    result_json = json.dumps(result_dict,indent=2,ensure_ascii=False)
-    return f"<result>\n{result_json}\n</result>"
-
-class TOOL_CALL:
-    def __call__(self, completion: str) -> Tuple[str, bool, Optional[float]]:
-        raise NotImplementedError
-
-class Search_Tool(TOOL_CALL):
-    def __call__(self, completion: str) -> Tuple[str, bool, Optional[float]]:
-        """
-        Checks if the completion strictly follows the format <think>xxx</think><tool_call>xxx</tool_call>
-        and if the tool_call contains valid JSON with "tool" and "arg" fields.
-        
-        Args:
-            completion: The text completion to check
-            
-        Returns:
-            Tuple containing:
-            - search result or empty string
-            - boolean indicating if there was an error
-            - score (0.2 if successful, 0 if error)
-        """
-        try:
-            # Check for required strict format using regex
-            pattern = r'^<think>(.*?)</think><tool_call>(.*?)</tool_call>$'
-            match = re.match(pattern, completion.strip(), re.DOTALL)
-            
-            if not match:
-                return "", True, 0
-                
-            tool_content = match.group(2).strip()
-            
-            # Parse JSON from tool_call content
-            try:
-                tool_data = json.loads(tool_content)
-            except json.JSONDecodeError:
-                return "", True, 0
-                
-            # Check if JSON has required fields
-            if not isinstance(tool_data, dict) or "tool" not in tool_data or "arg" not in tool_data:
-                return "", True, 0
-                
-            # Check if the tool is "search"
-            if tool_data["tool"] != "search":
-                return "", True, 0
-                
-            # Execute search with the provided argument
-            search_result = search(tool_data["arg"])
-            return search_result, False, 0.2
-            
-        except Exception as e:
-            print(f"Error in Search_Tool: {e}")
-            traceback.print_exc()
-            return "", True, 0
+}
@@ -2,7 +2,7 @@
 import os
 from dataclasses import dataclass
 from functools import wraps
-from typing import Any, Dict, Literal, Optional, Union
+from typing import Any, Dict, Literal, Optional, Union, Callable
 
 import torch
 import torch.utils.checkpoint
@@ -104,6 +104,9 @@ class GRPOArgumentsMixin:
     offload_optimizer: bool = False
     offload_model: bool = False
     gc_collect_after_offload: bool = False
+    is_reward_tool_call:bool = True #是否额外单独计算每个tool call的format得分
+    tool_call_weight:float = 1.0
+    tool_call:str = None
 
 
 @dataclass
 
@@ -1,5 +1,5 @@
 from dataclasses import dataclass, field
-from typing import List, Optional
+from typing import List, Optional,Callable
 
 from trl import CPOConfig as HfCPOConfig
 from trl import DPOConfig as HfDPOConfig
@@ -45,7 +45,6 @@ class PPOConfig(SwiftArgumentsMixin, HfPPOConfig):
 @dataclass
 class GRPOConfig(GRPOArgumentsMixin, SwiftArgumentsMixin, HfGRPOConfig):
     stop_words: List[str] = field(default_factory=list)
-    is_reward_tool_call = True #是否额外单独计算每个tool call的format得分
 
     def __post_init__(self):
         from swift.llm.argument.base_args.model_args import ModelArguments