@@ -106,23 +106,39 @@ class chat_formatter {
106
106
107
107
std::string operator () (const std::string & role, const std::string & content, [[maybe_unused]] bool use_toolcalls = false ) {
108
108
109
- common_chat_msg new_msg;
110
- new_msg.role = role;
111
- new_msg.content = content;
112
-
113
- common_chat_params cparams;
114
109
common_chat_templates_inputs cinputs;
110
+ cinputs.use_jinja = params_.use_jinja ;
111
+ cinputs.add_generation_prompt = (role == " user" );
115
112
#ifdef LLAMA_USE_TOOLCALL
116
113
if (tc_client_ != nullptr && use_toolcalls) {
117
114
cinputs.tool_choice = common_chat_tool_choice_parse_oaicompat (tc_client_->tool_choice ());
118
115
cinputs.tools = common_chat_tools_parse_oaicompat (tc_client_->tool_list ());
119
116
}
120
117
#endif
121
- bool add_ass = role == " user" ;
122
- auto formatted =
123
- common_chat_format_single (chat_templates_, chat_msgs_, new_msg, add_ass, params_.use_jinja ,
124
- &cinputs, &cparams);
118
+ for (const auto & msg : chat_msgs_) {
119
+ cinputs.messages .push_back (common_chat_msg (msg));
120
+ }
121
+
122
+ common_chat_msg new_msg = common_chat_parse (content, *chat_format_);
123
+ new_msg.role = role;
124
+
125
+ if (! new_msg.tool_calls .empty ()) {
126
+ nlohmann::json result_array = nlohmann::json::array ();
127
+ for (const auto & tc : new_msg.tool_calls ) {
128
+ toolcall::result_set res = tc_client_->call (tc.name , tc.arguments , tc.id );
129
+ if (! res.empty ()) {
130
+ for (const auto & r : res) {
131
+ result_array.push_back (r.data );
132
+ }
133
+ }
134
+ }
135
+ new_msg.content += result_array.dump (-1 );
136
+ }
137
+
138
+ cinputs.messages .push_back (new_msg);
139
+ common_chat_params cparams = common_chat_templates_apply (chat_templates_, cinputs);
125
140
141
+ auto formatted = cparams.prompt ;
126
142
chat_msgs_.push_back (new_msg);
127
143
LOG_DBG (" formatted: '%s'\n " , formatted.c_str ());
128
144
@@ -145,42 +161,6 @@ class chat_formatter {
145
161
#endif
146
162
};
147
163
148
- #ifdef LLAMA_USE_TOOLCALL
149
- static bool call_tool (common_chat_format chat_format, const std::string & assistant_msg, llama_context * ctx,
150
- toolcall::client::ptr tc_client, std::vector<llama_token> & embd_inp)
151
- {
152
- bool tool_was_called = false ;
153
- common_chat_msg msg = common_chat_parse (assistant_msg, chat_format);
154
- if (! msg.tool_calls .empty ()) {
155
- for (const auto & tc : msg.tool_calls ) {
156
- nlohmann::json tc_oai_json {
157
- {" type" , " function" },
158
- {" function" , {
159
- {" name" , tc.name },
160
- {" arguments" , tc.arguments },
161
- }},
162
- {" id" , tc.id },
163
- };
164
- toolcall::result_set res = tc_client->call (tc_oai_json);
165
- if (! res.empty ()) {
166
- std::string toolcall_result_str;
167
- for (const auto & r : res) {
168
- toolcall_result_str += (" \n " + r.data ); // Although more complex results can be
169
- // returned (resources, images, etc.),
170
- // for now simply append the data. Later
171
- // on support for specific models may
172
- // allow for unpacking Base64 data.
173
- }
174
- auto toolcall_result_tok = common_tokenize (ctx, toolcall_result_str, false , true );
175
- embd_inp.insert (embd_inp.end (), toolcall_result_tok.begin (), toolcall_result_tok.end ());
176
- }
177
- tool_was_called = true ;
178
- }
179
- }
180
- return tool_was_called;
181
- }
182
- #endif
183
-
184
164
int main (int argc, char ** argv) {
185
165
common_params params;
186
166
g_params = ¶ms;
@@ -943,16 +923,6 @@ int main(int argc, char ** argv) {
943
923
}
944
924
}
945
925
946
- #ifdef LLAMA_USE_TOOLCALL
947
- if ((tc_client && n_past > 0 ) && (waiting_for_first_input || is_interacting)) {
948
- size_t last_len = embd_inp.size ();
949
- bool was_toolcall = call_tool (chat_format, assistant_ss.str (), ctx, tc_client, embd_inp);
950
- if (was_toolcall && last_len < embd_inp.size ()) {
951
- LOG (" %s" , common_token_to_piece (ctx, embd_inp[last_len]).c_str ());
952
- }
953
- }
954
- #endif
955
-
956
926
if ((n_past > 0 || waiting_for_first_input) && is_interacting) {
957
927
LOG_DBG (" waiting for user input\n " );
958
928
0 commit comments