@@ -1114,7 +1114,7 @@ struct llama_mlock {
1114
1114
suggest = false;
1115
1115
}
1116
1116
1117
- fprintf(stderr, "warning: failed to mlock %zu-byte buffer (after previously locking %zu bytes): %s\n%s",
1117
+ LLAMA_LOG_WARN( "warning: failed to mlock %zu-byte buffer (after previously locking %zu bytes): %s\n%s",
1118
1118
size, this->size, errmsg, suggest ? MLOCK_SUGGESTION : "");
1119
1119
return false;
1120
1120
}
@@ -1123,7 +1123,7 @@ struct llama_mlock {
1123
1123
1124
1124
static void raw_unlock(void * addr, size_t size) {
1125
1125
if (munlock(addr, size)) {
1126
- fprintf(stderr, "warning: failed to munlock buffer: %s\n", std::strerror(errno));
1126
+ LLAMA_LOG_WARN( "warning: failed to munlock buffer: %s\n", std::strerror(errno));
1127
1127
}
1128
1128
}
1129
1129
#elif defined(_WIN32)
@@ -1141,7 +1141,7 @@ struct llama_mlock {
1141
1141
return true;
1142
1142
}
1143
1143
if (tries == 2) {
1144
- fprintf(stderr, "warning: failed to VirtualLock %zu-byte buffer (after previously locking %zu bytes): %s\n",
1144
+ LLAMA_LOG_WARN( "warning: failed to VirtualLock %zu-byte buffer (after previously locking %zu bytes): %s\n",
1145
1145
len, size, llama_format_win_err(GetLastError()).c_str());
1146
1146
return false;
1147
1147
}
@@ -1150,7 +1150,7 @@ struct llama_mlock {
1150
1150
// set size and try again.
1151
1151
SIZE_T min_ws_size, max_ws_size;
1152
1152
if (!GetProcessWorkingSetSize(GetCurrentProcess(), &min_ws_size, &max_ws_size)) {
1153
- fprintf(stderr, "warning: GetProcessWorkingSetSize failed: %s\n",
1153
+ LLAMA_LOG_WARN( "warning: GetProcessWorkingSetSize failed: %s\n",
1154
1154
llama_format_win_err(GetLastError()).c_str());
1155
1155
return false;
1156
1156
}
@@ -1163,7 +1163,7 @@ struct llama_mlock {
1163
1163
min_ws_size += increment;
1164
1164
max_ws_size += increment;
1165
1165
if (!SetProcessWorkingSetSize(GetCurrentProcess(), min_ws_size, max_ws_size)) {
1166
- fprintf(stderr, "warning: SetProcessWorkingSetSize failed: %s\n",
1166
+ LLAMA_LOG_WARN( "warning: SetProcessWorkingSetSize failed: %s\n",
1167
1167
llama_format_win_err(GetLastError()).c_str());
1168
1168
return false;
1169
1169
}
@@ -1172,7 +1172,7 @@ struct llama_mlock {
1172
1172
1173
1173
static void raw_unlock(void * ptr, size_t len) {
1174
1174
if (!VirtualUnlock(ptr, len)) {
1175
- fprintf(stderr, "warning: failed to VirtualUnlock buffer: %s\n",
1175
+ LLAMA_LOG_WARN( "warning: failed to VirtualUnlock buffer: %s\n",
1176
1176
llama_format_win_err(GetLastError()).c_str());
1177
1177
}
1178
1178
}
@@ -1184,7 +1184,7 @@ struct llama_mlock {
1184
1184
}
1185
1185
1186
1186
bool raw_lock(const void * addr, size_t len) const {
1187
- fprintf(stderr, "warning: mlock not supported on this system\n");
1187
+ LLAMA_LOG_WARN( "warning: mlock not supported on this system\n");
1188
1188
return false;
1189
1189
}
1190
1190
@@ -2085,13 +2085,13 @@ namespace GGUFMeta {
2085
2085
__func__, override_type_to_str(override->tag), override->key);
2086
2086
switch (override->tag) {
2087
2087
case LLAMA_KV_OVERRIDE_BOOL: {
2088
- printf ("%s\n", override->bool_value ? "true" : "false");
2088
+ LLAMA_LOG_INFO ("%s\n", override->bool_value ? "true" : "false");
2089
2089
} break;
2090
2090
case LLAMA_KV_OVERRIDE_INT: {
2091
- printf ("%" PRId64 "\n", override->int_value);
2091
+ LLAMA_LOG_INFO ("%" PRId64 "\n", override->int_value);
2092
2092
} break;
2093
2093
case LLAMA_KV_OVERRIDE_FLOAT: {
2094
- printf ("%.6f\n", override->float_value);
2094
+ LLAMA_LOG_INFO ("%.6f\n", override->float_value);
2095
2095
} break;
2096
2096
default:
2097
2097
// Shouldn't be possible to end up here, but just in case...
@@ -6993,7 +6993,7 @@ static void tokenizer_st_partition(const llama_vocab & vocab, std::forward_list<
6993
6993
if (match + special_token.length() > raw_text_base_offset + raw_text_base_length) break;
6994
6994
6995
6995
#ifdef PRETOKENIZERDEBUG
6996
- fprintf(stderr, "FF: (%ld %ld %ld) '%s'\n", raw_text->length(), raw_text_base_offset, raw_text_base_length, raw_text->substr(raw_text_base_offset, raw_text_base_length).c_str());
6996
+ LLAMA_LOG_WARN( "FF: (%ld %ld %ld) '%s'\n", raw_text->length(), raw_text_base_offset, raw_text_base_length, raw_text->substr(raw_text_base_offset, raw_text_base_length).c_str());
6997
6997
#endif
6998
6998
auto source = std::distance(buffer.begin(), it);
6999
6999
@@ -7006,7 +7006,7 @@ static void tokenizer_st_partition(const llama_vocab & vocab, std::forward_list<
7006
7006
buffer.emplace_after(it, (*raw_text), left_reminder_offset, left_reminder_length);
7007
7007
7008
7008
#ifdef PRETOKENIZERDEBUG
7009
- fprintf(stderr, "FL: (%ld %ld) '%s'\n", left_reminder_offset, left_reminder_length, raw_text->substr(left_reminder_offset, left_reminder_length).c_str());
7009
+ LLAMA_LOG_WARN( "FL: (%ld %ld) '%s'\n", left_reminder_offset, left_reminder_length, raw_text->substr(left_reminder_offset, left_reminder_length).c_str());
7010
7010
#endif
7011
7011
it++;
7012
7012
}
@@ -7022,7 +7022,7 @@ static void tokenizer_st_partition(const llama_vocab & vocab, std::forward_list<
7022
7022
buffer.emplace_after(it, (*raw_text), right_reminder_offset, right_reminder_length);
7023
7023
7024
7024
#ifdef PRETOKENIZERDEBUG
7025
- fprintf(stderr, "FR: (%ld %ld) '%s'\n", right_reminder_offset, right_reminder_length, raw_text->substr(right_reminder_offset, right_reminder_length).c_str());
7025
+ LLAMA_LOG_WARN( "FR: (%ld %ld) '%s'\n", right_reminder_offset, right_reminder_length, raw_text->substr(right_reminder_offset, right_reminder_length).c_str());
7026
7026
#endif
7027
7027
7028
7028
it++;
@@ -7038,7 +7038,7 @@ static void tokenizer_st_partition(const llama_vocab & vocab, std::forward_list<
7038
7038
raw_text_base_length = right_reminder_length;
7039
7039
7040
7040
#ifdef PRETOKENIZERDEBUG
7041
- fprintf(stderr, "RR: (%ld %ld) '%s'\n", raw_text_base_offset, raw_text_base_length, raw_text->substr(raw_text_base_offset, raw_text_base_length).c_str());
7041
+ LLAMA_LOG_WARN( "RR: (%ld %ld) '%s'\n", raw_text_base_offset, raw_text_base_length, raw_text->substr(raw_text_base_offset, raw_text_base_length).c_str());
7042
7042
#endif
7043
7043
} else {
7044
7044
if (source == 0) {
@@ -7095,7 +7095,7 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
7095
7095
}
7096
7096
7097
7097
#ifdef PRETOKENIZERDEBUG
7098
- fprintf(stderr," TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str());
7098
+ LLAMA_LOG_WARN( TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str());
7099
7099
#endif
7100
7100
llm_tokenizer_spm tokenizer(vocab);
7101
7101
llama_escape_whitespace(raw_text);
@@ -7116,7 +7116,7 @@ static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab &
7116
7116
auto raw_text = fragment.raw_text.substr(fragment.offset, fragment.length);
7117
7117
7118
7118
#ifdef PRETOKENIZERDEBUG
7119
- fprintf(stderr," TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str());
7119
+ LLAMA_LOG_WARN( TT: (%ld %ld %ld) '%s'\n", raw_text.length(), fragment.offset, fragment.length, raw_text.c_str());
7120
7120
#endif
7121
7121
llm_tokenizer_bpe tokenizer(vocab);
7122
7122
tokenizer.tokenize(raw_text, output);
@@ -8641,7 +8641,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
8641
8641
if (params->imatrix) {
8642
8642
imatrix_data = static_cast<const std::unordered_map<std::string, std::vector<float>>*>(params->imatrix);
8643
8643
if (imatrix_data) {
8644
- printf ("================================ Have weights data with %d entries\n",int(imatrix_data->size()));
8644
+ LLAMA_LOG_INFO ("================================ Have weights data with %d entries\n",int(imatrix_data->size()));
8645
8645
}
8646
8646
}
8647
8647
@@ -8764,23 +8764,23 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
8764
8764
if (imatrix_data) {
8765
8765
auto it = imatrix_data->find(tensor->name);
8766
8766
if (it == imatrix_data->end()) {
8767
- printf ("\n====== %s: did not find weights for %s\n", __func__, tensor->name);
8767
+ LLAMA_LOG_INFO ("\n====== %s: did not find weights for %s\n", __func__, tensor->name);
8768
8768
} else {
8769
8769
if (it->second.size() == (size_t)tensor->ne[0]) {
8770
8770
imatrix = it->second.data();
8771
8771
} else {
8772
- printf ("\n====== %s: imatrix size %d is different from tensor size %d for %s\n", __func__,
8772
+ LLAMA_LOG_INFO ("\n====== %s: imatrix size %d is different from tensor size %d for %s\n", __func__,
8773
8773
int(it->second.size()), int(tensor->ne[0]), tensor->name);
8774
8774
}
8775
8775
}
8776
8776
}
8777
8777
if ((new_type == GGML_TYPE_IQ2_XXS ||
8778
8778
new_type == GGML_TYPE_IQ2_XS ||
8779
8779
(new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0)) && !imatrix) {
8780
- fprintf(stderr, "\n\n============================================================\n");
8781
- fprintf(stderr, "Missing importance matrix for tensor %s in a very low-bit quantization\n", tensor->name);
8782
- fprintf(stderr, "The result will be garbage, so bailing out\n");
8783
- fprintf(stderr, "============================================================\n\n");
8780
+ LLAMA_LOG_ERROR( "\n\n============================================================\n");
8781
+ LLAMA_LOG_ERROR( "Missing importance matrix for tensor %s in a very low-bit quantization\n", tensor->name);
8782
+ LLAMA_LOG_ERROR( "The result will be garbage, so bailing out\n");
8783
+ LLAMA_LOG_ERROR( "============================================================\n\n");
8784
8784
throw std::runtime_error(format("Missing importance matrix for tensor %s in a very low-bit quantization", tensor->name));
8785
8785
}
8786
8786
0 commit comments