-
Notifications
You must be signed in to change notification settings - Fork 538
/
Copy pathsentencepiece.patch
2339 lines (2041 loc) · 89.1 KB
/
sentencepiece.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
diff --git a/src/bpe_model.cc b/src/bpe_model.cc
index 22cd115..97e0bda 100644
--- a/src/bpe_model.cc
+++ b/src/bpe_model.cc
@@ -21,7 +21,7 @@
#include "bpe_model.h"
#include "freelist.h"
-#include "third_party/absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_map.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/bpe_model_trainer.cc b/src/bpe_model_trainer.cc
index 964d44e..64878cd 100644
--- a/src/bpe_model_trainer.cc
+++ b/src/bpe_model_trainer.cc
@@ -18,7 +18,8 @@
#include <vector>
#include "bpe_model_trainer.h"
-#include "third_party/absl/container/flat_hash_set.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/status/status.h"
#include "util.h"
namespace sentencepiece {
@@ -171,7 +172,7 @@ void Trainer::UpdateActiveSymbols() {
active_symbols_.insert(symbols.begin(), symbols.begin() + size);
}
-util::Status Trainer::Train() {
+absl::Status Trainer::Train() {
RETURN_IF_ERROR(status());
CHECK_OR_RETURN(normalizer_spec_.escape_whitespaces());
diff --git a/src/bpe_model_trainer.h b/src/bpe_model_trainer.h
index e011a37..a17e580 100644
--- a/src/bpe_model_trainer.h
+++ b/src/bpe_model_trainer.h
@@ -20,7 +20,8 @@
#include <vector>
#include "sentencepiece_model.pb.h"
-#include "third_party/absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/status/status.h"
#include "trainer_interface.h"
namespace sentencepiece {
@@ -35,7 +36,7 @@ class Trainer : public TrainerInterface {
: TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec,
denormalizer_spec) {}
- util::Status Train() override;
+ absl::Status Train() override;
private:
// Symbol represents a character or symbol bigram.
diff --git a/src/bpe_model_trainer_test.cc b/src/bpe_model_trainer_test.cc
index 173eb9c..2a43c3a 100644
--- a/src/bpe_model_trainer_test.cc
+++ b/src/bpe_model_trainer_test.cc
@@ -20,8 +20,8 @@
#include "sentencepiece_processor.h"
#include "sentencepiece_trainer.h"
#include "testharness.h"
-#include "third_party/absl/strings/str_cat.h"
-#include "third_party/absl/strings/str_join.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/builder.cc b/src/builder.cc
index 378aaa0..fd8edf8 100644
--- a/src/builder.cc
+++ b/src/builder.cc
@@ -18,10 +18,11 @@
#include "builder.h"
#include "filesystem.h"
-#include "third_party/absl/strings/str_join.h"
-#include "third_party/absl/strings/str_replace.h"
-#include "third_party/absl/strings/str_split.h"
-#include "third_party/absl/strings/strip.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/str_replace.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/strip.h"
+#include "absl/status/status.h"
#ifdef ENABLE_NFKC_COMPILE
#include <unicode/errorcode.h>
@@ -36,7 +37,7 @@
#include "normalization_rule.h"
#include "normalizer.h"
-#include "third_party/darts_clone/darts.h"
+#include "include/darts.h"
#include "util.h"
namespace sentencepiece {
@@ -145,7 +146,7 @@ Builder::Chars Normalize(const Builder::CharsMap &chars_map,
} // namespace
// static
-util::Status Builder::CompileCharsMap(const CharsMap &chars_map,
+absl::Status Builder::CompileCharsMap(const CharsMap &chars_map,
std::string *output) {
CHECK_OR_RETURN(output);
CHECK_OR_RETURN(!chars_map.empty());
@@ -212,7 +213,7 @@ util::Status Builder::CompileCharsMap(const CharsMap &chars_map,
}
// static
-util::Status Builder::DecompileCharsMap(absl::string_view blob,
+absl::Status Builder::DecompileCharsMap(absl::string_view blob,
Builder::CharsMap *chars_map) {
CHECK_OR_RETURN(chars_map);
chars_map->clear();
@@ -265,7 +266,7 @@ util::Status Builder::DecompileCharsMap(absl::string_view blob,
}
// static
-util::Status Builder::GetPrecompiledCharsMap(const std::string &name,
+absl::Status Builder::GetPrecompiledCharsMap(const std::string &name,
std::string *output) {
CHECK_OR_RETURN(output);
@@ -282,12 +283,12 @@ util::Status Builder::GetPrecompiledCharsMap(const std::string &name,
return util::OkStatus();
}
}
- return util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC)
+ return util::StatusBuilder(absl::StatusCode::kNotFound, GTL_LOC)
<< "No precompiled charsmap is found: " << name;
}
// static
-util::Status Builder::BuildNFKCMap(CharsMap *chars_map) {
+absl::Status Builder::BuildNFKCMap(CharsMap *chars_map) {
#ifdef ENABLE_NFKC_COMPILE
LOG(INFO) << "Running BuildNFKCMap";
@@ -345,7 +346,7 @@ util::Status Builder::BuildNFKCMap(CharsMap *chars_map) {
return util::OkStatus();
}
-util::Status Builder::BuildNmtNFKCMap(CharsMap *chars_map) {
+absl::Status Builder::BuildNmtNFKCMap(CharsMap *chars_map) {
#ifdef ENABLE_NFKC_COMPILE
LOG(INFO) << "Running BuildNmtNFKCMap";
@@ -420,7 +421,7 @@ util::Status Builder::BuildNmtNFKCMap(CharsMap *chars_map) {
}
// static
-util::Status Builder::MergeUnicodeCaseFoldMap(Builder::CharsMap *chars_map) {
+absl::Status Builder::MergeUnicodeCaseFoldMap(Builder::CharsMap *chars_map) {
#ifdef ENABLE_NFKC_COMPILE
for (auto &c : *chars_map) {
std::vector<char32> trg;
@@ -445,7 +446,7 @@ util::Status Builder::MergeUnicodeCaseFoldMap(Builder::CharsMap *chars_map) {
}
// static
-util::Status Builder::BuildNFKC_CFMap(CharsMap *chars_map) {
+absl::Status Builder::BuildNFKC_CFMap(CharsMap *chars_map) {
#ifdef ENABLE_NFKC_COMPILE
CharsMap nfkc_map;
RETURN_IF_ERROR(Builder::BuildNFKCMap(&nfkc_map));
@@ -460,7 +461,7 @@ util::Status Builder::BuildNFKC_CFMap(CharsMap *chars_map) {
}
// static
-util::Status Builder::BuildNmtNFKC_CFMap(CharsMap *chars_map) {
+absl::Status Builder::BuildNmtNFKC_CFMap(CharsMap *chars_map) {
#ifdef ENABLE_NFKC_COMPILE
CharsMap nfkc_map;
RETURN_IF_ERROR(Builder::BuildNmtNFKCMap(&nfkc_map));
@@ -475,7 +476,7 @@ util::Status Builder::BuildNmtNFKC_CFMap(CharsMap *chars_map) {
}
// static
-util::Status Builder::LoadCharsMap(absl::string_view filename,
+absl::Status Builder::LoadCharsMap(absl::string_view filename,
CharsMap *chars_map) {
LOG(INFO) << "Loading mapping file: " << filename.data();
CHECK_OR_RETURN(chars_map);
@@ -510,7 +511,7 @@ util::Status Builder::LoadCharsMap(absl::string_view filename,
}
// static
-util::Status Builder::SaveCharsMap(absl::string_view filename,
+absl::Status Builder::SaveCharsMap(absl::string_view filename,
const Builder::CharsMap &chars_map) {
auto output = filesystem::NewWritableFile(filename);
RETURN_IF_ERROR(output->status());
@@ -540,7 +541,7 @@ util::Status Builder::SaveCharsMap(absl::string_view filename,
}
// static
-util::Status Builder::RemoveRedundantMap(CharsMap *chars_map) {
+absl::Status Builder::RemoveRedundantMap(CharsMap *chars_map) {
CHECK_OR_RETURN(chars_map);
CharsMap new_chars_map;
diff --git a/src/builder.h b/src/builder.h
index 49d2884..8ad872c 100644
--- a/src/builder.h
+++ b/src/builder.h
@@ -22,7 +22,8 @@
#include "common.h"
#include "sentencepiece_model.pb.h"
#include "sentencepiece_processor.h"
-#include "third_party/absl/strings/string_view.h"
+#include "absl/strings/string_view.h"
+#include "absl/status/status.h"
namespace sentencepiece {
namespace normalizer {
@@ -43,15 +44,15 @@ class Builder {
// String-to-string mapping.
using CharsMap = std::map<Chars, Chars>;
- static util::Status CompileCharsMap(const CharsMap &chars_map,
+ static absl::Status CompileCharsMap(const CharsMap &chars_map,
std::string *output);
// Decompiles `blob` into `chars_map`.
- static util::Status DecompileCharsMap(absl::string_view blob,
+ static absl::Status DecompileCharsMap(absl::string_view blob,
CharsMap *chars_map);
// Returns a pre-compiled binary index with `name`.
- static util::Status GetPrecompiledCharsMap(const std::string &name,
+ static absl::Status GetPrecompiledCharsMap(const std::string &name,
std::string *output);
// Makes a normalization mapping based on NFKC.
@@ -89,30 +90,30 @@ class Builder {
// normalizer is the goal of SentencePiece.
//
// TODO(taku): Make NFC, NFD, and NFKD mapping if necessary.
- static util::Status BuildNFKCMap(CharsMap *chars_map);
+ static absl::Status BuildNFKCMap(CharsMap *chars_map);
// Makes an NFKC-based mapping with NMT specific modifications around
// whitespaces.
- static util::Status BuildNmtNFKCMap(CharsMap *chars_map);
+ static absl::Status BuildNmtNFKCMap(CharsMap *chars_map);
// Merge Unicode case folding mapping into `chars_map`.
- static util::Status MergeUnicodeCaseFoldMap(CharsMap *chars_map);
+ static absl::Status MergeUnicodeCaseFoldMap(CharsMap *chars_map);
// Makes NFKC with Unicode case folding.
- static util::Status BuildNFKC_CFMap(CharsMap *chars_map);
+ static absl::Status BuildNFKC_CFMap(CharsMap *chars_map);
// Makes NMT NFKC with Unicode case folding.
- static util::Status BuildNmtNFKC_CFMap(CharsMap *chars_map);
+ static absl::Status BuildNmtNFKC_CFMap(CharsMap *chars_map);
// Builds Chars map save in `filename`.
// Format:
// src_uchar1 src_uchar2 ... <tab> trg_uchar1 trg_uchar2...
// (src|trg)_ucharX must be a hex of Unicode code point.
- static util::Status LoadCharsMap(absl::string_view filename,
+ static absl::Status LoadCharsMap(absl::string_view filename,
CharsMap *chars_map);
// Saves Chars map to `filename` as TSV.
- static util::Status SaveCharsMap(absl::string_view filename,
+ static absl::Status SaveCharsMap(absl::string_view filename,
const CharsMap &chars_map);
private:
@@ -121,7 +122,7 @@ class Builder {
// Removes redundant rules from `chars_map`.
// When char_maps have "aa" => "bb" and "a" => "b", the first
// rule is not necessary since the second rule can cover the first rule.
- static util::Status RemoveRedundantMap(CharsMap *chars_map);
+ static absl::Status RemoveRedundantMap(CharsMap *chars_map);
};
} // namespace normalizer
} // namespace sentencepiece
diff --git a/src/builder_test.cc b/src/builder_test.cc
index 4acb7b3..1dee5c7 100644
--- a/src/builder_test.cc
+++ b/src/builder_test.cc
@@ -18,7 +18,7 @@
#include "normalizer.h"
#include "sentencepiece_trainer.h"
#include "testharness.h"
-#include "third_party/absl/strings/str_cat.h"
+#include "absl/strings/str_cat.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/char_model_trainer.cc b/src/char_model_trainer.cc
index f438d78..4f4c603 100644
--- a/src/char_model_trainer.cc
+++ b/src/char_model_trainer.cc
@@ -16,12 +16,13 @@
#include "char_model.h"
#include "char_model_trainer.h"
+#include "absl/status/status.h"
#include "util.h"
namespace sentencepiece {
namespace character {
-util::Status Trainer::Train() {
+absl::Status Trainer::Train() {
RETURN_IF_ERROR(status());
CHECK_OR_RETURN(normalizer_spec_.escape_whitespaces());
diff --git a/src/char_model_trainer.h b/src/char_model_trainer.h
index e563819..a5d021c 100644
--- a/src/char_model_trainer.h
+++ b/src/char_model_trainer.h
@@ -17,6 +17,7 @@
#include "sentencepiece_model.pb.h"
#include "trainer_interface.h"
+#include "absl/status/status.h"
namespace sentencepiece {
namespace character {
@@ -30,7 +31,7 @@ class Trainer : public TrainerInterface {
: TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec,
denormalizer_spec) {}
- util::Status Train() override;
+ absl::Status Train() override;
};
} // namespace character
} // namespace sentencepiece
diff --git a/src/char_model_trainer_test.cc b/src/char_model_trainer_test.cc
index 8c2e4b7..e8b4979 100644
--- a/src/char_model_trainer_test.cc
+++ b/src/char_model_trainer_test.cc
@@ -19,8 +19,8 @@
#include "filesystem.h"
#include "sentencepiece_processor.h"
#include "testharness.h"
-#include "third_party/absl/strings/str_cat.h"
-#include "third_party/absl/strings/str_join.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/common.h b/src/common.h
index 7595634..339f831 100644
--- a/src/common.h
+++ b/src/common.h
@@ -146,6 +146,7 @@ inline const char *BaseName(const char *path) {
} // namespace logging
} // namespace sentencepiece
+#ifndef LOG
#define LOG(severity) \
(::sentencepiece::logging::GetMinLogLevel() > \
::sentencepiece::logging::LOG_##severity) \
@@ -156,6 +157,7 @@ inline const char *BaseName(const char *path) {
std::cerr << ::sentencepiece::logging::BaseName(__FILE__) << "(" \
<< __LINE__ << ") " \
<< "LOG(" << #severity << ") "
+#endif // LOG
#define CHECK(condition) \
(condition) ? 0 \
diff --git a/src/compile_charsmap_main.cc b/src/compile_charsmap_main.cc
index c5a5188..e5db1d7 100644
--- a/src/compile_charsmap_main.cc
+++ b/src/compile_charsmap_main.cc
@@ -22,8 +22,9 @@
#include "filesystem.h"
#include "init.h"
#include "sentencepiece_processor.h"
-#include "third_party/absl/flags/flag.h"
-#include "third_party/absl/strings/string_view.h"
+#include "absl/flags/flag.h"
+#include "absl/strings/string_view.h"
+#include "absl/status/status.h"
using sentencepiece::normalizer::Builder;
@@ -160,7 +161,7 @@ int main(int argc, char **argv) {
const std::vector<std::pair<
std::string,
- std::function<sentencepiece::util::Status(Builder::CharsMap *)>>>
+ std::function<sentencepiece::absl::Status(Builder::CharsMap *)>>>
kRuleList = {{"nfkc", Builder::BuildNFKCMap},
{"nmt_nfkc", Builder::BuildNmtNFKCMap},
{"nfkc_cf", Builder::BuildNFKC_CFMap},
diff --git a/src/error.cc b/src/error.cc
index a226d98..ab4675d 100644
--- a/src/error.cc
+++ b/src/error.cc
@@ -20,8 +20,8 @@
#ifdef _USE_EXTERNAL_ABSL
// Naive workaround to define minloglevel on external absl package.
// We want to define them in other cc file.
-#include "third_party/absl/flags/flag.h"
-#include "third_party/absl/flags/parse.h"
+#include "absl/flags/flag.h"
+#include "absl/flags/parse.h"
ABSL_FLAG(int32, minloglevel, 0,
"Messages logged at a lower level than this don't actually.");
#endif
diff --git a/src/filesystem.cc b/src/filesystem.cc
index 833c8f7..9a1b6c9 100644
--- a/src/filesystem.cc
+++ b/src/filesystem.cc
@@ -15,7 +15,8 @@
#include <iostream>
#include "filesystem.h"
-#include "third_party/absl/memory/memory.h"
+#include "absl/status/status.h"
+#include "absl/memory/memory.h"
#include "util.h"
#if defined(OS_WIN) && defined(UNICODE) && defined(_UNICODE)
@@ -36,7 +37,7 @@ class PosixReadableFile : public ReadableFile {
is_binary ? std::ios::binary | std::ios::in
: std::ios::in)) {
if (!*is_)
- status_ = util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC)
+ status_ = util::StatusBuilder(absl::StatusCode::kNotFound, GTL_LOC)
<< "\"" << filename.data() << "\": " << util::StrError(errno);
}
@@ -44,7 +45,7 @@ class PosixReadableFile : public ReadableFile {
if (is_ != &std::cin) delete is_;
}
- util::Status status() const { return status_; }
+ absl::Status status() const { return status_; }
bool ReadLine(std::string *line) {
return static_cast<bool>(std::getline(*is_, *line));
@@ -61,7 +62,7 @@ class PosixReadableFile : public ReadableFile {
}
private:
- util::Status status_;
+ absl::Status status_;
std::istream *is_;
};
@@ -75,7 +76,7 @@ class PosixWritableFile : public WritableFile {
: std::ios::out)) {
if (!*os_)
status_ =
- util::StatusBuilder(util::StatusCode::kPermissionDenied, GTL_LOC)
+ util::StatusBuilder(absl::StatusCode::kPermissionDenied, GTL_LOC)
<< "\"" << filename.data() << "\": " << util::StrError(errno);
}
@@ -83,7 +84,7 @@ class PosixWritableFile : public WritableFile {
if (os_ != &std::cout) delete os_;
}
- util::Status status() const { return status_; }
+ absl::Status status() const { return status_; }
bool Write(absl::string_view text) {
os_->write(text.data(), text.size());
@@ -93,7 +94,7 @@ class PosixWritableFile : public WritableFile {
bool WriteLine(absl::string_view text) { return Write(text) && Write("\n"); }
private:
- util::Status status_;
+ absl::Status status_;
std::ostream *os_;
};
diff --git a/src/filesystem.h b/src/filesystem.h
index e572b4b..6e8e305 100644
--- a/src/filesystem.h
+++ b/src/filesystem.h
@@ -23,7 +23,8 @@
#include "common.h"
#include "sentencepiece_processor.h"
-#include "third_party/absl/strings/string_view.h"
+#include "absl/strings/string_view.h"
+#include "absl/status/status.h"
namespace sentencepiece {
namespace filesystem {
@@ -33,7 +34,7 @@ class ReadableFile {
explicit ReadableFile(absl::string_view filename, bool is_binary = false) {}
virtual ~ReadableFile() {}
- virtual util::Status status() const = 0;
+ virtual absl::Status status() const = 0;
virtual bool ReadLine(std::string *line) = 0;
virtual bool ReadAll(std::string *line) = 0;
};
@@ -44,7 +45,7 @@ class WritableFile {
explicit WritableFile(absl::string_view filename, bool is_binary = false) {}
virtual ~WritableFile() {}
- virtual util::Status status() const = 0;
+ virtual absl::Status status() const = 0;
virtual bool Write(absl::string_view text) = 0;
virtual bool WriteLine(absl::string_view text) = 0;
};
diff --git a/src/filesystem_test.cc b/src/filesystem_test.cc
index 790e756..39ece99 100644
--- a/src/filesystem_test.cc
+++ b/src/filesystem_test.cc
@@ -14,7 +14,7 @@
#include "filesystem.h"
#include "testharness.h"
-#include "third_party/absl/strings/str_cat.h"
+#include "absl/strings/str_cat.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/init.h b/src/init.h
index 090a2d9..acfda8a 100644
--- a/src/init.h
+++ b/src/init.h
@@ -16,8 +16,8 @@
#define INIT_H_
#include "common.h"
-#include "third_party/absl/flags/flag.h"
-#include "third_party/absl/flags/parse.h"
+#include "absl/flags/flag.h"
+#include "absl/flags/parse.h"
ABSL_DECLARE_FLAG(int32, minloglevel);
diff --git a/src/model_factory.cc b/src/model_factory.cc
index be99501..040c00c 100644
--- a/src/model_factory.cc
+++ b/src/model_factory.cc
@@ -15,7 +15,7 @@
#include "bpe_model.h"
#include "char_model.h"
#include "model_factory.h"
-#include "third_party/absl/memory/memory.h"
+#include "absl/memory/memory.h"
#include "unigram_model.h"
#include "word_model.h"
diff --git a/src/model_interface.cc b/src/model_interface.cc
index c49be1e..22c6378 100644
--- a/src/model_interface.cc
+++ b/src/model_interface.cc
@@ -16,8 +16,8 @@
#include "model_interface.h"
#include "sentencepiece_model.pb.h"
-#include "third_party/absl/memory/memory.h"
-#include "third_party/absl/strings/str_format.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/str_format.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/model_interface.h b/src/model_interface.h
index aef5b53..c7858fb 100644
--- a/src/model_interface.h
+++ b/src/model_interface.h
@@ -25,9 +25,10 @@
#include "normalizer.h"
#include "sentencepiece_model.pb.h"
#include "sentencepiece_processor.h"
-#include "third_party/absl/container/flat_hash_map.h"
-#include "third_party/absl/strings/string_view.h"
-#include "third_party/darts_clone/darts.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/strings/string_view.h"
+#include "absl/status/status.h"
+#include "include/darts.h"
#include "util.h"
namespace sentencepiece {
@@ -69,7 +70,7 @@ class ModelInterface {
// Returns Status.
// Encode/Decode functions are valid only when status is OK.
- virtual util::Status status() const { return status_; }
+ virtual absl::Status status() const { return status_; }
virtual const ModelProto &model_proto() const { return *model_proto_; }
@@ -82,7 +83,7 @@ class ModelInterface {
// normally users do not need to call this function. This function is provided
// just in case that a user want to manually choose which encoder version to
// use.
- virtual util::Status SetEncoderVersion(EncoderVersion encoder_version) {
+ virtual absl::Status SetEncoderVersion(EncoderVersion encoder_version) {
encoder_version_ = encoder_version;
return util::OkStatus();
}
@@ -261,7 +262,7 @@ class ModelInterface {
EncoderVersion encoder_version_ = EncoderVersion::kOptimized;
// status.
- util::Status status_;
+ absl::Status status_;
};
} // namespace sentencepiece
#endif // MODEL_INTERFACE_H_
diff --git a/src/model_interface_test.cc b/src/model_interface_test.cc
index 69ee4e6..26a1e05 100644
--- a/src/model_interface_test.cc
+++ b/src/model_interface_test.cc
@@ -15,7 +15,7 @@
#include "model_factory.h"
#include "model_interface.h"
#include "testharness.h"
-#include "third_party/absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_map.h"
#include "util.h"
namespace sentencepiece {
diff --git a/src/normalizer.cc b/src/normalizer.cc
index 100b875..c553906 100644
--- a/src/normalizer.cc
+++ b/src/normalizer.cc
@@ -18,11 +18,12 @@
#include <vector>
#include "common.h"
-#include "third_party/absl/memory/memory.h"
-#include "third_party/absl/strings/match.h"
-#include "third_party/absl/strings/string_view.h"
-#include "third_party/absl/strings/strip.h"
-#include "third_party/darts_clone/darts.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/match.h"
+#include "absl/strings/string_view.h"
+#include "absl/strings/strip.h"
+#include "absl/status/status.h"
+#include "include/darts.h"
#include "util.h"
namespace sentencepiece {
@@ -71,7 +72,7 @@ void Normalizer::Init() {
}
}
-util::Status Normalizer::Normalize(absl::string_view input,
+absl::Status Normalizer::Normalize(absl::string_view input,
std::string *normalized,
std::vector<size_t> *norm_to_orig) const {
norm_to_orig->clear();
@@ -274,7 +275,7 @@ std::string Normalizer::EncodePrecompiledCharsMap(
}
// static
-util::Status Normalizer::DecodePrecompiledCharsMap(
+absl::Status Normalizer::DecodePrecompiledCharsMap(
absl::string_view blob, absl::string_view *trie_blob,
absl::string_view *normalized, std::string *buffer) {
uint32 trie_blob_size = 0;
diff --git a/src/normalizer.h b/src/normalizer.h
index 622bbd2..21d1385 100644
--- a/src/normalizer.h
+++ b/src/normalizer.h
@@ -24,8 +24,9 @@
#include "common.h"
#include "sentencepiece_model.pb.h"
#include "sentencepiece_processor.h"
-#include "third_party/absl/strings/string_view.h"
-#include "third_party/darts_clone/darts.h"
+#include "absl/strings/string_view.h"
+#include "absl/status/status.h"
+#include "include/darts.h"
#include "util.h"
namespace sentencepiece {
@@ -75,7 +76,7 @@ class Normalizer {
// Returns Status.
// Normalizes function is valid only when status is OK.
- virtual util::Status status() const { return status_; }
+ virtual absl::Status status() const { return status_; }
// Normalizes a plain utf8 string into an internal representation for
// Sentencepiece model. |norm_to_orig| stores the byte-alignment from
@@ -86,7 +87,7 @@ class Normalizer {
// - Adds a prefix space.
// - Replaces a space with a meta symbol.
// - Removing heading, tailing and other redundant spaces.
- virtual util::Status Normalize(absl::string_view input,
+ virtual absl::Status Normalize(absl::string_view input,
std::string *normalized,
std::vector<size_t> *norm_to_orig) const;
@@ -121,7 +122,7 @@ class Normalizer {
absl::string_view normalized);
// Decodes blob into trie_blob and normalized string.
- static util::Status DecodePrecompiledCharsMap(absl::string_view blob,
+ static absl::Status DecodePrecompiledCharsMap(absl::string_view blob,
absl::string_view *trie_blob,
absl::string_view *normalized,
std::string *buffer = nullptr);
@@ -153,7 +154,7 @@ class Normalizer {
#endif
// Normalizer's status.
- util::Status status_;
+ absl::Status status_;
};
} // namespace normalizer
} // namespace sentencepiece
diff --git a/src/pretokenizer_for_training.cc b/src/pretokenizer_for_training.cc
index 049658e..8021511 100644
--- a/src/pretokenizer_for_training.cc
+++ b/src/pretokenizer_for_training.cc
@@ -14,7 +14,7 @@
#include <string>
#include "pretokenizer_for_training.h"
-#include "third_party/absl/strings/str_replace.h"
+#include "absl/strings/str_replace.h"
namespace sentencepiece {
namespace pretokenizer {
diff --git a/src/pretokenizer_for_training.h b/src/pretokenizer_for_training.h
index 2d3bc82..b4a6de3 100644
--- a/src/pretokenizer_for_training.h
+++ b/src/pretokenizer_for_training.h
@@ -21,7 +21,8 @@
#include "common.h"
#include "sentencepiece.pb.h"
#include "sentencepiece_processor.h"
-#include "third_party/absl/strings/string_view.h"
+#include "absl/strings/string_view.h"
+#include "absl/status/status.h"
namespace sentencepiece {
namespace pretokenizer {
@@ -30,7 +31,7 @@ class PretokenizerForTrainingInterface {
public:
PretokenizerForTrainingInterface() {}
virtual ~PretokenizerForTrainingInterface() {}
- virtual util::Status status() const = 0;
+ virtual absl::Status status() const = 0;
// Puts kUPPBoundaryStr before and after the pre-tokenizer's segmentation
// when there are no spaces between these tokens.
diff --git a/src/pretokenizer_for_training_test.cc b/src/pretokenizer_for_training_test.cc
index 80f4787..de89fe3 100644
--- a/src/pretokenizer_for_training_test.cc
+++ b/src/pretokenizer_for_training_test.cc
@@ -13,8 +13,9 @@
// limitations under the License.!
#include "pretokenizer_for_training.h"
#include "testharness.h"
-#include "third_party/absl/strings/str_cat.h"
+#include "absl/strings/str_cat.h"
#include "trainer_interface.h"
+#include "absl/status/status.h"
namespace sentencepiece {
namespace pretokenizer {
@@ -28,7 +29,7 @@ class MockPretokenizer : public PretokenizerForTrainingInterface {
return spt_;
}
- util::Status status() const override { return util::OkStatus(); }
+ absl::Status status() const override { return util::OkStatus(); }
void SetOutput(const SentencePieceText &spt) { spt_ = spt; }
diff --git a/src/sentencepiece_processor.cc b/src/sentencepiece_processor.cc
index 1e4e7a0..78ae527 100644
--- a/src/sentencepiece_processor.cc
+++ b/src/sentencepiece_processor.cc
@@ -23,14 +23,15 @@
#include "normalizer.h"
#include "sentencepiece.pb.h"
#include "sentencepiece_processor.h"
-#include "third_party/absl/memory/memory.h"
-#include "third_party/absl/strings/numbers.h"
-#include "third_party/absl/strings/str_cat.h"
-#include "third_party/absl/strings/str_join.h"
-#include "third_party/absl/strings/str_replace.h"
-#include "third_party/absl/strings/str_split.h"
-#include "third_party/absl/strings/string_view.h"
-#include "third_party/absl/strings/strip.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/str_replace.h"
+#include "absl/strings/str_split.h"
+#include "absl/strings/string_view.h"
+#include "absl/strings/strip.h"
+#include "absl/status/status.h"
#include "unigram_model.h"
#include "util.h"
@@ -52,7 +53,7 @@ const char kReplacementCharacter[] = "\xef\xbf\xbd";
SentencePieceProcessor::SentencePieceProcessor() {}
SentencePieceProcessor::~SentencePieceProcessor() {}
-util::Status SentencePieceProcessor::Load(absl::string_view filename) {
+absl::Status SentencePieceProcessor::Load(absl::string_view filename) {
auto model_proto = absl::make_unique<ModelProto>();
RETURN_IF_ERROR(io::LoadModelProto(filename, model_proto.get()));
return Load(std::move(model_proto));
@@ -62,13 +63,13 @@ void SentencePieceProcessor::LoadOrDie(absl::string_view filename) {
CHECK_OK(Load(filename));
}
-util::Status SentencePieceProcessor::Load(const ModelProto &model_proto) {
+absl::Status SentencePieceProcessor::Load(const ModelProto &model_proto) {
auto model_proto_copy = absl::make_unique<ModelProto>();
*model_proto_copy = model_proto;
return Load(std::move(model_proto_copy));
}
-util::Status SentencePieceProcessor::LoadFromSerializedProto(
+absl::Status SentencePieceProcessor::LoadFromSerializedProto(
absl::string_view serialized) {
auto model_proto = absl::make_unique<ModelProto>();
CHECK_OR_RETURN(
@@ -76,7 +77,7 @@ util::Status SentencePieceProcessor::LoadFromSerializedProto(
return Load(std::move(model_proto));
}
-util::Status SentencePieceProcessor::Load(
+absl::Status SentencePieceProcessor::Load(
std::unique_ptr<ModelProto> model_proto) {
model_proto_ = std::move(model_proto);
model_ = ModelFactory::Create(*model_proto_);
@@ -117,7 +118,7 @@ util::Status SentencePieceProcessor::Load(
return util::OkStatus();
}
-util::Status SentencePieceProcessor::SetEncoderVersion(
+absl::Status SentencePieceProcessor::SetEncoderVersion(
EncoderVersion encoder_version) {
return model_->SetEncoderVersion(encoder_version);
}
@@ -126,17 +127,17 @@ EncoderVersion SentencePieceProcessor::GetEncoderVersion() const {
return model_->GetEncoderVersion();
}
-util::Status SentencePieceProcessor::SetEncodeExtraOptions(
+absl::Status SentencePieceProcessor::SetEncodeExtraOptions(
absl::string_view extra_options) {
return ParseExtraOptions(extra_options, &encode_extra_options_);
}
-util::Status SentencePieceProcessor::SetDecodeExtraOptions(
+absl::Status SentencePieceProcessor::SetDecodeExtraOptions(
absl::string_view extra_options) {
return ParseExtraOptions(extra_options, &decode_extra_options_);
}
-util::Status SentencePieceProcessor::status() const {
+absl::Status SentencePieceProcessor::status() const {
CHECK_OR_RETURN(model_) << "Model is not initialized.";
CHECK_OR_RETURN(normalizer_) << "Normalizer is not initialized.";
RETURN_IF_ERROR(model_->status());
@@ -144,7 +145,7 @@ util::Status SentencePieceProcessor::status() const {
return util::OkStatus();
}
-util::Status SentencePieceProcessor::SetVocabulary(
+absl::Status SentencePieceProcessor::SetVocabulary(
const std::vector<std::string> &valid_vocab) {
RETURN_IF_ERROR(status());
@@ -174,7 +175,7 @@ util::Status SentencePieceProcessor::SetVocabulary(
return util::OkStatus();
}
-util::Status SentencePieceProcessor::ResetVocabulary() {
+absl::Status SentencePieceProcessor::ResetVocabulary() {
RETURN_IF_ERROR(status());
for (auto &piece : *(model_proto_->mutable_pieces())) {
if (piece.type() == ModelProto::SentencePiece::UNUSED)
@@ -184,7 +185,7 @@ util::Status SentencePieceProcessor::ResetVocabulary() {
return util::OkStatus();
}
-util::Status SentencePieceProcessor::LoadVocabulary(absl::string_view filename,
+absl::Status SentencePieceProcessor::LoadVocabulary(absl::string_view filename,
int threshold) {
auto input = filesystem::NewReadableFile(filename);
RETURN_IF_ERROR(input->status());
@@ -221,7 +222,7 @@ util::Status SentencePieceProcessor::LoadVocabulary(absl::string_view filename,
//////////////////////////////////////////////////////////////
// Simple API.
-util::Status SentencePieceProcessor::Encode(
+absl::Status SentencePieceProcessor::Encode(
absl::string_view input, std::vector<std::string> *pieces) const {
CHECK_OR_RETURN_STATUS_STL(pieces);
@@ -234,7 +235,7 @@ util::Status SentencePieceProcessor::Encode(
return util::OkStatus();
}
-util::Status SentencePieceProcessor::Encode(absl::string_view input,
+absl::Status SentencePieceProcessor::Encode(absl::string_view input,
std::vector<int> *ids) const {
CHECK_OR_RETURN_STATUS_STL(ids);
@@ -247,7 +248,7 @@ util::Status SentencePieceProcessor::Encode(absl::string_view input,
return util::OkStatus();
}
-util::Status SentencePieceProcessor::Decode(
+absl::Status SentencePieceProcessor::Decode(
const std::vector<std::string> &pieces, std::string *detokenized) const {
CHECK_OR_RETURN_STATUS_STL(detokenized);
@@ -258,7 +259,7 @@ util::Status SentencePieceProcessor::Decode(
return util::OkStatus();
}
-util::Status SentencePieceProcessor::Decode(const std::vector<int> &ids,
+absl::Status SentencePieceProcessor::Decode(const std::vector<int> &ids,
std::string *detokenized) const {
CHECK_OR_RETURN_STATUS_STL(detokenized);
@@ -269,7 +270,7 @@ util::Status SentencePieceProcessor::Decode(const std::vector<int> &ids,
return util::OkStatus();
}
-util::Status SentencePieceProcessor::NBestEncode(
+absl::Status SentencePieceProcessor::NBestEncode(
absl::string_view input, int nbest_size,
std::vector<std::vector<std::string>> *pieces) const {
CHECK_OR_RETURN_STATUS_STL(pieces);
@@ -287,7 +288,7 @@ util::Status SentencePieceProcessor::NBestEncode(
return util::OkStatus();
}
-util::Status SentencePieceProcessor::NBestEncode(
+absl::Status SentencePieceProcessor::NBestEncode(
absl::string_view input, int nbest_size,
std::vector<std::vector<int>> *ids) const {
CHECK_OR_RETURN_STATUS_STL(ids);
@@ -305,7 +306,7 @@ util::Status SentencePieceProcessor::NBestEncode(
return util::OkStatus();
}
-util::Status SentencePieceProcessor::SampleEncode(
+absl::Status SentencePieceProcessor::SampleEncode(
absl::string_view input, int nbest_size, float alpha,
std::vector<std::string> *pieces) const {
CHECK_OR_RETURN_STATUS_STL(pieces);
@@ -319,7 +320,7 @@ util::Status SentencePieceProcessor::SampleEncode(
return util::OkStatus();
}
-util::Status SentencePieceProcessor::SampleEncode(absl::string_view input,
+absl::Status SentencePieceProcessor::SampleEncode(absl::string_view input,
int nbest_size, float alpha,
std::vector<int> *ids) const {
CHECK_OR_RETURN_STATUS_STL(ids);
@@ -333,7 +334,7 @@ util::Status SentencePieceProcessor::SampleEncode(absl::string_view input,
return util::OkStatus();
}
-util::Status SentencePieceProcessor::PopulateSentencePieceText(
+absl::Status SentencePieceProcessor::PopulateSentencePieceText(
absl::string_view input, absl::string_view normalized,
const std::vector<size_t> &norm_to_orig, const EncodeResult &result,
SentencePieceText *spt) const {
@@ -424,7 +425,7 @@ util::Status SentencePieceProcessor::PopulateSentencePieceText(
return util::OkStatus();
} // namespace sentencepiece
-util::Status SentencePieceProcessor::Encode(absl::string_view input,
+absl::Status SentencePieceProcessor::Encode(absl::string_view input,
SentencePieceText *spt) const {
CHECK_OR_RETURN_STATUS_PROTO(spt);
@@ -439,7 +440,7 @@ util::Status SentencePieceProcessor::Encode(absl::string_view input,
return util::OkStatus();
}
-util::Status SentencePieceProcessor::NBestEncode(
+absl::Status SentencePieceProcessor::NBestEncode(
absl::string_view input, int nbest_size,
NBestSentencePieceText *nbest_spt) const {
CHECK_OR_RETURN_STATUS_PROTO(nbest_spt);