|
| 1 | +#include <chrono> |
| 2 | +#include <fstream> |
| 3 | +#include <iostream> |
| 4 | +#include <re2/filtered_re2.h> |
| 5 | +#include <re2/re2.h> |
| 6 | +#include <re2/set.h> |
| 7 | + |
| 8 | +using namespace std::chrono; |
| 9 | +using namespace std::literals; |
| 10 | + |
| 11 | +template<typename T> |
| 12 | +std::ostream& operator<< (std::ostream& out, const std::vector<T>& v) { |
| 13 | + out << "["; |
| 14 | + bool first = true; |
| 15 | + for (T const &t: v) { |
| 16 | + if (first) { |
| 17 | + first = false; |
| 18 | + } else { |
| 19 | + out << ", "; |
| 20 | + } |
| 21 | + out << t; |
| 22 | + } |
| 23 | + out << "]"; |
| 24 | + return out; |
| 25 | +} |
| 26 | + |
| 27 | +int main(const int argc, const char* argv[]) { |
| 28 | + if (argc < 4) { |
| 29 | + std::cerr << "error: ./bench regexes user_agents repetitions [quiet]" << std::endl; |
| 30 | + return 1; |
| 31 | + } |
| 32 | + bool quiet = argc == 5; |
| 33 | + |
| 34 | + std::ifstream regexes_f(argv[1]); |
| 35 | + |
| 36 | + re2::RE2::Options opt; |
| 37 | + re2::FilteredRE2 f(3); |
| 38 | + int id; |
| 39 | + |
| 40 | + std::string line; |
| 41 | + |
| 42 | + auto start = steady_clock::now(); |
| 43 | + while(std::getline(regexes_f, line)) { |
| 44 | + re2::RE2::ErrorCode c; |
| 45 | + if((c = f.Add(line, opt, &id))) { |
| 46 | + std::cerr << "invalid regex " << line << std::endl; |
| 47 | + return 1; |
| 48 | + } |
| 49 | + } |
| 50 | + std::vector<std::string> to_match; |
| 51 | + f.Compile(&to_match); |
| 52 | + std::chrono::duration<float> diff = steady_clock::now() - start; |
| 53 | + std::cerr << f.NumRegexps() << " regexes " |
| 54 | + << to_match.size() << " atoms" |
| 55 | + << " in " << diff.count() << "s" |
| 56 | + << std::endl; |
| 57 | + |
| 58 | + opt.set_literal(true); |
| 59 | + opt.set_case_sensitive(false); |
| 60 | + start = steady_clock::now(); |
| 61 | + re2::RE2::Set s(opt, RE2::UNANCHORED); |
| 62 | + for(auto const &atom: to_match) { |
| 63 | + // can't fail since literals |
| 64 | + assert(s.Add(atom, NULL) != -1); |
| 65 | + } |
| 66 | + assert(s.Compile()); |
| 67 | + diff = steady_clock::now() - start; |
| 68 | + std::cerr << "\tprefilter built in " << diff.count() << "s" << std::endl; |
| 69 | + |
| 70 | + start = steady_clock::now(); |
| 71 | + std::vector<std::string> user_agents; |
| 72 | + std::ifstream user_agents_f(argv[2]); |
| 73 | + while(std::getline(user_agents_f, line)) { |
| 74 | + user_agents.push_back(line); |
| 75 | + } |
| 76 | + diff = steady_clock::now() - start; |
| 77 | + std::cerr << user_agents.size() |
| 78 | + << " user agents in " |
| 79 | + << diff.count() << "s" |
| 80 | + << std::endl; |
| 81 | + |
| 82 | + int repetitions = std::stoi(argv[3]); |
| 83 | + std::vector<int> matching; |
| 84 | + for(int x = 0; x < repetitions; ++x) { |
| 85 | + for(size_t i = 0; i < user_agents.size(); ++i) { |
| 86 | + auto& ua = user_agents[i]; |
| 87 | + matching.clear(); |
| 88 | + int n = s.Match(ua, &matching); |
| 89 | + if (n) { |
| 90 | + n = f.FirstMatch(ua, matching); |
| 91 | + } else { |
| 92 | + n = -1; |
| 93 | + } |
| 94 | + if (!quiet) { |
| 95 | + if (n != -1) { |
| 96 | + std::cout << std::setw(3) << n; |
| 97 | + } |
| 98 | + std::cout << std::endl; |
| 99 | + } |
| 100 | + } |
| 101 | + } |
| 102 | + |
| 103 | + return 0; |
| 104 | +} |
0 commit comments