From ee7f2eb027c272427519b8a9d02b62a7411bf344 Mon Sep 17 00:00:00 2001 From: Dan Goodliffe Date: Mon, 25 Aug 2025 15:21:43 +0100 Subject: Add an ingestor performance test Generates a random long file of random data and then ingests it. --- test/perf-ingest.cpp | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 test/perf-ingest.cpp (limited to 'test/perf-ingest.cpp') diff --git a/test/perf-ingest.cpp b/test/perf-ingest.cpp new file mode 100644 index 0000000..6de3dae --- /dev/null +++ b/test/perf-ingest.cpp @@ -0,0 +1,115 @@ +#include +#include + +#include "test-util.hpp" +#include +#include +#include + +#include + +namespace { + const std::filesystem::path TMP_LOG = std::format("/tmp/webstat-perf-{}.log", getpid()); + + const struct LogFile { + SPECIAL_MEMBERS_DELETE(LogFile); + + LogFile() + { + std::random_device randDevice; + std::mt19937 generator(randDevice()); + + struct Strings { + std::vector vhosts; + std::vector ips; + std::vector paths; + std::vector qss; + std::vector refs; + std::vector uas; + }; + + Strings strings; + + auto genIp = [&generator]() { + static std::uniform_int_distribution octetDistrib { + 0, std::numeric_limits::max()}; + return std::format("{}.{}.{}.{}", octetDistrib(generator), octetDistrib(generator), + octetDistrib(generator), octetDistrib(generator) + + ); + }; + auto getStrGen = [&generator](size_t minLen, size_t maxLen) { + return [minLen, maxLen, &generator]() { + std::uniform_int_distribution charDistrib {'a', 'z'}; + std::uniform_int_distribution lenDistrib {minLen, maxLen}; + std::string out; + std::generate_n(std::back_inserter(out), lenDistrib(generator), [&generator, &charDistrib]() { + return charDistrib(generator); + }); + return out; + }; + }; + + for (auto & [out, count, stringGenerator] : std::initializer_list< + std::tuple &, size_t, std::function>> { + {strings.vhosts, 4, getStrGen(6, 20)}, + {strings.ips, 4, genIp}, + {strings.paths, 100, getStrGen(1, 50)}, + {strings.qss, 100, getStrGen(1, 50)}, + {strings.refs, 50, getStrGen(10, 50)}, + {strings.uas, 10, getStrGen(50, 70)}, + }) { + std::generate_n(std::back_inserter(out), count, stringGenerator); + } + strings.qss.emplace_back(""); + strings.refs.emplace_back("-"); + strings.uas.emplace_back("-"); + + constexpr size_t MISC_MIN = 1000; + constexpr size_t MISC_MAX = 10000; + constexpr uint64_t TICK_START = 1755710158296508; + std::uniform_int_distribution tickDistrib {MISC_MIN, MISC_MAX}; + std::uniform_int_distribution sizeDistrib {MISC_MIN, MISC_MAX}; + std::uniform_int_distribution durationDistrib {MISC_MIN, MISC_MAX}; + uint64_t tick = TICK_START; + auto randomString = [&generator](auto & stringSet) { + std::uniform_int_distribution choiceDistrib {0, stringSet.size() - 1}; + return stringSet[choiceDistrib(generator)]; + }; + + std::ofstream logfile {TMP_LOG}; + constexpr size_t LOG_LINES = 10000; + for (size_t line = 0; line < LOG_LINES; ++line) { + std::println(logfile, R"LOG({} {} {} GET "/{}" "?{}" HTTP/1.1 200 {} {} "{}" "{}")LOG", + randomString(strings.vhosts), randomString(strings.ips), tick += tickDistrib(generator), + randomString(strings.paths), randomString(strings.qss), sizeDistrib(generator), + durationDistrib(generator), randomString(strings.refs), randomString(strings.uas)); + } + } + + ~LogFile() + { + std::filesystem::remove(TMP_LOG); + } + } LOG_FILE; + + void + setup(const benchmark::State &) + { + static const WebStat::MockDB mockdb; + } + + void + doIngestFile(benchmark::State & state) + { + WebStat::Ingestor ingestor {"perf-hostname", DB::MockDatabase::openConnectionTo("webstat")}; + for (auto loop : state) { + WebStat::FilePtr logFile {fopen(TMP_LOG.c_str(), "r")}; + ingestor.ingestLog(logFile.get()); + } + } +} + +BENCHMARK(doIngestFile)->Setup(setup); + +BENCHMARK_MAIN(); -- cgit v1.2.3