From b2416925f8845b70ed25fb4ec7cde8ef11e8c239 Mon Sep 17 00:00:00 2001 From: Dan Goodliffe Date: Thu, 21 Aug 2025 20:39:52 +0100 Subject: Initial commit; basic Apache log parsing --- test/test-ingest.cpp | 166 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 test/test-ingest.cpp (limited to 'test/test-ingest.cpp') diff --git a/test/test-ingest.cpp b/test/test-ingest.cpp new file mode 100644 index 0000000..f7eab19 --- /dev/null +++ b/test/test-ingest.cpp @@ -0,0 +1,166 @@ +#define BOOST_TEST_MODULE ingest +#include +#include + +#include + +using ScanValues = std::remove_cvref_t()->values())>; +template using ParseData = std::tuple; +template +using DeleteWith = decltype([](auto obj) { + return Deleter(obj); +}); +using FilePtr = std::unique_ptr>; + +namespace std { + template + ostream & + operator<<(ostream & strm, const std::optional & value) + { + if (value) { + strm << *value; + } + return strm; + } + + template + ostream & + operator<<(ostream & strm, const std::tuple & values) + { + return std::apply( + [&strm](auto &&... elems) -> decltype(auto) { + return ((strm << elems << '\n'), ...); + }, + values); + } +} + +BOOST_DATA_TEST_CASE(QuotedStringsGood, + boost::unit_test::data::make>({ + {R"("")", ""}, + {R"("-")", "-"}, + {R"(".")", "."}, + {R"("/url/path")", "/url/path"}, + }), + input, expected) +{ + const auto result = scn::scan(input, "{}"); + BOOST_REQUIRE(result); + BOOST_CHECK_EQUAL(result->value(), expected); +} + +BOOST_DATA_TEST_CASE(QuotedStringsBad, + boost::unit_test::data::make({ + R"()", + R"(-)", + R"(word)", + R"(/url/path)", + }), + input) +{ + BOOST_REQUIRE(!scn::scan(input, "{}")); +} + +BOOST_DATA_TEST_CASE(QueryStringsGood, + boost::unit_test::data::make>({ + {R"("")", std::nullopt}, + {R"("?")", ""}, + {R"("?something")", "something"}, + {R"("?some=thing")", "some=thing"}, + {R"("?some=thing&other=thing")", "some=thing&other=thing"}, + }), + input, expected) +{ + const auto result = scn::scan(input, "{}"); + BOOST_REQUIRE(result); + BOOST_CHECK_EQUAL(result->value(), expected); +} + +BOOST_DATA_TEST_CASE(QueryStringsBad, + boost::unit_test::data::make({ + R"()", + R"("-")", + R"(".")", + R"(-)", + R"(word)", + R"(/url/path)", + }), + input) +{ + BOOST_REQUIRE(!scn::scan(input, "{}")); +} + +BOOST_TEST_DECORATOR(*boost::unit_test::timeout(1)) + +BOOST_DATA_TEST_CASE(CLFStringsDecode, + boost::unit_test::data::make>({ + {"", ""}, + {"plain", "plain"}, + {R"(hex\x41)", "hexA"}, + {R"(hex\x4141)", "hexA41"}, + {R"(hex\x41\x41)", "hexAA"}, + {R"(hex\t\x41)", "hex\tA"}, + }), + input, expected) +{ + std::string value {input}; + scn::scanner::decode(value); + BOOST_CHECK_EQUAL(value, expected); +} + +BOOST_TEST_DECORATOR(*boost::unit_test::depends_on("CLFStringsDecode")) + +BOOST_DATA_TEST_CASE(CLFStringsGood, + boost::unit_test::data::make>({ + {R"("")", ""}, + {R"("-")", std::nullopt}, + {R"("?")", "?"}, + {R"(".")", "."}, + {R"("something")", "something"}, + {R"("https://google.com")", "https://google.com"}, + {R"("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")", + R"(Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36)"}, + }), + input, expected) +{ + const auto result = scn::scan(input, "{}"); + BOOST_REQUIRE(result); + BOOST_CHECK_EQUAL(result->value(), expected); +} + +BOOST_DATA_TEST_CASE(CLFStringsBad, + boost::unit_test::data::make({ + R"()", + R"(-)", + R"(word)", + R"(/url/path)", + }), + input) +{ + BOOST_REQUIRE(!scn::scan(input, "{}")); +} + +BOOST_TEST_DECORATOR(*boost::unit_test::depends_on("QuotedStringsGood")) +BOOST_TEST_DECORATOR(*boost::unit_test::depends_on("QueryStringsGood")) +BOOST_TEST_DECORATOR(*boost::unit_test::depends_on("CLFStringsGood")) + +BOOST_DATA_TEST_CASE(ExtractFields, + boost::unit_test::data::make>({ + {R"LOG(git.randomdan.homeip.net 98.82.40.168 1755561576768318 GET "/repo/gentoobrowse-api/commit/gentoobrowse-api/unittests/fixtures/756569aa764177340726dd3d40b41d89b11b20c7/app-crypt/pdfcrack/Manifest" "?h=gentoobrowse-api-0.9.1&id=a2ed3fd30333721accd4b697bfcb6cc4165c7714" HTTP/1.1 200 1884 107791 "-" "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot) Chrome/119.0.6045.214 Safari/537.36")LOG", + {"git.randomdan.homeip.net", "98.82.40.168", 1755561576768318, "GET", + R"(/repo/gentoobrowse-api/commit/gentoobrowse-api/unittests/fixtures/756569aa764177340726dd3d40b41d89b11b20c7/app-crypt/pdfcrack/Manifest)", + R"(h=gentoobrowse-api-0.9.1&id=a2ed3fd30333721accd4b697bfcb6cc4165c7714)", "HTTP/1.1", + 200, 1884, 107791, std::nullopt, + R"(Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot) Chrome/119.0.6045.214 Safari/537.36)"}}, + {R"LOG(www.randomdan.homeip.net 43.128.84.166 1755561575973204 GET "/app-dicts/myspell-et/Manifest" "" HTTP/1.1 200 312 10369 "https://google.com" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36")LOG", + {"www.randomdan.homeip.net", "43.128.84.166", 1755561575973204, "GET", + "/app-dicts/myspell-et/Manifest", std::nullopt, "HTTP/1.1", 200, 312, 10369, + "https://google.com", + R"(Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36)"}}, + }), + input, expected) +{ + const auto result = WebStat::Ingestor::scanLogLine(input); + BOOST_REQUIRE(result); + BOOST_CHECK_EQUAL(result->values(), expected); +} -- cgit v1.2.3