From 01ccd4e00d0990bd191ccc07a22895a68df65634 Mon Sep 17 00:00:00 2001 From: Dan Goodliffe Date: Sun, 27 Jun 2021 18:51:59 +0100 Subject: Basic type mapper --- lib/output/pq/typeMapper.cpp | 72 ++++++++++++++++++++++++ lib/output/pq/typeMapper.h | 48 ++++++++++++++++ test/Jamfile.jam | 2 +- test/test-mapping.cpp | 128 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 249 insertions(+), 1 deletion(-) create mode 100644 lib/output/pq/typeMapper.cpp create mode 100644 lib/output/pq/typeMapper.h create mode 100644 test/test-mapping.cpp diff --git a/lib/output/pq/typeMapper.cpp b/lib/output/pq/typeMapper.cpp new file mode 100644 index 0000000..5583319 --- /dev/null +++ b/lib/output/pq/typeMapper.cpp @@ -0,0 +1,72 @@ +#include "typeMapper.h" +#include + +namespace MyGrate::Output::Pq { + TypeMapper::RegexMapper::RegexMapper(std::optional t, std::optional c, std::string tt) : + typeMatch {std::move(t)}, columnMatch {std::move(c)}, targetType {std::move(tt)} + { + } + + std::optional + TypeMapper::RegexMapper::map(std::string_view typeName, std::string_view columnName) const + { + if ((!typeMatch || std::regex_match(typeName.begin(), typeName.end(), *typeMatch)) + && (!columnMatch || std::regex_match(columnName.begin(), columnName.end(), *columnMatch))) { + if (targetType.empty()) { + return {typeName}; + } + return {targetType}; + } + return {}; + } + + TypeMapper::TypeMapper() + { + const auto add = [this](auto r, auto t) { + mappings.emplace_back(std::make_unique(r, std::nullopt, t)); + }; + // https://dev.mysql.com/doc/refman/8.0/en/numeric-types.html + add(R"(bit\(\d+\))"_r, ""); + add(R"(tinyint\(1\)( (un)?signed)?)"_r, "boolean"); + add(R"((tiny|small)int(\(\d+\))?( (un)?signed)?)"_r, "smallint"); + add(R"((medium)?int(\(\d+\))?( (un)?signed)?)"_r, "int"); + add(R"(bigint(\(\d+\))?( (un)?signed)?)"_r, "bigint"); + add(R"((numeric|decimal)(\(\d+(,\d+)?\))?)"_r, ""); + add(R"(float(\(\d+,\d+\))?)"_r, "float"); + add(R"((real|double( precision)?)(\(\d+,\d+\))?)"_r, "double"); + // https://dev.mysql.com/doc/refman/8.0/en/date-and-time-types.html + add(R"(date)"_r, "date"); + add(R"(datetime)"_r, "timestamp without time zone"); + add(R"(datetime(\(\d+\))?)"_r, "timestamp without time zone"); // drops precision + add(R"(timestamp)"_r, "timestamp without time zone"); + add(R"(timestamp(\(\d+\))?)"_r, "timestamp without time zone"); // drops precision + add(R"(time)"_r, "time without time zone"); + add(R"(time(\(\d+\))?)"_r, "time without time zone"); // drops precision + add(R"(year)"_r, "smallint"); + // https://dev.mysql.com/doc/refman/8.0/en/string-types.html + add(R"((var)?binary(\(\d+\))?)"_r, "bytea"); + add(R"((var)?char(\(\d+\))?)"_r, ""); + add(R"((tiny||medium|long)blob(\(\d+\))?)"_r, "bytea"); + add(R"((tiny||medium|long)text(\(\d+\))?)"_r, "text"); + add(R"(enum(.*))"_r, "text"); // not really + // set + // https://dev.mysql.com/doc/refman/8.0/en/json.html + add(R"(json)"_r, "json"); + } + + std::string_view + TypeMapper::map(std::string_view t, std::string_view n) const + { + for (const auto & m : mappings) { + if (const auto rt {m->map(t, n)}) { + return *rt; + } + } + throw std::runtime_error(scprintf<"No mapping for %? (%?)">(n, t)); + } + + TypeMapper::ObsRegex operator""_r(const char * input, std::size_t) + { + return MyGrate::Output::Pq::TypeMapper::ObsRegex {input, std::regex::icase}; + } +} diff --git a/lib/output/pq/typeMapper.h b/lib/output/pq/typeMapper.h new file mode 100644 index 0000000..7005e4f --- /dev/null +++ b/lib/output/pq/typeMapper.h @@ -0,0 +1,48 @@ +#ifndef MYGRATE_OUTPUT_PQ_TYPEMAPPER_H +#define MYGRATE_OUTPUT_PQ_TYPEMAPPER_H + +#include +#include +#include +#include +#include + +namespace MyGrate::Output::Pq { + class TypeMapper { + public: + class ObsRegex : public std::regex { + public: + explicit ObsRegex(const std::string & src, std::regex_constants::syntax_option_type sot = {}) : + std::regex {src, sot}, src {src} + { + } + const std::string src; + }; + + struct Mapping { + virtual std::optional map(std::string_view t, std::string_view n) const = 0; + }; + using MappingPtr = std::unique_ptr; + + struct RegexMapper : public Mapping { + RegexMapper(std::optional, std::optional, std::string); + + std::optional map(std::string_view t, std::string_view n) const override; + + std::optional typeMatch; + std::optional columnMatch; + std::string targetType; + }; + + TypeMapper(); + + std::string_view map(std::string_view t, std::string_view n) const; + + private: + std::vector mappings; + }; + + TypeMapper::ObsRegex operator""_r(const char * input, std::size_t); +} + +#endif diff --git a/test/Jamfile.jam b/test/Jamfile.jam index 55dcadf..d0b0e70 100644 --- a/test/Jamfile.jam +++ b/test/Jamfile.jam @@ -18,4 +18,4 @@ run test-streams.cpp ; run test-misc.cpp ; run test-mysql.cpp : : : testdb ; run test-postgresql.cpp : -- : ../db/schema.sql : testdb ; -run test-e2e.cpp : -- : ../db/schema.sql : testdb ; +run test-mapping.cpp : : : testdb testdb ; diff --git a/test/test-mapping.cpp b/test/test-mapping.cpp new file mode 100644 index 0000000..0ecc62f --- /dev/null +++ b/test/test-mapping.cpp @@ -0,0 +1,128 @@ +#define BOOST_TEST_MODULE TypeMappers +#include +#include + +#include "helpers.h" +#include +#include + +using M = MyGrate::Output::Pq::TypeMapper::RegexMapper; + +namespace std { + ostream & + operator<<(ostream & s, const MyGrate::Output::Pq::TypeMapper::ObsRegex & r) + { + return s << r.src; + } + ostream & + operator<<(ostream & s, const M & m) + { + return MyGrate::scprintf<"{ type: %?, column: %?, target: %? }">(s, m.typeMatch, m.columnMatch, m.targetType); + } +} + +using regex_mapping_test_data = std::tuple>; +using namespace MyGrate::Output::Pq; +BOOST_DATA_TEST_CASE(regex_mapper, + boost::unit_test::data::make({ + // Types + {{R"(int\(\d+\))"_r, {}, "int"}, "int(10)", "any", "int"}, + {{R"(int(\(\d+\))?)"_r, {}, "int"}, "int", "any", "int"}, + {{R"(int\(\d+\))"_r, {}, "int"}, "int(1)", "any", "int"}, + {{R"(int\(\d+\))"_r, {}, "int"}, "varchar(1)", "any", {}}, + {{R"(varchar\(\d+\))"_r, {}, ""}, "varchar(1)", "any", "varchar(1)"}, + {{R"(varchar\(\d+\))"_r, {}, ""}, "varchar(10)", "any", "varchar(10)"}, + {{R"(varchar\(\d+\))"_r, {}, ""}, "int(10)", "any", {}}, + {{R"(varchar\(\d{4,}\))"_r, {}, "text"}, "varchar(1024)", "any", "text"}, + // Names + {{{}, R"(.*\.id)"_r, "bigint"}, "int(10)", "all.id", "bigint"}, + {{{}, R"(.*\.name)"_r, "text"}, "int(10)", "all.name", "text"}, + }), + m, inType, inCol, exp) +{ + BOOST_CHECK_EQUAL(m.map(inType, inCol), exp); +} + +using mapping_test_data = std::tuple; +BOOST_DATA_TEST_CASE(default_type_mapping, + boost::unit_test::data::make({ + // https://dev.mysql.com/doc/refman/8.0/en/numeric-types.html + {"bit(10)", "bit(10)"}, + {"tinyint", "smallint"}, + {"tinyint(1)", "boolean"}, + {"tinyint(2)", "smallint"}, + {"smallint", "smallint"}, + {"smallint(1)", "smallint"}, + {"smallint(2)", "smallint"}, + {"mediumint", "int"}, + {"mediumint(5)", "int"}, + {"int", "int"}, + {"int(10)", "int"}, + {"bigint", "bigint"}, + {"bigint(10)", "bigint"}, + {"tinyint unsigned", "smallint"}, + {"tinyint(1) unsigned", "boolean"}, + {"tinyint(2) unsigned", "smallint"}, + {"smallint unsigned", "smallint"}, + {"smallint(1) unsigned", "smallint"}, + {"smallint(2) unsigned", "smallint"}, + {"mediumint unsigned", "int"}, + {"mediumint(5) unsigned", "int"}, + {"int unsigned", "int"}, + {"int(10) unsigned", "int"}, + {"bigint unsigned", "bigint"}, + {"bigint(10) unsigned", "bigint"}, + {"decimal", "decimal"}, + {"decimal(1,2)", "decimal(1,2)"}, + {"decimal(1)", "decimal(1)"}, + {"numeric", "numeric"}, + {"numeric(1,2)", "numeric(1,2)"}, + {"numeric(1)", "numeric(1)"}, + {"float", "float"}, + {"float(1,2)", "float"}, + {"real", "double"}, + {"real(1,2)", "double"}, + {"double", "double"}, + {"double(1,2)", "double"}, + {"double precision", "double"}, + {"double precision(1,2)", "double"}, + // https://dev.mysql.com/doc/refman/8.0/en/date-and-time-types.html + {"date", "date"}, + {"datetime", "timestamp without time zone"}, + {"datetime(1)", "timestamp without time zone"}, + {"timestamp", "timestamp without time zone"}, + {"timestamp(1)", "timestamp without time zone"}, + {"time", "time without time zone"}, + {"time(1)", "time without time zone"}, + {"year", "smallint"}, + {"time(1)", "time without time zone"}, + {"year", "smallint"}, + // https://dev.mysql.com/doc/refman/8.0/en/string-types.html + {"binary", "bytea"}, + {"binary(10)", "bytea"}, + {"varbinary", "bytea"}, + {"varbinary(10)", "bytea"}, + {"char", "char"}, + {"char(10)", "char(10)"}, + {"varchar", "varchar"}, + {"varchar(10)", "varchar(10)"}, + {"blob", "bytea"}, + {"blob(100)", "bytea"}, + {"tinyblob", "bytea"}, + {"mediumblob", "bytea"}, + {"longblob", "bytea"}, + {"text", "text"}, + {"text(100)", "text"}, + {"tinytext", "text"}, + {"mediumtext", "text"}, + {"longtext", "text"}, + // enum + // set + // https://dev.mysql.com/doc/refman/8.0/en/json.html + {"json", "json"}, + }), + inType, exp) +{ + TypeMapper tm; + BOOST_CHECK_EQUAL(tm.map(inType, {}), exp); +} -- cgit v1.2.3