Lightweight 0.20260213.0
Loading...
Searching...
No Matches
DataGenerator.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0
2
3#pragma once
4
5#include "Entities.hpp"
6
7#include <Lightweight/DataMapper/DataMapper.hpp>
8
9#include <cstddef>
10#include <cstdint>
11#include <random>
12#include <string>
13#include <vector>
14
15/// @file DataGenerator.hpp
16/// @brief Data generation utilities for creating a 500MB+ test database.
17///
18/// All generation functions are deterministic when using the same seed,
19/// allowing reproducible test runs across all supported database backends.
20
21namespace LargeDb
22{
23
24/// @brief Configuration for database generation.
26{
27 uint64_t seed = 42; ///< Random seed for deterministic generation
28
29 // Entity counts
30 size_t userCount = 2000;
31 size_t categoryCount = 200;
32 size_t productCount = 2000;
33 size_t productImageCount = 4000; ///< ~2 images per product
34 size_t orderCount = 10000;
35 size_t orderItemCount = 30000; ///< ~3 items per order
36 size_t reviewCount = 8000;
37 size_t tagCount = 500;
38 size_t productTagCount = 6000; ///< ~3 tags per product
39 size_t activityLogCount = 50000;
40 size_t systemAuditLogCount = 5000;
41 size_t articleCount = 500;
42
43 // Field size targets (in bytes)
44 size_t userBioSize = 500;
45 size_t userAvatarSize = 10240;
46 size_t categoryDescriptionSize = 2048;
47 size_t productLongDescriptionSize = 8192;
48 size_t productSpecsSize = 2048;
49 size_t productImageSize = 51200;
50 size_t productThumbnailSize = 5120;
51 size_t reviewContentSize = 2048;
52 size_t activityLogJsonSize = 1024;
53 size_t systemAuditContextSize = 5120;
54 size_t systemAuditStackTraceSize = 3072;
55 size_t articleContentSize = 15360;
56 size_t articleFeaturedImageSize = 20480;
57};
58
59/// @brief Creates a scaled-down configuration for faster testing.
60/// @param scaleFactor Factor to scale down counts (e.g., 0.1 for 10% of full size)
61/// @return Scaled configuration
63
64/// @brief Seeded random number generator wrapper for deterministic generation.
66{
67 public:
68 explicit SeededRandom(uint64_t seed);
69
70 /// @brief Generates a random integer in range [min, max].
71 int64_t NextInt(int64_t min, int64_t max);
72
73 /// @brief Generates a random double in range [min, max).
74 double NextDouble(double min, double max);
75
76 /// @brief Picks a random element from a vector.
77 template <typename T>
78 T const& Pick(std::vector<T> const& items)
79 {
80 return items[static_cast<size_t>(NextInt(0, static_cast<int64_t>(items.size()) - 1))];
81 }
82
83 /// @brief Generates a random boolean with given probability of true.
84 bool NextBool(double probabilityTrue = 0.5);
85
86 /// @brief Generates random text of approximately the given size.
87 std::string GenerateText(size_t targetSize);
88
89 /// @brief Generates pseudo-random binary data of the given size.
90 std::vector<uint8_t> GenerateBinaryData(size_t targetSize);
91
92 /// @brief Generates a pseudo-random JSON object of approximately the given size.
93 std::string GenerateJson(size_t targetSize);
94
95 /// @brief Generates a random email address.
96 std::string GenerateEmail(int64_t userId);
97
98 /// @brief Generates a random product name.
99 std::string GenerateProductName(int64_t productId);
100
101 /// @brief Generates random address JSON.
102 std::string GenerateAddressJson();
103
104 private:
105 std::mt19937_64 m_generator;
106};
107
108/// @brief Creates all tables for the large test database schema.
109/// @param dm DataMapper instance connected to the target database.
111
112/// @brief Drops all tables from the large test database schema.
113/// @param dm DataMapper instance connected to the target database.
115
116/// @brief Populates the database with generated data according to config.
117/// @param dm DataMapper instance connected to the target database.
118/// @param config Generation configuration.
119/// @param progressCallback Optional callback for progress reporting (0.0 to 1.0).
121 GeneratorConfig const& config = {},
122 std::function<void(double, std::string_view)> progressCallback = {});
123
124/// @brief Gets the expected total size of generated data based on config.
125/// @param config Generation configuration.
126/// @return Expected raw data size in bytes.
128
129/// @brief Batch insert helper for efficient data loading.
130template <typename Entity, typename Generator>
131size_t BatchInsert(Light::DataMapper& dm, size_t count, Generator&& generator, size_t batchSize = 1000);
132
133} // namespace LargeDb
GeneratorConfig CreateScaledConfig(double scaleFactor)
Creates a scaled-down configuration for faster testing.
void PopulateDatabase(Light::DataMapper &dm, GeneratorConfig const &config={}, std::function< void(double, std::string_view)> progressCallback={})
Populates the database with generated data according to config.
size_t GetExpectedDataSize(GeneratorConfig const &config)
Gets the expected total size of generated data based on config.
void CreateSchema(Light::DataMapper &dm)
Creates all tables for the large test database schema.
void DropSchema(Light::DataMapper &dm)
Drops all tables from the large test database schema.
size_t BatchInsert(Light::DataMapper &dm, size_t count, Generator &&generator, size_t batchSize=1000)
Batch insert helper for efficient data loading.
Entity definitions for the large test database schema.
Seeded random number generator wrapper for deterministic generation.
std::string GenerateProductName(int64_t productId)
Generates a random product name.
double NextDouble(double min, double max)
Generates a random double in range [min, max).
std::string GenerateEmail(int64_t userId)
Generates a random email address.
int64_t NextInt(int64_t min, int64_t max)
Generates a random integer in range [min, max].
std::string GenerateText(size_t targetSize)
Generates random text of approximately the given size.
T const & Pick(std::vector< T > const &items)
Picks a random element from a vector.
std::string GenerateAddressJson()
Generates random address JSON.
std::string GenerateJson(size_t targetSize)
Generates a pseudo-random JSON object of approximately the given size.
std::vector< uint8_t > GenerateBinaryData(size_t targetSize)
Generates pseudo-random binary data of the given size.
bool NextBool(double probabilityTrue=0.5)
Generates a random boolean with given probability of true.
Main API for mapping records to and from the database using high level C++ syntax.
Configuration for database generation.
size_t orderItemCount
~3 items per order
uint64_t seed
Random seed for deterministic generation.
size_t productImageCount
~2 images per product
size_t productTagCount
~3 tags per product