Lightweight 0.20260617.0
Loading...
Searching...
No Matches
SqlBackup.hpp
1// SPDX-License-Identifier: Apache-2.0
2#pragma once
3
4#include "../Api.hpp"
5#include "../SqlConnectInfo.hpp"
6#include "../SqlQuery/MigrationPlan.hpp"
7#include "../SqlSchema.hpp"
8
9#include <chrono>
10#include <cstdint>
11#include <filesystem>
12#include <map>
13#include <string>
14#include <string_view>
15#include <vector>
16
17namespace Lightweight::SqlBackup
18{
19
20/// Compression methods supported for ZIP entries.
21///
22/// The values correspond to the ZIP compression method IDs used by libzip.
23/// Not all methods may be available at runtime depending on how libzip was compiled.
24/// Use IsCompressionMethodSupported() to check availability.
25// NOLINTNEXTLINE(performance-enum-size) - Values must match libzip ZIP_CM_* constants
26enum class CompressionMethod : std::int32_t
27{
28 Store = 0, ///< No compression (ZIP_CM_STORE)
29 Deflate = 8, ///< Deflate compression (ZIP_CM_DEFLATE) - most compatible
30 Bzip2 = 12, ///< Bzip2 compression (ZIP_CM_BZIP2)
31 Lzma = 14, ///< LZMA compression (ZIP_CM_LZMA)
32 Zstd = 93, ///< Zstandard compression (ZIP_CM_ZSTD)
33 Xz = 95, ///< XZ compression (ZIP_CM_XZ)
34};
35
36/// Configuration for backup operations including compression and chunking.
38{
39 /// The compression method to use.
40 CompressionMethod method = CompressionMethod::Deflate;
41
42 /// The compression level (0-9).
43 /// - For Deflate: 1 = fastest, 9 = best compression, 6 = default
44 /// - For Bzip2: 1-9 (block size in 100k units)
45 /// - For Zstd: maps to zstd levels
46 /// - For Store: ignored
47 std::uint32_t level = 6;
48
49 /// The target size in bytes for each chunk before flushing.
50 /// Chunks are flushed when the buffer exceeds this size.
51 /// Default: 10 MB.
52 std::size_t chunkSizeBytes = 10 * 1024 * 1024;
53
54 /// Target rows per chunk window. Tables with a single numeric primary key are split into
55 /// windows of about this many keys (subject to a per-table window cap) that are backed up
56 /// in parallel by multiple workers.
57 std::size_t rowsPerChunk = 100'000;
58
59 /// Uncompressed bytes each worker accumulates in its private temp archive before sealing it
60 /// (the compression of that archive runs in the worker thread at that point, overlapped with
61 /// the network-bound fetch). Bounds worker memory at about jobs x workerArchiveBytes and
62 /// determines how many temp archives the finalize merge opens. Default: 256 MB.
63 std::size_t workerArchiveBytes = 256ULL * 1024 * 1024;
64
65 /// If true, only export schema metadata without backing up table data.
66 bool schemaOnly = false;
67
68 /// Deprecated no-op. Previously bypassed the MS SQL Server single-worker clamp; that clamp has
69 /// been removed (all databases now back up multi-threaded), so this flag no longer has any
70 /// effect. Retained temporarily to avoid an API/ABI break; pending removal.
72};
73
74/// Configuration for restore operations including memory management.
76{
77 /// Batch size for insert operations (rows per batch).
78 /// Default: 0 (auto-calculated based on available memory).
79 std::size_t batchSize = 0;
80
81 /// Maximum rows before an intermediate commit within a chunk.
82 /// Helps reduce transaction log / WAL memory accumulation.
83 /// Default: 10000. Set to 0 to disable intermediate commits.
84 std::size_t maxRowsPerCommit = 10000;
85
86 /// Database page cache size in KB (used for SQLite PRAGMA cache_size,
87 /// could be extended for other DBMS memory hints).
88 /// Default: 65536 (64MB). Set to 0 to use database default.
89 std::size_t cacheSizeKB = 65536;
90
91 /// Memory limit in bytes (0 = auto-detect from system).
92 std::size_t memoryLimitBytes = 0;
93
94 /// If true, only recreate schema without importing data.
95 bool schemaOnly = false;
96};
97
98/// Returns available system memory in bytes.
99LIGHTWEIGHT_API std::size_t GetAvailableSystemMemory() noexcept;
100
101/// Calculates optimal restore settings based on available memory.
102///
103/// @param availableMemory Available system memory in bytes.
104/// @param concurrency Number of concurrent restore workers.
105/// @return RestoreSettings optimized for the given memory constraints.
106LIGHTWEIGHT_API RestoreSettings CalculateRestoreSettings(std::size_t availableMemory, unsigned concurrency);
107
108/// Checks if a compression method is supported by the current libzip installation.
109///
110/// @param method The compression method to check.
111/// @return true if the method is available for both compression and decompression.
112LIGHTWEIGHT_API bool IsCompressionMethodSupported(CompressionMethod method) noexcept;
113
114/// Returns a list of all compression methods that are supported by the current libzip installation.
115LIGHTWEIGHT_API std::vector<CompressionMethod> GetSupportedCompressionMethods() noexcept;
116
117/// Returns the human-readable name of a compression method.
118LIGHTWEIGHT_API std::string_view CompressionMethodName(CompressionMethod method) noexcept;
119
120/// Configuration for retry behavior on transient errors during backup/restore operations.
122{
123 /// Maximum number of retry attempts for transient errors.
124 unsigned maxRetries = 3;
125
126 /// Initial delay between retry attempts.
127 std::chrono::milliseconds initialDelay { 500 };
128
129 /// Multiplier applied to delay after each failed attempt (exponential backoff).
130 double backoffMultiplier = 2.0;
131
132 /// Maximum delay between retry attempts.
133 std::chrono::milliseconds maxDelay { 30000 };
134};
135
136/// Information about a table being backed up.
138{
139 /// The list of columns in the table in SQL format.
140 std::string fields;
141
142 /// The list of columns in the table.
143 std::vector<bool> isBinaryColumn;
144
145 /// The list of columns in the table.
146 std::vector<SqlColumnDeclaration> columns;
147
148 /// The list of foreign key constraints in the table.
149 std::vector<SqlSchema::ForeignKeyConstraint> foreignKeys;
150
151 /// The indexes on the table (excluding primary key index).
152 std::vector<SqlSchema::IndexDefinition> indexes;
153
154 /// The number of rows in the table.
155 size_t rowCount = 0;
156};
157
158/// Progress information for backup/restore operations status updates.
160{
161 /// The state of an individual backup/restore operation.
162 enum class State : std::uint8_t
163 {
164 Started,
165 InProgress,
166 Finished,
167 Error,
168 Warning
169 };
170
171 /// The state of an individual backup/restore operation.
173
174 /// The name of the table being backed up / restored.
175 std::string tableName;
176
177 /// The current number of rows processed.
178 size_t currentRows {};
179
180 /// The total number of rows to be processed, if known.
181 std::optional<size_t> totalRows;
182
183 /// A message associated with the progress update.
184 std::string message;
185};
186
187/// The interface for progress updates.
189{
190 virtual ~ProgressManager() = default;
191 /// Default constructor.
192 ProgressManager() = default;
193 /// Default copy constructor.
195 /// Default copy assignment operator.
197 /// Default move constructor.
199 /// Default move assignment operator.
201
202 /// Gets called when the progress of an individual backup/restore operation changes.
203 virtual void Update(Progress const& p) = 0;
204
205 /// Gets called when all backup/restore operations are finished.
206 virtual void AllDone() = 0;
207
208 /// Sets the maximum length of a table name.
209 /// This is used to align the output of the progress manager.
210 virtual void SetMaxTableNameLength(size_t /*len*/) {}
211
212 /// Returns the number of errors encountered during the operation.
213 [[nodiscard]] virtual size_t ErrorCount() const noexcept
214 {
215 return 0;
216 }
217
218 /// Sets the total number of items to be processed (for ETA calculation).
219 /// @param totalItems Total number of items (rows) to process across all tables.
220 virtual void SetTotalItems(size_t totalItems)
221 {
222 (void) totalItems;
223 }
224
225 /// Adds to the total number of items for progressive ETA calculation.
226 /// This is called as row counts become available during parallel counting.
227 /// @param additionalItems Number of additional items (rows) to add to the total.
228 virtual void AddTotalItems(size_t additionalItems)
229 {
230 (void) additionalItems;
231 }
232
233 /// Called when items are processed (for rate and ETA calculation).
234 /// @param count Number of items (rows) just processed.
235 virtual void OnItemsProcessed(size_t count)
236 {
237 (void) count;
238 }
239};
240
241/// Base class for progress managers that tracks errors automatically.
243{
244 public:
245 void Update(Progress const& progress) override
246 {
247 if (progress.state == Progress::State::Error)
248 ++_errorCount;
249 }
250
251 [[nodiscard]] size_t ErrorCount() const noexcept override
252 {
253 return _errorCount;
254 }
255
256 private:
257 size_t _errorCount = 0;
258};
259
260struct NullProgressManager: ErrorTrackingProgressManager
261{
262 void Update(Progress const& progress) override
263 {
265 }
266 void AllDone() override {}
267};
268
269/// Backs up the database to a file.
270///
271/// @param outputFile the output file.
272/// @param connectionString the connection string used to connect to the database.
273/// @param concurrency the number of concurrent jobs.
274/// @param progress the progress manager to use for progress updates.
275/// @param schema the database schema to backup (optional).
276/// @param tableFilter comma-separated table filter patterns (default: "*" for all tables).
277/// Supports glob wildcards (* and ?) and schema.table notation.
278/// Examples: "Users,Products", "*_log", "dbo.Users", "sales.*"
279/// @param retrySettings configuration for retry behavior on transient errors.
280/// @param backupSettings configuration for compression method, level, and chunk size.
281LIGHTWEIGHT_API void Backup(std::filesystem::path const& outputFile,
282 SqlConnectionString const& connectionString,
283 unsigned concurrency,
284 ProgressManager& progress,
285 std::string const& schema = {},
286 std::string const& tableFilter = "*",
287 RetrySettings const& retrySettings = {},
288 BackupSettings const& backupSettings = {});
289
290/// Restores the database from a file.
291///
292/// @param inputFile the input file.
293/// @param connectionString the connection string used to connect to the database.
294/// @param concurrency the number of concurrent jobs.
295/// @param progress the progress manager to use for progress updates.
296/// @param schema the database schema to restore into (optional, overrides backup metadata).
297/// @param tableFilter comma-separated table filter patterns (default: "*" for all tables).
298/// Supports glob wildcards (* and ?) and schema.table notation.
299/// Examples: "Users,Products", "*_log", "dbo.Users", "sales.*"
300/// @param retrySettings configuration for retry behavior on transient errors.
301LIGHTWEIGHT_API void Restore(std::filesystem::path const& inputFile,
302 SqlConnectionString const& connectionString,
303 unsigned concurrency,
304 ProgressManager& progress,
305 std::string const& schema = {},
306 std::string const& tableFilter = "*",
307 RetrySettings const& retrySettings = {});
308
309/// Restores the database from a file with explicit memory management settings.
310///
311/// @param inputFile the input file.
312/// @param connectionString the connection string used to connect to the database.
313/// @param concurrency the number of concurrent jobs.
314/// @param progress the progress manager to use for progress updates.
315/// @param schema the database schema to restore into (optional, overrides backup metadata).
316/// @param tableFilter comma-separated table filter patterns (default: "*" for all tables).
317/// @param retrySettings configuration for retry behavior on transient errors.
318/// @param restoreSettings configuration for memory management during restore.
319LIGHTWEIGHT_API void Restore(std::filesystem::path const& inputFile,
320 SqlConnectionString const& connectionString,
321 unsigned concurrency,
322 ProgressManager& progress,
323 std::string const& schema,
324 std::string const& tableFilter,
325 RetrySettings const& retrySettings,
326 RestoreSettings const& restoreSettings);
327
328/// Creates the metadata JSON content.
329///
330/// @param connectionString the connection string used to connect to the database.
331/// @param tables the list of tables to backup.
332/// @param schema the database schema used for these tables (optional).
333LIGHTWEIGHT_API std::string CreateMetadata(SqlConnectionString const& connectionString,
334 SqlSchema::TableList const& tables,
335 std::string const& schema = {});
336
337/// Parses the metadata JSON content and returns a map of table info.
338///
339/// @param metadataJson content of the metadata.json file.
340LIGHTWEIGHT_API std::map<std::string, TableInfo> ParseSchema(std::string_view metadataJson,
341 ProgressManager* progress = nullptr);
342
343} // namespace Lightweight::SqlBackup
Base class for progress managers that tracks errors automatically.
size_t ErrorCount() const noexcept override
Returns the number of errors encountered during the operation.
void Update(Progress const &progress) override
Gets called when the progress of an individual backup/restore operation changes.
Configuration for backup operations including compression and chunking.
Definition SqlBackup.hpp:38
CompressionMethod method
The compression method to use.
Definition SqlBackup.hpp:40
bool schemaOnly
If true, only export schema metadata without backing up table data.
Definition SqlBackup.hpp:66
The interface for progress updates.
virtual void SetTotalItems(size_t totalItems)
ProgressManager()=default
Default constructor.
virtual void Update(Progress const &p)=0
Gets called when the progress of an individual backup/restore operation changes.
virtual void SetMaxTableNameLength(size_t)
virtual size_t ErrorCount() const noexcept
Returns the number of errors encountered during the operation.
ProgressManager & operator=(ProgressManager const &)=default
Default copy assignment operator.
virtual void OnItemsProcessed(size_t count)
ProgressManager(ProgressManager &&)=default
Default move constructor.
ProgressManager & operator=(ProgressManager &&)=default
Default move assignment operator.
virtual void AddTotalItems(size_t additionalItems)
virtual void AllDone()=0
Gets called when all backup/restore operations are finished.
ProgressManager(ProgressManager const &)=default
Default copy constructor.
Progress information for backup/restore operations status updates.
size_t currentRows
The current number of rows processed.
State state
The state of an individual backup/restore operation.
std::string message
A message associated with the progress update.
std::optional< size_t > totalRows
The total number of rows to be processed, if known.
std::string tableName
The name of the table being backed up / restored.
State
The state of an individual backup/restore operation.
Configuration for restore operations including memory management.
Definition SqlBackup.hpp:76
std::size_t memoryLimitBytes
Memory limit in bytes (0 = auto-detect from system).
Definition SqlBackup.hpp:92
bool schemaOnly
If true, only recreate schema without importing data.
Definition SqlBackup.hpp:95
Configuration for retry behavior on transient errors during backup/restore operations.
Information about a table being backed up.
size_t rowCount
The number of rows in the table.
std::vector< SqlColumnDeclaration > columns
The list of columns in the table.
std::vector< SqlSchema::IndexDefinition > indexes
The indexes on the table (excluding primary key index).
std::vector< bool > isBinaryColumn
The list of columns in the table.
std::vector< SqlSchema::ForeignKeyConstraint > foreignKeys
The list of foreign key constraints in the table.
std::string fields
The list of columns in the table in SQL format.