Lightweight 0.20260617.0
Loading...
Searching...
No Matches
ChunkPlanner.hpp
1// SPDX-License-Identifier: Apache-2.0
2#pragma once
3
4#include "../Api.hpp"
5#include "../SqlSchema.hpp"
6#include "../SqlServerType.hpp"
7
8#include <atomic>
9#include <cstddef>
10#include <cstdint>
11#include <deque>
12#include <functional>
13#include <optional>
14#include <string>
15#include <utility>
16#include <vector>
17
18namespace Lightweight::SqlBackup::detail
19{
20
21/// How a chunk's row window is expressed in SQL.
22enum class ChunkStrategy : uint8_t
23{
24 /// OFFSET/FETCH window: rows [offset, offset+limit). Requires a total ORDER BY.
25 Offset,
26 /// Primary-key range window: pk >= lo AND pk <= hi. Single numeric PK only.
27 PrimaryKeyRange,
28};
29
30/// Shared per-table aggregation point for chunks processed concurrently by multiple workers.
31/// Owned by the ChunkPlan (stable addresses); chunks point into it.
32struct TableBackupState
33{
34 /// Chunks of this table not yet completed; the worker that drops this to 0 reports Finished.
35 std::atomic<size_t> remainingChunks { 0 };
36 /// Table-cumulative rows processed across all workers (drives currentRows in progress updates).
37 std::atomic<size_t> processedRows { 0 };
38 /// First-chunk-fires-Started latch.
39 std::atomic<bool> started { false };
40 /// Set when any chunk of this table failed; suppresses the Finished progress event.
41 std::atomic<bool> failed { false };
42 /// PK-range: plan-time estimate (pkMax - pkMin + 1). OFFSET: set by the worker after COUNT(*).
43 std::atomic<size_t> totalRows { 0 };
44};
45
46/// One unit of backup work: a bounded row-range of a single table. Multiple chunks of the
47/// same table can be processed concurrently. The chunk index is assigned at plan time so
48/// produced chunk filenames are unique without inter-worker coordination.
49struct Chunk
50{
51 /// The table this chunk reads from (pointer into the caller-owned table list; must outlive the chunk).
52 SqlSchema::Table const* table = nullptr;
53 /// How the window is expressed.
54 ChunkStrategy strategy = ChunkStrategy::Offset;
55 /// Stable per-table window index (0-based), assigned at plan time, used to derive a unique
56 /// chunk filename. PK-range tables get one chunk per window; OFFSET tables have a single
57 /// seed chunk with index 0.
58 uint32_t windowIndex = 0;
59 /// OFFSET-strategy resume cursor (rows to skip). Valid when strategy == Offset.
60 size_t offset = 0;
61 /// PrimaryKeyRange window bounds [lo, hi]. Valid when strategy == PrimaryKeyRange.
62 int64_t lo = 0;
63 int64_t hi = 0;
64 /// Primary-key column name for PrimaryKeyRange strategy; empty otherwise.
65 std::string pkColumn;
66 /// Informational: true if the table has at least one LOB column (varchar(max)/text/binary).
67 /// NOTE: dispatch routes on `arrayFetchable` (which already excludes LOB tables), not on this
68 /// field directly. Kept for observability / potential future use; not consulted by the fetch path.
69 bool hasLob = false;
70 /// True if every column of the table is in the "array-fetch-safe" type set (integer family,
71 /// Real, Bool, Varchar/Char, Decimal, NVarchar/NChar) AND the table has no LOB column. Such
72 /// chunks use the bulk RowArrayCursor read path (one SQLFetchScroll per block instead of one
73 /// SQLGetData per cell). Tables containing temporal (Date/Time/DateTime/Timestamp), Guid, or
74 /// binary/LOB columns are NOT array-fetchable yet and keep the proven single-row path (their
75 /// array representation is pending P6 follow-up tasks).
76 bool arrayFetchable = false;
77 /// Shared progress/completion state of this chunk's table (owned by the ChunkPlan). Never
78 /// null for chunks produced by PlanChunks — the workers dereference it unconditionally.
79 TableBackupState* state = nullptr;
80};
81
82/// Queries the inclusive [MIN(pk), MAX(pk)] bounds of @p table's @p pkColumn at plan time,
83/// or std::nullopt if the table is empty. Injected so PlanChunks stays database-free in tests.
84using PkBoundsFunction =
85 std::function<std::optional<std::pair<int64_t, int64_t>>(SqlSchema::Table const& table, std::string const& pkColumn)>;
86
87/// The planned chunk work-list plus the per-table shared state the chunks point into.
88struct ChunkPlan
89{
90 /// Flat chunk work-list, in table order then window order. Single-numeric-PK tables contribute
91 /// one PrimaryKeyRange chunk per window; all other tables contribute one OFFSET seed chunk.
92 std::vector<Chunk> chunks;
93 /// One state per table that has chunks; deque for stable addresses (chunks hold pointers,
94 /// and std::deque moves without relocating elements).
95 std::deque<TableBackupState> tableStates;
96 /// Single-numeric-PK tables with no rows: no chunks; caller reports them Finished(0).
97 std::vector<SqlSchema::Table const*> emptyTables;
98};
99
100/// Plans the chunk work-list for a set of tables. Tables with a single numeric primary key are
101/// split into one PrimaryKeyRange chunk per key window (bounds via @p pkBounds, window width via
102/// CappedWindowWidth) so multiple workers can process one table concurrently; all other tables get
103/// a single OFFSET seed chunk. The returned plan owns the per-table states the chunks point into
104/// and must outlive the workers.
105///
106/// @param tables The tables to back up (must outlive the returned plan — chunks hold pointers).
107/// @param rowsPerChunk Target rows per chunk window.
108/// @param pkBounds Plan-time MIN/MAX query for a table's primary-key column.
109/// @param serverType The DBMS being backed up (gates per-DBMS array-fetch admissions, see
110/// TableIsArrayFetchable).
111/// @return The chunk plan (work-list + per-table states + empty-table list).
112[[nodiscard]] LIGHTWEIGHT_API ChunkPlan PlanChunks(std::vector<SqlSchema::Table> const& tables,
113 size_t rowsPerChunk,
114 PkBoundsFunction const& pkBounds,
115 SqlServerType serverType);
116
117/// Returns true if @p table has a column whose type cannot be fixed-stride array-bound
118/// (varchar(max)/text/nvarchar(max)/binary/varbinary/image LOBs). Such tables use the
119/// single-row fallback fetch path.
120[[nodiscard]] LIGHTWEIGHT_API bool TableHasLobColumn(SqlSchema::Table const& table);
121
122/// Returns true if @p table can be read through the bulk RowArrayCursor path while staying
123/// byte-identical to the trusted single-row decode path.
124///
125/// A table is array-fetchable iff it has no LOB column AND every column type is in the
126/// "safe" set whose array-cursor representation matches the single-row path exactly:
127/// - integer family: Integer / Bigint / Smallint / Tinyint (read as int64),
128/// - Real (read as double),
129/// - Bool (read as 0/1 int64 -> bool),
130/// - Varchar / Char (read as text),
131/// - Decimal (already string / MSSQL-CONVERTed),
132/// - NVarchar / NChar (P6: SQL_C_WCHAR -> ToUtf8, the same
133/// conversion as the single-row
134/// u16string read),
135/// - Time, on PostgreSQL/MSSQL only (P6: both paths read TIME as driver
136/// text via SQL_C_CHAR, preserving
137/// fractional seconds; SQLite's
138/// single-row path uses the native
139/// SqlTime struct instead, so Time
140/// stays single-row there),
141/// - Date / DateTime / Timestamp (P6: native SQL_DATE_STRUCT /
142/// SQL_TIMESTAMP_STRUCT array binds,
143/// formatted via the same std::format
144/// as the single-row SqlDate /
145/// SqlDateTime reads).
146///
147/// Guid, Text, and binary/LOB columns are still EXCLUDED: Guid array decode is pending a P6
148/// follow-up task, and Text/binary are LOBs that cannot be fixed-stride array-bound at all.
149/// Tables with any such column keep the proven single-row path.
150///
151/// @param table The table to classify.
152/// @param serverType The DBMS being backed up (gates the per-DBMS admissions above).
153[[nodiscard]] LIGHTWEIGHT_API bool TableIsArrayFetchable(SqlSchema::Table const& table, SqlServerType serverType);
154
155/// Returns the name of the table's sole primary-key column if it is a single numeric (integer-family)
156/// PK suitable for range partitioning; std::nullopt otherwise (no PK, composite PK, or non-numeric PK).
157[[nodiscard]] LIGHTWEIGHT_API std::optional<std::string> SingleNumericPrimaryKey(SqlSchema::Table const& table);
158
159/// Computes the inclusive primary-key window list [lo, hi] covering [pkMin, pkMax] with at most
160/// @p rowsPerChunk keys per window. Windows are contiguous and disjoint (next lo = previous hi + 1),
161/// so every key in [pkMin, pkMax] is covered exactly once. Returns empty if pkMin > pkMax.
162/// Guards against int64 overflow when pkMax is near INT64_MAX.
163///
164/// @param pkMin Inclusive lower bound of the key range to cover.
165/// @param pkMax Inclusive upper bound of the key range to cover.
166/// @param rowsPerChunk Maximum number of keys per window (clamped to at least 1).
167/// @return The ordered list of closed [lo, hi] windows; empty if pkMin > pkMax.
168[[nodiscard]] LIGHTWEIGHT_API std::vector<std::pair<int64_t, int64_t>> PlanPrimaryKeyWindows(int64_t pkMin,
169 int64_t pkMax,
170 int64_t rowsPerChunk);
171
172/// Upper bound on parallel windows a single table is split into at plan time. Caps plan-time
173/// memory for sparse key spaces (e.g. snowflake-style int64 IDs) where span/rowsPerChunk would
174/// explode; sparse tables get proportionally wider windows instead.
175constexpr int64_t MaxWindowsPerTable = 1024;
176
177/// Computes the per-window key width for splitting [pkMin, pkMax] into at most
178/// @p maxWindowsPerTable windows of at least @p rowsPerChunk keys each:
179/// windowCount = clamp(ceil(span / rowsPerChunk), 1, maxWindowsPerTable); width = ceil(span / windowCount).
180/// Wrap-safe over the full int64 range (span arithmetic is done in uint64).
181///
182/// @param pkMin Inclusive lower bound of the key range.
183/// @param pkMax Inclusive upper bound of the key range.
184/// @param rowsPerChunk Target keys per window (clamped to at least 1).
185/// @param maxWindowsPerTable Maximum number of windows (clamped to at least 1).
186/// @return The window width in keys; @p rowsPerChunk if pkMin > pkMax.
187[[nodiscard]] LIGHTWEIGHT_API int64_t CappedWindowWidth(int64_t pkMin,
188 int64_t pkMax,
189 int64_t rowsPerChunk,
190 int64_t maxWindowsPerTable);
191
192} // namespace Lightweight::SqlBackup::detail