Lightweight 0.20260617.0
Loading...
Searching...
No Matches
WorkerChunkArchive.hpp
1// SPDX-License-Identifier: Apache-2.0
2#pragma once
3
4#include "../Api.hpp"
5#include "SqlBackup.hpp"
6
7#include <cstddef>
8#include <cstdint>
9#include <deque>
10#include <filesystem>
11#include <set>
12#include <string>
13#include <string_view>
14#include <vector>
15
16typedef struct zip zip_t; // NOLINT(modernize-use-using) - mirrors libzip's own C typedef
17
18namespace Lightweight::SqlBackup::detail
19{
20
21/// One backup worker's rotating set of compressed temp archives.
22///
23/// libzip defers all compression work to zip_close, so the only way to compress chunks DURING
24/// the (network-bound) export phase is to actually close archives as we go: chunks are added to a
25/// per-worker temp zip, and once it has swallowed @c rotationBytes of uncompressed input the
26/// archive is sealed (zip_close runs the compression in the worker thread) and a fresh one is
27/// started. The finalize phase later raw-merges the sealed archives' entries into the final
28/// backup archive without recompressing (see SqlBackup.cpp).
29///
30/// Single-threaded use: each worker owns exactly one instance; no internal locking.
31class LIGHTWEIGHT_API WorkerChunkArchive
32{
33 public:
34 /// Constructs the archive set (no file is created until the first Add).
35 ///
36 /// @param directory Existing temp directory the archives are created in.
37 /// @param workerId Stable worker index, used in the archive file names.
38 /// @param rotationBytes Uncompressed input bytes per archive before it is sealed (clamped to >= 1).
39 /// @param method Compression method applied to every entry.
40 /// @param level Compression level applied to every entry.
41 WorkerChunkArchive(std::filesystem::path directory,
42 unsigned workerId,
43 std::size_t rotationBytes,
44 CompressionMethod method,
45 std::uint32_t level);
46
47 /// Discards a still-open current archive (error path); sealed archives stay on disk.
48 ~WorkerChunkArchive() noexcept;
49
50 WorkerChunkArchive(WorkerChunkArchive const&) = delete;
51 WorkerChunkArchive& operator=(WorkerChunkArchive const&) = delete;
52 WorkerChunkArchive(WorkerChunkArchive&&) = delete;
53 WorkerChunkArchive& operator=(WorkerChunkArchive&&) = delete;
54
55 /// Adds @p data under @p entryName, overwriting a same-named entry in the current archive.
56 /// Seals and rotates first if the current archive already holds >= rotationBytes of input.
57 /// @param entryName The final backup archive entry name (kept verbatim through the merge).
58 /// @param data The uncompressed chunk bytes.
59 void Add(std::string const& entryName, std::string_view data);
60
61 /// Deletes @p entryName from the current archive if present there; otherwise records a
62 /// tombstone so the finalize merge skips the name from earlier sealed archives.
63 /// @param entryName The entry name to remove.
64 void Remove(std::string const& entryName);
65
66 /// Seals the current archive (zip_close — the compression happens here, in the calling
67 /// worker thread). No-op when no entries are pending. Idempotent.
68 void Seal();
69
70 /// The sealed archive paths in rotation order (complete after the final Seal()).
71 [[nodiscard]] std::vector<std::filesystem::path> const& SealedArchives() const noexcept
72 {
73 return m_sealed;
74 }
75
76 /// Entry names removed after their archive was sealed; the merge must skip these.
77 [[nodiscard]] std::set<std::string> const& Tombstones() const noexcept
78 {
79 return m_tombstones;
80 }
81
82 private:
83 void OpenNextArchive();
84
85 std::filesystem::path m_directory;
86 unsigned m_workerId;
87 std::size_t m_rotationBytes;
88 CompressionMethod m_method;
89 std::uint32_t m_level;
90
91 zip_t* m_current = nullptr;
92 std::filesystem::path m_currentPath;
93 std::size_t m_currentInputBytes = 0;
94 std::set<std::string> m_currentNames; // names present in the (open) current archive
95 // Backing store for the current archive's entries: libzip defers reading sources until
96 // zip_close, so the chunk bytes must outlive every Add until Seal(). Owned here and handed
97 // to libzip as non-owning buffers (freep=0); released once Seal() has closed the archive.
98 //
99 // MUST be a std::deque, not a std::vector: libzip retains a raw pointer into each element
100 // (emplace_back's returned reference). std::vector reallocates on growth and moves its
101 // elements, which for small-string-optimized entries relocates the character data itself and
102 // dangles libzip's pointer — silently corrupting small chunks. std::deque keeps element
103 // references valid across emplace_back, so the pointers stay live until Seal()/clear().
104 std::deque<std::string> m_currentBuffers;
105 unsigned m_rotationIndex = 0;
106
107 std::vector<std::filesystem::path> m_sealed;
108 std::set<std::string> m_tombstones;
109};
110
111} // namespace Lightweight::SqlBackup::detail