Skip to content

Commit 00996cd

Browse files
authored
Serializer/deserializer for encrypted backup files (#14912)
1 parent 0c1ebc0 commit 00996cd

File tree

10 files changed

+1484
-5
lines changed

10 files changed

+1484
-5
lines changed

ydb/core/backup/common/encryption.cpp

Lines changed: 873 additions & 0 deletions
Large diffs are not rendered by default.

ydb/core/backup/common/encryption.h

Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
#pragma once
2+
3+
#include <ydb/library/yverify_stream/yverify_stream.h>
4+
5+
#include <util/generic/buffer.h>
6+
#include <util/generic/maybe.h>
7+
#include <util/generic/string.h>
8+
9+
#include <vector>
10+
11+
namespace NKikimr::NBackup {
12+
13+
// Backup file type.
14+
// Must be different for all files in one backup item folder.
15+
// Must be one byte size.
16+
enum class EBackupFileType : unsigned char {
17+
// All items
18+
Metadata = 0,
19+
Permissions = 1,
20+
21+
// Table
22+
TableSchema = 10,
23+
TableData = 11,
24+
25+
// Topic
26+
TopicDescription = 10,
27+
TopicCreate = 11,
28+
29+
// Coordination node
30+
CoordinationNodeCreate = 10,
31+
CoordinationNodeCreateRateLimiter = 11,
32+
33+
// Incomplete
34+
Incomplete = 10,
35+
IncompleteCsv = 11,
36+
37+
// Directory
38+
DirectoryEmpty = 10,
39+
40+
// View
41+
ViewCreate = 10,
42+
43+
// Database
44+
Database = 10,
45+
46+
// User
47+
UserCreate = 10,
48+
49+
// Group
50+
GroupCreate = 10,
51+
GroupAlter = 11,
52+
53+
// Replication
54+
AsyncReplicationCreate = 10,
55+
56+
// External data source
57+
ExternalDataSourceCreate = 10,
58+
59+
// External table
60+
ExternalTableCreate = 10,
61+
};
62+
63+
struct TEncryptionIV {
64+
TEncryptionIV() = default; // Uninitialized IV
65+
TEncryptionIV(const TEncryptionIV&) = default;
66+
TEncryptionIV(TEncryptionIV&&) = default;
67+
68+
TEncryptionIV& operator=(const TEncryptionIV&) = default;
69+
TEncryptionIV& operator=(TEncryptionIV&&) = default;
70+
71+
// Generate new random IV
72+
static TEncryptionIV Generate();
73+
74+
// Combine IV for backup item file
75+
// base: base IV for the whole backup
76+
// backupItemNumber: unique backup item number within backup
77+
// shardNumber (only for sharded backup items such as tables): datashard number
78+
static TEncryptionIV Combine(const TEncryptionIV& base, EBackupFileType fileType, uint32_t backupItemNumber, uint32_t shardNumber);
79+
80+
// Combine IV for specific chunk
81+
// fileIV: IV for backup item file got by Combine() function
82+
static TEncryptionIV CombineForChunk(const TEncryptionIV& fileIV, uint32_t chunkNumber);
83+
84+
static TEncryptionIV FromBinaryString(const TString& s);
85+
86+
operator bool() const {
87+
return !IV.empty();
88+
}
89+
90+
bool operator!() const {
91+
return IV.empty();
92+
}
93+
94+
bool operator==(const TEncryptionIV& iv) const {
95+
return IV == iv.IV;
96+
}
97+
98+
bool operator!=(const TEncryptionIV& iv) const {
99+
return IV != iv.IV;
100+
}
101+
102+
size_t Size() const {
103+
return IV.size();
104+
}
105+
106+
const unsigned char* Ptr() const {
107+
Y_VERIFY(!IV.empty());
108+
return &IV[0];
109+
}
110+
111+
TString GetHexString() const;
112+
TString GetBinaryString() const;
113+
114+
std::vector<unsigned char> IV;
115+
116+
// Proper size for ciphers used in backups
117+
static constexpr size_t SIZE = 12;
118+
119+
static constexpr size_t FILE_TYPE_OFFSET = 0;
120+
static constexpr size_t FILE_TYPE_SIZE = 1;
121+
122+
static constexpr size_t BACKUP_ITEM_OFFSET = FILE_TYPE_OFFSET + FILE_TYPE_SIZE;
123+
static constexpr size_t BACKUP_ITEM_SIZE = 3;
124+
static constexpr uint32_t MAX_BACKUP_ITEM_NUMBER = (1 << (8 * BACKUP_ITEM_SIZE));
125+
126+
static constexpr size_t SHARD_NUMBER_OFFSET = BACKUP_ITEM_OFFSET + BACKUP_ITEM_SIZE;
127+
static constexpr size_t SHARD_NUMBER_SIZE = 4;
128+
129+
static constexpr size_t CHUNK_NUMBER_OFFSET = SHARD_NUMBER_OFFSET + SHARD_NUMBER_SIZE;
130+
static constexpr size_t CHUNK_NUMBER_SIZE = 4;
131+
132+
static_assert(CHUNK_NUMBER_OFFSET + CHUNK_NUMBER_SIZE == SIZE);
133+
};
134+
135+
struct TEncryptionKey {
136+
TEncryptionKey() = default; // Uninitialized
137+
TEncryptionKey(const TEncryptionKey&) = default;
138+
TEncryptionKey(TEncryptionKey&&) = default;
139+
explicit TEncryptionKey(const TString& bytes)
140+
: Key(reinterpret_cast<const unsigned char*>(bytes.data()), reinterpret_cast<const unsigned char*>(bytes.data() + bytes.size()))
141+
{
142+
}
143+
144+
TEncryptionKey& operator=(const TEncryptionKey&) = default;
145+
TEncryptionKey& operator=(TEncryptionKey&&) = default;
146+
147+
operator bool() const {
148+
return !Key.empty();
149+
}
150+
151+
bool operator!() const {
152+
return Key.empty();
153+
}
154+
155+
size_t Size() const {
156+
return Key.size();
157+
}
158+
159+
const unsigned char* Ptr() const {
160+
Y_VERIFY(!Key.empty());
161+
return &Key[0];
162+
}
163+
164+
TString GetBinaryString() const;
165+
166+
std::vector<unsigned char> Key;
167+
};
168+
169+
// Class that writes encrypted file
170+
// Has streaming interface
171+
class TEncryptedFileSerializer {
172+
public:
173+
TEncryptedFileSerializer(TEncryptedFileSerializer&&) = default;
174+
TEncryptedFileSerializer(TString algorithm, TEncryptionKey key, TEncryptionIV iv);
175+
~TEncryptedFileSerializer();
176+
177+
// Streaming interface
178+
// File consists of blocks that contain MAC
179+
// Block size should not be too big
180+
// because whole block must be read before usage.
181+
TBuffer AddBlock(TStringBuf data, bool last);
182+
183+
// Helper that serializes the whole file at one time
184+
static TBuffer EncryptFile(TString algorithm, TEncryptionKey key, TEncryptionIV iv, TStringBuf data);
185+
186+
private:
187+
class TImpl;
188+
std::unique_ptr<TImpl> Impl;
189+
};
190+
191+
// Class that reads encrypted file
192+
// Has streaming interface
193+
class TEncryptedFileDeserializer {
194+
TEncryptedFileDeserializer();
195+
196+
public:
197+
TEncryptedFileDeserializer(TEncryptedFileDeserializer&&) = default;
198+
TEncryptedFileDeserializer(TEncryptionKey key); // Decrypt file with key. Take IV from file header.
199+
TEncryptedFileDeserializer(TEncryptionKey key, TEncryptionIV expectedIV); // Decrypt file with key. Check that IV in header is equal to expectedIV
200+
~TEncryptedFileDeserializer();
201+
202+
// Adds buffer with input data.
203+
void AddData(TBuffer data, bool last);
204+
205+
// Decrypts next block from previously added data.
206+
// Throws in case of error.
207+
// Returns Nothing if not enough data added.
208+
// Returns buffer with data in normal case.
209+
TMaybe<TBuffer> GetNextBlock();
210+
211+
// Store state
212+
TString GetState() const;
213+
214+
// Restore from state
215+
// State includes secret key
216+
static TEncryptedFileDeserializer RestoreFromState(const TString& state);
217+
218+
// Get file IV.
219+
// Must be called after data added enough for the file header.
220+
TEncryptionIV GetIV() const;
221+
222+
// Get input bytes read
223+
size_t GetProcessedInputBytes() const;
224+
225+
// Helper that deserializes the whole file at one time
226+
static std::pair<TBuffer, TEncryptionIV> DecryptFile(TEncryptionKey key, TBuffer data);
227+
static TBuffer DecryptFile(TEncryptionKey key, TEncryptionIV expectedIV, TBuffer data);
228+
229+
private:
230+
class TImpl;
231+
std::unique_ptr<TImpl> Impl;
232+
};
233+
234+
} // NKikimr::NBackup

0 commit comments

Comments
 (0)