-
Notifications
You must be signed in to change notification settings - Fork 25.2k
Save Memory on Large Repository Metadata Blob Writes #74313
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 25 commits
318bacc
998c369
67ad357
d7c0223
75fa696
cf4fa9e
206ee12
097f2b3
8f70df4
1f46a8b
6d7a2df
0d67021
3a1cc7d
1c8084d
613cdc5
682c8ca
1d6ec23
6c06d96
f98d10d
98b6119
6de3bc5
ced642a
3d06059
ad68ed8
3af17d7
7ebffa9
3cdef23
039db6a
0541320
17ac9fa
e37d207
25eb3b7
ca32351
f50fb5f
4340b39
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ | |
import fixture.azure.AzureHttpHandler; | ||
import org.elasticsearch.cluster.metadata.RepositoryMetadata; | ||
import org.elasticsearch.common.Strings; | ||
import org.elasticsearch.common.util.BigArrays; | ||
import org.elasticsearch.core.SuppressForbidden; | ||
import org.elasticsearch.common.UUIDs; | ||
import org.elasticsearch.common.blobstore.BlobContainer; | ||
|
@@ -58,6 +59,7 @@ | |
import java.util.concurrent.ConcurrentHashMap; | ||
import java.util.concurrent.TimeUnit; | ||
import java.util.concurrent.atomic.AtomicInteger; | ||
import java.util.concurrent.atomic.AtomicLong; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
import java.util.stream.Collectors; | ||
|
@@ -172,7 +174,7 @@ int getMaxReadRetries(String clientName) { | |
.put(MAX_SINGLE_PART_UPLOAD_SIZE_SETTING.getKey(), new ByteSizeValue(1, ByteSizeUnit.MB)) | ||
.build()); | ||
|
||
return new AzureBlobContainer(BlobPath.EMPTY, new AzureBlobStore(repositoryMetadata, service)); | ||
return new AzureBlobContainer(BlobPath.EMPTY, new AzureBlobStore(repositoryMetadata, service, BigArrays.NON_RECYCLING_INSTANCE)); | ||
} | ||
|
||
public void testReadNonexistentBlobThrowsNoSuchFileException() { | ||
|
@@ -391,6 +393,83 @@ public void testWriteLargeBlob() throws Exception { | |
assertThat(blocks.isEmpty(), is(true)); | ||
} | ||
|
||
public void testWriteLargeBlobStreaming() throws Exception { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These tests could be dried up more against the existing large blob write tests, but this is already quite the complicated PR since it touches all the plugins so I figured it best to clean up more in a follow-up and not touch too much of the existing code (I did dry things up in the production code a little more to make it easier to follow that this doesn't introduce much new interaction with the SDKs though). |
||
final int maxRetries = randomIntBetween(2, 5); | ||
|
||
final int blobSize = (int) ByteSizeUnit.MB.toBytes(10); | ||
final byte[] data = randomBytes(blobSize); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we could generate less random bytes and repeat them instead? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ++ certainly, lets do that in a follow-up though, then we can dry this up with the other test that already uses a large array as well in one go? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure |
||
int nbBlocks = (int) Math.ceil((double) data.length / (double) ByteSizeUnit.MB.toBytes(1)); | ||
|
||
final int nbErrors = 2; // we want all requests to fail at least once | ||
final AtomicInteger countDownUploads = new AtomicInteger(nbErrors * nbBlocks); | ||
final AtomicLong bytesReceived = new AtomicLong(0L); | ||
final CountDown countDownComplete = new CountDown(nbErrors); | ||
|
||
final Map<String, BytesReference> blocks = new ConcurrentHashMap<>(); | ||
httpServer.createContext("/account/container/write_large_blob", exchange -> { | ||
original-brownbear marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
if ("PUT".equals(exchange.getRequestMethod())) { | ||
final Map<String, String> params = new HashMap<>(); | ||
RestUtils.decodeQueryString(exchange.getRequestURI().getRawQuery(), 0, params); | ||
|
||
final String blockId = params.get("blockid"); | ||
assert Strings.hasText(blockId) == false || AzureFixtureHelper.assertValidBlockId(blockId); | ||
|
||
if (Strings.hasText(blockId) && (countDownUploads.decrementAndGet() % 2 == 0)) { | ||
final BytesReference blockData = Streams.readFully(exchange.getRequestBody()); | ||
blocks.put(blockId, blockData); | ||
bytesReceived.addAndGet(blockData.length()); | ||
exchange.sendResponseHeaders(RestStatus.CREATED.getStatus(), -1); | ||
exchange.close(); | ||
return; | ||
} | ||
|
||
final String complete = params.get("comp"); | ||
if ("blocklist".equals(complete) && (countDownComplete.countDown())) { | ||
final String blockList = Streams.copyToString(new InputStreamReader(exchange.getRequestBody(), UTF_8)); | ||
final List<String> blockUids = Arrays.stream(blockList.split("<Latest>")) | ||
.filter(line -> line.contains("</Latest>")) | ||
.map(line -> line.substring(0, line.indexOf("</Latest>"))) | ||
.collect(Collectors.toList()); | ||
|
||
final ByteArrayOutputStream blob = new ByteArrayOutputStream(); | ||
for (String blockUid : blockUids) { | ||
BytesReference block = blocks.remove(blockUid); | ||
assert block != null; | ||
block.writeTo(blob); | ||
} | ||
assertArrayEquals(data, blob.toByteArray()); | ||
exchange.getResponseHeaders().add("x-ms-request-server-encrypted", "false"); | ||
exchange.sendResponseHeaders(RestStatus.CREATED.getStatus(), -1); | ||
exchange.close(); | ||
return; | ||
} | ||
} | ||
|
||
if (randomBoolean()) { | ||
Streams.readFully(exchange.getRequestBody()); | ||
AzureHttpHandler.sendError(exchange, randomFrom(RestStatus.INTERNAL_SERVER_ERROR, RestStatus.SERVICE_UNAVAILABLE)); | ||
} | ||
exchange.close(); | ||
}); | ||
|
||
final BlobContainer blobContainer = createBlobContainer(maxRetries); | ||
blobContainer.writeBlob("write_large_blob", false, randomBoolean(), out -> { | ||
int outstanding = data.length; | ||
while (outstanding > 0) { | ||
if (randomBoolean()) { | ||
int toWrite = Math.toIntExact(Math.min(randomIntBetween(64, data.length), outstanding)); | ||
out.write(data, data.length - outstanding, toWrite); | ||
outstanding -= toWrite; | ||
} else { | ||
out.write(data[data.length - outstanding]); | ||
outstanding--; | ||
} | ||
} | ||
}); | ||
assertEquals(blobSize, bytesReceived.get()); | ||
} | ||
|
||
public void testRetryUntilFail() throws Exception { | ||
final int maxRetries = randomIntBetween(2, 5); | ||
final AtomicInteger requestsReceived = new AtomicInteger(0); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Those are garbage collected after a week according to the API docs (https://docs.microsoft.com/en-us/rest/api/storageservices/put-block#remarks)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah thanks I didn't know that, I'll add a comment to that effect :) Seems like you can't even delete those dangling blocks if you wanted to