Skip to content

S3 URI Parser #3874

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Apr 6, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/next-release/feature-AmazonS3-92ece24.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"category": "Amazon S3",
"contributor": "",
"type": "feature",
"description": "Adding feature for parsing S3 URIs"
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,14 @@
import java.net.URI;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Consumer;
import java.util.function.Supplier;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import software.amazon.awssdk.annotations.Immutable;
import software.amazon.awssdk.annotations.SdkInternalApi;
import software.amazon.awssdk.annotations.SdkPublicApi;
Expand Down Expand Up @@ -62,8 +66,11 @@
import software.amazon.awssdk.services.s3.internal.endpoints.UseGlobalEndpointResolver;
import software.amazon.awssdk.services.s3.model.GetObjectRequest;
import software.amazon.awssdk.services.s3.model.GetUrlRequest;
import software.amazon.awssdk.services.s3.parsing.S3Uri;
import software.amazon.awssdk.utils.AttributeMap;
import software.amazon.awssdk.utils.StringUtils;
import software.amazon.awssdk.utils.Validate;
import software.amazon.awssdk.utils.http.SdkHttpUtils;

/**
* Utilities for working with Amazon S3 objects. An instance of this class can be created by:
Expand Down Expand Up @@ -94,7 +101,7 @@
@SdkPublicApi
public final class S3Utilities {
private static final String SERVICE_NAME = "s3";

private static final Pattern ENDPOINT_PATTERN = Pattern.compile("^(.+\\.)?s3[.-]([a-z0-9-]+)\\.");
private final Region region;
private final URI endpoint;
private final S3Configuration s3Configuration;
Expand Down Expand Up @@ -251,6 +258,162 @@ public URL getUrl(GetUrlRequest getUrlRequest) {
}
}

/**
* Returns a parsed {@link S3Uri} with which a user can easily retrieve the bucket, key, region, style, and query
* parameters of the URI. Only path-style and virtual-hosted-style URI parsing is supported, including CLI-style
* URIs, e.g., "s3://bucket/key". AccessPoints and Outposts URI parsing is not supported. If you work with object keys
* and/or query parameters with special characters, they must be URL-encoded, e.g., replace " " with "%20". If you work with
* virtual-hosted-style URIs with bucket names that contain a dot, i.e., ".", the dot must not be URL-encoded. Encoded
* buckets, keys, and query parameters will be returned decoded.
*
* <p>
* For more information on path-style and virtual-hosted-style URIs, see <a href=
* "https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-bucket-intro.html"
* >Methods for accessing a bucket</a>.
*
* @param uri The URI to be parsed
* @return Parsed {@link S3Uri}
*
* <p><b>Example Usage</b>
* <p>
* {@snippet :
* S3Client s3Client = S3Client.create();
* S3Utilities s3Utilities = s3Client.utilities();
* String uriString = "https://myBucket.s3.us-west-1.amazonaws.com/doc.txt?versionId=abc123";
* URI uri = URI.create(uriString);
* S3Uri s3Uri = s3Utilities.parseUri(uri);
*
* String bucket = s3Uri.bucket().orElse(null); // "myBucket"
* String key = s3Uri.key().orElse(null); // "doc.txt"
* Region region = s3Uri.region().orElse(null); // Region.US_WEST_1
* boolean isPathStyle = s3Uri.isPathStyle(); // false
* String versionId = s3Uri.firstMatchingRawQueryParameter("versionId").orElse(null); // "abc123"
*}
*/
public S3Uri parseUri(URI uri) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not a blocker, but we may want to support string, which we will encode for them in the future depending on customer ask.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I tried using preprocessUrlStr() from v1 to encode but it runs into issues in cases like:

  • "." in bucket name for virtual hosted style, URI.getHost() doesn't work properly since it only retrieves the portion to the left of the first "."
  • query parameters get parsed as part of the key, so the key is incorrect and query parameters are empty, due to the "?" getting encoded

Seems to be pretty complicated to fix these...

validateUri(uri);

if ("s3".equalsIgnoreCase(uri.getScheme())) {
return parseAwsCliStyleUri(uri);
}

return parseStandardUri(uri);
}

private S3Uri parseStandardUri(URI uri) {

if (uri.getHost() == null) {
throw new IllegalArgumentException("Invalid S3 URI: no hostname: " + uri);
}

Matcher matcher = ENDPOINT_PATTERN.matcher(uri.getHost());
if (!matcher.find()) {
throw new IllegalArgumentException("Invalid S3 URI: hostname does not appear to be a valid S3 endpoint: " + uri);
}

S3Uri.Builder builder = S3Uri.builder().uri(uri);
addRegionIfNeeded(builder, matcher.group(2));
addQueryParamsIfNeeded(builder, uri);

String prefix = matcher.group(1);
if (StringUtils.isEmpty(prefix)) {
return parsePathStyleUri(builder, uri);
}
return parseVirtualHostedStyleUri(builder, uri, matcher);
}

private S3Uri.Builder addRegionIfNeeded(S3Uri.Builder builder, String region) {
if (!"amazonaws".equals(region)) {
return builder.region(Region.of(region));
}
return builder;
}

private S3Uri.Builder addQueryParamsIfNeeded(S3Uri.Builder builder, URI uri) {
if (uri.getQuery() != null) {
return builder.queryParams(SdkHttpUtils.uriParams(uri));
}
return builder;
}

private S3Uri parsePathStyleUri(S3Uri.Builder builder, URI uri) {
String bucket = null;
String key = null;
String path = uri.getPath();

if (!StringUtils.isEmpty(path) && !"/".equals(path)) {
int index = path.indexOf('/', 1);

if (index == -1) {
// No trailing slash, e.g., "https://s3.amazonaws.com/bucket"
bucket = path.substring(1);
} else {
bucket = path.substring(1, index);
if (index != path.length() - 1) {
key = path.substring(index + 1);
}
}
}
return builder.key(key)
.bucket(bucket)
.isPathStyle(true)
.build();
}

private S3Uri parseVirtualHostedStyleUri(S3Uri.Builder builder, URI uri, Matcher matcher) {
String bucket;
String key = null;
String path = uri.getPath();
String prefix = matcher.group(1);

bucket = prefix.substring(0, prefix.length() - 1);
if (!StringUtils.isEmpty(path) && !"/".equals(path)) {
key = path.substring(1);
}

return builder.key(key)
.bucket(bucket)
.build();
}

private S3Uri parseAwsCliStyleUri(URI uri) {
String key = null;
String bucket = uri.getAuthority();
Region region = null;
boolean isPathStyle = false;
Map<String, List<String>> queryParams = new HashMap<>();
String path = uri.getPath();

if (bucket == null) {
throw new IllegalArgumentException("Invalid S3 URI: bucket not included: " + uri);
}

if (path.length() > 1) {
key = path.substring(1);
}

return S3Uri.builder()
.uri(uri)
.bucket(bucket)
.key(key)
.region(region)
.isPathStyle(isPathStyle)
.queryParams(queryParams)
.build();
}

private void validateUri(URI uri) {
Validate.paramNotNull(uri, "uri");

if (uri.toString().contains(".s3-accesspoint")) {
throw new IllegalArgumentException("AccessPoints URI parsing is not supported: " + uri);
}

if (uri.toString().contains(".s3-outposts")) {
throw new IllegalArgumentException("Outposts URI parsing is not supported: " + uri);
}
}

private Region resolveRegionForGetUrl(GetUrlRequest getUrlRequest) {
if (getUrlRequest.region() == null && this.region == null) {
throw new IllegalArgumentException("Region should be provided either in GetUrlRequest object or S3Utilities object");
Expand Down
Loading