Skip to content

Commit d035d15

Browse files
author
Bhavay Pahuja
committed
HADOOP-14837: Glacier read restored objects support
1 parent 4f0f5a5 commit d035d15

File tree

9 files changed

+327
-3
lines changed

9 files changed

+327
-3
lines changed

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1519,6 +1519,12 @@ private Constants() {
15191519
*/
15201520
public static final int DEFAULT_PREFETCH_MAX_BLOCKS_COUNT = 4;
15211521

1522+
/**
1523+
* Read Restored Glacier objects config.
1524+
* Value = {@value}
1525+
*/
1526+
public static final String READ_RESTORED_GLACIER_OBJECTS = "fs.s3a.glacier.read.restored.objects";
1527+
15221528
/**
15231529
* The bucket region header.
15241530
*/

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Listing.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ public class Listing extends AbstractStoreOperation {
7676

7777
private static final Logger LOG = S3AFileSystem.LOG;
7878
private final boolean isCSEEnabled;
79+
private final S3ObjectStorageClassFilter s3ObjectStorageClassFilter;
7980

8081
static final FileStatusAcceptor ACCEPT_ALL_BUT_S3N =
8182
new AcceptAllButS3nDirs();
@@ -87,6 +88,7 @@ public Listing(ListingOperationCallbacks listingOperationCallbacks,
8788
super(storeContext);
8889
this.listingOperationCallbacks = listingOperationCallbacks;
8990
this.isCSEEnabled = storeContext.isCSEEnabled();
91+
this.s3ObjectStorageClassFilter = storeContext.getS3ObjectsStorageClassFilter();
9092
}
9193

9294
/**
@@ -462,7 +464,8 @@ private boolean buildNextStatusBatch(S3ListResult objects) {
462464
LOG.debug("{}: {}", keyPath, stringify(s3Object));
463465
}
464466
// Skip over keys that are ourselves and old S3N _$folder$ files
465-
if (acceptor.accept(keyPath, s3Object) && filter.accept(keyPath)) {
467+
// Handle Glacier Storage Class objects based on the config fs.s3a.glacier.read.restored.objects value set
468+
if ( s3ObjectStorageClassFilter.getFilter().apply(s3Object) && acceptor.accept(keyPath, s3Object) && filter.accept(keyPath)) {
466469
S3AFileStatus status = createFileStatus(keyPath, s3Object,
467470
listingOperationCallbacks.getDefaultBlockSize(keyPath),
468471
getStoreContext().getUsername(),

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
441441
*/
442442
private boolean isCSEEnabled;
443443

444+
private S3ObjectStorageClassFilter s3ObjectStorageClassFilter;
445+
444446
/**
445447
* Bucket AccessPoint.
446448
*/
@@ -577,6 +579,12 @@ public void initialize(URI name, Configuration originalConf)
577579

578580
s3aInternals = createS3AInternals();
579581

582+
s3ObjectStorageClassFilter = Optional.ofNullable(conf.get(READ_RESTORED_GLACIER_OBJECTS))
583+
.map(String::trim)
584+
.map(String::toUpperCase)
585+
.map(S3ObjectStorageClassFilter::valueOf)
586+
.orElse(S3ObjectStorageClassFilter.READ_ALL);
587+
580588
// look for encryption data
581589
// DT Bindings may override this
582590
setEncryptionSecrets(
@@ -5658,6 +5666,7 @@ public StoreContext createStoreContext() {
56585666
.setContextAccessors(new ContextAccessorsImpl())
56595667
.setAuditor(getAuditor())
56605668
.setEnableCSE(isCSEEnabled)
5669+
.setS3ObjectStorageClassFilter(s3ObjectStorageClassFilter)
56615670
.build();
56625671
}
56635672

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.fs.s3a;
20+
21+
import org.apache.hadoop.thirdparty.com.google.common.collect.Sets;
22+
import java.util.Set;
23+
import java.util.function.Function;
24+
import software.amazon.awssdk.services.s3.model.ObjectStorageClass;
25+
import software.amazon.awssdk.services.s3.model.S3Object;
26+
27+
28+
/**
29+
* <pre>
30+
* {@link S3ObjectStorageClassFilter} will filter the S3 files based on the {@code fs.s3a.glacier.read.restored.objects} configuration set in {@link S3AFileSystem}
31+
* The config can have 3 values:
32+
* {@code READ_ALL}: Retrieval of Glacier files will fail with InvalidObjectStateException: The operation is not valid for the object's storage class.
33+
* {@code SKIP_ALL_GLACIER}: If this value is set then this will ignore any S3 Objects which are tagged with Glacier storage classes and retrieve the others.
34+
* {@code READ_RESTORED_GLACIER_OBJECTS}: If this value is set then restored status of the Glacier object will be checked, if restored the objects would be read like normal S3 objects else they will be ignored as the objects would not have been retrieved from the S3 Glacier.
35+
* </pre>
36+
*/
37+
public enum S3ObjectStorageClassFilter {
38+
READ_ALL(o -> true),
39+
SKIP_ALL_GLACIER(S3ObjectStorageClassFilter::isNotGlacierObject),
40+
READ_RESTORED_GLACIER_OBJECTS(S3ObjectStorageClassFilter::isCompletedRestoredObject);
41+
42+
private static final Set<ObjectStorageClass> GLACIER_STORAGE_CLASSES = Sets.newHashSet(ObjectStorageClass.GLACIER, ObjectStorageClass.DEEP_ARCHIVE);
43+
44+
private final Function<S3Object, Boolean> filter;
45+
46+
S3ObjectStorageClassFilter(Function<S3Object, Boolean> filter) {
47+
this.filter = filter;
48+
}
49+
50+
private static boolean isNotGlacierObject(S3Object object) {
51+
return !GLACIER_STORAGE_CLASSES.contains(object.storageClass());
52+
}
53+
54+
private static boolean isGlacierObject(S3Object object) {
55+
return GLACIER_STORAGE_CLASSES.contains(object.storageClass());
56+
}
57+
58+
private static boolean isCompletedRestoredObject(S3Object object) {
59+
if(isGlacierObject(object)) {
60+
return object.restoreStatus() != null && !object.restoreStatus().isRestoreInProgress();
61+
}
62+
return true;
63+
}
64+
65+
public Function<S3Object, Boolean> getFilter() {
66+
return filter;
67+
}
68+
69+
}

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import software.amazon.awssdk.services.s3.model.ListObjectsV2Request;
4343
import software.amazon.awssdk.services.s3.model.MetadataDirective;
4444
import software.amazon.awssdk.services.s3.model.ObjectIdentifier;
45+
import software.amazon.awssdk.services.s3.model.OptionalObjectAttributes;
4546
import software.amazon.awssdk.services.s3.model.PutObjectRequest;
4647
import software.amazon.awssdk.services.s3.model.ServerSideEncryption;
4748
import software.amazon.awssdk.services.s3.model.StorageClass;
@@ -609,6 +610,7 @@ public ListObjectsV2Request.Builder newListObjectsV2RequestBuilder(
609610
final ListObjectsV2Request.Builder requestBuilder = ListObjectsV2Request.builder()
610611
.bucket(bucket)
611612
.maxKeys(maxKeys)
613+
.optionalObjectAttributes(OptionalObjectAttributes.RESTORE_STATUS) // Optional Attribute to get the Restored Status of the Glacier Objects
612614
.prefix(key);
613615

614616
if (delimiter != null) {

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StoreContext.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import java.util.concurrent.CompletableFuture;
2626
import java.util.concurrent.ExecutorService;
2727

28+
import org.apache.hadoop.fs.s3a.S3ObjectStorageClassFilter;
2829
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService;
2930

3031
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors;
@@ -117,6 +118,8 @@ public class StoreContext implements ActiveThreadSpanSource<AuditSpan> {
117118
/** Is client side encryption enabled? */
118119
private final boolean isCSEEnabled;
119120

121+
private final S3ObjectStorageClassFilter s3ObjectStorageClassFilter;
122+
120123
/**
121124
* Instantiate.
122125
*/
@@ -137,7 +140,8 @@ public class StoreContext implements ActiveThreadSpanSource<AuditSpan> {
137140
final boolean useListV1,
138141
final ContextAccessors contextAccessors,
139142
final AuditSpanSource<AuditSpanS3A> auditor,
140-
final boolean isCSEEnabled) {
143+
final boolean isCSEEnabled,
144+
final S3ObjectStorageClassFilter s3ObjectStorageClassFilter) {
141145
this.fsURI = fsURI;
142146
this.bucket = bucket;
143147
this.configuration = configuration;
@@ -158,6 +162,7 @@ public class StoreContext implements ActiveThreadSpanSource<AuditSpan> {
158162
this.contextAccessors = contextAccessors;
159163
this.auditor = auditor;
160164
this.isCSEEnabled = isCSEEnabled;
165+
this.s3ObjectStorageClassFilter = s3ObjectStorageClassFilter;
161166
}
162167

163168
public URI getFsURI() {
@@ -411,4 +416,12 @@ public RequestFactory getRequestFactory() {
411416
public boolean isCSEEnabled() {
412417
return isCSEEnabled;
413418
}
419+
420+
/**
421+
* Return the S3ObjectStorageClassFilter object for S3A, whose value is set according to the config {@code fs.s3a.glacier.read.restored.objects}
422+
* @return {@link S3ObjectStorageClassFilter} object
423+
*/
424+
public S3ObjectStorageClassFilter getS3ObjectsStorageClassFilter() {
425+
return s3ObjectStorageClassFilter;
426+
}
414427
}

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StoreContextBuilder.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.apache.hadoop.fs.s3a.Invoker;
2626
import org.apache.hadoop.fs.s3a.S3AInputPolicy;
2727
import org.apache.hadoop.fs.s3a.S3AStorageStatistics;
28+
import org.apache.hadoop.fs.s3a.S3ObjectStorageClassFilter;
2829
import org.apache.hadoop.fs.s3a.audit.AuditSpanS3A;
2930
import org.apache.hadoop.fs.s3a.statistics.S3AStatisticsContext;
3031
import org.apache.hadoop.fs.store.audit.AuditSpanSource;
@@ -69,6 +70,8 @@ public class StoreContextBuilder {
6970

7071
private boolean isCSEEnabled;
7172

73+
private S3ObjectStorageClassFilter s3ObjectStorageClassFilter;
74+
7275
public StoreContextBuilder setFsURI(final URI fsURI) {
7376
this.fsURI = fsURI;
7477
return this;
@@ -175,6 +178,12 @@ public StoreContextBuilder setEnableCSE(
175178
return this;
176179
}
177180

181+
public StoreContextBuilder setS3ObjectStorageClassFilter(
182+
S3ObjectStorageClassFilter value) {
183+
s3ObjectStorageClassFilter = value;
184+
return this;
185+
}
186+
178187
public StoreContext build() {
179188
return new StoreContext(fsURI,
180189
bucket,
@@ -192,6 +201,7 @@ public StoreContext build() {
192201
useListV1,
193202
contextAccessors,
194203
auditor,
195-
isCSEEnabled);
204+
isCSEEnabled,
205+
s3ObjectStorageClassFilter);
196206
}
197207
}

hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1285,6 +1285,20 @@ The switch to turn S3A auditing on or off.
12851285
</description>
12861286
</property>
12871287

1288+
<!--
1289+
The switch to control how S3A handles glacier storage classes.
1290+
-->
1291+
<property>
1292+
<name>fs.s3a.glacier.read.restored.objects</name>
1293+
<value>READ_ALL</value>
1294+
<description>
1295+
The config can have 3 values:
1296+
1297+
* READ_ALL: Retrieval of Glacier files will fail with InvalidObjectStateException: The operation is not valid for the object's storage class.
1298+
* SKIP_ALL_GLACIER: If this value is set then this will ignore any S3 Objects which are tagged with Glacier storage classes and retrieve the others.
1299+
* READ_RESTORED_GLACIER_OBJECTS: If this value is set then restored status of the Glacier object will be checked, if restored the objects would be read like normal S3 objects else they will be ignored as the objects would not have been retrieved from the S3 Glacier.
1300+
</description>
1301+
</property>
12881302
```
12891303
## <a name="retry_and_recovery"></a>Retry and Recovery
12901304

0 commit comments

Comments
 (0)