Skip to content

Commit dfeeda2

Browse files
committed
applying review comments of Imran v2.0
1 parent 0d6ed51 commit dfeeda2

File tree

5 files changed

+81
-13
lines changed

5 files changed

+81
-13
lines changed

common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/CleanupNonShuffleServiceServedFilesSuite.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ private static void assertCleanedUp(TestShuffleDataContext dataContext) {
177177
}
178178

179179
private static TestShuffleDataContext initDataContext(boolean withFilesToKeep)
180-
throws IOException {
180+
throws IOException {
181181
TestShuffleDataContext dataContext = new TestShuffleDataContext(10, 5);
182182
dataContext.create();
183183
if (withFilesToKeep) {
@@ -193,7 +193,7 @@ private static void createFilesToKeep(TestShuffleDataContext dataContext) throws
193193
dataContext.insertSortShuffleData(rand.nextInt(1000), rand.nextInt(1000), new byte[][] {
194194
"ABC".getBytes(StandardCharsets.UTF_8),
195195
"DEF".getBytes(StandardCharsets.UTF_8)});
196-
dataContext.insertCachedRddData();
196+
dataContext.insertCachedRddData(12, 34, new byte[] { 42 });
197197
}
198198

199199
private static void createRemovableTestFiles(TestShuffleDataContext dataContext)

common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java

Lines changed: 68 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@
1717

1818
package org.apache.spark.network.shuffle;
1919

20+
import java.io.File;
2021
import java.io.IOException;
2122
import java.nio.ByteBuffer;
2223
import java.util.Arrays;
2324
import java.util.Collections;
25+
import java.util.HashMap;
2426
import java.util.HashSet;
2527
import java.util.LinkedList;
2628
import java.util.List;
@@ -31,6 +33,8 @@
3133

3234
import com.google.common.collect.ImmutableMap;
3335
import com.google.common.collect.Sets;
36+
import org.apache.spark.network.buffer.FileSegmentManagedBuffer;
37+
import org.apache.spark.network.server.OneForOneStreamManager;
3438
import org.junit.After;
3539
import org.junit.AfterClass;
3640
import org.junit.BeforeClass;
@@ -52,6 +56,11 @@ public class ExternalShuffleIntegrationSuite {
5256
private static final String APP_ID = "app-id";
5357
private static final String SORT_MANAGER = "org.apache.spark.shuffle.sort.SortShuffleManager";
5458

59+
private static final int RDD_ID = 1;
60+
private static final int SPLIT_INDEX_VALID_BLOCK = 0;
61+
private static final int SPLIT_INDEX_MISSING_FILE = 1;
62+
private static final int SPLIT_INDEX_CORRUPT_LENGTH = 2;
63+
5564
// Executor 0 is sort-based
5665
static TestShuffleDataContext dataContext0;
5766

@@ -60,6 +69,8 @@ public class ExternalShuffleIntegrationSuite {
6069
static TransportConf conf;
6170
static TransportContext transportContext;
6271

72+
static byte[] exec0RddBlock = new byte[123];
73+
6374
static byte[][] exec0Blocks = new byte[][] {
6475
new byte[123],
6576
new byte[12345],
@@ -81,13 +92,36 @@ public static void beforeAll() throws IOException {
8192
for (byte[] block: exec1Blocks) {
8293
rand.nextBytes(block);
8394
}
95+
rand.nextBytes(exec0RddBlock);
8496

8597
dataContext0 = new TestShuffleDataContext(2, 5);
8698
dataContext0.create();
8799
dataContext0.insertSortShuffleData(0, 0, exec0Blocks);
88-
89-
conf = new TransportConf("shuffle", MapConfigProvider.EMPTY);
90-
handler = new ExternalShuffleBlockHandler(conf, null);
100+
dataContext0.insertCachedRddData( RDD_ID, SPLIT_INDEX_VALID_BLOCK, exec0RddBlock);
101+
102+
HashMap<String, String> config = new HashMap<>();
103+
config.put("spark.shuffle.io.maxRetries", "0");
104+
conf = new TransportConf("shuffle", new MapConfigProvider(config));
105+
handler = new ExternalShuffleBlockHandler(
106+
new OneForOneStreamManager(),
107+
new ExternalShuffleBlockResolver(conf, null) {
108+
@Override
109+
public ManagedBuffer getRddBlockData(String appId, String execId, int rddId, int splitIdx) {
110+
ManagedBuffer res;
111+
if (rddId == RDD_ID) {
112+
switch (splitIdx) {
113+
case SPLIT_INDEX_CORRUPT_LENGTH:
114+
res = new FileSegmentManagedBuffer(conf, new File("missing.file"), 0, 12);
115+
break;
116+
default:
117+
res = super.getRddBlockData(appId, execId, rddId, splitIdx);
118+
}
119+
} else {
120+
res = super.getRddBlockData(appId, execId, rddId, splitIdx);
121+
}
122+
return res;
123+
}
124+
});
91125
transportContext = new TransportContext(conf, handler);
92126
server = transportContext.createServer();
93127
}
@@ -199,9 +233,38 @@ public void testRegisterInvalidExecutor() throws Exception {
199233
@Test
200234
public void testFetchWrongBlockId() throws Exception {
201235
registerExecutor("exec-1", dataContext0.createExecutorInfo(SORT_MANAGER));
202-
FetchResult execFetch = fetchBlocks("exec-1", new String[] { "rdd_1_0_0" });
236+
FetchResult execFetch = fetchBlocks("exec-1", new String[] { "broadcast_1" });
237+
assertTrue(execFetch.successBlocks.isEmpty());
238+
assertEquals(Sets.newHashSet("broadcast_1"), execFetch.failedBlocks);
239+
}
240+
241+
@Test
242+
public void testFetchValidRddBlock() throws Exception {
243+
registerExecutor("exec-1", dataContext0.createExecutorInfo(SORT_MANAGER));
244+
String validBlockId = "rdd_" + RDD_ID +"_" + SPLIT_INDEX_VALID_BLOCK;
245+
FetchResult execFetch = fetchBlocks("exec-1", new String[] { validBlockId });
246+
assertTrue(execFetch.failedBlocks.isEmpty());
247+
assertEquals(Sets.newHashSet(validBlockId), execFetch.successBlocks);
248+
assertBuffersEqual(new NioManagedBuffer(ByteBuffer.wrap(exec0RddBlock)),
249+
execFetch.buffers.get(0));
250+
}
251+
252+
@Test
253+
public void testFetchDeletedRddBlock() throws Exception {
254+
registerExecutor("exec-1", dataContext0.createExecutorInfo(SORT_MANAGER));
255+
String missingBlockId = "rdd_" + RDD_ID +"_" + SPLIT_INDEX_MISSING_FILE;
256+
FetchResult execFetch = fetchBlocks("exec-1", new String[] { missingBlockId });
257+
assertTrue(execFetch.successBlocks.isEmpty());
258+
assertEquals(Sets.newHashSet(missingBlockId), execFetch.failedBlocks);
259+
}
260+
261+
@Test
262+
public void testFetchCorruptRddBlock() throws Exception {
263+
registerExecutor("exec-1", dataContext0.createExecutorInfo(SORT_MANAGER));
264+
String corruptBlockId = "rdd_" + RDD_ID +"_" + SPLIT_INDEX_CORRUPT_LENGTH;
265+
FetchResult execFetch = fetchBlocks("exec-1", new String[] { corruptBlockId });
203266
assertTrue(execFetch.successBlocks.isEmpty());
204-
assertEquals(Sets.newHashSet("rdd_1_0_0"), execFetch.failedBlocks);
267+
assertEquals(Sets.newHashSet(corruptBlockId), execFetch.failedBlocks);
205268
}
206269

207270
@Test

common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -111,17 +111,20 @@ public void insertTempShuffleData() throws IOException {
111111
insertFile(filename);
112112
}
113113

114-
public void insertCachedRddData() throws IOException {
115-
String filename = "rdd_12_34";
116-
insertFile(filename);
114+
public void insertCachedRddData(int rddId, int splitId, byte[] block) throws IOException {
115+
String blockId = "rdd_" + rddId + "_" + splitId;
116+
insertFile(blockId, block);
117117
}
118-
119118
private void insertFile(String filename) throws IOException {
119+
insertFile(filename, new byte[] { 42 });
120+
}
121+
122+
private void insertFile(String filename, byte[] block) throws IOException {
120123
OutputStream dataStream = null;
121124
try {
122125
dataStream = new FileOutputStream(
123126
ExternalShuffleBlockResolver.getFile(localDirs, subDirsPerLocalDir, filename));
124-
dataStream.write(42);
127+
dataStream.write(block);
125128
} finally {
126129
Closeables.close(dataStream, false);
127130
}

core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ class BlockManagerMaster(
225225
/**
226226
* Find out if the executor has cached blocks which are not available via the external shuffle
227227
* service.
228-
* This method does not consider broadcast blocks, since they are not reported the master.
228+
* This method does not consider broadcast blocks, since they are not reported to the master.
229229
*/
230230
def hasExclusiveCachedBlocks(executorId: String): Boolean = {
231231
driverEndpoint.askSync[Boolean](HasExclusiveCachedBlocks(executorId))

core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,8 @@ class BlockManagerMasterEndpoint(
357357
}
358358

359359
private def externalShuffleServiceIdOnHost(blockManagerId: BlockManagerId): BlockManagerId = {
360+
// we need to keep the executor ID of the original executor to let the shuffle service know
361+
// which local directories should be used to look for the file
360362
BlockManagerId(blockManagerId.executorId, blockManagerId.host, externalShuffleServicePort)
361363
}
362364

0 commit comments

Comments
 (0)