apache · davies · Nov 14, 2016 · rxin · Nov 17, 2016 · srowen
diff --git a/...etwork-common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java b/...etwork-common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
@@ -17,13 +17,12 @@
 
 package org.apache.spark.network.buffer;
 
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.RandomAccessFile;
+import java.io.*;
 import java.nio.ByteBuffer;
 import java.nio.channels.FileChannel;
+import java.util.zip.Adler32;
+import java.util.zip.CheckedInputStream;
+import java.util.zip.Checksum;
 
 import com.google.common.base.Objects;
 import com.google.common.io.ByteStreams;
@@ -92,12 +91,27 @@ public ByteBuffer nioByteBuffer() throws IOException {
   }
 
   @Override
-  public InputStream createInputStream() throws IOException {
+  public InputStream createInputStream(boolean checksum) throws IOException {
     FileInputStream is = null;
     try {
       is = new FileInputStream(file);
       ByteStreams.skipFully(is, offset);
-      return new LimitedInputStream(is, length);
+      if (checksum) {
+        Checksum ck = new Adler32();
+        DataInputStream din = new DataInputStream(new CheckedInputStream(is, ck));
+        ByteStreams.skipFully(din, length - 8);
+        long sum = ck.getValue();
+        long expected = din.readLong();
+        if (sum != expected) {
+          throw new IOException("Checksum does not match " + sum + "!=" + expected);
+        }
+        is.close();
+        is = new FileInputStream(file);
+        ByteStreams.skipFully(is, offset);
+        return new LimitedInputStream(is, length - 8);
+      } else {
+        return new LimitedInputStream(is, length);
+      }
     } catch (IOException e) {
       try {
         if (is != null) {

diff --git a/common/network-common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java b/common/network-common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java
@@ -51,7 +51,7 @@ public abstract class ManagedBuffer {
    * necessarily check for the length of bytes read, so the caller is responsible for making sure
    * it does not go over the limit.
    */
-  public abstract InputStream createInputStream() throws IOException;
+  public abstract InputStream createInputStream(boolean checksum) throws IOException;
 
   /**
    * Increment the reference count by one if applicable.

diff --git a/common/network-common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java b/common/network-common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java
@@ -20,6 +20,7 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
+import java.util.zip.Adler32;
 
 import com.google.common.base.Objects;
 import io.netty.buffer.ByteBuf;
@@ -46,7 +47,21 @@ public ByteBuffer nioByteBuffer() throws IOException {
   }
 
   @Override
-  public InputStream createInputStream() throws IOException {
+  public InputStream createInputStream(boolean checksum) throws IOException {
+    if (checksum) {
+      Adler32 adler = new Adler32();
+      long size = size();
+      buf.markReaderIndex();
+      for (int i = 0; i < size - 8; i++) {
+        adler.update(buf.readByte());
+      }
+      long sum = buf.readLong();
+      if (adler.getValue() != sum) {
+        throw new IOException("Checksum does not match " + adler.getValue() + "!=" + sum);
+      }
+      buf.resetReaderIndex();
+      buf.writerIndex(buf.writerIndex() - 8);
+    }
     return new ByteBufInputStream(buf);
   }
 

diff --git a/common/network-common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java b/common/network-common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
@@ -20,6 +20,7 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
+import java.util.zip.Adler32;
 
 import com.google.common.base.Objects;
 import io.netty.buffer.ByteBufInputStream;
@@ -46,7 +47,23 @@ public ByteBuffer nioByteBuffer() throws IOException {
   }
 
   @Override
-  public InputStream createInputStream() throws IOException {
+  public InputStream createInputStream(boolean checksum) throws IOException {
+    if (checksum) {
+      Adler32 adler = new Adler32();
+      int position = buf.position();
+      int limit = buf.limit() - 8;
+      buf.position(limit);
+      long sum = buf.getLong();
+      buf.position(position);
+      // simplify this after drop Java 7 support
+      for (int i=buf.position(); i<limit; i++) {
+        adler.update(buf.get(i));
+      }
+      if (sum != adler.getValue()) {
+        throw new IOException("Checksum does not match: " + adler.getValue() + "!=" + sum);
+      }
+      buf.limit(limit);
+    }
     return new ByteBufInputStream(Unpooled.wrappedBuffer(buf));
   }
 

diff --git a/common/network-common/src/test/java/org/apache/spark/network/TestManagedBuffer.java b/common/network-common/src/test/java/org/apache/spark/network/TestManagedBuffer.java
@@ -59,8 +59,8 @@ public ByteBuffer nioByteBuffer() throws IOException {
   }
 
   @Override
-  public InputStream createInputStream() throws IOException {
-    return underlying.createInputStream();
+  public InputStream createInputStream(boolean checksum) throws IOException {
+    return underlying.createInputStream(checksum);
   }
 
   @Override

diff --git a/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java b/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
@@ -17,20 +17,17 @@
 
 package org.apache.spark.network.sasl;
 
-import static org.junit.Assert.*;
-import static org.mockito.Mockito.*;
-
+import javax.security.sasl.SaslException;
 import java.io.File;
 import java.lang.reflect.Method;
 import java.nio.ByteBuffer;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Random;
 import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.TimeoutException;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicReference;
-import javax.security.sasl.SaslException;
 
 import com.google.common.collect.Lists;
 import com.google.common.io.ByteStreams;
@@ -62,6 +59,9 @@
 import org.apache.spark.network.util.SystemPropertyConfigProvider;
 import org.apache.spark.network.util.TransportConf;
 
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.*;
+
 /**
  * Jointly tests SparkSaslClient and SparkSaslServer, as both are black boxes.
  */
@@ -296,7 +296,7 @@ public Void answer(InvocationOnMock invocation) {
       verify(callback, times(1)).onSuccess(anyInt(), any(ManagedBuffer.class));
       verify(callback, never()).onFailure(anyInt(), any(Throwable.class));
 
-      byte[] received = ByteStreams.toByteArray(response.get().createInputStream());
+      byte[] received = ByteStreams.toByteArray(response.get().createInputStream(false));
       assertTrue(Arrays.equals(data, received));
     } finally {
       file.delete();

diff --git a/...fle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java b/...fle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
@@ -24,14 +24,15 @@
 
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.io.CharStreams;
-import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
-import org.apache.spark.network.util.SystemPropertyConfigProvider;
-import org.apache.spark.network.util.TransportConf;
-import org.apache.spark.network.shuffle.ExternalShuffleBlockResolver.AppExecId;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
+import org.apache.spark.network.shuffle.ExternalShuffleBlockResolver.AppExecId;
+import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
+import org.apache.spark.network.util.SystemPropertyConfigProvider;
+import org.apache.spark.network.util.TransportConf;
+
 import static org.junit.Assert.*;
 
 public class ExternalShuffleBlockResolverSuite {
@@ -98,14 +99,14 @@ public void testSortShuffleBlocks() throws IOException {
       dataContext.createExecutorInfo(SORT_MANAGER));
 
     InputStream block0Stream =
-      resolver.getBlockData("app0", "exec0", "shuffle_0_0_0").createInputStream();
+      resolver.getBlockData("app0", "exec0", "shuffle_0_0_0").createInputStream(false);
     String block0 = CharStreams.toString(
         new InputStreamReader(block0Stream, StandardCharsets.UTF_8));
     block0Stream.close();
     assertEquals(sortBlock0, block0);
 
     InputStream block1Stream =
-      resolver.getBlockData("app0", "exec0", "shuffle_0_0_1").createInputStream();
+      resolver.getBlockData("app0", "exec0", "shuffle_0_0_1").createInputStream(false);
     String block1 = CharStreams.toString(
         new InputStreamReader(block1Stream, StandardCharsets.UTF_8));
     block1Stream.close();

diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
@@ -139,7 +139,7 @@ public void write(Iterator<Product2<K, V>> records) throws IOException {
       final File file = tempShuffleBlockIdPlusFile._2();
       final BlockId blockId = tempShuffleBlockIdPlusFile._1();
       partitionWriters[i] =
-        blockManager.getDiskWriter(blockId, file, serInstance, fileBufferSize, writeMetrics);
+        blockManager.getDiskWriter(blockId, file, serInstance, fileBufferSize, writeMetrics, true);
     }
     // Creating the file to write to and creating a disk writer both involve interacting with
     // the disk, and can take a long time in aggregate when we open many files, so should be

diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
@@ -173,7 +173,9 @@ private void writeSortedFile(boolean isLastFile) throws IOException {
     final SerializerInstance ser = DummySerializerInstance.INSTANCE;
 
     final DiskBlockObjectWriter writer =
-      blockManager.getDiskWriter(blockId, file, ser, fileBufferSizeBytes, writeMetricsToUse);
+      blockManager.getDiskWriter(blockId, file, ser, fileBufferSizeBytes, writeMetricsToUse,
+        // only generate checksum for only spill
+        isLastFile && spills.isEmpty());
 
     int currentPartition = -1;
     while (sortedRecords.hasNext()) {

diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
@@ -21,6 +21,7 @@
 import java.io.*;
 import java.nio.channels.FileChannel;
 import java.util.Iterator;
+import java.util.zip.Adler32;
 
 import scala.Option;
 import scala.Product2;
@@ -35,7 +36,10 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import org.apache.spark.*;
+import org.apache.spark.Partitioner;
+import org.apache.spark.ShuffleDependency;
+import org.apache.spark.SparkConf;
+import org.apache.spark.TaskContext;
 import org.apache.spark.annotation.Private;
 import org.apache.spark.executor.ShuffleWriteMetrics;
 import org.apache.spark.io.CompressionCodec;
@@ -49,6 +53,7 @@
 import org.apache.spark.shuffle.IndexShuffleBlockResolver;
 import org.apache.spark.shuffle.ShuffleWriter;
 import org.apache.spark.storage.BlockManager;
+import org.apache.spark.storage.ChecksumOutputStream;
 import org.apache.spark.storage.TimeTrackingOutputStream;
 import org.apache.spark.unsafe.Platform;
 import org.apache.spark.util.Utils;
@@ -75,6 +80,7 @@ public class UnsafeShuffleWriter<K, V> extends ShuffleWriter<K, V> {
   private final SparkConf sparkConf;
   private final boolean transferToEnabled;
   private final int initialSortBufferSize;
+  private final boolean checksum;
 
   @Nullable private MapStatus mapStatus;
   @Nullable private ShuffleExternalSorter sorter;
@@ -108,8 +114,8 @@ public UnsafeShuffleWriter(
     if (numPartitions > SortShuffleManager.MAX_SHUFFLE_OUTPUT_PARTITIONS_FOR_SERIALIZED_MODE()) {
       throw new IllegalArgumentException(
         "UnsafeShuffleWriter can only be used for shuffles with at most " +
-        SortShuffleManager.MAX_SHUFFLE_OUTPUT_PARTITIONS_FOR_SERIALIZED_MODE() +
-        " reduce partitions");
+          SortShuffleManager.MAX_SHUFFLE_OUTPUT_PARTITIONS_FOR_SERIALIZED_MODE() +
+          " reduce partitions");
     }
     this.blockManager = blockManager;
     this.shuffleBlockResolver = shuffleBlockResolver;
@@ -124,7 +130,9 @@ public UnsafeShuffleWriter(
     this.sparkConf = sparkConf;
     this.transferToEnabled = sparkConf.getBoolean("spark.file.transferTo", true);
     this.initialSortBufferSize = sparkConf.getInt("spark.shuffle.sort.initialBufferSize",
-                                                  DEFAULT_INITIAL_SORT_BUFFER_SIZE);
+      DEFAULT_INITIAL_SORT_BUFFER_SIZE);
+    this.checksum = sparkConf.getBoolean("spark.shuffle.checksum", true);
+
     open();
   }
 
@@ -289,7 +297,7 @@ private long[] mergeSpills(SpillInfo[] spills, File outputFile) throws IOExcepti
           // Compression is disabled or we are using an IO compression codec that supports
           // decompression of concatenated compressed streams, so we can perform a fast spill merge
           // that doesn't need to interpret the spilled bytes.
-          if (transferToEnabled) {
+          if (transferToEnabled && !checksum) {
             logger.debug("Using transferTo-based fast merge");
             partitionLengths = mergeSpillsWithTransferTo(spills, outputFile);
           } else {
@@ -346,8 +354,11 @@ private long[] mergeSpillsWithFileStream(
       }
       for (int partition = 0; partition < numPartitions; partition++) {
         final long initialFileLength = outputFile.length();
-        mergedFileOutputStream =
-          new TimeTrackingOutputStream(writeMetrics, new FileOutputStream(outputFile, true));
+        OutputStream fos = new FileOutputStream(outputFile, true);
+        if (checksum) {
+          fos = new ChecksumOutputStream(fos, new Adler32());
+        }
+        mergedFileOutputStream = new TimeTrackingOutputStream(writeMetrics, fos);
         if (compressionCodec != null) {
           mergedFileOutputStream = compressionCodec.compressedOutputStream(mergedFileOutputStream);
         }

diff --git a/core/src/main/java/org/apache/spark/storage/ChecksumOutputStream.java b/core/src/main/java/org/apache/spark/storage/ChecksumOutputStream.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.storage;
+
+import java.io.FilterOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.zip.Checksum;
+
+/**
+ * A output stream that generate checksum for written data and write the checksum as long at
+ * the end of stream.
+ */
+public class ChecksumOutputStream extends FilterOutputStream {
+  private Checksum cksum;
+  private boolean closed;
+
+  public ChecksumOutputStream(OutputStream out, Checksum cksum) {
+    super(out);
+    cksum.reset();
+    this.cksum = cksum;
+    this.closed = false;
+  }
+
+  public void write(int b) throws IOException {
+    out.write(b);
+    cksum.update(b);
+  }
+
+  public void write(byte[] b) throws IOException {
+    write(b, 0, b.length);
+  }
+
+  public void write(byte[] b, int off, int len) throws IOException {
+    out.write(b, off, len);
+    cksum.update(b, off, len);
+  }
+
+  public void close() throws IOException {
+    flush();
+    if (!closed) {
+      closed = true;
+      ByteBuffer buffer = ByteBuffer.allocate(8).order(ByteOrder.BIG_ENDIAN);
+      buffer.putLong(cksum.getValue());
+      out.write(buffer.array());
+      out.close();
+    }
+  }
+}