Skip to content

Commit 915cbc9

Browse files
Stephen O'Donnelljojochuang
authored andcommitted
HDFS-14706. Checksums are not checked if block meta file is less than 7 bytes. Contributed by Stephen O'Donnell.
Signed-off-by: Wei-Chiu Chuang <[email protected]>
1 parent d207aba commit 915cbc9

File tree

7 files changed

+275
-14
lines changed

7 files changed

+275
-14
lines changed

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -143,17 +143,27 @@ public static DataChecksum newDataChecksum(Type type, int bytesPerChecksum ) {
143143
* Creates a DataChecksum from HEADER_LEN bytes from arr[offset].
144144
* @return DataChecksum of the type in the array or null in case of an error.
145145
*/
146-
public static DataChecksum newDataChecksum( byte bytes[], int offset ) {
146+
public static DataChecksum newDataChecksum(byte[] bytes, int offset)
147+
throws IOException {
147148
if (offset < 0 || bytes.length < offset + getChecksumHeaderSize()) {
148-
return null;
149+
throw new InvalidChecksumSizeException("Could not create DataChecksum "
150+
+ " from the byte array of length " + bytes.length
151+
+ " and offset "+ offset);
149152
}
150153

151154
// like readInt():
152155
int bytesPerChecksum = ( (bytes[offset+1] & 0xff) << 24 ) |
153156
( (bytes[offset+2] & 0xff) << 16 ) |
154157
( (bytes[offset+3] & 0xff) << 8 ) |
155158
( (bytes[offset+4] & 0xff) );
156-
return newDataChecksum( Type.valueOf(bytes[offset]), bytesPerChecksum );
159+
DataChecksum csum = newDataChecksum(mapByteToChecksumType(bytes[offset]),
160+
bytesPerChecksum);
161+
if (csum == null) {
162+
throw new InvalidChecksumSizeException(("Could not create DataChecksum "
163+
+ " from the byte array of length " + bytes.length
164+
+ " and bytesPerCheckSum of "+ bytesPerChecksum));
165+
}
166+
return csum;
157167
}
158168

159169
/**
@@ -164,13 +174,23 @@ public static DataChecksum newDataChecksum( DataInputStream in )
164174
throws IOException {
165175
int type = in.readByte();
166176
int bpc = in.readInt();
167-
DataChecksum summer = newDataChecksum(Type.valueOf(type), bpc );
177+
DataChecksum summer = newDataChecksum(mapByteToChecksumType(type), bpc);
168178
if ( summer == null ) {
169179
throw new InvalidChecksumSizeException("Could not create DataChecksum "
170180
+ "of type " + type + " with bytesPerChecksum " + bpc);
171181
}
172182
return summer;
173183
}
184+
185+
private static Type mapByteToChecksumType(int type)
186+
throws InvalidChecksumSizeException{
187+
try {
188+
return Type.valueOf(type);
189+
} catch (IllegalArgumentException e) {
190+
throw new InvalidChecksumSizeException("The value "+type+" does not map"+
191+
" to a valid checksum Type");
192+
}
193+
}
174194

175195
/**
176196
* Writes the checksum header to the output stream <i>out</i>.

hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockMetadataHeader.java

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535

3636
import com.google.common.annotations.VisibleForTesting;
3737

38+
import org.apache.hadoop.util.InvalidChecksumSizeException;
3839
import org.slf4j.Logger;
3940
import org.slf4j.LoggerFactory;
4041

@@ -119,13 +120,19 @@ public static BlockMetadataHeader preadHeader(FileChannel fc)
119120
ByteBuffer buf = ByteBuffer.wrap(arr);
120121

121122
while (buf.hasRemaining()) {
122-
if (fc.read(buf, 0) <= 0) {
123-
throw new EOFException("unexpected EOF while reading " +
124-
"metadata file header");
123+
if (fc.read(buf, buf.position()) <= 0) {
124+
throw new CorruptMetaHeaderException("EOF while reading header from "+
125+
"the metadata file. The meta file may be truncated or corrupt");
125126
}
126127
}
127128
short version = (short)((arr[0] << 8) | (arr[1] & 0xff));
128-
DataChecksum dataChecksum = DataChecksum.newDataChecksum(arr, 2);
129+
DataChecksum dataChecksum;
130+
try {
131+
dataChecksum = DataChecksum.newDataChecksum(arr, 2);
132+
} catch (InvalidChecksumSizeException e) {
133+
throw new CorruptMetaHeaderException("The block meta file header is "+
134+
"corrupt", e);
135+
}
129136
return new BlockMetadataHeader(version, dataChecksum);
130137
}
131138

@@ -136,7 +143,14 @@ public static BlockMetadataHeader preadHeader(FileChannel fc)
136143
*/
137144
public static BlockMetadataHeader readHeader(DataInputStream in)
138145
throws IOException {
139-
return readHeader(in.readShort(), in);
146+
try {
147+
return readHeader(in.readShort(), in);
148+
} catch (EOFException eof) {
149+
// The attempt to read the header threw EOF, indicating there are not
150+
// enough bytes in the meta file for the header.
151+
throw new CorruptMetaHeaderException("EOF while reading header from meta"+
152+
". The meta file may be truncated or corrupt", eof);
153+
}
140154
}
141155

142156
/**
@@ -170,7 +184,13 @@ public static BlockMetadataHeader readHeader(RandomAccessFile raf)
170184
// Version is already read.
171185
private static BlockMetadataHeader readHeader(short version,
172186
DataInputStream in) throws IOException {
173-
DataChecksum checksum = DataChecksum.newDataChecksum(in);
187+
DataChecksum checksum = null;
188+
try {
189+
checksum = DataChecksum.newDataChecksum(in);
190+
} catch (InvalidChecksumSizeException e) {
191+
throw new CorruptMetaHeaderException("The block meta file header is "+
192+
"corrupt", e);
193+
}
174194
return new BlockMetadataHeader(version, checksum);
175195
}
176196

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.hdfs.server.datanode;
20+
21+
import java.io.IOException;
22+
23+
/**
24+
* Exception object that is thrown when the block metadata file is corrupt.
25+
*/
26+
public class CorruptMetaHeaderException extends IOException {
27+
28+
CorruptMetaHeaderException(String msg) {
29+
super(msg);
30+
}
31+
32+
CorruptMetaHeaderException(String msg, Throwable cause) {
33+
super(msg, cause);
34+
}
35+
36+
}

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -326,13 +326,22 @@ class BlockSender implements java.io.Closeable {
326326
// storage. The header is important for determining the checksum
327327
// type later when lazy persistence copies the block to non-transient
328328
// storage and computes the checksum.
329+
int expectedHeaderSize = BlockMetadataHeader.getHeaderSize();
329330
if (!replica.isOnTransientStorage() &&
330-
metaIn.getLength() >= BlockMetadataHeader.getHeaderSize()) {
331+
metaIn.getLength() >= expectedHeaderSize) {
331332
checksumIn = new DataInputStream(new BufferedInputStream(
332333
metaIn, IO_FILE_BUFFER_SIZE));
333-
334+
334335
csum = BlockMetadataHeader.readDataChecksum(checksumIn, block);
335336
keepMetaInOpen = true;
337+
} else if (!replica.isOnTransientStorage() &&
338+
metaIn.getLength() < expectedHeaderSize) {
339+
LOG.warn("The meta file length {} is less than the expected " +
340+
"header length {}, indicating the meta file is corrupt",
341+
metaIn.getLength(), expectedHeaderSize);
342+
throw new CorruptMetaHeaderException("The meta file length "+
343+
metaIn.getLength()+" is less than the expected length "+
344+
expectedHeaderSize);
336345
}
337346
} else {
338347
LOG.warn("Could not find metadata file for " + block);

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,6 @@
208208
import org.apache.hadoop.util.Daemon;
209209
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
210210
import org.apache.hadoop.util.GenericOptionsParser;
211-
import org.apache.hadoop.util.InvalidChecksumSizeException;
212211
import org.apache.hadoop.util.JvmPauseMonitor;
213212
import org.apache.hadoop.util.ServicePlugin;
214213
import org.apache.hadoop.util.StringUtils;
@@ -3474,7 +3473,7 @@ private void handleVolumeFailures(Set<FsVolumeSpi> unhealthyVolumes) {
34743473
void handleBadBlock(ExtendedBlock block, IOException e, boolean fromScanner) {
34753474

34763475
boolean isBadBlock = fromScanner || (e instanceof DiskFileCorruptException
3477-
|| e instanceof InvalidChecksumSizeException);
3476+
|| e instanceof CorruptMetaHeaderException);
34783477

34793478
if (!isBadBlock) {
34803479
return;

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -647,6 +647,12 @@ public void readBlock(final ExtendedBlock block,
647647
dnR, block, remoteAddress, ioe);
648648
incrDatanodeNetworkErrors();
649649
}
650+
// Normally the client reports a bad block to the NN. However if the
651+
// meta file is corrupt or an disk error occurs (EIO), then the client
652+
// never gets a chance to do validation, and hence will never report
653+
// the block as bad. For some classes of IO exception, the DN should
654+
// report the block as bad, via the handleBadBlock() method
655+
datanode.handleBadBlock(block, ioe, false);
650656
throw ioe;
651657
} finally {
652658
IOUtils.closeStream(blockSender);
@@ -1118,6 +1124,12 @@ public void copyBlock(final ExtendedBlock block,
11181124
isOpSuccess = false;
11191125
LOG.info("opCopyBlock {} received exception {}", block, ioe.toString());
11201126
incrDatanodeNetworkErrors();
1127+
// Normally the client reports a bad block to the NN. However if the
1128+
// meta file is corrupt or an disk error occurs (EIO), then the client
1129+
// never gets a chance to do validation, and hence will never report
1130+
// the block as bad. For some classes of IO exception, the DN should
1131+
// report the block as bad, via the handleBadBlock() method
1132+
datanode.handleBadBlock(block, ioe, false);
11211133
throw ioe;
11221134
} finally {
11231135
dataXceiverServer.balanceThrottler.release();
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hdfs.server.datanode;
19+
20+
import com.google.common.base.Supplier;
21+
import org.apache.hadoop.conf.Configuration;
22+
import org.apache.hadoop.fs.FSDataInputStream;
23+
import org.apache.hadoop.fs.FSDataOutputStream;
24+
import org.apache.hadoop.fs.FileSystem;
25+
import org.apache.hadoop.fs.Path;
26+
import org.apache.hadoop.hdfs.*;
27+
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
28+
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
29+
import org.apache.hadoop.test.GenericTestUtils;
30+
import org.apache.hadoop.test.LambdaTestUtils;
31+
import org.junit.After;
32+
import org.junit.Before;
33+
import org.junit.Test;
34+
35+
import java.io.File;
36+
import java.io.RandomAccessFile;
37+
38+
import static org.junit.Assert.assertEquals;
39+
40+
/**
41+
* Tests to ensure that a block is not read successfully from a datanode
42+
* when it has a corrupt metadata file.
43+
*/
44+
public class TestCorruptMetadataFile {
45+
46+
private MiniDFSCluster cluster;
47+
private MiniDFSCluster.Builder clusterBuilder;
48+
private Configuration conf;
49+
50+
@Before
51+
public void setUp() throws Exception {
52+
conf = new HdfsConfiguration();
53+
// Reduce block acquire retries as we only have 1 DN and it allows the
54+
// test to run faster
55+
conf.setInt(
56+
HdfsClientConfigKeys.DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_KEY, 1);
57+
clusterBuilder = new MiniDFSCluster.Builder(conf).numDataNodes(1);
58+
}
59+
60+
@After
61+
public void tearDown() throws Exception {
62+
if (cluster != null) {
63+
cluster.shutdown();
64+
cluster = null;
65+
}
66+
}
67+
68+
@Test(timeout=60000)
69+
public void testReadBlockFailsWhenMetaIsCorrupt() throws Exception {
70+
cluster = clusterBuilder.build();
71+
cluster.waitActive();
72+
FileSystem fs = cluster.getFileSystem();
73+
DataNode dn0 = cluster.getDataNodes().get(0);
74+
Path filePath = new Path("test.dat");
75+
FSDataOutputStream out = fs.create(filePath, (short) 1);
76+
out.write(1);
77+
out.hflush();
78+
out.close();
79+
80+
ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, filePath);
81+
File metadataFile = cluster.getBlockMetadataFile(0, block);
82+
83+
// First ensure we can read the file OK
84+
FSDataInputStream in = fs.open(filePath);
85+
in.readByte();
86+
in.close();
87+
88+
// Now truncate the meta file, and ensure the data is not read OK
89+
RandomAccessFile raFile = new RandomAccessFile(metadataFile, "rw");
90+
raFile.setLength(0);
91+
92+
FSDataInputStream intrunc = fs.open(filePath);
93+
LambdaTestUtils.intercept(BlockMissingException.class,
94+
() -> intrunc.readByte());
95+
intrunc.close();
96+
97+
// Write 11 bytes to the file, but an invalid header
98+
raFile.write("12345678901".getBytes());
99+
assertEquals(11, raFile.length());
100+
101+
FSDataInputStream ininvalid = fs.open(filePath);
102+
LambdaTestUtils.intercept(BlockMissingException.class,
103+
() -> ininvalid.readByte());
104+
ininvalid.close();
105+
106+
GenericTestUtils.waitFor(new Supplier<Boolean>() {
107+
@Override
108+
public Boolean get() {
109+
return cluster.getNameNode().getNamesystem()
110+
.getBlockManager().getCorruptBlocks() == 1;
111+
}
112+
}, 100, 5000);
113+
114+
raFile.close();
115+
}
116+
117+
/**
118+
* This test create a sample block meta file and then attempts to load it
119+
* using BlockMetadataHeader to ensure it can load a valid file and that it
120+
* throws a CorruptMetaHeaderException when the header is invalid.
121+
* @throws Exception
122+
*/
123+
@Test
124+
public void testBlockMetaDataHeaderPReadHandlesCorruptMetaFile()
125+
throws Exception {
126+
File testDir = GenericTestUtils.getTestDir();
127+
RandomAccessFile raFile = new RandomAccessFile(
128+
new File(testDir, "metafile"), "rw");
129+
130+
// Write a valid header into the file
131+
// Version
132+
raFile.writeShort((short)1);
133+
// Checksum type
134+
raFile.writeByte(1);
135+
// Bytes per checksum
136+
raFile.writeInt(512);
137+
// We should be able to get the header with no exceptions
138+
BlockMetadataHeader header =
139+
BlockMetadataHeader.preadHeader(raFile.getChannel());
140+
141+
// Now truncate the meta file to zero and ensure an exception is raised
142+
raFile.setLength(0);
143+
LambdaTestUtils.intercept(CorruptMetaHeaderException.class,
144+
() -> BlockMetadataHeader.preadHeader(raFile.getChannel()));
145+
146+
// Now write a partial valid header to sure an exception is thrown
147+
// if the header cannot be fully read
148+
// Version
149+
raFile.writeShort((short)1);
150+
// Checksum type
151+
raFile.writeByte(1);
152+
153+
LambdaTestUtils.intercept(CorruptMetaHeaderException.class,
154+
() -> BlockMetadataHeader.preadHeader(raFile.getChannel()));
155+
156+
// Finally write the expected 7 bytes, but invalid data
157+
raFile.setLength(0);
158+
raFile.write("1234567".getBytes());
159+
160+
LambdaTestUtils.intercept(CorruptMetaHeaderException.class,
161+
() -> BlockMetadataHeader.preadHeader(raFile.getChannel()));
162+
163+
raFile.close();
164+
}
165+
}

0 commit comments

Comments
 (0)