Skip to content

Commit 82c7dbd

Browse files
authored
HBASE-27649 WALPlayer does not properly dedupe overridden cell versions (#5047)
Signed-off-by: Duo Zhang <[email protected]>
1 parent bc31e68 commit 82c7dbd

File tree

10 files changed

+426
-58
lines changed

10 files changed

+426
-58
lines changed

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,11 @@ public ArrayList<BackupImage> getAncestors(BackupInfo backupInfo) throws IOExcep
352352
public ArrayList<BackupImage> getAncestors(BackupInfo backupInfo, TableName table)
353353
throws IOException {
354354
ArrayList<BackupImage> ancestors = getAncestors(backupInfo);
355+
return filterAncestorsForTable(ancestors, table);
356+
}
357+
358+
public static ArrayList<BackupImage> filterAncestorsForTable(ArrayList<BackupImage> ancestors,
359+
TableName table) {
355360
ArrayList<BackupImage> tableAncestors = new ArrayList<>();
356361
for (BackupImage image : ancestors) {
357362
if (image.hasTable(table)) {

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java

Lines changed: 58 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,13 @@
1818
package org.apache.hadoop.hbase.backup.impl;
1919

2020
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.JOB_NAME_CONF_KEY;
21+
import static org.apache.hadoop.hbase.backup.impl.BackupManifest.BackupImage;
2122

2223
import java.io.IOException;
2324
import java.net.URI;
2425
import java.net.URISyntaxException;
2526
import java.util.ArrayList;
27+
import java.util.HashMap;
2628
import java.util.List;
2729
import java.util.Map;
2830
import java.util.Set;
@@ -40,17 +42,26 @@
4042
import org.apache.hadoop.hbase.backup.util.BackupUtils;
4143
import org.apache.hadoop.hbase.client.Admin;
4244
import org.apache.hadoop.hbase.client.Connection;
45+
import org.apache.hadoop.hbase.client.RegionInfo;
46+
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
47+
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
4348
import org.apache.hadoop.hbase.mapreduce.WALPlayer;
49+
import org.apache.hadoop.hbase.mob.MobConstants;
50+
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
51+
import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
4452
import org.apache.hadoop.hbase.util.Bytes;
4553
import org.apache.hadoop.hbase.util.CommonFSUtils;
4654
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
4755
import org.apache.hadoop.hbase.util.Pair;
4856
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
49-
import org.apache.hadoop.util.Tool;
5057
import org.apache.yetus.audience.InterfaceAudience;
5158
import org.slf4j.Logger;
5259
import org.slf4j.LoggerFactory;
5360

61+
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
62+
import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
63+
import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
64+
5465
/**
5566
* Incremental backup implementation. See the {@link #execute() execute} method.
5667
*/
@@ -276,10 +287,48 @@ public void execute() throws IOException {
276287

277288
// case INCREMENTAL_COPY:
278289
try {
290+
// todo: need to add an abstraction to encapsulate and DRY this up
291+
ArrayList<BackupImage> ancestors = backupManager.getAncestors(backupInfo);
292+
Map<TableName, List<RegionInfo>> regionsByTable = new HashMap<>();
293+
List<ImmutableBytesWritable> splits = new ArrayList<>();
294+
for (TableName table : backupInfo.getTables()) {
295+
ArrayList<BackupImage> ancestorsForTable =
296+
BackupManager.filterAncestorsForTable(ancestors, table);
297+
298+
BackupImage backupImage = ancestorsForTable.get(ancestorsForTable.size() - 1);
299+
if (backupImage.getType() != BackupType.FULL) {
300+
throw new RuntimeException("No full backup found in ancestors for table " + table);
301+
}
302+
303+
String lastFullBackupId = backupImage.getBackupId();
304+
Path backupRootDir = new Path(backupInfo.getBackupRootDir());
305+
306+
FileSystem backupFs = backupRootDir.getFileSystem(conf);
307+
Path tableInfoPath =
308+
BackupUtils.getTableInfoPath(backupFs, backupRootDir, lastFullBackupId, table);
309+
SnapshotProtos.SnapshotDescription snapshotDesc =
310+
SnapshotDescriptionUtils.readSnapshotInfo(backupFs, tableInfoPath);
311+
SnapshotManifest manifest =
312+
SnapshotManifest.open(conf, backupFs, tableInfoPath, snapshotDesc);
313+
List<RegionInfo> regionInfos = new ArrayList<>(manifest.getRegionManifests().size());
314+
for (SnapshotProtos.SnapshotRegionManifest regionManifest : manifest.getRegionManifests()) {
315+
HBaseProtos.RegionInfo regionInfo = regionManifest.getRegionInfo();
316+
RegionInfo regionInfoObj = ProtobufUtil.toRegionInfo(regionInfo);
317+
// scanning meta doesnt return mob regions, so skip them here too so we keep parity
318+
if (Bytes.equals(regionInfoObj.getStartKey(), MobConstants.MOB_REGION_NAME_BYTES)) {
319+
continue;
320+
}
321+
322+
regionInfos.add(regionInfoObj);
323+
splits.add(new ImmutableBytesWritable(HFileOutputFormat2
324+
.combineTableNameSuffix(table.getName(), regionInfoObj.getStartKey())));
325+
}
326+
regionsByTable.put(table, regionInfos);
327+
}
279328
// copy out the table and region info files for each table
280-
BackupUtils.copyTableRegionInfo(conn, backupInfo, conf);
329+
BackupUtils.copyTableRegionInfo(conn, backupInfo, regionsByTable, conf);
281330
// convert WAL to HFiles and copy them to .tmp under BACKUP_ROOT
282-
convertWALsToHFiles();
331+
convertWALsToHFiles(splits);
283332
incrementalCopyHFiles(new String[] { getBulkOutputDir().toString() },
284333
backupInfo.getBackupRootDir());
285334
} catch (Exception e) {
@@ -359,7 +408,7 @@ protected void deleteBulkLoadDirectory() throws IOException {
359408
}
360409
}
361410

362-
protected void convertWALsToHFiles() throws IOException {
411+
protected void convertWALsToHFiles(List<ImmutableBytesWritable> splits) throws IOException {
363412
// get incremental backup file list and prepare parameters for DistCp
364413
List<String> incrBackupFileList = backupInfo.getIncrBackupFileList();
365414
// Get list of tables in incremental backup set
@@ -375,7 +424,7 @@ protected void convertWALsToHFiles() throws IOException {
375424
LOG.warn("Table " + table + " does not exists. Skipping in WAL converter");
376425
}
377426
}
378-
walToHFiles(incrBackupFileList, tableList);
427+
walToHFiles(incrBackupFileList, tableList, splits);
379428

380429
}
381430

@@ -385,8 +434,9 @@ protected boolean tableExists(TableName table, Connection conn) throws IOExcepti
385434
}
386435
}
387436

388-
protected void walToHFiles(List<String> dirPaths, List<String> tableList) throws IOException {
389-
Tool player = new WALPlayer();
437+
protected void walToHFiles(List<String> dirPaths, List<String> tableList,
438+
List<ImmutableBytesWritable> splits) throws IOException {
439+
WALPlayer player = new WALPlayer();
390440

391441
// Player reads all files in arbitrary directory structure and creates
392442
// a Map task for each file. We use ';' as separator
@@ -401,6 +451,7 @@ protected void walToHFiles(List<String> dirPaths, List<String> tableList) throws
401451
conf.set(JOB_NAME_CONF_KEY, jobname);
402452
String[] playerArgs = { dirs, StringUtils.join(tableList, ",") };
403453

454+
player.setSplits(splits);
404455
try {
405456
player.setConf(conf);
406457
int result = player.run(playerArgs);

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java

Lines changed: 48 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@
4040
import org.apache.hadoop.fs.RemoteIterator;
4141
import org.apache.hadoop.fs.permission.FsPermission;
4242
import org.apache.hadoop.hbase.HConstants;
43-
import org.apache.hadoop.hbase.MetaTableAccessor;
4443
import org.apache.hadoop.hbase.ServerName;
4544
import org.apache.hadoop.hbase.TableName;
4645
import org.apache.hadoop.hbase.backup.BackupInfo;
@@ -54,6 +53,7 @@
5453
import org.apache.hadoop.hbase.client.RegionInfo;
5554
import org.apache.hadoop.hbase.client.TableDescriptor;
5655
import org.apache.hadoop.hbase.master.region.MasterRegionFactory;
56+
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
5757
import org.apache.hadoop.hbase.tool.BulkLoadHFiles;
5858
import org.apache.hadoop.hbase.util.CommonFSUtils;
5959
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@@ -122,7 +122,8 @@ private BackupUtils() {
122122
* @param conf configuration
123123
* @throws IOException exception
124124
*/
125-
public static void copyTableRegionInfo(Connection conn, BackupInfo backupInfo, Configuration conf)
125+
public static void copyTableRegionInfo(Connection conn, BackupInfo backupInfo,
126+
Map<TableName, List<RegionInfo>> lastFullBackupForTable, Configuration conf)
126127
throws IOException {
127128
Path rootDir = CommonFSUtils.getRootDir(conf);
128129
FileSystem fs = rootDir.getFileSystem(conf);
@@ -147,20 +148,56 @@ public static void copyTableRegionInfo(Connection conn, BackupInfo backupInfo, C
147148
LOG.debug("Attempting to copy table info for:" + table + " target: " + target
148149
+ " descriptor: " + orig);
149150
LOG.debug("Finished copying tableinfo.");
150-
List<RegionInfo> regions = MetaTableAccessor.getTableRegions(conn, table);
151-
// For each region, write the region info to disk
152-
LOG.debug("Starting to write region info for table " + table);
153-
for (RegionInfo regionInfo : regions) {
154-
Path regionDir = FSUtils
155-
.getRegionDirFromTableDir(new Path(backupInfo.getTableBackupDir(table)), regionInfo);
156-
regionDir = new Path(backupInfo.getTableBackupDir(table), regionDir.getName());
157-
writeRegioninfoOnFilesystem(conf, targetFs, regionDir, regionInfo);
158-
}
151+
copyTableRegionInfosFromParent(table, targetFs, backupInfo,
152+
lastFullBackupForTable.get(table), conf);
159153
LOG.debug("Finished writing region info for table " + table);
160154
}
161155
}
162156
}
163157

158+
private static void copyTableRegionInfosFromParent(TableName table, FileSystem targetFs,
159+
BackupInfo backupInfo, List<RegionInfo> lastFullBackupForTable, Configuration conf)
160+
throws IOException {
161+
for (RegionInfo regionInfo : lastFullBackupForTable) {
162+
Path regionDir =
163+
FSUtils.getRegionDirFromTableDir(new Path(backupInfo.getTableBackupDir(table)), regionInfo);
164+
regionDir = new Path(backupInfo.getTableBackupDir(table), regionDir.getName());
165+
writeRegioninfoOnFilesystem(conf, targetFs, regionDir, regionInfo);
166+
}
167+
}
168+
169+
/**
170+
* Returns value represent path for:
171+
* ""/$USER/SBACKUP_ROOT/backup_id/namespace/table/.hbase-snapshot/
172+
* snapshot_1396650097621_namespace_table" this path contains .snapshotinfo, .tabledesc (0.96 and
173+
* 0.98) this path contains .snapshotinfo, .data.manifest (trunk)
174+
* @param tableName table name
175+
* @return path to table info
176+
* @throws IOException exception
177+
*/
178+
public static Path getTableInfoPath(FileSystem fs, Path backupRootPath, String backupId,
179+
TableName tableName) throws IOException {
180+
Path tableSnapShotPath = getTableSnapshotPath(backupRootPath, tableName, backupId);
181+
Path tableInfoPath = null;
182+
183+
// can't build the path directly as the timestamp values are different
184+
FileStatus[] snapshots = fs.listStatus(tableSnapShotPath,
185+
new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs));
186+
for (FileStatus snapshot : snapshots) {
187+
tableInfoPath = snapshot.getPath();
188+
// SnapshotManifest.DATA_MANIFEST_NAME = "data.manifest";
189+
if (tableInfoPath.getName().endsWith("data.manifest")) {
190+
break;
191+
}
192+
}
193+
return tableInfoPath;
194+
}
195+
196+
static Path getTableSnapshotPath(Path backupRootPath, TableName tableName, String backupId) {
197+
return new Path(HBackupFileSystem.getTableBackupPath(tableName, backupRootPath, backupId),
198+
HConstants.SNAPSHOT_DIR_NAME);
199+
}
200+
164201
/**
165202
* Write the .regioninfo file on-disk.
166203
*/

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/RestoreTool.java

Lines changed: 9 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -145,13 +145,13 @@ void modifyTableSync(Connection conn, TableDescriptor desc) throws IOException {
145145
* the future
146146
* @param conn HBase connection
147147
* @param tableBackupPath backup path
148-
* @param logDirs : incremental backup folders, which contains WAL
149-
* @param tableNames : source tableNames(table names were backuped)
150-
* @param newTableNames : target tableNames(table names to be restored to)
148+
* @param hfileDirs incremental backup folders, which contains hfiles to bulkload
149+
* @param tableNames source tableNames(table names were backuped)
150+
* @param newTableNames target tableNames(table names to be restored to)
151151
* @param incrBackupId incremental backup Id
152152
* @throws IOException exception
153153
*/
154-
public void incrementalRestoreTable(Connection conn, Path tableBackupPath, Path[] logDirs,
154+
public void incrementalRestoreTable(Connection conn, Path tableBackupPath, Path[] hfileDirs,
155155
TableName[] tableNames, TableName[] newTableNames, String incrBackupId) throws IOException {
156156
try (Admin admin = conn.getAdmin()) {
157157
if (tableNames.length != newTableNames.length) {
@@ -202,7 +202,7 @@ public void incrementalRestoreTable(Connection conn, Path tableBackupPath, Path[
202202
}
203203
RestoreJob restoreService = BackupRestoreFactory.getRestoreJob(conf);
204204

205-
restoreService.run(logDirs, tableNames, restoreRootDir, newTableNames, false);
205+
restoreService.run(hfileDirs, tableNames, restoreRootDir, newTableNames, false);
206206
}
207207
}
208208

@@ -225,39 +225,14 @@ Path getTableSnapshotPath(Path backupRootPath, TableName tableName, String backu
225225
HConstants.SNAPSHOT_DIR_NAME);
226226
}
227227

228-
/**
229-
* Returns value represent path for:
230-
* ""/$USER/SBACKUP_ROOT/backup_id/namespace/table/.hbase-snapshot/
231-
* snapshot_1396650097621_namespace_table" this path contains .snapshotinfo, .tabledesc (0.96 and
232-
* 0.98) this path contains .snapshotinfo, .data.manifest (trunk)
233-
* @param tableName table name
234-
* @return path to table info
235-
* @throws IOException exception
236-
*/
237-
Path getTableInfoPath(TableName tableName) throws IOException {
238-
Path tableSnapShotPath = getTableSnapshotPath(backupRootPath, tableName, backupId);
239-
Path tableInfoPath = null;
240-
241-
// can't build the path directly as the timestamp values are different
242-
FileStatus[] snapshots = fs.listStatus(tableSnapShotPath,
243-
new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs));
244-
for (FileStatus snapshot : snapshots) {
245-
tableInfoPath = snapshot.getPath();
246-
// SnapshotManifest.DATA_MANIFEST_NAME = "data.manifest";
247-
if (tableInfoPath.getName().endsWith("data.manifest")) {
248-
break;
249-
}
250-
}
251-
return tableInfoPath;
252-
}
253-
254228
/**
255229
* Get table descriptor
256230
* @param tableName is the table backed up
257231
* @return {@link TableDescriptor} saved in backup image of the table
258232
*/
259233
TableDescriptor getTableDesc(TableName tableName) throws IOException {
260-
Path tableInfoPath = this.getTableInfoPath(tableName);
234+
Path tableInfoPath = BackupUtils.getTableInfoPath(fs, backupRootPath, backupId, tableName);
235+
;
261236
SnapshotDescription desc = SnapshotDescriptionUtils.readSnapshotInfo(fs, tableInfoPath);
262237
SnapshotManifest manifest = SnapshotManifest.open(conf, fs, tableInfoPath, desc);
263238
TableDescriptor tableDescriptor = manifest.getTableDescriptor();
@@ -307,7 +282,8 @@ private void createAndRestoreTable(Connection conn, TableName tableName, TableNa
307282
tableDescriptor = manifest.getTableDescriptor();
308283
} else {
309284
tableDescriptor = getTableDesc(tableName);
310-
snapshotMap.put(tableName, getTableInfoPath(tableName));
285+
snapshotMap.put(tableName,
286+
BackupUtils.getTableInfoPath(fs, backupRootPath, backupId, tableName));
311287
}
312288
if (tableDescriptor == null) {
313289
LOG.debug("Found no table descriptor in the snapshot dir, previous schema would be lost");

0 commit comments

Comments
 (0)