Skip to content

Commit 6b672cc

Browse files
authored
Revert "HBASE-27649 WALPlayer does not properly dedupe overridden cell versions (#5047)" (#5057)
This reverts commit 82c7dbd.
1 parent 82c7dbd commit 6b672cc

File tree

10 files changed

+58
-426
lines changed

10 files changed

+58
-426
lines changed

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/BackupManager.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -352,11 +352,6 @@ public ArrayList<BackupImage> getAncestors(BackupInfo backupInfo) throws IOExcep
352352
public ArrayList<BackupImage> getAncestors(BackupInfo backupInfo, TableName table)
353353
throws IOException {
354354
ArrayList<BackupImage> ancestors = getAncestors(backupInfo);
355-
return filterAncestorsForTable(ancestors, table);
356-
}
357-
358-
public static ArrayList<BackupImage> filterAncestorsForTable(ArrayList<BackupImage> ancestors,
359-
TableName table) {
360355
ArrayList<BackupImage> tableAncestors = new ArrayList<>();
361356
for (BackupImage image : ancestors) {
362357
if (image.hasTable(table)) {

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/impl/IncrementalTableBackupClient.java

Lines changed: 7 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,11 @@
1818
package org.apache.hadoop.hbase.backup.impl;
1919

2020
import static org.apache.hadoop.hbase.backup.BackupRestoreConstants.JOB_NAME_CONF_KEY;
21-
import static org.apache.hadoop.hbase.backup.impl.BackupManifest.BackupImage;
2221

2322
import java.io.IOException;
2423
import java.net.URI;
2524
import java.net.URISyntaxException;
2625
import java.util.ArrayList;
27-
import java.util.HashMap;
2826
import java.util.List;
2927
import java.util.Map;
3028
import java.util.Set;
@@ -42,26 +40,17 @@
4240
import org.apache.hadoop.hbase.backup.util.BackupUtils;
4341
import org.apache.hadoop.hbase.client.Admin;
4442
import org.apache.hadoop.hbase.client.Connection;
45-
import org.apache.hadoop.hbase.client.RegionInfo;
46-
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
47-
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
4843
import org.apache.hadoop.hbase.mapreduce.WALPlayer;
49-
import org.apache.hadoop.hbase.mob.MobConstants;
50-
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
51-
import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
5244
import org.apache.hadoop.hbase.util.Bytes;
5345
import org.apache.hadoop.hbase.util.CommonFSUtils;
5446
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
5547
import org.apache.hadoop.hbase.util.Pair;
5648
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
49+
import org.apache.hadoop.util.Tool;
5750
import org.apache.yetus.audience.InterfaceAudience;
5851
import org.slf4j.Logger;
5952
import org.slf4j.LoggerFactory;
6053

61-
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
62-
import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
63-
import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
64-
6554
/**
6655
* Incremental backup implementation. See the {@link #execute() execute} method.
6756
*/
@@ -287,48 +276,10 @@ public void execute() throws IOException {
287276

288277
// case INCREMENTAL_COPY:
289278
try {
290-
// todo: need to add an abstraction to encapsulate and DRY this up
291-
ArrayList<BackupImage> ancestors = backupManager.getAncestors(backupInfo);
292-
Map<TableName, List<RegionInfo>> regionsByTable = new HashMap<>();
293-
List<ImmutableBytesWritable> splits = new ArrayList<>();
294-
for (TableName table : backupInfo.getTables()) {
295-
ArrayList<BackupImage> ancestorsForTable =
296-
BackupManager.filterAncestorsForTable(ancestors, table);
297-
298-
BackupImage backupImage = ancestorsForTable.get(ancestorsForTable.size() - 1);
299-
if (backupImage.getType() != BackupType.FULL) {
300-
throw new RuntimeException("No full backup found in ancestors for table " + table);
301-
}
302-
303-
String lastFullBackupId = backupImage.getBackupId();
304-
Path backupRootDir = new Path(backupInfo.getBackupRootDir());
305-
306-
FileSystem backupFs = backupRootDir.getFileSystem(conf);
307-
Path tableInfoPath =
308-
BackupUtils.getTableInfoPath(backupFs, backupRootDir, lastFullBackupId, table);
309-
SnapshotProtos.SnapshotDescription snapshotDesc =
310-
SnapshotDescriptionUtils.readSnapshotInfo(backupFs, tableInfoPath);
311-
SnapshotManifest manifest =
312-
SnapshotManifest.open(conf, backupFs, tableInfoPath, snapshotDesc);
313-
List<RegionInfo> regionInfos = new ArrayList<>(manifest.getRegionManifests().size());
314-
for (SnapshotProtos.SnapshotRegionManifest regionManifest : manifest.getRegionManifests()) {
315-
HBaseProtos.RegionInfo regionInfo = regionManifest.getRegionInfo();
316-
RegionInfo regionInfoObj = ProtobufUtil.toRegionInfo(regionInfo);
317-
// scanning meta doesnt return mob regions, so skip them here too so we keep parity
318-
if (Bytes.equals(regionInfoObj.getStartKey(), MobConstants.MOB_REGION_NAME_BYTES)) {
319-
continue;
320-
}
321-
322-
regionInfos.add(regionInfoObj);
323-
splits.add(new ImmutableBytesWritable(HFileOutputFormat2
324-
.combineTableNameSuffix(table.getName(), regionInfoObj.getStartKey())));
325-
}
326-
regionsByTable.put(table, regionInfos);
327-
}
328279
// copy out the table and region info files for each table
329-
BackupUtils.copyTableRegionInfo(conn, backupInfo, regionsByTable, conf);
280+
BackupUtils.copyTableRegionInfo(conn, backupInfo, conf);
330281
// convert WAL to HFiles and copy them to .tmp under BACKUP_ROOT
331-
convertWALsToHFiles(splits);
282+
convertWALsToHFiles();
332283
incrementalCopyHFiles(new String[] { getBulkOutputDir().toString() },
333284
backupInfo.getBackupRootDir());
334285
} catch (Exception e) {
@@ -408,7 +359,7 @@ protected void deleteBulkLoadDirectory() throws IOException {
408359
}
409360
}
410361

411-
protected void convertWALsToHFiles(List<ImmutableBytesWritable> splits) throws IOException {
362+
protected void convertWALsToHFiles() throws IOException {
412363
// get incremental backup file list and prepare parameters for DistCp
413364
List<String> incrBackupFileList = backupInfo.getIncrBackupFileList();
414365
// Get list of tables in incremental backup set
@@ -424,7 +375,7 @@ protected void convertWALsToHFiles(List<ImmutableBytesWritable> splits) throws I
424375
LOG.warn("Table " + table + " does not exists. Skipping in WAL converter");
425376
}
426377
}
427-
walToHFiles(incrBackupFileList, tableList, splits);
378+
walToHFiles(incrBackupFileList, tableList);
428379

429380
}
430381

@@ -434,9 +385,8 @@ protected boolean tableExists(TableName table, Connection conn) throws IOExcepti
434385
}
435386
}
436387

437-
protected void walToHFiles(List<String> dirPaths, List<String> tableList,
438-
List<ImmutableBytesWritable> splits) throws IOException {
439-
WALPlayer player = new WALPlayer();
388+
protected void walToHFiles(List<String> dirPaths, List<String> tableList) throws IOException {
389+
Tool player = new WALPlayer();
440390

441391
// Player reads all files in arbitrary directory structure and creates
442392
// a Map task for each file. We use ';' as separator
@@ -451,7 +401,6 @@ protected void walToHFiles(List<String> dirPaths, List<String> tableList,
451401
conf.set(JOB_NAME_CONF_KEY, jobname);
452402
String[] playerArgs = { dirs, StringUtils.join(tableList, ",") };
453403

454-
player.setSplits(splits);
455404
try {
456405
player.setConf(conf);
457406
int result = player.run(playerArgs);

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/BackupUtils.java

Lines changed: 11 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import org.apache.hadoop.fs.RemoteIterator;
4141
import org.apache.hadoop.fs.permission.FsPermission;
4242
import org.apache.hadoop.hbase.HConstants;
43+
import org.apache.hadoop.hbase.MetaTableAccessor;
4344
import org.apache.hadoop.hbase.ServerName;
4445
import org.apache.hadoop.hbase.TableName;
4546
import org.apache.hadoop.hbase.backup.BackupInfo;
@@ -53,7 +54,6 @@
5354
import org.apache.hadoop.hbase.client.RegionInfo;
5455
import org.apache.hadoop.hbase.client.TableDescriptor;
5556
import org.apache.hadoop.hbase.master.region.MasterRegionFactory;
56-
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
5757
import org.apache.hadoop.hbase.tool.BulkLoadHFiles;
5858
import org.apache.hadoop.hbase.util.CommonFSUtils;
5959
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@@ -122,8 +122,7 @@ private BackupUtils() {
122122
* @param conf configuration
123123
* @throws IOException exception
124124
*/
125-
public static void copyTableRegionInfo(Connection conn, BackupInfo backupInfo,
126-
Map<TableName, List<RegionInfo>> lastFullBackupForTable, Configuration conf)
125+
public static void copyTableRegionInfo(Connection conn, BackupInfo backupInfo, Configuration conf)
127126
throws IOException {
128127
Path rootDir = CommonFSUtils.getRootDir(conf);
129128
FileSystem fs = rootDir.getFileSystem(conf);
@@ -148,56 +147,20 @@ public static void copyTableRegionInfo(Connection conn, BackupInfo backupInfo,
148147
LOG.debug("Attempting to copy table info for:" + table + " target: " + target
149148
+ " descriptor: " + orig);
150149
LOG.debug("Finished copying tableinfo.");
151-
copyTableRegionInfosFromParent(table, targetFs, backupInfo,
152-
lastFullBackupForTable.get(table), conf);
150+
List<RegionInfo> regions = MetaTableAccessor.getTableRegions(conn, table);
151+
// For each region, write the region info to disk
152+
LOG.debug("Starting to write region info for table " + table);
153+
for (RegionInfo regionInfo : regions) {
154+
Path regionDir = FSUtils
155+
.getRegionDirFromTableDir(new Path(backupInfo.getTableBackupDir(table)), regionInfo);
156+
regionDir = new Path(backupInfo.getTableBackupDir(table), regionDir.getName());
157+
writeRegioninfoOnFilesystem(conf, targetFs, regionDir, regionInfo);
158+
}
153159
LOG.debug("Finished writing region info for table " + table);
154160
}
155161
}
156162
}
157163

158-
private static void copyTableRegionInfosFromParent(TableName table, FileSystem targetFs,
159-
BackupInfo backupInfo, List<RegionInfo> lastFullBackupForTable, Configuration conf)
160-
throws IOException {
161-
for (RegionInfo regionInfo : lastFullBackupForTable) {
162-
Path regionDir =
163-
FSUtils.getRegionDirFromTableDir(new Path(backupInfo.getTableBackupDir(table)), regionInfo);
164-
regionDir = new Path(backupInfo.getTableBackupDir(table), regionDir.getName());
165-
writeRegioninfoOnFilesystem(conf, targetFs, regionDir, regionInfo);
166-
}
167-
}
168-
169-
/**
170-
* Returns value represent path for:
171-
* ""/$USER/SBACKUP_ROOT/backup_id/namespace/table/.hbase-snapshot/
172-
* snapshot_1396650097621_namespace_table" this path contains .snapshotinfo, .tabledesc (0.96 and
173-
* 0.98) this path contains .snapshotinfo, .data.manifest (trunk)
174-
* @param tableName table name
175-
* @return path to table info
176-
* @throws IOException exception
177-
*/
178-
public static Path getTableInfoPath(FileSystem fs, Path backupRootPath, String backupId,
179-
TableName tableName) throws IOException {
180-
Path tableSnapShotPath = getTableSnapshotPath(backupRootPath, tableName, backupId);
181-
Path tableInfoPath = null;
182-
183-
// can't build the path directly as the timestamp values are different
184-
FileStatus[] snapshots = fs.listStatus(tableSnapShotPath,
185-
new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs));
186-
for (FileStatus snapshot : snapshots) {
187-
tableInfoPath = snapshot.getPath();
188-
// SnapshotManifest.DATA_MANIFEST_NAME = "data.manifest";
189-
if (tableInfoPath.getName().endsWith("data.manifest")) {
190-
break;
191-
}
192-
}
193-
return tableInfoPath;
194-
}
195-
196-
static Path getTableSnapshotPath(Path backupRootPath, TableName tableName, String backupId) {
197-
return new Path(HBackupFileSystem.getTableBackupPath(tableName, backupRootPath, backupId),
198-
HConstants.SNAPSHOT_DIR_NAME);
199-
}
200-
201164
/**
202165
* Write the .regioninfo file on-disk.
203166
*/

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/util/RestoreTool.java

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -145,13 +145,13 @@ void modifyTableSync(Connection conn, TableDescriptor desc) throws IOException {
145145
* the future
146146
* @param conn HBase connection
147147
* @param tableBackupPath backup path
148-
* @param hfileDirs incremental backup folders, which contains hfiles to bulkload
149-
* @param tableNames source tableNames(table names were backuped)
150-
* @param newTableNames target tableNames(table names to be restored to)
148+
* @param logDirs : incremental backup folders, which contains WAL
149+
* @param tableNames : source tableNames(table names were backuped)
150+
* @param newTableNames : target tableNames(table names to be restored to)
151151
* @param incrBackupId incremental backup Id
152152
* @throws IOException exception
153153
*/
154-
public void incrementalRestoreTable(Connection conn, Path tableBackupPath, Path[] hfileDirs,
154+
public void incrementalRestoreTable(Connection conn, Path tableBackupPath, Path[] logDirs,
155155
TableName[] tableNames, TableName[] newTableNames, String incrBackupId) throws IOException {
156156
try (Admin admin = conn.getAdmin()) {
157157
if (tableNames.length != newTableNames.length) {
@@ -202,7 +202,7 @@ public void incrementalRestoreTable(Connection conn, Path tableBackupPath, Path[
202202
}
203203
RestoreJob restoreService = BackupRestoreFactory.getRestoreJob(conf);
204204

205-
restoreService.run(hfileDirs, tableNames, restoreRootDir, newTableNames, false);
205+
restoreService.run(logDirs, tableNames, restoreRootDir, newTableNames, false);
206206
}
207207
}
208208

@@ -225,14 +225,39 @@ Path getTableSnapshotPath(Path backupRootPath, TableName tableName, String backu
225225
HConstants.SNAPSHOT_DIR_NAME);
226226
}
227227

228+
/**
229+
* Returns value represent path for:
230+
* ""/$USER/SBACKUP_ROOT/backup_id/namespace/table/.hbase-snapshot/
231+
* snapshot_1396650097621_namespace_table" this path contains .snapshotinfo, .tabledesc (0.96 and
232+
* 0.98) this path contains .snapshotinfo, .data.manifest (trunk)
233+
* @param tableName table name
234+
* @return path to table info
235+
* @throws IOException exception
236+
*/
237+
Path getTableInfoPath(TableName tableName) throws IOException {
238+
Path tableSnapShotPath = getTableSnapshotPath(backupRootPath, tableName, backupId);
239+
Path tableInfoPath = null;
240+
241+
// can't build the path directly as the timestamp values are different
242+
FileStatus[] snapshots = fs.listStatus(tableSnapShotPath,
243+
new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs));
244+
for (FileStatus snapshot : snapshots) {
245+
tableInfoPath = snapshot.getPath();
246+
// SnapshotManifest.DATA_MANIFEST_NAME = "data.manifest";
247+
if (tableInfoPath.getName().endsWith("data.manifest")) {
248+
break;
249+
}
250+
}
251+
return tableInfoPath;
252+
}
253+
228254
/**
229255
* Get table descriptor
230256
* @param tableName is the table backed up
231257
* @return {@link TableDescriptor} saved in backup image of the table
232258
*/
233259
TableDescriptor getTableDesc(TableName tableName) throws IOException {
234-
Path tableInfoPath = BackupUtils.getTableInfoPath(fs, backupRootPath, backupId, tableName);
235-
;
260+
Path tableInfoPath = this.getTableInfoPath(tableName);
236261
SnapshotDescription desc = SnapshotDescriptionUtils.readSnapshotInfo(fs, tableInfoPath);
237262
SnapshotManifest manifest = SnapshotManifest.open(conf, fs, tableInfoPath, desc);
238263
TableDescriptor tableDescriptor = manifest.getTableDescriptor();
@@ -282,8 +307,7 @@ private void createAndRestoreTable(Connection conn, TableName tableName, TableNa
282307
tableDescriptor = manifest.getTableDescriptor();
283308
} else {
284309
tableDescriptor = getTableDesc(tableName);
285-
snapshotMap.put(tableName,
286-
BackupUtils.getTableInfoPath(fs, backupRootPath, backupId, tableName));
310+
snapshotMap.put(tableName, getTableInfoPath(tableName));
287311
}
288312
if (tableDescriptor == null) {
289313
LOG.debug("Found no table descriptor in the snapshot dir, previous schema would be lost");

0 commit comments

Comments
 (0)