-
Notifications
You must be signed in to change notification settings - Fork 3.4k
HBASE-28564 Refactor direct interactions of Reference file creations to SFT interface #5939
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -681,8 +681,9 @@ private Pair<List<Path>, List<Path>> splitStoreFiles(final MasterProcedureEnv en | |
| // table dir. In case of failure, the proc would go through this again, already existing | ||
| // region dirs and split files would just be ignored, new split files should get created. | ||
| int nbFiles = 0; | ||
| final Map<String, Collection<StoreFileInfo>> files = | ||
| new HashMap<String, Collection<StoreFileInfo>>(htd.getColumnFamilyCount()); | ||
| final Map<String, Pair<Collection<StoreFileInfo>, StoreFileTracker>> files = | ||
| new HashMap<String, Pair<Collection<StoreFileInfo>, StoreFileTracker>>( | ||
| htd.getColumnFamilyCount()); | ||
| for (ColumnFamilyDescriptor cfd : htd.getColumnFamilies()) { | ||
| String family = cfd.getNameAsString(); | ||
| StoreFileTracker tracker = | ||
|
|
@@ -705,7 +706,7 @@ private Pair<List<Path>, List<Path>> splitStoreFiles(final MasterProcedureEnv en | |
| } | ||
| if (filteredSfis == null) { | ||
| filteredSfis = new ArrayList<StoreFileInfo>(sfis.size()); | ||
| files.put(family, filteredSfis); | ||
| files.put(family, new Pair(filteredSfis, tracker)); | ||
| } | ||
| filteredSfis.add(sfi); | ||
| nbFiles++; | ||
|
|
@@ -728,10 +729,12 @@ private Pair<List<Path>, List<Path>> splitStoreFiles(final MasterProcedureEnv en | |
| final List<Future<Pair<Path, Path>>> futures = new ArrayList<Future<Pair<Path, Path>>>(nbFiles); | ||
|
|
||
| // Split each store file. | ||
| for (Map.Entry<String, Collection<StoreFileInfo>> e : files.entrySet()) { | ||
| for (Map.Entry<String, Pair<Collection<StoreFileInfo>, StoreFileTracker>> e : files | ||
| .entrySet()) { | ||
| byte[] familyName = Bytes.toBytes(e.getKey()); | ||
| final ColumnFamilyDescriptor hcd = htd.getColumnFamily(familyName); | ||
| final Collection<StoreFileInfo> storeFiles = e.getValue(); | ||
| Pair<Collection<StoreFileInfo>, StoreFileTracker> storeFilesAndTracker = e.getValue(); | ||
| final Collection<StoreFileInfo> storeFiles = storeFilesAndTracker.getFirst(); | ||
| if (storeFiles != null && storeFiles.size() > 0) { | ||
| final Configuration storeConfiguration = | ||
| StoreUtils.createStoreConfiguration(env.getMasterConfiguration(), htd, hcd); | ||
|
|
@@ -742,8 +745,9 @@ private Pair<List<Path>, List<Path>> splitStoreFiles(final MasterProcedureEnv en | |
| // is running in a regionserver's Store context, or we might not be able | ||
| // to read the hfiles. | ||
| storeFileInfo.setConf(storeConfiguration); | ||
| StoreFileSplitter sfs = new StoreFileSplitter(regionFs, familyName, | ||
| new HStoreFile(storeFileInfo, hcd.getBloomFilterType(), CacheConfig.DISABLED)); | ||
| StoreFileSplitter sfs = | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: Assign the value of .getSecond() to a local variable of type |
||
| new StoreFileSplitter(regionFs, storeFilesAndTracker.getSecond(), familyName, | ||
| new HStoreFile(storeFileInfo, hcd.getBloomFilterType(), CacheConfig.DISABLED)); | ||
| futures.add(threadPool.submit(sfs)); | ||
| } | ||
| } | ||
|
|
@@ -809,19 +813,19 @@ private void assertSplitResultFilesCount(final FileSystem fs, | |
| } | ||
| } | ||
|
|
||
| private Pair<Path, Path> splitStoreFile(HRegionFileSystem regionFs, byte[] family, HStoreFile sf) | ||
| throws IOException { | ||
| private Pair<Path, Path> splitStoreFile(HRegionFileSystem regionFs, StoreFileTracker tracker, | ||
| byte[] family, HStoreFile sf) throws IOException { | ||
| if (LOG.isDebugEnabled()) { | ||
| LOG.debug("pid=" + getProcId() + " splitting started for store file: " + sf.getPath() | ||
| + " for region: " + getParentRegion().getShortNameToLog()); | ||
| } | ||
|
|
||
| final byte[] splitRow = getSplitRow(); | ||
| final String familyName = Bytes.toString(family); | ||
| final Path path_first = | ||
| regionFs.splitStoreFile(this.daughterOneRI, familyName, sf, splitRow, false, splitPolicy); | ||
| final Path path_second = | ||
| regionFs.splitStoreFile(this.daughterTwoRI, familyName, sf, splitRow, true, splitPolicy); | ||
| final Path path_first = regionFs.splitStoreFile(this.daughterOneRI, familyName, sf, splitRow, | ||
| false, splitPolicy, tracker); | ||
| final Path path_second = regionFs.splitStoreFile(this.daughterTwoRI, familyName, sf, splitRow, | ||
| true, splitPolicy, tracker); | ||
| if (LOG.isDebugEnabled()) { | ||
| LOG.debug("pid=" + getProcId() + " splitting complete for store file: " + sf.getPath() | ||
| + " for region: " + getParentRegion().getShortNameToLog()); | ||
|
|
@@ -837,22 +841,25 @@ private class StoreFileSplitter implements Callable<Pair<Path, Path>> { | |
| private final HRegionFileSystem regionFs; | ||
| private final byte[] family; | ||
| private final HStoreFile sf; | ||
| private final StoreFileTracker tracker; | ||
|
|
||
| /** | ||
| * Constructor that takes what it needs to split | ||
| * @param regionFs the file system | ||
| * @param family Family that contains the store file | ||
| * @param sf which file | ||
| */ | ||
| public StoreFileSplitter(HRegionFileSystem regionFs, byte[] family, HStoreFile sf) { | ||
| public StoreFileSplitter(HRegionFileSystem regionFs, StoreFileTracker tracker, byte[] family, | ||
| HStoreFile sf) { | ||
| this.regionFs = regionFs; | ||
| this.sf = sf; | ||
| this.family = family; | ||
| this.tracker = tracker; | ||
| } | ||
|
|
||
| @Override | ||
| public Pair<Path, Path> call() throws IOException { | ||
| return splitStoreFile(regionFs, family, sf); | ||
| return splitStoreFile(regionFs, tracker, family, sf); | ||
| } | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -35,6 +35,8 @@ | |
| import org.apache.hadoop.hbase.MetaTableAccessor; | ||
| import org.apache.hadoop.hbase.ScheduledChore; | ||
| import org.apache.hadoop.hbase.TableName; | ||
| import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; | ||
| import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; | ||
| import org.apache.hadoop.hbase.client.Connection; | ||
| import org.apache.hadoop.hbase.client.ConnectionFactory; | ||
| import org.apache.hadoop.hbase.client.Get; | ||
|
|
@@ -50,6 +52,8 @@ | |
| import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; | ||
| import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; | ||
| import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; | ||
| import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker; | ||
| import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; | ||
| import org.apache.hadoop.hbase.util.Bytes; | ||
| import org.apache.hadoop.hbase.util.CommonFSUtils; | ||
| import org.apache.hadoop.hbase.util.Pair; | ||
|
|
@@ -422,7 +426,16 @@ private static Pair<Boolean, Boolean> checkRegionReferences(MasterServices servi | |
| try { | ||
| HRegionFileSystem regionFs = HRegionFileSystem | ||
| .openRegionFromFileSystem(services.getConfiguration(), fs, tabledir, region, true); | ||
| boolean references = regionFs.hasReferences(tableDescriptor); | ||
| ColumnFamilyDescriptor[] families = tableDescriptor.getColumnFamilies(); | ||
| boolean references = false; | ||
| for (ColumnFamilyDescriptor cfd : families) { | ||
| StoreFileTracker sft = StoreFileTrackerFactory.create(services.getConfiguration(), | ||
| tableDescriptor, ColumnFamilyDescriptorBuilder.of(cfd.getNameAsString()), regionFs); | ||
| references = references || sft.hasReferences(); | ||
| if (references) { | ||
| break; | ||
| } | ||
| } | ||
|
Comment on lines
+429
to
+438
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should also be moved to HRegionFileSystem.hasReferences(), like other methods did? Additional question: Is it guaranteed that we don't have more than one SFT instance for the same store at any point in time? Because the file based SFT impl relies on the fact it's the single instance manipulating its meta files. If we have multiple instances of SFT for the same store, it could lead to inconsistencies, where one of the instances update the meta file, than the others would be looking at an outdated state of the meta files.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
When we introduce virtual links to Link/Ref files using SFT, hasReferences() abstraction also should move to SFT layer and there is only one reference left of HRegionFileSystem.hasReferences() which will be eventually removed in HBASE-28861
This should only be true for WRITE mode of SFT instance right? there could be multiple instances of READ only mode SFT for the same store. I have created HBASE-28863 for introducing cache primarily for READ only mode
Is this part not already handled based on timestamp? Or Flush and Compaction both are using SFT instance from StoreEngine due to that same reason?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
How would the "READ_ONLY" SFTs know that the "WRITER" had rotated the file? Specially if its on a different process, like here? Other master based processes such as split take it for granted since the region would be closed, but I don't think it's the case here.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was looking at the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
load of SFT every time does a backedfile.load() again right, at that time it should know the latest manifest and load that one again, it does not return the storefiles it cached know |
||
| return new Pair<>(Boolean.TRUE, references); | ||
| } catch (IOException e) { | ||
| LOG.error("Error trying to determine if region {} has references, assuming it does", | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess in the future we would be moving mergeStoreFile to the SFT interface itself?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes in future if we have split/merge as well in SFT layer based on the impl (FILE, DEFAULT) we can commit all the ref/link files at once for FSFT based implementations