-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-7251][Spark Core] Perform sequential scan when iterating over entries in BytesToBytesMap #5836
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-7251][Spark Core] Perform sequential scan when iterating over entries in BytesToBytesMap #5836
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -173,18 +173,35 @@ public BytesToBytesMap( | |
| public Iterator<Location> iterator() { | ||
| return new Iterator<Location>() { | ||
|
|
||
| private int nextPos = bitset.nextSetBit(0); | ||
| private int cur = 0; | ||
|
|
||
| private int pageCur = 0; | ||
|
|
||
| private MemoryBlock currentPage; | ||
|
|
||
| private long addr; | ||
|
|
||
| @Override | ||
| public boolean hasNext() { | ||
| return nextPos != -1; | ||
| return cur != size; | ||
| } | ||
|
|
||
| @Override | ||
| public Location next() { | ||
| final int pos = nextPos; | ||
| nextPos = bitset.nextSetBit(nextPos + 1); | ||
| return loc.with(pos, 0, true); | ||
| if (currentPage == null) { | ||
| currentPage = dataPages.get(pageCur++); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should never use inline increment operations like this. When I see this, I have to take time to remember whether |
||
| addr = currentPage.getBaseOffset(); | ||
| } | ||
| long keySize = PlatformDependent.UNSAFE.getLong(memoryManager.getPage(addr), addr); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you're calling This call isn't necessary, though, since we happen to have the |
||
| if (keySize == -1L) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. -1 should be lifted into a static constant named
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This branch was never taken in the unit tests but was exercised by my stress tester, which explains why I was able to find a bug even though the tests passed. This is why I'm loving IntelliJ's code coverage metrics for this Tungsten stuff: insisting on 100% line coverage really helps to prevent bugs like this. |
||
| currentPage = dataPages.get(pageCur++); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here; do not ever use prefix / postfix increments when passing arguments to calculations. |
||
| addr = currentPage.getBaseOffset(); | ||
| } | ||
| loc.with(addr, true); | ||
| addr += keySize + 8; | ||
| addr += PlatformDependent.UNSAFE.getLong(memoryManager.getPage(addr), addr) + 8; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line is confusing; can the first half of this addition be derived from a call to |
||
| cur++; | ||
| return loc; | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -291,6 +308,14 @@ Location with(int pos, int keyHashcode, boolean isDefined) { | |
| return this; | ||
| } | ||
|
|
||
| Location with(long fullKeyAddress, boolean isDefined) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This method is only ever called when |
||
| this.isDefined = isDefined; | ||
| if (isDefined) { | ||
| updateAddressesAndSizes(fullKeyAddress); | ||
| } | ||
| return this; | ||
| } | ||
|
|
||
| /** | ||
| * Returns true if the key is defined at this position, and false otherwise. | ||
| */ | ||
|
|
@@ -380,7 +405,13 @@ public void putNewKey( | |
| bitset.set(pos); | ||
|
|
||
| // If there's not enough space in the current page, allocate a new page: | ||
| if (currentDataPage == null || PAGE_SIZE_BYTES - pageCursor < requiredSize) { | ||
| if (currentDataPage == null || PAGE_SIZE_BYTES - pageCursor - 8 < requiredSize) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's an assertion on line 403 that needs to be updated to reserve 8 extra bytes for the final length. |
||
| if (currentDataPage != null) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The fact that we have a check for
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, I guess we'll still end up allocating a new block. I'll add a comment to make this more clear. |
||
| final Object pageBaseObject = currentDataPage.getBaseObject(); | ||
| final long pageBaseOffset = currentDataPage.getBaseOffset(); | ||
| final long lengthOffsetInPage = pageBaseOffset + pageCursor; | ||
| PlatformDependent.UNSAFE.putLong(pageBaseObject, lengthOffsetInPage, -1L); | ||
| } | ||
| MemoryBlock newPage = memoryManager.allocatePage(PAGE_SIZE_BYTES); | ||
| dataPages.add(newPage); | ||
| pageCursor = 0; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a confusing name; we shouldn't trade off brevity for clarity like this.