Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -252,16 +252,24 @@ private boolean isFarAway(TextPosition previous, TextPosition current) {
return Math.abs(Xgap) > XspaceThreshold && Math.abs(Ygap) > YspaceThreshold;
}

private boolean isUnwantedText(TextPosition previousTextPosition, TextPosition textPosition) {
private boolean isUnwantedText(TextPosition previousTextPosition, TextPosition textPosition,
Map<Float, TextPosition> lastPositionMap, float fontSize) {
// This indicates that the text is at the start of the line, so it is needed.
if (textPosition == null || previousTextPosition == null) {
return false;
}
// We use the font size to identify titles. Blank characters don't have a font size, so we discard them.
// The space will be added back in the final result, but not in this method.
if (StringUtil.isBlank(textPosition.getUnicode())) {
return true;
}
// The title usually don't in the bottom 10% of a page.
return (textPosition.getPageHeight() - textPosition.getYDirAdj())
< (textPosition.getPageHeight() * 0.1);
// Titles are generally not located in the bottom 10% of a page.
if ((textPosition.getPageHeight() - textPosition.getYDirAdj()) < (textPosition.getPageHeight() * 0.1)) {
return true;
}
// Characters in a title typically remain close together,
// so a distant character is unlikely to be part of the title.
return lastPositionMap.containsKey(fontSize) && isFarAway(lastPositionMap.get(fontSize), textPosition);
}

private Optional<String> findLargestFontText(List<TextPosition> textPositions) {
Expand All @@ -271,8 +279,7 @@ private Optional<String> findLargestFontText(List<TextPosition> textPositions) {
for (TextPosition textPosition : textPositions) {
float fontSize = textPosition.getFontSizeInPt();
// Exclude unwanted text based on heuristics
if (isUnwantedText(previousTextPosition, textPosition) ||
(lastPositionMap.containsKey(fontSize) && isFarAway(lastPositionMap.get(fontSize), textPosition))) {
if (isUnwantedText(previousTextPosition, textPosition, lastPositionMap, fontSize)) {
continue;
}
fontSizeTextMap.putIfAbsent(fontSize, new StringBuilder());
Expand Down