Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve

### Fixed

- We fixed an issue where the "Normalize page numbers" formatter did not replace en-dashes or em-dashes with a hyphen-minus sign. [#7239](https://github.com/JabRef/jabref/issues/7239)
- We fixed an issue with the style of highlighted check boxes while searching in preferences. [#7226](https://github.com/JabRef/jabref/issues/7226)
- We fixed an issue where the option "Move file to file directory" was disabled in the entry editor for all files [#7194](https://github.com/JabRef/jabref/issues/7194)
- We fixed an issue where application dialogs were opening in the wrong display when using multiple screens [#7273](https://github.com/JabRef/jabref/pull/7273)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,41 @@
import java.util.regex.Pattern;

import org.jabref.logic.cleanup.Formatter;
import org.jabref.logic.formatter.casechanger.UnprotectTermsFormatter;
import org.jabref.logic.l10n.Localization;

import com.google.common.base.Strings;

/**
* This class includes sensible defaults for consistent formatting of BibTeX page numbers.
*
* <p>
* Format page numbers, separated either by commas or double-hyphens.
* Converts the range number format of the <code>pages</code> field to page_number--page_number.
* Removes unwanted literals except letters, numbers and -+ signs.
* Keeps the existing String if the resulting field does not match the expected Regex.
* <p>
* From BibTeX manual:
* One or more page numbers or range of numbers, such as 42--111 or 7,41,73--97 or 43+
* (the '+' in this last example indicates pages following that don't form a simple range).
* To make it easier to maintain Scribe-compatible databases, the standard styles convert
* a single dash (as in 7-33) to the double dash used in TEX to denote number ranges (as in 7--33).
* <p>
* Examples:
*
* <ul>
* <li><code>1-2 -> 1--2</code></li>
* <li><code>1---2 -> 1--2</code></li>
* <li><code>1-2 -> 1--2</code></li>
* <li><code>1,2,3 -> 1,2,3</code></li>
* <li><code>{1}-{2} -> 1--2</code></li>
* <li><code>43+ -> 43+</code></li>
* <li>Invalid -> Invalid</li>
* </ul>
*/
public class NormalizePagesFormatter extends Formatter {

// "startpage" and "endpage" are named groups. See http://stackoverflow.com/a/415635/873282 for a documentation
private static final Pattern PAGES_DETECT_PATTERN = Pattern.compile("\\A(?<startpage>(\\d+:)?\\d+)(?:-{1,2}(?<endpage>(\\d+:)?\\d+))?\\Z");
private static final Pattern EM_EN_DASH_PATTERN = Pattern.compile("\u2013|\u2014");
private static final Pattern DASHES_DETECT_PATTERN = Pattern.compile("[ ]*-+[ ]*");

private static final String REJECT_LITERALS = "[^a-zA-Z0-9,\\-\\+,:]";
private static final String PAGES_REPLACE_PATTERN = "${startpage}--${endpage}";
private static final String SINGLE_PAGE_REPLACE_PATTERN = "$1";
private final Formatter unprotectTermsFormatter = new UnprotectTermsFormatter();

@Override
public String getName() {
Expand All @@ -37,44 +51,31 @@ public String getKey() {
return "normalize_page_numbers";
}

/**
* Format page numbers, separated either by commas or double-hyphens.
* Converts the range number format of the <code>pages</code> field to page_number--page_number.
* Removes unwanted literals except letters, numbers and -+ signs.
* Keeps the existing String if the resulting field does not match the expected Regex.
*
* <example>
* 1-2 -> 1--2
* 1,2,3 -> 1,2,3
* {1}-{2} -> 1--2
* 43+ -> 43+
* Invalid -> Invalid
* </example>
*/
@Override
public String format(String value) {
Objects.requireNonNull(value);

if (value.isEmpty()) {
// nothing to do
return value;
}

value = value.trim();

// Remove pages prefix
String cleanValue = value.replace("pp.", "").replace("p.", "");
// remove unwanted literals including en dash, em dash, and whitespace
cleanValue = cleanValue.replaceAll("\u2013|\u2014", "-").replaceAll(REJECT_LITERALS, "");
// try to find pages pattern
Matcher matcher = PAGES_DETECT_PATTERN.matcher(cleanValue);
if (matcher.matches()) {
// replace
if (Strings.isNullOrEmpty(matcher.group("endpage"))) {
return matcher.replaceFirst(SINGLE_PAGE_REPLACE_PATTERN);
} else {
return matcher.replaceFirst(PAGES_REPLACE_PATTERN);
value = value.replace("pp.", "").replace("p.", "").trim();

// replace em and en dashes by --
value = EM_EN_DASH_PATTERN.matcher(value).replaceAll("--");

Matcher matcher = DASHES_DETECT_PATTERN.matcher(value);
if (matcher.find() && matcher.start() >= 0) {
String fixedValue = matcher.replaceFirst("--");
if (matcher.find()) {
// multiple occurrences --> better do no replacement
return value;
}
return unprotectTermsFormatter.format(fixedValue);
}
// no replacement

return value;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@
import org.jabref.logic.protectedterms.ProtectedTermsLoader;
import org.jabref.logic.util.strings.StringLengthComparator;

/**
* Adds {} brackets around acronyms, month names and countries to preserve their case.
*
* Related formatter: {@link org.jabref.logic.formatter.bibtexfields.RemoveBracesFormatter}
*/
public class ProtectTermsFormatter extends Formatter {

private final ProtectedTermsLoader protectedTermsLoader;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package org.jabref.logic.formatter.casechanger;

import java.util.Objects;

import org.jabref.logic.cleanup.Formatter;
import org.jabref.logic.l10n.Localization;

/**
* Remove {} braces around words in case they appear balanced
*
* Related formatter: {@link ProtectTermsFormatter}
*/
public class UnprotectTermsFormatter extends Formatter {

@Override
public String format(String text) {
// similar implementation at {@link org.jabref.logic.formatter.bibtexfields.RemoveBracesFormatter.hasNegativeBraceCount}
Objects.requireNonNull(text);
if (text.isEmpty()) {
return text;
}
StringBuilder result = new StringBuilder();
int level = 0;
int index = 0;
do {
char charAtIndex = text.charAt(index);
if (charAtIndex == '{') {
level++;
} else if (charAtIndex == '}') {
level--;
} else {
result.append(charAtIndex);
}
index++;
} while (index < text.length() && level >= 0);
if (level != 0) {
// in case of unbalanced braces, the original text is returned unmodified
return text;
}
return result.toString();
}

@Override
public String getDescription() {
return Localization.lang(
"Removes all balanced {} braces around words.");
}

@Override
public String getExampleInput() {
return "{In} {CDMA}";
}

@Override
public String getName() {
return Localization.lang("Unprotect terms");
}

@Override
public String getKey() {
return "unprotect_terms";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
import org.jabref.logic.cleanup.Formatter;
import org.jabref.logic.l10n.Localization;

/**
* Converts all characters of the given string to upper case, but does not change words starting with "{"
*/
public class UpperCaseFormatter extends Formatter {

@Override
Expand All @@ -15,9 +18,6 @@ public String getKey() {
return "upper_case";
}

/**
* Converts all characters of the given string to upper case, but does not change words starting with "{"
*/
@Override
public String format(String input) {
Title title = new Title(input);
Expand Down
2 changes: 2 additions & 0 deletions src/main/resources/l10n/JabRef_en.properties
Original file line number Diff line number Diff line change
Expand Up @@ -1430,6 +1430,7 @@ Add\ enclosing\ braces=Add enclosing braces
Add\ braces\ encapsulating\ the\ complete\ field\ content.=Add braces encapsulating the complete field content.
Remove\ enclosing\ braces=Remove enclosing braces
Removes\ braces\ encapsulating\ the\ complete\ field\ content.=Removes braces encapsulating the complete field content.
Removes\ all\ balanced\ {}\ braces\ around\ words.=Removes all balanced {} braces around words.
Shorten\ DOI=Shorten DOI
Shortens\ DOI\ to\ more\ human\ readable\ form.=Shortens DOI to more human readable form.
Sentence\ case=Sentence case
Expand Down Expand Up @@ -2283,5 +2284,6 @@ Regular\ expression=Regular expression

Error\ importing.\ See\ the\ error\ log\ for\ details.=Error importing. See the error log for details.

Unprotect\ terms=Unprotect terms
Error\ connecting\ to\ Writer\ document=Error connecting to Writer document
You\ need\ to\ open\ Writer\ with\ a\ document\ before\ connecting=You need to open Writer with a document before connecting
Loading