diff --git a/CHANGELOG.md b/CHANGELOG.md index e3c47001910..0944491f94d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `# - We fixed wrong hotkey being displayed at "automatically file links" in the entry editor - We fixed an issue where metadata syncing with local and shared database were unstable. It will also fix syncing groups and sub-groups in database. [#2284](https://github.com/JabRef/jabref/issues/2284) - Renaming files now truncates the filename to not exceed the limit of 255 chars [#2622](https://github.com/JabRef/jabref/issues/2622) +- We improved the handling of hyphens in names. [#2775](https://github.com/JabRef/jabref/issues/2775) ### Removed - We removed support for LatexEditor, as it is not under active development. [#3199](https://github.com/JabRef/jabref/issues/3199) diff --git a/src/main/java/org/jabref/model/entry/AuthorListParser.java b/src/main/java/org/jabref/model/entry/AuthorListParser.java index 234371d4a98..bd9093b348c 100644 --- a/src/main/java/org/jabref/model/entry/AuthorListParser.java +++ b/src/main/java/org/jabref/model/entry/AuthorListParser.java @@ -32,25 +32,6 @@ public class AuthorListParser { // Constant HashSet containing names of TeX special characters private static final Set TEX_NAMES = new HashSet<>(); - /** the raw bibtex author/editor field */ - private String original; - - /** index of the start in original, for example to point to 'abc' in 'abc xyz', tokenStart=2 */ - private int tokenStart; - - /** index of the end in original, for example to point to 'abc' in 'abc xyz', tokenEnd=5 */ - private int tokenEnd; - - /** end of token abbreviation (always: tokenStart < tokenAbbr <= tokenEnd), only valid if getToken returns TOKEN_WORD */ - private int tokenAbbr; - - - /** either space of dash */ - private char tokenTerm; - - /** true if upper-case token, false if lower-case */ - private boolean tokenCase; - static { TEX_NAMES.add("aa"); TEX_NAMES.add("ae"); @@ -66,6 +47,32 @@ public class AuthorListParser { TEX_NAMES.add("j"); } + /** + * the raw bibtex author/editor field + */ + private String original; + /** + * index of the start in original, for example to point to 'abc' in 'abc xyz', tokenStart=2 + */ + private int tokenStart; + /** + * index of the end in original, for example to point to 'abc' in 'abc xyz', tokenEnd=5 + */ + private int tokenEnd; + /** + * end of token abbreviation (always: tokenStart < tokenAbbrEnd <= tokenEnd), only valid if getToken returns + * TOKEN_WORD + */ + private int tokenAbbrEnd; + /** + * either space of dash + */ + private char tokenTerm; + /** + * true if upper-case token, false if lower-case + */ + private boolean tokenCase; + /** * Parses the String containing person names and returns a list of person information. * @@ -121,7 +128,7 @@ private Optional getAuthor() { break; case TOKEN_WORD: tokens.add(original.substring(tokenStart, tokenEnd)); - tokens.add(original.substring(tokenStart, tokenAbbr)); + tokens.add(original.substring(tokenStart, tokenAbbrEnd)); tokens.add(tokenTerm); tokens.add(tokenCase); if (commaFirst >= 0) { @@ -137,6 +144,13 @@ private Optional getAuthor() { // We are in a first name which contained a hyphen break; } + + int thisTermToken = previousTermToken + TOKEN_GROUP_LENGTH; + if ((thisTermToken >= 0) && tokens.get(thisTermToken).equals('-')) { + // We are in a name which contained a hyphen + break; + } + vonStart = tokens.size() - TOKEN_GROUP_LENGTH; break; } @@ -194,14 +208,16 @@ private Optional getAuthor() { firstPartStart = 0; } } - } else { // commas are present: it affects only 'first part' and - // 'junior part' + } else { + // commas are present: it affects only 'first part' and 'junior part' firstPartEnd = tokens.size(); - if (commaSecond < 0) { // one comma + if (commaSecond < 0) { + // one comma if (commaFirst < firstPartEnd) { firstPartStart = commaFirst; } - } else { // two or more commas + } else { + // two or more commas if (commaSecond < firstPartEnd) { firstPartStart = commaSecond; } @@ -342,7 +358,7 @@ private int getToken() { tokenEnd++; return TOKEN_AND; } - tokenAbbr = -1; + tokenAbbrEnd = -1; tokenTerm = ' '; tokenCase = true; int bracesLevel = 0; @@ -353,8 +369,9 @@ private int getToken() { if (c == '{') { bracesLevel++; } - if (firstLetterIsFound && (tokenAbbr < 0) && ((bracesLevel == 0) || (c == '{'))) { - tokenAbbr = tokenEnd; + + if (firstLetterIsFound && (tokenAbbrEnd < 0) && ((bracesLevel == 0) || (c == '{'))) { + tokenAbbrEnd = tokenEnd; } if ((c == '}') && (bracesLevel > 0)) { bracesLevel--; @@ -388,8 +405,8 @@ private int getToken() { } tokenEnd++; } - if (tokenAbbr < 0) { - tokenAbbr = tokenEnd; + if (tokenAbbrEnd < 0) { + tokenAbbrEnd = tokenEnd; } if ((tokenEnd < original.length()) && (original.charAt(tokenEnd) == '-')) { tokenTerm = '-'; diff --git a/src/test/java/org/jabref/model/entry/AuthorListTest.java b/src/test/java/org/jabref/model/entry/AuthorListTest.java index 7b234f41bb7..3926e212f54 100644 --- a/src/test/java/org/jabref/model/entry/AuthorListTest.java +++ b/src/test/java/org/jabref/model/entry/AuthorListTest.java @@ -7,6 +7,10 @@ public class AuthorListTest { + public static int size(String bibtex) { + return AuthorList.parse(bibtex).getNumberOfAuthors(); + } + @Test public void testFixAuthorNatbib() { Assert.assertEquals("", AuthorList.fixAuthorNatbib("")); @@ -286,10 +290,6 @@ public void testFixAuthorForAlphabetization() { .fixAuthorForAlphabetization("John von Neumann and John Smith and de Black Brown, Jr., Peter")); } - public static int size(String bibtex) { - return AuthorList.parse(bibtex).getNumberOfAuthors(); - } - @Test public void testSize() { @@ -625,6 +625,25 @@ public void parseNameWithHyphenInLastName() throws Exception { Assert.assertEquals(new AuthorList(expected), AuthorList.parse("Firstname Bailey-Jones")); } + @Test + public void parseNameWithHyphenInLastNameWithInitials() throws Exception { + Author expected = new Author("E. S.", "E. S.", null, "El-{M}allah", null); + Assert.assertEquals(new AuthorList(expected), AuthorList.parse("E. S. El-{M}allah")); + } + + @Test + public void parseNameWithHyphenInLastNameWithEscaped() throws Exception { + Author expected = new Author("E. S.", "E. S.", null, "{K}ent-{B}oswell", null); + Assert.assertEquals(new AuthorList(expected), AuthorList.parse("E. S. {K}ent-{B}oswell")); + } + + @Test + public void parseNameWithHyphenInLastNameWhenLastNameGivenFirst() throws Exception { + // TODO: Fix abbreviation to be "A." + Author expected = new Author("ʿAbdallāh", "ʿ.", null, "al-Ṣāliḥ", null); + Assert.assertEquals(new AuthorList(expected), AuthorList.parse("al-Ṣāliḥ, ʿAbdallāh")); + } + @Test public void parseNameWithBraces() throws Exception { Author expected = new Author("H{e}lene", "H.", null, "Fiaux", null);