Skip to content

Commit 461e368

Browse files
Yubo-CaoSiedlerchr
authored and
Krishna Kumar Parthasarathy
committed
Add support for LTWA (issue JabRef#12276) (JabRef#12880)
* Implement LTWA abbreviation * Create LTWA resource download in the gradle tasks * Connected the LTWA logic with the GUI * Updated the CHANGELOG.md * Fix according to Trag * Reimplement with Antlr * Fix errors from previous PR * Use Optional as Trag suggested * Fix the locale translation issue --------- Co-authored-by: Christoph <[email protected]>
1 parent 8d53f7f commit 461e368

File tree

18 files changed

+1006
-29
lines changed

18 files changed

+1006
-29
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ Note that this project **does not** adhere to [Semantic Versioning](https://semv
2828
- We added a new functionality where author names having multiple spaces in-between will be considered as separate user block as it does for " and ". [#12701](https://github.com/JabRef/jabref/issues/12701)
2929
- We enhanced support for parsing XMP metadata from PDF files. [#12829](https://github.com/JabRef/jabref/issues/12829)
3030
- We added a "Preview" header in the JStyles tab in the "Select style" dialog, to make it consistent with the CSL styles tab. [#12838](https://github.com/JabRef/jabref/pull/12838)
31+
- We added a "LTWA" abbreviation feature in the "Quality > Abbreviate journal names > LTWA" menu [#12273](https://github.com/JabRef/jabref/issues/12273/)
3132
- We added path validation to file directories in library properties dialog. [#11840](https://github.com/JabRef/jabref/issues/11840)
3233

3334
### Changed

build.gradle

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -459,14 +459,15 @@ processResources {
459459
tasks.register('generateSource') {
460460
dependsOn("generateBstGrammarSource",
461461
"generateSearchGrammarSource",
462+
"generateLtwaGrammarSource",
462463
"generateCitaviSource")
463464
group = 'JabRef'
464465
description 'Generates all necessary (Java) source files.'
465466
}
466467

467468
tasks.register("generateBstGrammarSource", JavaExec) {
468469
group = "JabRef"
469-
description = 'Generates BstLexer.java and BstParser.java from the Bst.g grammar file using antlr4.'
470+
description = 'Generates BstLexer.java and BstParser.java from the Bst.g4 grammar file using antlr4.'
470471
classpath = configurations.antlr4
471472
mainClass = "org.antlr.v4.Tool"
472473
javaLauncher.set(javaToolchains.launcherFor(java.toolchain))
@@ -478,7 +479,7 @@ tasks.register("generateBstGrammarSource", JavaExec) {
478479

479480
tasks.register("generateSearchGrammarSource", JavaExec) {
480481
group = 'JabRef'
481-
description = "Generates java files for Search.g antlr4."
482+
description = "Generates java files for Search.g4 antlr4."
482483
classpath = configurations.antlr4
483484
mainClass = "org.antlr.v4.Tool"
484485
javaLauncher.set(javaToolchains.launcherFor(java.toolchain))
@@ -488,6 +489,18 @@ tasks.register("generateSearchGrammarSource", JavaExec) {
488489
args = ["-o","src-gen/main/java/org/jabref/search" , "-visitor", "-no-listener", "-package", "org.jabref.search", "$projectDir/src/main/antlr4/org/jabref/search/Search.g4"]
489490
}
490491

492+
tasks.register("generateLtwaGrammarSource", JavaExec) {
493+
group = "JabRef"
494+
description = 'Generates LtwaLexer.java and LtwaParser.java from the Ltwa.g4 grammar file using antlr4.'
495+
classpath = configurations.antlr4
496+
mainClass = "org.antlr.v4.Tool"
497+
javaLauncher.set(javaToolchains.launcherFor(java.toolchain))
498+
499+
inputs.dir('src/main/antlr4/org/jabref/ltwa/')
500+
outputs.dir("src-gen/main/java/org/jabref/logic/journals/ltwa/")
501+
args = ["-o", "src-gen/main/java/org/jabref/logic/journals/ltwa/", "-no-visitor", "-listener", "-package", "org.jabref.logic.journals.ltwa", "$projectDir/src/main/antlr4/org/jabref/ltwa/Ltwa.g4"]
502+
}
503+
491504
tasks.register("generateJournalListMV", JavaExec) {
492505
group = "JabRef"
493506
description = "Converts the comma-separated journal abbreviation file to a H2 MVStore"
@@ -502,6 +515,42 @@ tasks.register("generateJournalListMV", JavaExec) {
502515
jar.dependsOn("generateJournalListMV")
503516
compileTestJava.dependsOn("generateJournalListMV")
504517

518+
tasks.register('downloadLtwaFile') {
519+
group = "JabRef"
520+
description = "Downloads the LTWA file for journal abbreviations"
521+
doLast {
522+
def ltwaUrl = "https://www.issn.org/wp-content/uploads/2021/07/ltwa_20210702.csv"
523+
def ltwaDir = file("build/resources/main/journals")
524+
def ltwaCsvFile = file("${ltwaDir}/ltwa_20210702.csv")
525+
526+
if (!ltwaCsvFile.exists()) {
527+
mkdir(ltwaDir)
528+
ant.get(src: ltwaUrl, dest: ltwaCsvFile, verbose: true)
529+
logger.lifecycle("Downloaded LTWA file to ${ltwaCsvFile}")
530+
} else {
531+
logger.lifecycle("LTWA file already exists at ${ltwaCsvFile}")
532+
}
533+
}
534+
onlyIf {
535+
!file("build/resources/main/journals/ltwa_20210702.csv").exists()
536+
}
537+
}
538+
539+
tasks.register('generateLtwaListMV', JavaExec) {
540+
group = "JabRef"
541+
description = "Converts the LTWA CSV file to a H2 MVStore"
542+
classpath = sourceSets.main.runtimeClasspath
543+
mainClass = "org.jabref.cli.LtwaListMvGenerator"
544+
javaLauncher.set(javaToolchains.launcherFor(java.toolchain))
545+
dependsOn('downloadLtwaFile')
546+
onlyIf {
547+
!file("build/resources/main/journals/ltwa-list.mv").exists()
548+
}
549+
}
550+
551+
jar.dependsOn("generateLtwaListMV")
552+
compileTestJava.dependsOn("generateLtwaListMV")
553+
505554
tasks.register('generateCitaviSource', XjcTask) {
506555
group = 'JabRef'
507556
description = "Generates java files for the citavi importer."
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
grammar Ltwa;
2+
options { caseInsensitive=true; }
3+
4+
@lexer::members {
5+
private boolean isNextBoundary() {
6+
int lookAhead = _input.LA(1);
7+
return lookAhead == EOF ||
8+
lookAhead == ' ' || lookAhead == '\t' || lookAhead == '\r' || lookAhead == '\n' ||
9+
lookAhead == '-' || lookAhead == '\u2013' || lookAhead == '\u2014' ||
10+
lookAhead == '_' || lookAhead == '.' || lookAhead == ',' ||
11+
lookAhead == ':' || lookAhead == ';' || lookAhead == '!' ||
12+
lookAhead == '|' || lookAhead == '=' || lookAhead == '+' ||
13+
lookAhead == '*' || lookAhead == '\\' || lookAhead == '/' ||
14+
lookAhead == '"' || lookAhead == '(' || lookAhead == ')' ||
15+
lookAhead == '&' || lookAhead == '#' || lookAhead == '%' ||
16+
lookAhead == '@' || lookAhead == '$' || lookAhead == '?';
17+
}
18+
19+
private boolean isNotHyphenated() {
20+
int lookAhead = _input.LA(1);
21+
return lookAhead == EOF || lookAhead != '-';
22+
}
23+
}
24+
25+
fragment COMMON_ABBR_FRAGMENT options { caseInsensitive=false; }: 'St' | 'Mr' | 'Ms' | 'Mrs' | 'Mx' | 'Dr' | 'Prof' | 'vs';
26+
fragment PART_ABBR_FRAGMENT: 'ser' | 'sect' | 'sec';
27+
28+
ABBREVIATION
29+
: (COMMON_ABBR_FRAGMENT | PART_ABBR_FRAGMENT) '.' {isNextBoundary()}?
30+
| ([A-Z] '.')+ {isNextBoundary()}?;
31+
32+
PART: ('series' | 'serie' | 'part' | 'section' | 'série' | 'supplemento' | 'chapter' | 'parte') {isNextBoundary()}?;
33+
34+
ORDINAL options { caseInsensitive=false; }: ([IVXivx]+ | [A-Z]) {isNextBoundary()}?;
35+
36+
ARTICLE: ('l\'' | 'd\'' | 'dell\'' | 'nell\'')
37+
| ('a' | 'an' | 'the'
38+
| 'der' | 'die' | 'das' | 'des' | 'dem' | 'den'
39+
| 'el' | 'la' | 'los' | 'las' | 'un' | 'una' | 'unos' | 'unas'
40+
| 'le' | 'la' | 'les' | 'un' | 'une' | 'des' | 'du' | 'de la' | 'au' | 'aux'
41+
| 'dell' | 'nell') {isNextBoundary()}? {isNotHyphenated()}?;
42+
43+
STOPWORD: ('a' | 'an' | 'the' | 'and' | 'but' | 'or' | 'for' | 'nor' | 'so' | 'yet' | 'though'
44+
| 'when' | 'whenever' | 'where' | 'whereas' | 'wherever' | 'while' | 'about' | 'afore'
45+
| 'after' | 'ago' | 'along' | 'amid' | 'among' | 'amongst' | 'apropos' | 'as' | 'at'
46+
| 'atop' | 'by' | 'ca' | 'circa' | 'from' | 'hence' | 'in' | 'into'
47+
| 'like' | 'of' | 'off' | 'on' | 'onto' | 'ontop' | 'out' | 'over' | 'per' | 'since'
48+
| 'than' | 'til' | 'till' | 'to' | 'unlike' | 'until' | 'unto' | 'up' | 'upon' | 'upside'
49+
| 'versus' | 'via' | 'vis-a-vis' | 'vs' | 'with' | 'within' | 'für' | 'und' | 'aus'
50+
| 'zu' | 'zur' | 'im' | 'de' | 'et' | 'y' | 'del' | 'en' | 'di' | 'e' | 'da' | 'delle'
51+
| 'della' | 'sue' | 'el' | 'do' | 'og' | 'i' | 'voor' | 'van' | 'dell\'' | 'dell' | 'ed'
52+
| 'för' | 'tot' | 'vir' | 'o' | 'its' | 'sul') {isNextBoundary()}? {isNotHyphenated()}?;
53+
54+
HYPHEN: '-';
55+
56+
SYMBOLS: [.,;!?&+=*#%@$] | '\'';
57+
58+
fragment LETTER: 'A'..'Z' | 'À'..'Ö' | 'ø'..'ÿ' | '\u0100'..'\u017F' | '\u4E00'..'\u9FFF';
59+
60+
WORD: (LETTER+ '\'' + [a-z]) {isNextBoundary()}? // e.g., Shi'a, parent's
61+
| (LETTER+ '\'') {isNextBoundary()}? // Word ending with apostrophe, e.g., Parents' (plural possessive)
62+
| LETTER + ('.' + LETTER+)+ {isNextBoundary()}? // e.g., Humana.Mente
63+
| (LETTER | [0-9])+ {isNextBoundary()}?; // Regular word
64+
65+
WS: [ \t\r\n]+ -> skip;
66+
67+
// Parser rules
68+
title
69+
: singleWordTitle EOF #SingleWordTitleFull
70+
| stopwordPlusAny EOF #StopwordPlusTitleFull
71+
| anyPlusSymbols EOF #AnyPlusSymbolsFull
72+
| normalTitle EOF #NormalTitleFull
73+
;
74+
75+
singleWordTitle
76+
: (WORD | STOPWORD | PART | ORDINAL | ABBREVIATION)
77+
;
78+
79+
stopwordPlusAny
80+
: STOPWORD (WORD | PART | ORDINAL | ABBREVIATION)
81+
;
82+
83+
anyPlusSymbols
84+
: (WORD | STOPWORD | PART | ORDINAL | ABBREVIATION) SYMBOLS
85+
;
86+
87+
normalTitle
88+
: titleElement+
89+
;
90+
91+
titleElement
92+
: article #ArticleElement
93+
| stopword #StopwordElement
94+
| symbols #SymbolsElement
95+
| ordinal #OrdinalElement
96+
| word #WordElement
97+
| hyphen #HyphenElement
98+
| part #PartElement
99+
| abbreviation #AbbreviationElement
100+
;
101+
102+
// Rules for each token type
103+
article : ARTICLE;
104+
stopword : STOPWORD;
105+
symbols : SYMBOLS;
106+
ordinal : ORDINAL;
107+
word : WORD;
108+
hyphen : HYPHEN;
109+
part : PART;
110+
abbreviation : ABBREVIATION;
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
package org.jabref.cli;
2+
3+
import java.io.IOException;
4+
import java.net.URI;
5+
import java.net.URISyntaxException;
6+
import java.nio.file.Files;
7+
import java.nio.file.Path;
8+
import java.nio.file.StandardOpenOption;
9+
import java.util.List;
10+
import java.util.stream.Collectors;
11+
import java.util.stream.Stream;
12+
13+
import org.jabref.logic.journals.ltwa.LtwaEntry;
14+
import org.jabref.logic.journals.ltwa.LtwaTsvParser;
15+
import org.jabref.logic.journals.ltwa.NormalizeUtils;
16+
import org.jabref.logic.journals.ltwa.PrefixTree;
17+
18+
import org.h2.mvstore.MVMap;
19+
import org.h2.mvstore.MVStore;
20+
import org.slf4j.Logger;
21+
import org.slf4j.LoggerFactory;
22+
23+
/**
24+
* CLI tool for downloading the LTWA CSV file and converting it to an MVStore file.
25+
*/
26+
public class LtwaListMvGenerator {
27+
28+
private static final Logger LOGGER = LoggerFactory.getLogger(LtwaListMvGenerator.class);
29+
private static final String LTWA_URL = "https://www.issn.org/wp-content/uploads/2021/07/ltwa_20210702.csv";
30+
31+
public static void main(String[] args) {
32+
try {
33+
Path tempCsvFile = downloadLtwaFile();
34+
Path outputDir = Path.of("build", "resources", "main", "journals");
35+
Files.createDirectories(outputDir);
36+
Path outputFile = outputDir.resolve("ltwa-list.mv");
37+
38+
generateMvStore(tempCsvFile, outputFile);
39+
40+
// Delete temp file
41+
Files.deleteIfExists(tempCsvFile);
42+
43+
LOGGER.info("LTWA MVStore file generated successfully at {}.", outputFile);
44+
} catch (IOException e) {
45+
LOGGER.error("Error generating LTWA MVStore file.", e);
46+
} catch (URISyntaxException e) {
47+
LOGGER.error("Invalid URL for LTWA file (this should never happen).", e);
48+
}
49+
}
50+
51+
/**
52+
* Downloads the LTWA CSV file from the specified URL.
53+
*
54+
* @return Path to the downloaded file
55+
* @throws IOException If an I/O error occurs
56+
*/
57+
private static Path downloadLtwaFile() throws IOException, URISyntaxException {
58+
LOGGER.info("Downloading LTWA file from {}.", LtwaListMvGenerator.LTWA_URL);
59+
var in = new URI(LTWA_URL).toURL().openStream();
60+
var path = Files.writeString(
61+
Files.createTempFile("ltwa", ".csv"),
62+
new String(in.readAllBytes()),
63+
StandardOpenOption.CREATE,
64+
StandardOpenOption.TRUNCATE_EXISTING);
65+
in.close();
66+
return path;
67+
}
68+
69+
/**
70+
* Generates an MVStore file from the LTWA CSV file.
71+
*
72+
* @param inputFile Path to the LTWA CSV file
73+
* @param outputFile Path where the MVStore file will be written
74+
* @throws IOException If an I/O error occurs
75+
*/
76+
private static void generateMvStore(Path inputFile, Path outputFile) throws IOException {
77+
LOGGER.info("Parsing LTWA file...");
78+
LtwaTsvParser parser = new LtwaTsvParser(inputFile);
79+
List<LtwaEntry> entries = parser.parse();
80+
81+
LOGGER.info("Found {} LTWA entries", entries.size());
82+
83+
try (MVStore store = new MVStore.Builder()
84+
.fileName(outputFile.toString())
85+
.compressHigh()
86+
.open()) {
87+
MVMap<String, List<LtwaEntry>> prefixMap = store.openMap("Prefixes");
88+
MVMap<String, List<LtwaEntry>> suffixMap = store.openMap("Suffixes");
89+
String inflection = Character.toString(PrefixTree.WILD_CARD).repeat(3) + " ";
90+
91+
entries.forEach(entry ->
92+
NormalizeUtils.normalize(entry.word())
93+
.map(String::toLowerCase)
94+
.map(word -> word.replace(" ", inflection))
95+
.ifPresent(word -> {
96+
if (word.startsWith("-")) {
97+
String key = word.substring(1);
98+
suffixMap.computeIfAbsent(key, k ->
99+
Stream.<LtwaEntry>builder().build().collect(Collectors.toList())
100+
).add(entry);
101+
} else {
102+
String key = word.endsWith("-") ? word.substring(0, word.length() - 1) : word;
103+
prefixMap.computeIfAbsent(key, k ->
104+
Stream.<LtwaEntry>builder().build().collect(Collectors.toList())
105+
).add(entry);
106+
}
107+
})
108+
);
109+
110+
LOGGER.info("Stored {} prefixes and {} suffixes", prefixMap.size(), suffixMap.size());
111+
}
112+
}
113+
}

src/main/java/org/jabref/gui/actions/StandardActions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ public enum StandardActions implements Action {
113113
ABBREVIATE_DEFAULT(Localization.lang("default"), Localization.lang("Abbreviate journal names of the selected entries (DEFAULT abbreviation)"), KeyBinding.ABBREVIATE),
114114
ABBREVIATE_DOTLESS(Localization.lang("dotless"), Localization.lang("Abbreviate journal names of the selected entries (DOTLESS abbreviation)")),
115115
ABBREVIATE_SHORTEST_UNIQUE(Localization.lang("shortest unique"), Localization.lang("Abbreviate journal names of the selected entries (SHORTEST UNIQUE abbreviation)")),
116+
ABBREVIATE_LTWA(Localization.lang("LTWA"), Localization.lang("Abbreviate journal names of the selected entries (LTWA)")),
116117
UNABBREVIATE(Localization.lang("Unabbreviate journal names"), Localization.lang("Unabbreviate journal names of the selected entries"), KeyBinding.UNABBREVIATE),
117118

118119
MANAGE_CUSTOM_EXPORTS(Localization.lang("Manage custom exports")),

src/main/java/org/jabref/gui/frame/MainMenu.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,8 @@ private void createMenu() {
265265
factory.createSubMenu(StandardActions.ABBREVIATE,
266266
factory.createMenuItem(StandardActions.ABBREVIATE_DEFAULT, new AbbreviateAction(StandardActions.ABBREVIATE_DEFAULT, frame::getCurrentLibraryTab, dialogService, stateManager, preferences.getJournalAbbreviationPreferences(), abbreviationRepository, taskExecutor, undoManager)),
267267
factory.createMenuItem(StandardActions.ABBREVIATE_DOTLESS, new AbbreviateAction(StandardActions.ABBREVIATE_DOTLESS, frame::getCurrentLibraryTab, dialogService, stateManager, preferences.getJournalAbbreviationPreferences(), abbreviationRepository, taskExecutor, undoManager)),
268-
factory.createMenuItem(StandardActions.ABBREVIATE_SHORTEST_UNIQUE, new AbbreviateAction(StandardActions.ABBREVIATE_SHORTEST_UNIQUE, frame::getCurrentLibraryTab, dialogService, stateManager, preferences.getJournalAbbreviationPreferences(), abbreviationRepository, taskExecutor, undoManager))),
268+
factory.createMenuItem(StandardActions.ABBREVIATE_SHORTEST_UNIQUE, new AbbreviateAction(StandardActions.ABBREVIATE_SHORTEST_UNIQUE, frame::getCurrentLibraryTab, dialogService, stateManager, preferences.getJournalAbbreviationPreferences(), abbreviationRepository, taskExecutor, undoManager)),
269+
factory.createMenuItem(StandardActions.ABBREVIATE_LTWA, new AbbreviateAction(StandardActions.ABBREVIATE_LTWA, frame::getCurrentLibraryTab, dialogService, stateManager, preferences.getJournalAbbreviationPreferences(), abbreviationRepository, taskExecutor, undoManager))),
269270

270271
factory.createMenuItem(StandardActions.UNABBREVIATE, new AbbreviateAction(StandardActions.UNABBREVIATE, frame::getCurrentLibraryTab, dialogService, stateManager, preferences.getJournalAbbreviationPreferences(), abbreviationRepository, taskExecutor, undoManager))
271272
);

src/main/java/org/jabref/gui/journals/AbbreviateAction.java

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,16 @@ public AbbreviateAction(StandardActions action,
6060
this.undoManager = undoManager;
6161

6262
switch (action) {
63-
case ABBREVIATE_DEFAULT -> abbreviationType = AbbreviationType.DEFAULT;
64-
case ABBREVIATE_DOTLESS -> abbreviationType = AbbreviationType.DOTLESS;
65-
case ABBREVIATE_SHORTEST_UNIQUE -> abbreviationType = AbbreviationType.SHORTEST_UNIQUE;
66-
default -> LOGGER.debug("Unknown action: {}", action.name());
63+
case ABBREVIATE_DEFAULT ->
64+
abbreviationType = AbbreviationType.DEFAULT;
65+
case ABBREVIATE_DOTLESS ->
66+
abbreviationType = AbbreviationType.DOTLESS;
67+
case ABBREVIATE_SHORTEST_UNIQUE ->
68+
abbreviationType = AbbreviationType.SHORTEST_UNIQUE;
69+
case ABBREVIATE_LTWA ->
70+
abbreviationType = AbbreviationType.LTWA;
71+
default ->
72+
LOGGER.debug("Unknown action: {}", action.name());
6773
}
6874

6975
this.executable.bind(ActionHelper.needsEntriesSelected(stateManager));
@@ -73,7 +79,8 @@ public AbbreviateAction(StandardActions action,
7379
public void execute() {
7480
if ((action == StandardActions.ABBREVIATE_DEFAULT)
7581
|| (action == StandardActions.ABBREVIATE_DOTLESS)
76-
|| (action == StandardActions.ABBREVIATE_SHORTEST_UNIQUE)) {
82+
|| (action == StandardActions.ABBREVIATE_SHORTEST_UNIQUE)
83+
|| (action == StandardActions.ABBREVIATE_LTWA)) {
7784
dialogService.notify(Localization.lang("Abbreviating..."));
7885
stateManager.getActiveDatabase().ifPresent(_ ->
7986
BackgroundTask.wrap(() -> abbreviate(stateManager.getActiveDatabase().get(), stateManager.getSelectedEntries()))

src/main/java/org/jabref/gui/journals/AbbreviationType.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,15 @@
22

33
/**
44
* Defines the different abbreviation types that JabRef can operate with.
5+
*
6+
* DEFAULT: Default abbreviation type, which is the standard behavior.
7+
* DOTLESS: Abbreviation type that does not include dots in the abbreviation.
8+
* SHORTEST_UNIQUE: Abbreviation type that generates the shortest unique abbreviation.
9+
* LTWA: Abbreviation type that uses the LTWA (List of Title Word Abbreviations)/ISO4 method.
510
*/
611
public enum AbbreviationType {
712
DEFAULT,
813
DOTLESS,
9-
SHORTEST_UNIQUE
14+
SHORTEST_UNIQUE,
15+
LTWA
1016
}

0 commit comments

Comments
 (0)