Skip to content

Commit

Permalink
Merge pull request #807 from kermitt2/follow-up-761
Browse files Browse the repository at this point in the history
Review date handling with header consolidation and date serialization in references
  • Loading branch information
kermitt2 authored Aug 18, 2021
2 parents d134e1f + 0bd69f0 commit e450e4f
Show file tree
Hide file tree
Showing 6 changed files with 381 additions and 158 deletions.
188 changes: 91 additions & 97 deletions grobid-core/src/main/java/org/grobid/core/data/BiblioItem.java
Original file line number Diff line number Diff line change
Expand Up @@ -906,6 +906,10 @@ public void setNormalizedPublicationDate(Date theDate) {
this.normalized_publication_date = theDate;
}

public void mergeNormalizedPublicationDate(Date theDate) {
this.normalized_publication_date = Date.merge(this.normalized_publication_date , theDate);
}

public void setEditors(String theEditors) {
this.editors = StringUtils.normalizeSpace(theEditors);
}
Expand Down Expand Up @@ -1982,10 +1986,23 @@ public String toBibTeX(String id, GrobidAnalysisConfig config) {

// dates
if (normalized_publication_date != null) {
String isoDate = TEIFormatter.toISOString(normalized_publication_date);
String isoDate = Date.toISOString(normalized_publication_date);
if (isoDate != null) {
bibtex.add(" date = {" + isoDate + "}");
}
if (normalized_publication_date.getYear() >= 0) {
bibtex.add(" year = {" + normalized_publication_date.getYear() + "}");

if (normalized_publication_date.getMonth() >= 0) {
bibtex.add(" month = {" + normalized_publication_date.getMonth() + "}");

if (normalized_publication_date.getDay() >= 0) {
bibtex.add(" day = {" + normalized_publication_date.getDay() + "}");
}
}
}
} else if (publication_date != null) {
bibtex.add(" year = {" + publication_date + "}");
}

// address
Expand Down Expand Up @@ -2475,40 +2492,22 @@ else if (bookTitle == null) {
}

if (normalized_publication_date != null) {
if ((normalized_publication_date.getDay() != -1) ||
(normalized_publication_date.getMonth() != -1) ||
(normalized_publication_date.getYear() != -1)) {
int year = normalized_publication_date.getYear();
int month = normalized_publication_date.getMonth();
int day = normalized_publication_date.getDay();

if (year != -1) {
String when = "";
if (year <= 9)
when += "000" + year;
else if (year <= 99)
when += "00" + year;
else if (year <= 999)
when += "0" + year;
else
when += year;
if (month != -1) {
if (month <= 9)
when += "-0" + month;
else
when += "-" + month;
if (day != -1) {
if (day <= 9)
when += "-0" + day;
else
when += "-" + day;
}
}
if (normalized_publication_date.getYear() != -1) {
String when = Date.toISOString(normalized_publication_date);
if (when != null) {
for (int i = 0; i < indent + 3; i++) {
tei.append("\t");
}
tei.append("<date type=\"published\" when=\"");
tei.append(when + "\" />\n");
tei.append(when + "\"");

if (publication_date != null && publication_date.length() > 0) {
tei.append(">");
tei.append(TextUtilities.HTMLEncode(publication_date) );
tei.append("</date>\n");
} else {
tei.append(" />\n");
}
}
} else if (this.getYear() != null) {
String when = "";
Expand Down Expand Up @@ -2537,7 +2536,15 @@ else if (this.getYear().length() == 4)
tei.append("\t");
}
tei.append("<date type=\"published\" when=\"");
tei.append(when + "\" />\n");
tei.append(when + "\"");

if (publication_date != null && publication_date.length() > 0) {
tei.append(">");
tei.append(TextUtilities.HTMLEncode(publication_date) );
tei.append("</date>\n");
} else {
tei.append(" />\n");
}
} else {
for (int i = 0; i < indent + 3; i++) {
tei.append("\t");
Expand Down Expand Up @@ -2688,40 +2695,22 @@ else if (this.getYear().length() == 4)

// date
if (normalized_publication_date != null) {
if ((normalized_publication_date.getDay() != -1) ||
(normalized_publication_date.getMonth() != -1) ||
(normalized_publication_date.getYear() != -1)) {
int year = normalized_publication_date.getYear();
int month = normalized_publication_date.getMonth();
int day = normalized_publication_date.getDay();

if (year != -1) {
String when = "";
if (year <= 9)
when += "000" + year;
else if (year <= 99)
when += "00" + year;
else if (year <= 999)
when += "0" + year;
else
when += year;
if (month != -1) {
if (month <= 9)
when += "-0" + month;
else
when += "-" + month;
if (day != -1) {
if (day <= 9)
when += "-0" + day;
else
when += "-" + day;
}
}
if (normalized_publication_date.getYear() != -1) {
String when = Date.toISOString(normalized_publication_date);
if (when != null) {
for (int i = 0; i < indent + 3; i++) {
tei.append("\t");
}
tei.append("<date type=\"published\" when=\"");
tei.append(when + "\" />\n");
tei.append(when + "\"");

if (publication_date != null && publication_date.length() > 0) {
tei.append(">");
tei.append(TextUtilities.HTMLEncode(publication_date) );
tei.append("</date>\n");
} else {
tei.append(" />\n");
}
}
} else if (this.getYear() != null) {
String when = "";
Expand Down Expand Up @@ -2750,7 +2739,15 @@ else if (this.getYear().length() == 4)
tei.append("\t");
}
tei.append("<date type=\"published\" when=\"");
tei.append(when + "\" />\n");
tei.append(when + "\"");

if (publication_date != null && publication_date.length() > 0) {
tei.append(">");
tei.append(TextUtilities.HTMLEncode(publication_date) );
tei.append("</date>\n");
} else {
tei.append(" />\n");
}
} else {
for (int i = 0; i < indent + 3; i++) {
tei.append("\t");
Expand Down Expand Up @@ -2819,40 +2816,22 @@ else if (this.getYear().length() == 4)
}
// date
if (normalized_publication_date != null) {
if ((normalized_publication_date.getDay() != -1) |
(normalized_publication_date.getMonth() != -1) |
(normalized_publication_date.getYear() != -1)) {
int year = normalized_publication_date.getYear();
int month = normalized_publication_date.getMonth();
int day = normalized_publication_date.getDay();

if (year != -1) {
String when = "";
if (year <= 9)
when += "000" + year;
else if (year <= 99)
when += "00" + year;
else if (year <= 999)
when += "0" + year;
else
when += year;
if (month != -1) {
if (month <= 9)
when += "-0" + month;
else
when += "-" + month;
if (day != -1) {
if (day <= 9)
when += "-0" + day;
else
when += "-" + day;
}
}
if (normalized_publication_date.getYear() != -1) {
String when = Date.toISOString(normalized_publication_date);
if (when != null) {
for (int i = 0; i < indent + 3; i++) {
tei.append("\t");
}
tei.append("<date type=\"published\" when=\"");
tei.append(when + "\" />\n");
tei.append(when + "\"");

if (publication_date != null && publication_date.length() > 0) {
tei.append(">");
tei.append(TextUtilities.HTMLEncode(publication_date) );
tei.append("</date>\n");
} else {
tei.append(" />\n");
}
}
} else if (this.getYear() != null) {
String when = "";
Expand Down Expand Up @@ -2881,7 +2860,15 @@ else if (this.getYear().length() == 4)
tei.append("\t");
}
tei.append("<date type=\"published\" when=\"");
tei.append(when + "\" />\n");
tei.append(when + "\"");

if (publication_date != null && publication_date.length() > 0) {
tei.append(">");
tei.append(TextUtilities.HTMLEncode(publication_date) );
tei.append("</date>\n");
} else {
tei.append(" />\n");
}
} else {
for (int i = 0; i < indent + 3; i++) {
tei.append("\t");
Expand Down Expand Up @@ -4390,10 +4377,17 @@ public static void correct(BiblioItem bib, BiblioItem bibo) {
bib.setSubmissionDate(bibo.getSubmissionDate());
if (bibo.getDownloadDate() != null)
bib.setDownloadDate(bibo.getDownloadDate());
if (bibo.getYear() != null)

if (bibo.getNormalizedPublicationDate() != null) {
if (bib.getNormalizedPublicationDate() != null) {
bib.mergeNormalizedPublicationDate(bibo.getNormalizedPublicationDate());
}
else {
bib.setNormalizedPublicationDate(bibo.getNormalizedPublicationDate());
}
}
if (bibo.getYear() != null)
bib.setYear(bibo.getYear());
if (bibo.getNormalizedPublicationDate() != null)
bib.setNormalizedPublicationDate(bibo.getNormalizedPublicationDate());
if (bibo.getMonth() != null)
bib.setMonth(bibo.getMonth());
if (bibo.getDay() != null)
Expand Down
91 changes: 82 additions & 9 deletions grobid-core/src/main/java/org/grobid/core/data/Date.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@
* 19.10.2010 < 20.10.2010
* 1999 < 10.2000
* 10.1999 < 2000
* which is not the same as a comparison in term of the time flow only.
* For comparing dates in term of strict time flow, please use java.util.Date + java.util.Calendar
*
* which is not the same as a comparison based only on time flow.
* For comparing dates by strict time flow, please use java.util.Date + java.util.Calendar
*/
public class Date implements Comparable {
public class Date implements Comparable<Date> {
private int day = -1;
private int month = -1;
private int year = -1;
Expand All @@ -22,6 +21,19 @@ public class Date implements Comparable {
private String monthString = null;
private String yearString = null;

public Date() {
}

public Date(Date fromDate) {
this.day = fromDate.day;
this.month = fromDate.month;
this.year = fromDate.year;
this.rawDate = fromDate.rawDate;
this.dayString = fromDate.dayString;
this.monthString = fromDate.monthString;
this.yearString = fromDate.yearString;
}

public int getDay() {
return day;
}
Expand Down Expand Up @@ -125,11 +137,7 @@ public int compareTo(Date another) {

return EQUAL;
}

public int compareTo(Object another) {
return compareTo(((Date) another));
}


public boolean isNotNull() {
return (rawDate != null) ||
(dayString != null) ||
Expand All @@ -144,6 +152,71 @@ public boolean isAmbiguous() {
return false;
}

public static String toISOString(Date date) {
int year = date.getYear();
int month = date.getMonth();
int day = date.getDay();

String when = "";
if (year != -1) {
if (year <= 9)
when += "000" + year;
else if (year <= 99)
when += "00" + year;
else if (year <= 999)
when += "0" + year;
else
when += year;
if (month != -1) {
if (month <= 9)
when += "-0" + month;
else
when += "-" + month;
if (day != -1) {
if (day <= 9)
when += "-0" + day;
else
when += "-" + day;
}
}
}
return when;
}

/**
* Return a new date instance by merging the date information from a first date with
* the date information from a second date.
* The merging follows the year, month, day sequence. If the years
* for instance clash, the merging is stopped.
*
* Examples of merging:
* "2010" "2010-10" -> "2010-10"
* "2010" "2010-10-27" -> "2010-10-27"
* "2010-10" "2010-10-27" -> "2010-10-27"
* "2010-10-27" "2010-10" -> "2010-10-27"
* "2011-10" "2010-10-27" -> "2011-10"
* "2010" "2016-10-27" -> "2010"
* "2011" "2010" -> 2011
*/
public static Date merge(Date date1, Date date2) {
if (date1.getYear() == -1) {
return new Date(date2);
}

if (date1.getYear() == date2.getYear()) {
if (date1.getMonth() == -1 && date2.getMonth() != -1) {
return new Date(date2);
}
if (date1.getMonth() == date2.getMonth()) {
if (date1.getDay() == -1 && date2.getDay() != -1) {
return new Date(date2);
}
}
}

return new Date(date1);
}

public String toString() {
String theDate = "";
if (day != -1) {
Expand Down
Loading

0 comments on commit e450e4f

Please sign in to comment.