-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #33 from lfoppiano/extended-api
Extends API
- Loading branch information
Showing
21 changed files
with
985 additions
and
167 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
33 changes: 33 additions & 0 deletions
33
resources/features-engineering/superconductors/scibert/sentence-splitter.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
## requires Blingfire https://github.com/Microsoft/BlingFire | ||
|
||
import argparse | ||
import os | ||
from pathlib import Path | ||
|
||
from blingfire import text_to_sentences | ||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser( | ||
description="Sentence segmentation") | ||
|
||
parser.add_argument("--input", help="Input file", required=True) | ||
parser.add_argument("--output", help="Output file", required=True) | ||
|
||
args = parser.parse_args() | ||
|
||
input = args.input | ||
output = args.output | ||
|
||
if os.path.isfile(input): | ||
input_path = Path(input) | ||
output_path = Path(output) | ||
|
||
with open(output_path, 'w') as fo: | ||
with open(input_path, 'r') as fi: | ||
for line in fi: | ||
for sentence in text_to_sentences(line).split("\n"): | ||
fo.write(sentence + "\n") | ||
|
||
|
||
else: | ||
parser.print_help() |
75 changes: 75 additions & 0 deletions
75
src/main/java/org/grobid/core/data/ChemicalComposition.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
package org.grobid.core.data; | ||
|
||
import com.fasterxml.jackson.annotation.JsonInclude; | ||
import com.fasterxml.jackson.annotation.JsonProperty; | ||
import org.apache.commons.lang3.ArrayUtils; | ||
|
||
import java.util.Map; | ||
import java.util.StringJoiner; | ||
|
||
@JsonInclude(JsonInclude.Include.NON_EMPTY) | ||
public class ChemicalComposition { | ||
private Map<String, String> elements; | ||
|
||
@JsonProperty("elements_vars") | ||
private Map<String, Object> elementsVars; | ||
|
||
@JsonProperty("amounts_vars") | ||
private Map<String, Object> amountsVars; | ||
|
||
private String formula; | ||
|
||
@JsonProperty("oxygen_deficiency") | ||
private Map<String, String> oxygenDeficency; | ||
|
||
public Map<String, String> getElements() { | ||
return elements; | ||
} | ||
|
||
public void setElements(Map<String, String> elements) { | ||
this.elements = elements; | ||
} | ||
|
||
public Map<String, Object> getAmountsVars() { | ||
return amountsVars; | ||
} | ||
|
||
public void setAmountsVars(Map<String, Object> amountsVars) { | ||
this.amountsVars = amountsVars; | ||
} | ||
|
||
public Map<String, Object> getElementsVars() { | ||
return elementsVars; | ||
} | ||
|
||
public void setElementsVars(Map<String, Object> elementsVars) { | ||
this.elementsVars = elementsVars; | ||
} | ||
|
||
public String getFormula() { | ||
return formula; | ||
} | ||
|
||
public void setFormula(String formula) { | ||
this.formula = formula; | ||
} | ||
|
||
public Map<String, String> getOxygenDeficency() { | ||
return oxygenDeficency; | ||
} | ||
|
||
public void setOxygenDeficency(Map<String, String> oxygenDeficency) { | ||
this.oxygenDeficency = oxygenDeficency; | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return new StringJoiner(", ", ChemicalComposition.class.getSimpleName() + "[", "]") | ||
.add("elements=" + ArrayUtils.toString(elements)) | ||
.add("elementsVars=" + ArrayUtils.toString(elementsVars)) | ||
.add("amountsVars=" + ArrayUtils.toString(amountsVars)) | ||
.add("formula='" + formula + "'") | ||
.add("oxygenDeficency=" + ArrayUtils.toString(oxygenDeficency)) | ||
.toString(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
package org.grobid.core.data; | ||
|
||
import java.util.HashMap; | ||
import java.util.Map; | ||
|
||
public class Formula { | ||
|
||
private String rawValue; | ||
private Map<String, String> formulaComposition = new HashMap<>(); | ||
|
||
public Formula(String rawValue) { | ||
this.rawValue = rawValue; | ||
} | ||
|
||
public Formula(String rawValue, Map<String, String> formulaComposition) { | ||
this.rawValue = rawValue; | ||
this.formulaComposition = formulaComposition; | ||
} | ||
|
||
public Map<String, String> getFormulaComposition() { | ||
return formulaComposition; | ||
} | ||
|
||
public void setFormulaComposition(Map<String, String> formulaComposition) { | ||
this.formulaComposition = formulaComposition; | ||
} | ||
|
||
public String getRawValue() { | ||
return rawValue; | ||
} | ||
|
||
public void setRawValue(String rawValue) { | ||
this.rawValue = rawValue; | ||
} | ||
} |
Oops, something went wrong.