diff --git a/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/BaseTemplateBean.java b/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/BaseTemplateBean.java index e136775d1dd..2ed4a6ca721 100644 --- a/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/BaseTemplateBean.java +++ b/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/BaseTemplateBean.java @@ -17,22 +17,18 @@ import javax.persistence.Column; import javax.persistence.MappedSuperclass; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.GenericField; - /** * This bean contains properties common for Template and Process. */ @MappedSuperclass public abstract class BaseTemplateBean extends BaseBean { - @GenericField @Column(name = "title") protected String title; @Column(name = "creationDate") protected Date creationDate; - @GenericField @Column(name = "sortHelperStatus") private String sortHelperStatus; diff --git a/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Batch.java b/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Batch.java index 93fb527049d..1577c6b0e39 100644 --- a/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Batch.java +++ b/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Batch.java @@ -27,9 +27,6 @@ import javax.persistence.ManyToMany; import javax.persistence.Table; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.GenericField; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.Indexed; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexedEmbedded; import org.kitodo.data.database.enums.BatchType; import org.kitodo.data.database.persistence.BatchDAO; @@ -41,7 +38,6 @@ * multi-journal binding unit. */ @Entity -@Indexed(index = "kitodo-batch") @Table(name = "batch") public class Batch extends BaseBean { @@ -49,7 +45,6 @@ public class Batch extends BaseBean { * The batch title. Using titles for batches is optional, the field may be * {@code null}. If so, the ID will be shown to the user instead. */ - @GenericField @Column(name = "title") private String title; diff --git a/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Client.java b/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Client.java index 62b3f0fcecd..efdc9cf95df 100644 --- a/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Client.java +++ b/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Client.java @@ -25,14 +25,12 @@ import javax.persistence.OneToMany; import javax.persistence.Table; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.GenericField; import org.kitodo.data.database.persistence.ClientDAO; @Entity @Table(name = "client") public class Client extends BaseBean { - @GenericField @Column(name = "name") private String name; diff --git a/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Comment.java b/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Comment.java index 17c03f09dcb..d30bfcc748c 100644 --- a/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Comment.java +++ b/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Comment.java @@ -17,12 +17,12 @@ import javax.persistence.Entity; import javax.persistence.EnumType; import javax.persistence.Enumerated; +import javax.persistence.FetchType; import javax.persistence.ForeignKey; import javax.persistence.JoinColumn; import javax.persistence.ManyToOne; import javax.persistence.Table; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.GenericField; import org.kitodo.data.database.enums.CommentType; @Entity @@ -32,7 +32,6 @@ public class Comment extends BaseBean { * The field message holds the comment message. */ @Column(name = "message", columnDefinition = "longtext") - @GenericField private String message; /** @@ -40,7 +39,6 @@ public class Comment extends BaseBean { */ @Column(name = "type") @Enumerated(EnumType.STRING) - @GenericField private CommentType type; /** @@ -65,21 +63,21 @@ public class Comment extends BaseBean { /** * This field contains information about user, which create the comment. */ - @ManyToOne + @ManyToOne(fetch = FetchType.LAZY) @JoinColumn(name = "user_id", foreignKey = @ForeignKey(name = "FK_comment_user_id")) private User author; /** * This field contains information about the currentTask, when the comment is created. */ - @ManyToOne + @ManyToOne(fetch = FetchType.LAZY) @JoinColumn(name = "currentTask_id", foreignKey = @ForeignKey(name = "FK_comment_currentTask_id")) private Task currentTask; /** * This field contains information about the correctionTask, where the user can correct the error. */ - @ManyToOne + @ManyToOne(fetch = FetchType.LAZY) @JoinColumn(name = "correctionTask_id", foreignKey = @ForeignKey(name = "FK_comment_correctionTask_id")) private Task correctionTask; @@ -87,7 +85,7 @@ public class Comment extends BaseBean { /** * The field process holds the process of the comment. */ - @ManyToOne + @ManyToOne(fetch = FetchType.LAZY) @JoinColumn(name = "process_id", foreignKey = @ForeignKey(name = "FK_comment_process_id")) private Process process; diff --git a/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Docket.java b/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Docket.java index d8506ecf97f..cb7f4d78006 100644 --- a/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Docket.java +++ b/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Docket.java @@ -15,37 +15,26 @@ import javax.persistence.Column; import javax.persistence.Entity; +import javax.persistence.FetchType; import javax.persistence.ForeignKey; import javax.persistence.JoinColumn; import javax.persistence.ManyToOne; import javax.persistence.Table; -import org.hibernate.search.mapper.pojo.automaticindexing.ReindexOnUpdate; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.GenericField; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.Indexed; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexedEmbedded; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexingDependency; - @Entity -@Indexed(index = "kitodo-docket") @Table(name = "docket") public class Docket extends BaseBean { - @GenericField @Column(name = "title") private String title; - @GenericField @Column(name = "file") private String file; - @GenericField @Column(name = "active") private Boolean active = true; - @ManyToOne - @IndexedEmbedded(includePaths = {"id", "name"}) - @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) + @ManyToOne(fetch = FetchType.LAZY) @JoinColumn(name = "client_id", foreignKey = @ForeignKey(name = "FK_docket_client_id")) private Client client; diff --git a/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Filter.java b/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Filter.java index 90ea446254f..3fe328e4734 100644 --- a/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Filter.java +++ b/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Filter.java @@ -21,28 +21,20 @@ import javax.persistence.ManyToOne; import javax.persistence.Table; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.GenericField; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.Indexed; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexedEmbedded; - /** * Filter bean. */ @Entity -@Indexed(index = "kitodo-filter") @Table(name = "filter") public class Filter extends BaseBean { - @GenericField @Column(name = "value", columnDefinition = "longtext") private String value; - @GenericField @Column(name = "creationDate") private Date creationDate; @ManyToOne - @IndexedEmbedded(includePaths = {"id"}) @JoinColumn(name = "user_id", foreignKey = @ForeignKey(name = "FK_filter_user_id")) private User user; diff --git a/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Folder.java b/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Folder.java index 63f831e8321..961fa32ef4a 100644 --- a/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Folder.java +++ b/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Folder.java @@ -29,7 +29,6 @@ import javax.persistence.Table; import javax.persistence.Transient; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.GenericField; import org.kitodo.api.imagemanagement.ImageManagementInterface; import org.kitodo.config.ConfigMain; import org.kitodo.data.database.enums.LinkingMode; @@ -105,7 +104,6 @@ public class Folder extends BaseBean { * contents of this folder will be linked. */ @Column(name = "fileGroup") - @GenericField private String fileGroup; /** @@ -132,14 +130,12 @@ public class Folder extends BaseBean { * @see org.kitodo.config.xml.fileformats.FileFormatsConfig */ @Column(name = "mimeType") - @GenericField private String mimeType = "image/jpeg"; /** * The path to the folder in the process directory of each processes. */ @Column(name = "path") - @GenericField private String path = ""; /** @@ -155,7 +151,6 @@ public class Folder extends BaseBean { * replaced before concatenation. */ @Column(name = "urlStructure") - @GenericField private String urlStructure; /** diff --git a/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/IndexingKeyworder.java b/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/IndexingKeyworder.java new file mode 100644 index 00000000000..e3ecb3b47d5 --- /dev/null +++ b/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/IndexingKeyworder.java @@ -0,0 +1,498 @@ +/* + * (c) Kitodo. Key to digital objects e. V. + * + * This file is part of the Kitodo project. + * + * It is licensed under GNU General Public License version 3 or later. + * + * For the full copyright and license information, please read the + * GPL3-License.txt file that was distributed with this source code. + */ + +package org.kitodo.data.database.beans; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.TreeSet; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.logging.log4j.Level; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.kitodo.config.KitodoConfig; +import org.kitodo.data.database.enums.TaskStatus; + +/** + * Prepares the search keywords for a process or task. + */ +class IndexingKeyworder { + private static final Logger logger = LogManager.getLogger(IndexingKeyworder.class); + + private static final String PSEUDOWORD_TASK_AUTOMATIC = "automatic"; + private static final String PSEUDOWORD_TASK_DONE = "closed"; + private static final String PSEUDOWORD_TASK_DONE_PROCESSING_USER = "closeduser"; + private static final String ANY_METADATA_MARKER = "mdWrap"; + private static final char VALUE_SEPARATOR = 'q'; + + private static final Pattern TITLE_GROUPS_PATTERN = Pattern.compile("[\\p{IsLetter}\\p{Digit}]+"); + private static final Pattern METADATA_PATTERN = Pattern.compile("name=\"([^\"]+)\">([^<]*)<", Pattern.DOTALL); + private static final Pattern METADATA_SECTIONS_PATTERN = + Pattern.compile("(.*?)(.*?)", Pattern.DOTALL); + private static final Pattern RULESET_LABEL_PATTERN = Pattern.compile("]*>([^<]+)", Pattern.DOTALL); + + private static final Map>> rulesetCache = new HashMap<>(); + + private Set titleKeywords = Collections.emptySet(); + private Set projectKeywords = Collections.emptySet(); + private Set batchKeywords = Collections.emptySet(); + private Set taskKeywords = Collections.emptySet(); + private Set taskPseudoKeywords = Collections.emptySet(); + private Set metadataKeywords = Collections.emptySet(); + private Set metadataPseudoKeywords = Collections.emptySet(); + private String processId = null; + private Set commentKeywords = Collections.emptySet(); + + public IndexingKeyworder(Process process) { + this.titleKeywords = filterMinLength(initTitleKeywords(process.getTitle())); + String projectTitle = Objects.nonNull(process.getProject()) ? process.getProject().getTitle() : ""; + this.projectKeywords = filterMinLength(initSimpleKeywords(projectTitle)); + this.batchKeywords = filterMinLength(initBatchKeywords(process.getBatches())); + var taskKeywords = initTaskKeywords(process.getTasksUnmodified()); + this.taskKeywords = filterMinLength(taskKeywords.getLeft()); + this.taskPseudoKeywords = filterMinLength(taskKeywords.getRight()); + var metadataKeywords = initMetadataKeywords(process); + this.metadataKeywords = filterMinLength(metadataKeywords.getLeft()); + this.metadataPseudoKeywords = filterMinLength(metadataKeywords.getRight()); + this.processId = process.getId().toString(); + this.commentKeywords = filterMinLength(initCommentKeywords(process.getComments())); + if (logger.isTraceEnabled()) { + traceLogKeywords("indexterms.log"); + } + } + + /** + * Creates search terms for process titles. To do this, the process title is + * separated into groups of consecutive characters and numbers, and these + * are generated in their entirety and starting from the front or from the + * back. The latter is more common, as the last four digits of a PPN are + * used for the search and the hit rate is excellent. + * + * @param processTitle + * the title of the process + * @return keywords + */ + private static Set initTitleKeywords(String processTitle) { + Set tokens = new HashSet<>(); + Matcher matcher = TITLE_GROUPS_PATTERN.matcher(processTitle); + while (matcher.find()) { + String normalized = normalize(matcher.group()); + final int length = normalized.length(); + // starting from the beginning + for (int end = 1; end <= length; end++) { + tokens.add(normalized.substring(0, end)); + } + // ending at the end + for (int beginning = length - 1; beginning >= 0; beginning--) { + tokens.add(normalized.substring(beginning, length)); + } + } + return tokens; + } + + /** + * Makes the keywords for searching by a string. These are just single + * words. + * + * @param input + * the input string + * @return keywords + */ + private static final Set initSimpleKeywords(String input) { + Set tokens = new HashSet<>(); + for (String term : splitValues(input)) { + tokens.add(normalize(term)); + } + return tokens; + } + + /** + * Generates the search terms by batch. Note that batch title can be + * optional. Batch ID is not indexed because ID search is done via the + * database. + * + * @param batches + * batches containing the process + * @return batch search terms + */ + private static final Set initBatchKeywords(Collection batches) { + if (batches.isEmpty()) { + return Collections.emptySet(); + } + Set tokens = new HashSet<>(); + for (Batch batch : batches) { + String optionalTitle = batch.getTitle(); + if (StringUtils.isNotBlank(optionalTitle)) { + tokens.addAll(initSimpleKeywords(optionalTitle)); + } + } + return tokens; + } + + /** + * Generates all search terms by task and pseudo search terms. + * + * @param tasks + * tasks for the words to be generated + * @return search terms and pseudo search terms + */ + private static final Pair, Set> initTaskKeywords(Collection tasks) { + Set taskKeywords = new HashSet<>(); + Set taskPseudoKeywords = new HashSet<>(); + for (Task task : tasks) { + for (String token : splitValues(task.getTitle())) { + String term = normalize(token); + taskKeywords.add(term); + if (task.isTypeAutomatic()) { + taskKeywords.add(PSEUDOWORD_TASK_AUTOMATIC + VALUE_SEPARATOR + term); + } + TaskStatus taskStatus = task.getProcessingStatus(); + if (Objects.isNull(taskStatus)) { + continue; + } + if (Objects.equals(taskStatus, TaskStatus.DONE)) { + taskPseudoKeywords.add(PSEUDOWORD_TASK_DONE); + taskPseudoKeywords.add(PSEUDOWORD_TASK_DONE + VALUE_SEPARATOR + term); + User closedUser = task.getProcessingUser(); + if (Objects.isNull(closedUser)) { + continue; + } + if (StringUtils.isNotBlank(closedUser.getName())) { + taskPseudoKeywords.add(PSEUDOWORD_TASK_DONE_PROCESSING_USER + VALUE_SEPARATOR + normalize( + closedUser.getName())); + } + if (StringUtils.isNotBlank(closedUser.getSurname())) { + taskPseudoKeywords.add(PSEUDOWORD_TASK_DONE_PROCESSING_USER + VALUE_SEPARATOR + normalize( + closedUser.getSurname())); + } + } else { + String taskKeyword = taskStatus.toString().toLowerCase(); + taskPseudoKeywords.add(taskKeyword); + taskPseudoKeywords.add(taskKeyword + VALUE_SEPARATOR + term); + } + } + } + return Pair.of(taskKeywords, taskPseudoKeywords); + } + + /** + * Generates all metadata keywords and pseudowords for metadata of a METS + * file KITODO-metadata. + * + * @param process + * process of the METS file + * @param placeDebug + * what the metadata is read for, only for debug log messages. + * Can be null if logging is disabled on the debug level. + * @return metadata keywords, and metadata pseudo keywords + */ + private static final Pair, Set> initMetadataKeywords(Process process) { + final Pair, Set> emptyResult = Pair.of(Collections.emptySet(), Collections.emptySet()); + try { + String processId = Integer.toString(process.getId()); + Path path = Paths.get(KitodoConfig.getKitodoDataDirectory(), processId, "meta.xml"); + if (!Files.isReadable(path)) { + logger.info((Files.exists(path) ? "File not readable for indexing: " + : "Missing metadata file for indexing: ") + path); + return emptyResult; + } + logger.debug("Indexing {} in process {} \"{}\"", path, process.getId(), process.getTitle()); + String metaXml = FileUtils.readFileToString(path.toFile(), StandardCharsets.UTF_8); + if (!metaXml.contains(ANY_METADATA_MARKER)) { + return emptyResult; + } + Set metadataKeywords = new HashSet<>(); + Set metadataPseudoKeywords = new HashSet<>(); + Map> rulesetLabelMap = getRulesetLabelMap(process.getRuleset().getFile()); + Matcher metadataSectionsMatcher = METADATA_SECTIONS_PATTERN.matcher(metaXml); + while (metadataSectionsMatcher.find()) { + Matcher keyMatcher = METADATA_PATTERN.matcher(metadataSectionsMatcher.group(1)); + while (keyMatcher.find()) { + String key = normalize(keyMatcher.group(1)); + String valueString = keyMatcher.group(2); + for (String singleValue : splitValues(valueString)) { + String value = normalize(singleValue); + metadataKeywords.add(value); + metadataPseudoKeywords.add(key + VALUE_SEPARATOR + value); + metadataPseudoKeywords.add(key); + for (String label : rulesetLabelMap.getOrDefault(key, Collections.emptyList())) { + metadataPseudoKeywords.add(label + VALUE_SEPARATOR + value); + metadataPseudoKeywords.add(label); + } + } + } + } + return Pair.of(metadataKeywords, metadataPseudoKeywords); + } catch (IOException | RuntimeException e) { + logger.catching(e instanceof FileNotFoundException ? Level.INFO : Level.WARN, e); + return emptyResult; + } + } + + /** + * Returns a map for ruleset key translations. A cache is used, but if there + * is nothing in cache, the ruleset is parsed and the map is created. Since + * Kitodo-DataEditor is not available here, we have to do this directly, and + * it also increases performance massively. + * + * @param file + * indicates a ruleset + * @return a map + */ + private static Map> getRulesetLabelMap(String file) { + Map> rulesetLabelMap = rulesetCache.get(file); + if (Objects.nonNull(rulesetLabelMap)) { + return rulesetLabelMap; + } + try { + File rulesetFile = Paths.get(KitodoConfig.getParameter("directory.rulesets"), file).toFile(); + logger.debug("Reading {} ...", rulesetFile); + String ruleset = FileUtils.readFileToString(rulesetFile, StandardCharsets.UTF_8); + rulesetLabelMap = new HashMap<>(); + Matcher keysMatcher = RULESET_KEY_PATTERN.matcher(ruleset); + while (keysMatcher.find()) { + String key = normalize(keysMatcher.group(1)); + Matcher labelMatcher = RULESET_LABEL_PATTERN.matcher(keysMatcher.group(2)); + Set labels = new HashSet<>(); + while (labelMatcher.find()) { + labels.add(normalize(labelMatcher.group(1))); + } + rulesetLabelMap.put(key, labels); + if (logger.isTraceEnabled()) { + logger.trace("- {} -> {}", key, String.join(", ", labels)); + } + } + rulesetCache.put(file, rulesetLabelMap); + return rulesetLabelMap; + } catch (IOException | RuntimeException e) { + logger.catching(Level.WARN, e); + return Collections.emptyMap(); + } + } + + /** + * Creates the keywords for searching in correction messages. + * + * @param comments + * the comments of a process + * @return keywords + */ + private static final Set initCommentKeywords(List comments) { + Set tokens = new HashSet<>(); + for (Comment comment : comments) { + String message = comment.getMessage(); + if (StringUtils.isNotBlank(message)) { + tokens.addAll(initSimpleKeywords(message)); + } + } + return tokens; + } + + /** + * Converts the string to lowercase and removes special characters. + * + * @param string + * string to clean + * @return clean string in lowercase + */ + private static String normalize(String string) { + return string.toLowerCase().replaceAll("[\0-/:-`{-¿]", ""); + } + + /** + * Splits the values ​​of a string at special characters. Groups of letters + * and numbers written together are not split. + * + * @param value + * string to split + * @return groups + */ + private static List splitValues(String value) { + String initializedValue = value != null ? value : ""; + return Arrays.asList(initializedValue.split("[ ,\\-._]+")); + } + + /** + * Filter minimum-length tokens. Only tokens at least three characters long + * should be indexed, because you'll never search for tokens that are too + * short anyway, but it would bloat the index a lot. + * + * @param tokens + * input set, is changed! + * @return input set + */ + private static Set filterMinLength(Set tokens) { + for (Iterator iterator = tokens.iterator(); iterator.hasNext();) { + if (iterator.next().length() < 3) { + iterator.remove(); + } + } + return tokens; + } + + /** + * Returns the search keywords for the free search. These are search + * keywords for title terms, the title of the project, names of assigned + * batches, task names, and metadata. For searching the metadata, there are + * both the bare terms and pseudowords to particularly powerfully search the + * various metadata keys. + * + *

+ * Suppose there is metadata with the key "TitleDocMain" and a value + * containing the string "Berlin, Charlottenburg". In the ruleset, + * "TitleDocMain" is translated as "Maint title" and "Hauptsachtitel". This + * produces the following pseudo search terms in addition to the search term + * "berlin": + *

    + *
  • {@code titledocmainqberlin} - for search "TitleDocMain:berlin"
  • + *
  • {@code mainttitleqberlin} - for search "Maint title:berlin"
  • + *
  • {@code hauptsachtitelqberlin} - for search + * "Hauptsachtitel:berlin"
  • + *
  • and, formed according to the same scheme, {@code charlottenburg} and + * its psodowords. + *
+ * This means that if a user searches for "Maint title:Berlin, + * Charlottenburg", the index has to search for: + * {@code mainttitleqberlin mainttitleqcharlottenburg}. + * + * @return search keywords for the free search + */ + public String getSearch() { + Set freeKeywords = new HashSet<>(); + freeKeywords.addAll(titleKeywords); + freeKeywords.addAll(projectKeywords); + freeKeywords.addAll(batchKeywords); + freeKeywords.addAll(taskKeywords); + freeKeywords.addAll(metadataKeywords); + freeKeywords.addAll(metadataPseudoKeywords); + if (Objects.nonNull(processId)) { + freeKeywords.add(processId); + } + freeKeywords.addAll(commentKeywords); + return String.join(" ", freeKeywords); + } + + /** + * Returns the search keywords for the title search. The title is sequenced + * in a meaningful way to achieve meaningful hits even with the substring + * search. + * + *

+ * A process with title "PineSeve_313539383" would be searchable as: p, pi, + * pin, pine, pines, pinese, pinesev, pineseve, 3, 31, 313, 3135, 31353, + * 313539, 3135393, 31353938, 313539383. + * + * @return search keywords for the title + */ + public String getSearchTitle() { + return String.join(" ", titleKeywords); + } + + /** + * Returns the search keywords for the project name search. These are the + * words from the project name in normalized form. + * + * @return search keywords for the project + */ + public String getSearchProject() { + return String.join(" ", projectKeywords); + } + + /** + * Returns the search keywords for searching for operations assigned to a + * batch. The same splitting criteria apply as for the title. + * + * @return search keywords for batches + */ + public String getSearchBatch() { + return String.join(" ", batchKeywords); + } + + /** + * Returns search keywords for finding tasks. This uses pseudowords to + * particularly powerfully search the various task states. + * + *

+ * Given an automated task called "OCR" and the task is running, it + * generates the tokens: {@code ocr}, {@code automaticqocr}, + * {@code inworkqocr}. + * + *

+ * If a task "Quality Assurance" is finished and it was processed by John + * Doe, the token results: {@code quality}, {@code assurance}, + * {@code closedqquality}, {@code closedqassurance}, + * {@code closeduserqjohn}, {@code closeduserqdoe}. + * + * @return search keywords for tasks + */ + public String getSearchTask() { + Set allTaskKeywords = new HashSet<>(); + allTaskKeywords.addAll(taskKeywords); + allTaskKeywords.addAll(taskPseudoKeywords); + return String.join(" ", allTaskKeywords); + } + + /** + * Writes the keywords to an output file in the metadata directory of the + * process. This is only done if the log level for this class is set to + * TRACE. + * + * @param keywordsLogfile + * file name (without path) + */ + private final void traceLogKeywords(String keywordsLogfile) { + try { + File log = Paths.get(KitodoConfig.getKitodoDataDirectory(), processId, keywordsLogfile).toFile(); + traceLogCollection("[titleKeywords]", titleKeywords, log, false); + traceLogCollection("[projectKeywords]", projectKeywords, log, true); + traceLogCollection("[batchKeywords]", batchKeywords, log, true); + traceLogCollection("[taskKeywords]", taskKeywords, log, true); + traceLogCollection("[taskPseudoKeywords]", taskPseudoKeywords, log, true); + traceLogCollection("[metadataKeywords]", metadataKeywords, log, true); + traceLogCollection("[metadataPseudoKeywords]", metadataPseudoKeywords, log, true); + traceLogCollection("[processId]", Objects.nonNull(processId) ? Collections.singletonList(processId) + : Collections.emptyList(), log, true); + traceLogCollection("[commentKeywords]", commentKeywords, log, true); + logger.trace("Keywords logged to {}", log); + } catch (RuntimeException | IOException e) { + logger.catching(Level.TRACE, e); + } + } + + private static final void traceLogCollection(String caption, Collection tokens, File log, boolean append) + throws IOException { + FileUtils.write(log, caption.concat(System.lineSeparator()), StandardCharsets.UTF_8, append); + FileUtils.writeLines(log, StandardCharsets.UTF_8.toString(), new TreeSet<>(tokens), true); + FileUtils.write(log, System.lineSeparator(), StandardCharsets.UTF_8, true); + } +} diff --git a/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Process.java b/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Process.java index a8c476ffc45..487d1280244 100644 --- a/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Process.java +++ b/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Process.java @@ -13,15 +13,16 @@ import java.net.URI; import java.util.ArrayList; +import java.util.Collection; import java.util.Date; import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.stream.Collectors; import javax.persistence.CascadeType; import javax.persistence.Column; import javax.persistence.Entity; +import javax.persistence.FetchType; import javax.persistence.ForeignKey; import javax.persistence.JoinColumn; import javax.persistence.JoinTable; @@ -40,9 +41,7 @@ import org.hibernate.annotations.LazyCollectionOption; import org.hibernate.search.mapper.pojo.automaticindexing.ReindexOnUpdate; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.FullTextField; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.GenericField; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.Indexed; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexedEmbedded; import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexingDependency; import org.kitodo.data.database.converter.ProcessConverter; import org.kitodo.data.database.enums.CorrectionComments; @@ -55,100 +54,74 @@ @Table(name = "process") public class Process extends BaseTemplateBean { - @GenericField @Column(name = "sortHelperImages") private Integer sortHelperImages; - @GenericField @Column(name = "sortHelperArticles") private Integer sortHelperArticles; - @GenericField @Column(name = "sortHelperMetadata") private Integer sortHelperMetadata; - @GenericField @Column(name = "sortHelperDocstructs") private Integer sortHelperDocstructs; - @FullTextField @Column(name = "wikiField", columnDefinition = "longtext") private String wikiField = ""; - @GenericField @Column(name = "processBaseUri") private String processBaseUri; - @GenericField @Column(name = "ordering") private Integer ordering; - @ManyToOne - @IndexedEmbedded(includePaths = {"title", "id"}) - @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) + @ManyToOne(fetch = FetchType.LAZY) @JoinColumn(name = "docket_id", foreignKey = @ForeignKey(name = "FK_process_docket_id")) private Docket docket; @ManyToOne - @IndexedEmbedded(includePaths = {"title", "active", "id", "client.id"}) @JoinColumn(name = "project_id", foreignKey = @ForeignKey(name = "FK_process_project_id")) private Project project; - @ManyToOne - @IndexedEmbedded(includePaths = {"title", "id"}) - @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) + @ManyToOne(fetch = FetchType.LAZY) @JoinColumn(name = "ruleset_id", foreignKey = @ForeignKey(name = "FK_process_ruleset_id")) private Ruleset ruleset; - @ManyToOne - @IndexedEmbedded(includePaths = {"title", "id"}) + @ManyToOne(fetch = FetchType.LAZY) @JoinColumn(name = "template_id", foreignKey = @ForeignKey(name = "FK_process_template_id")) private Template template; - @ManyToOne - @IndexedEmbedded(includePaths = {"title", "id"}) + @ManyToOne(fetch = FetchType.LAZY) @JoinColumn(name = "parent_id", foreignKey = @ForeignKey(name = "FK_process_parent_id")) private Process parent; - @OneToMany(mappedBy = "parent", cascade = CascadeType.PERSIST) + @OneToMany(mappedBy = "parent", cascade = CascadeType.PERSIST, fetch = FetchType.LAZY) private List children; @Transient private boolean hasChildren = true; - @LazyCollection(LazyCollectionOption.FALSE) @OneToMany(mappedBy = "process", cascade = CascadeType.ALL, orphanRemoval = true) - @IndexedEmbedded(includePaths = {"title", "id"}) @OrderBy("ordering") private List tasks; @LazyCollection(LazyCollectionOption.FALSE) @OneToMany(mappedBy = "process", cascade = CascadeType.PERSIST, orphanRemoval = true) - @IndexedEmbedded(includePaths = {"message"}) private List comments; - @LazyCollection(LazyCollectionOption.FALSE) @ManyToMany(cascade = CascadeType.ALL) - @IndexedEmbedded(includePaths = {"id", "title", "value"}) - @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @JoinTable(name = "process_x_property", joinColumns = { @JoinColumn(name = "process_id", foreignKey = @ForeignKey(name = "FK_process_x_property_process_id")) }, inverseJoinColumns = { @JoinColumn(name = "property_id", foreignKey = @ForeignKey(name = "FK_process_x_property_property_id")) }) private List properties; - @LazyCollection(LazyCollectionOption.FALSE) @ManyToMany(cascade = CascadeType.ALL) - @IndexedEmbedded(includePaths = {"id", "title", "value"}) - @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @JoinTable(name = "template_x_property", joinColumns = { @JoinColumn(name = "process_id", foreignKey = @ForeignKey(name = "FK_template_x_property_process_id")) }, inverseJoinColumns = { @JoinColumn(name = "property_id", foreignKey = @ForeignKey(name = "FK_template_x_property_property_id")) }) private List templates; - @LazyCollection(LazyCollectionOption.FALSE) @ManyToMany(cascade = CascadeType.ALL) - @IndexedEmbedded(includePaths = {"id", "title", "value"}) - @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.SHALLOW) @JoinTable(name = "workpiece_x_property", joinColumns = { @JoinColumn(name = "process_id", foreignKey = @ForeignKey(name = "FK_workpiece_x_property_process_id")) }, inverseJoinColumns = { @@ -157,15 +130,12 @@ public class Process extends BaseTemplateBean { @LazyCollection(LazyCollectionOption.FALSE) @ManyToMany(mappedBy = "processes") - @IndexedEmbedded(includePaths = {"title", "id"}) private List batches = new ArrayList<>(); @Column(name = "exported") - @GenericField private boolean exported; @Column(name = "inChoiceListShown") - @GenericField Boolean inChoiceListShown; @Column(name = "ocrd_workflow_id") @@ -189,6 +159,9 @@ public class Process extends BaseTemplateBean { @Transient private String baseType; + @Transient + private transient IndexingKeyworder indexingKeyworder; + /** * Constructor. */ @@ -511,6 +484,15 @@ public List getTasks() { return this.tasks; } + /** + * Returns the tasks of the process without forced initialization. + * + * @return the task list + */ + Collection getTasksUnmodified() { + return this.tasks; + } + /** * Sets the task list of this process. * @@ -1052,4 +1034,76 @@ public void setHasComments(boolean hasComments) { public String toString() { return title + " [" + id + "]"; } + + /** + * When indexing, outputs the index keywords for free search. + * + * @return the index keywords for free search + */ + @Transient + @FullTextField(name = "search") + @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.NO) + public String getKeywordsForFreeSearch() { + return initializeKeywords().getSearch(); + } + + /** + * When indexing, outputs the index keywords for searching in title. + * + * @return the index keywords for searching in title + */ + @Transient + @FullTextField(name = "searchTitle") + @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.NO) + public String getKeywordsForSearchingInTitle() { + return initializeKeywords().getSearchTitle(); + } + + /** + * When indexing, outputs the index keywords for searching by project name. + * + * @return the index keywords for searching by project name + */ + @Transient + @FullTextField(name = "searchProject") + @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.NO) + public String getKeywordsForSearchingByProjectName() { + return initializeKeywords().getSearchProject(); + } + + /** + * When indexing, outputs the index keywords for searching for assignment to + * batches. + * + * @return the index keywords for searching for assignment to batches + */ + @Transient + @FullTextField(name = "searchBatch") + @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.NO) + public String getKeywordsForAssignmentToBatches() { + return initializeKeywords().getSearchBatch(); + } + + /** + * When indexing, outputs the index keywords for searching for task + * information. + * + * @return the index keywords for searching for task information + */ + @Transient + @FullTextField(name = "searchTask") + @IndexingDependency(reindexOnUpdate = ReindexOnUpdate.NO) + public String getKeywordsForSearchingForTaskInformation() { + return initializeKeywords().getSearchTask(); + } + + private IndexingKeyworder initializeKeywords() { + if (this.indexingKeyworder == null) { + IndexingKeyworder indexingKeyworder = new IndexingKeyworder(this); + this.indexingKeyworder = indexingKeyworder; + return indexingKeyworder; + } else { + return indexingKeyworder; + } + } } diff --git a/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Project.java b/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Project.java index ef12b210d93..b249a3b552c 100644 --- a/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Project.java +++ b/Kitodo-DataManagement/src/main/java/org/kitodo/data/database/beans/Project.java @@ -25,6 +25,7 @@ import javax.persistence.Entity; import javax.persistence.EnumType; import javax.persistence.Enumerated; +import javax.persistence.FetchType; import javax.persistence.ForeignKey; import javax.persistence.JoinColumn; import javax.persistence.ManyToMany; @@ -39,91 +40,65 @@ import org.hibernate.LazyInitializationException; import org.hibernate.annotations.LazyCollection; import org.hibernate.annotations.LazyCollectionOption; -import org.hibernate.search.mapper.pojo.automaticindexing.ReindexOnUpdate; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.FullTextField; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.GenericField; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.Indexed; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexedEmbedded; -import org.hibernate.search.mapper.pojo.mapping.definition.annotation.IndexingDependency; import org.kitodo.data.database.enums.PreviewHoverMode; import org.kitodo.data.database.persistence.ProjectDAO; @Entity -@Indexed(index = "kitodo-project") @Table(name = "project") public class Project extends BaseBean implements Comparable { - @GenericField @Column(name = "title", nullable = false, unique = true) private String title; - @FullTextField @Column(name = "dmsImportRootPath") private String dmsImportRootPath; - @FullTextField @Column(name = "metsRightsOwner") private String metsRightsOwner = ""; - @FullTextField @Column(name = "metsRightsOwnerLogo") private String metsRightsOwnerLogo = ""; - @FullTextField @Column(name = "metsRightsOwnerSite") private String metsRightsOwnerSite = ""; - @FullTextField @Column(name = "metsRightsOwnerMail") private String metsRightsOwnerMail = ""; - @FullTextField @Column(name = "metsDigiprovReference") private String metsDigiprovReference = ""; - @FullTextField @Column(name = "metsDigiprovPresentation") private String metsDigiprovPresentation = ""; - @FullTextField @Column(name = "metsPointerPath") private String metsPointerPath = ""; - @FullTextField @Column(name = "metsPurl") private String metsPurl = ""; - @FullTextField @Column(name = "metsContentId") private String metsContentIDs = ""; @Column(name = "startDate") - @GenericField private Date startDate; @Column(name = "endDate") - @GenericField private Date endDate; @Column(name = "numberOfPages") - @GenericField private Integer numberOfPages; @Column(name = "numberOfVolumes") - @GenericField private Integer numberOfVolumes; @Column(name = "active") - @GenericField private Boolean active = true; - @LazyCollection(LazyCollectionOption.FALSE) @ManyToMany(mappedBy = "projects", cascade = CascadeType.PERSIST) - @IndexedEmbedded(includePaths = {"surname", "name", "id", "login"}) private List users; @OneToMany(mappedBy = "project", cascade = CascadeType.ALL, orphanRemoval = true) - @IndexedEmbedded(includePaths = {"id", "title"}) private List processes; @Transient @@ -131,27 +106,22 @@ public class Project extends BaseBean implements Comparable { @LazyCollection(LazyCollectionOption.FALSE) @ManyToMany(mappedBy = "projects", cascade = CascadeType.PERSIST) - @IndexedEmbedded(includePaths = {"id", "title"}) private List