diff --git a/dev/checkstyle/suppressions.xml b/dev/checkstyle/suppressions.xml index 4047c8265f4..9529cc9846e 100644 --- a/dev/checkstyle/suppressions.xml +++ b/dev/checkstyle/suppressions.xml @@ -18,7 +18,7 @@ information: Portions Copyright [yyyy] [name of copyright owner] CDDL HEADER END -Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. +Copyright (c) 2018, 2002, Oracle and/or its affiliates. All rights reserved. Portions Copyright (c) 2018-2020, Chris Fraire . --> @@ -43,7 +43,7 @@ Portions Copyright (c) 2018-2020, Chris Fraire . |Context\.java|HistoryContext\.java|Suggester\.java| |ProjectHelperTestBase\.java|SearchHelper\.java" /> - + diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Configuration.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Configuration.java index e7f4961b3be..23b9df56109 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Configuration.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Configuration.java @@ -300,6 +300,8 @@ public final class Configuration { private int connectTimeout = -1; // connect timeout in seconds private int apiTimeout = -1; // API timeout in seconds + private boolean historyBasedReindex; + /* * types of handling history for remote SCM repositories: * ON - index history and display it in webapp @@ -576,6 +578,7 @@ public Configuration() { setTagsEnabled(false); //setUserPage("http://www.myserver.org/viewProfile.jspa?username="); // Set to empty string so we can append it to the URL unconditionally later. + setHistoryBasedReindex(true); setUserPageSuffix(""); setWebappLAF("default"); // webappCtags is default(boolean) @@ -1412,6 +1415,14 @@ public void setApiTimeout(int apiTimeout) { this.apiTimeout = apiTimeout; } + public boolean isHistoryBasedReindex() { + return historyBasedReindex; + } + + public void setHistoryBasedReindex(boolean flag) { + historyBasedReindex = flag; + } + /** * Write the current configuration to a file. * @@ -1524,4 +1535,45 @@ private static Configuration decodeObject(InputStream in) throws IOException { return conf; } + + public static class ConfigurationException extends Exception { + static final long serialVersionUID = -1; + + public ConfigurationException(String message) { + super(message); + } + } + + /** + * Check if configuration is populated and self-consistent. + * @throws ConfigurationException on error + */ + public void checkConfiguration() throws ConfigurationException { + + if (getSourceRoot() == null) { + throw new ConfigurationException("Source root is not specified."); + } + + if (getDataRoot() == null) { + throw new ConfigurationException("Data root is not specified."); + } + + if (!new File(getSourceRoot()).canRead()) { + throw new ConfigurationException("Source root directory '" + getSourceRoot() + "' must be readable."); + } + + if (!new File(getDataRoot()).canWrite()) { + throw new ConfigurationException("Data root directory '" + getDataRoot() + "' must be writable."); + } + + if (!isHistoryEnabled() && isHistoryBasedReindex()) { + LOGGER.log(Level.INFO, "History based reindex is on, however history is off. " + + "History has to be enabled for history based reindex."); + } + + if (!isHistoryCache() && isHistoryBasedReindex()) { + LOGGER.log(Level.INFO, "History based reindex is on, however history cache is off. " + + "History cache has to be enabled for history based reindex."); + } + } } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Project.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Project.java index 761cab92c58..e86e7d94dde 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Project.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Project.java @@ -18,7 +18,7 @@ */ /* - * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2022, Oracle and/or its affiliates. All rights reserved. * Portions Copyright (c) 2018, Chris Fraire . */ package org.opengrok.indexer.configuration; @@ -34,6 +34,7 @@ import java.util.logging.Logger; import java.util.regex.PatternSyntaxException; +import org.jetbrains.annotations.VisibleForTesting; import org.opengrok.indexer.logger.LoggerFactory; import org.opengrok.indexer.util.ClassUtil; import org.opengrok.indexer.util.ForbiddenSymlinkException; @@ -99,6 +100,11 @@ public class Project implements Comparable, Nameable, Serializable { */ private boolean indexed = false; + /** + * This flag sets per-project reindex based on traversing SCM history. + */ + private Boolean historyBasedReindex = null; + /** * Set of groups which match this project. */ @@ -289,6 +295,28 @@ public void setMergeCommitsEnabled(boolean flag) { this.mergeCommitsEnabled = flag; } + /** + * @return true if this project handles renamed files. + */ + public boolean isHistoryBasedReindex() { + return historyBasedReindex != null && historyBasedReindex; + } + + /** + * @param flag true if project should handle renamed files, false otherwise. + */ + public void setHistoryBasedReindex(boolean flag) { + this.historyBasedReindex = flag; + } + + @VisibleForTesting + public void clearProperties() { + historyBasedReindex = null; + mergeCommitsEnabled = null; + historyEnabled = null; + handleRenamedFiles = null; + } + /** * Return groups where this project belongs. * @@ -436,6 +464,10 @@ public final void completeWithDefaults() { if (reviewPattern == null) { setReviewPattern(env.getReviewPattern()); } + + if (historyBasedReindex == null) { + setHistoryBasedReindex(env.isHistoryBasedReindex()); + } } /** @@ -476,8 +508,7 @@ public static Project getProject(String path) { * Get the project for a specific file. * * @param file the file to lookup - * @return the project that this file belongs to (or null if the file - * doesn't belong to a project) + * @return the project that this file belongs to (or {@code null} if the file doesn't belong to a project) */ public static Project getProject(File file) { Project ret = null; diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java index 285fd08c9d8..03f97afc3f3 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java @@ -36,6 +36,7 @@ import java.util.Collection; import java.util.Collections; import java.util.Date; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -62,8 +63,10 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.NamedThreadFactory; +import org.jetbrains.annotations.VisibleForTesting; import org.opengrok.indexer.authorization.AuthorizationFramework; import org.opengrok.indexer.authorization.AuthorizationStack; +import org.opengrok.indexer.history.FileCollector; import org.opengrok.indexer.history.HistoryGuru; import org.opengrok.indexer.history.RepositoryInfo; import org.opengrok.indexer.index.IndexDatabase; @@ -137,6 +140,12 @@ public List getSubFiles() { private final List subFiles = new ArrayList<>(); + /** + * Maps project name to FileCollector object. This is used to pass the list of files acquired when + * generating history cache in the first phase of indexing to the second phase of indexing. + */ + private final Map fileCollectorMap = new HashMap<>(); + /** * Creates a new instance of RuntimeEnvironment. Private to ensure a * singleton anti-pattern. @@ -465,7 +474,7 @@ public List getProjectList() { /** * Get project map. * - * @return a Map with all of the projects + * @return a Map with all the projects */ public Map getProjects() { return syncReadConfiguration(Configuration::getProjects); @@ -1417,6 +1426,27 @@ public void setConnectTimeout(int connectTimeout) { syncWriteConfiguration(connectTimeout, Configuration::setConnectTimeout); } + public boolean isHistoryBasedReindex() { + return syncReadConfiguration(Configuration::isHistoryBasedReindex); + } + + public void setHistoryBasedReindex(boolean flag) { + syncWriteConfiguration(flag, Configuration::setHistoryBasedReindex); + } + + public FileCollector getFileCollector(String name) { + return fileCollectorMap.get(name); + } + + public void setFileCollector(String name, FileCollector fileCollector) { + fileCollectorMap.put(name, fileCollector); + } + + @VisibleForTesting + public void clearFileCollector() { + fileCollectorMap.clear(); + } + /** * Read an configuration file and set it as the current configuration. * @@ -1491,7 +1521,8 @@ public void writeConfiguration(String host) throws IOException, InterruptedExcep * Project with some repository information is considered as a repository * otherwise it is just a simple project. */ - private void generateProjectRepositoriesMap() throws IOException { + @VisibleForTesting + public void generateProjectRepositoriesMap() throws IOException { repository_map.clear(); for (RepositoryInfo r : getRepositories()) { Project proj; diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/ChangesetVisitor.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/ChangesetVisitor.java new file mode 100644 index 00000000000..f2b18da6b3e --- /dev/null +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/ChangesetVisitor.java @@ -0,0 +1,33 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + */ +package org.opengrok.indexer.history; + +import java.util.function.Consumer; + +public abstract class ChangesetVisitor implements Consumer { + boolean consumeMergeChangesets; + + protected ChangesetVisitor(boolean consumeMergeChangesets) { + this.consumeMergeChangesets = consumeMergeChangesets; + } +} \ No newline at end of file diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileCollector.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileCollector.java new file mode 100644 index 00000000000..bcb8281680d --- /dev/null +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileCollector.java @@ -0,0 +1,66 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + */ +package org.opengrok.indexer.history; + +import java.util.Collection; +import java.util.SortedSet; +import java.util.TreeSet; + +/** + * This class is meant to collect files that were touched in some way by SCM update. + * The visitor argument contains the files separated based on the type of modification performed, + * however the consumer of this class is not interested in this classification. + * This is because when incrementally indexing a bunch of changesets, + * in one changeset a file may be deleted, only to be re-added in the next changeset etc. + */ +public class FileCollector extends ChangesetVisitor { + private final SortedSet files; + + /** + * Assumes comparing in the same way as {@code org.opengrok.indexer.index.IndexDatabase#FILENAME_COMPARATOR}. + */ + public FileCollector(boolean consumeMergeChangesets) { + super(consumeMergeChangesets); + files = new TreeSet<>(); + } + + public void accept(RepositoryWithHistoryTraversal.ChangesetInfo changesetInfo) { + if (changesetInfo.renamedFiles != null) { + files.addAll(changesetInfo.renamedFiles); + } + if (changesetInfo.files != null) { + files.addAll(changesetInfo.files); + } + if (changesetInfo.deletedFiles != null) { + files.addAll(changesetInfo.deletedFiles); + } + } + + public SortedSet getFiles() { + return files; + } + + void addFiles(Collection files) { + this.files.addAll(files); + } +} \ No newline at end of file diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileHistoryCache.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileHistoryCache.java index bd11ad41bd7..59ff50da1e6 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileHistoryCache.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileHistoryCache.java @@ -43,6 +43,7 @@ import java.io.Writer; import java.nio.file.Files; import java.nio.file.NoSuchFileException; +import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collections; @@ -62,6 +63,7 @@ import io.micrometer.core.instrument.Counter; import io.micrometer.core.instrument.MeterRegistry; +import org.jetbrains.annotations.Nullable; import org.jetbrains.annotations.TestOnly; import org.opengrok.indexer.Metrics; import org.opengrok.indexer.configuration.PathAccepter; @@ -745,32 +747,26 @@ private String getRepositoryCachedRevPath(Repository repository) { * @param rev latest revision which has been just indexed */ private void storeLatestCachedRevision(Repository repository, String rev) { - Writer writer = null; - - try { - writer = new BufferedWriter(new OutputStreamWriter( - new FileOutputStream(getRepositoryCachedRevPath(repository)))); + Path newPath = Path.of(getRepositoryCachedRevPath(repository)); + try (Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(newPath.toFile())))) { writer.write(rev); } catch (IOException ex) { LOGGER.log(Level.WARNING, String.format("Cannot write latest cached revision to file for repository %s", repository), ex); - } finally { - try { - if (writer != null) { - writer.close(); - } - } catch (IOException ex) { - LOGGER.log(Level.WARNING, "Cannot close file", ex); - } } } @Override + @Nullable public String getLatestCachedRevision(Repository repository) { + return getCachedRevision(repository, getRepositoryCachedRevPath(repository)); + } + + @Nullable + private String getCachedRevision(Repository repository, String revPath) { String rev; BufferedReader input; - String revPath = getRepositoryCachedRevPath(repository); if (revPath == null) { LOGGER.log(Level.WARNING, "no rev path for repository {0}", repository); return null; diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java index b900ce28a70..d6ac0222dcf 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java @@ -18,7 +18,7 @@ */ /* - * Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved. * Portions Copyright (c) 2017, 2020, Chris Fraire . * Portions Copyright (c) 2019, Krystof Tulinger . */ @@ -33,7 +33,6 @@ import java.util.Date; import java.util.HashSet; import java.util.List; -import java.util.ArrayList; import java.util.HashMap; import java.util.Map; import java.util.Scanner; @@ -92,7 +91,7 @@ * Access to a Git repository. * */ -public class GitRepository extends RepositoryWithPerPartesHistory { +public class GitRepository extends RepositoryWithHistoryTraversal { private static final Logger LOGGER = LoggerFactory.getLogger(GitRepository.class); @@ -476,66 +475,44 @@ public History getHistory(File file, String sinceRevision, String tillRevision) return getHistory(file, sinceRevision, tillRevision, null); } - public History getHistory(File file, String sinceRevision, String tillRevision, - Integer numCommits) throws HistoryException { + public void traverseHistory(File file, String sinceRevision, String tillRevision, + Integer numCommits, List visitors) throws HistoryException { if (numCommits != null && numCommits <= 0) { - return null; + throw new HistoryException("invalid number of commits to retrieve"); } - final List entries = new ArrayList<>(); - final Set renamedFiles = new HashSet<>(); - boolean isDirectory = file.isDirectory(); try (org.eclipse.jgit.lib.Repository repository = getJGitRepository(getDirectoryName()); RevWalk walk = new RevWalk(repository)) { - if (sinceRevision != null) { - walk.markUninteresting(walk.lookupCommit(repository.resolve(sinceRevision))); - } - - if (tillRevision != null) { - walk.markStart(walk.lookupCommit(repository.resolve(tillRevision))); - } else { - walk.markStart(walk.parseCommit(repository.resolve(Constants.HEAD))); - } - - String relativePath = RuntimeEnvironment.getInstance().getPathRelativeToSourceRoot(file); - if (!getDirectoryNameRelative().equals(relativePath)) { - if (isHandleRenamedFiles()) { - Config config = repository.getConfig(); - config.setBoolean("diff", null, "renames", true); - org.eclipse.jgit.diff.DiffConfig dc = config.get(org.eclipse.jgit.diff.DiffConfig.KEY); - FollowFilter followFilter = FollowFilter.create(getGitFilePath(getRepoRelativePath(file)), dc); - walk.setTreeFilter(followFilter); - } else { - walk.setTreeFilter(AndTreeFilter.create( - PathFilter.create(getGitFilePath(getRepoRelativePath(file))), - TreeFilter.ANY_DIFF)); - } - } + setupWalk(file, sinceRevision, tillRevision, repository, walk); int num = 0; for (RevCommit commit : walk) { - if (commit.getParentCount() > 1 && !isMergeCommitsEnabled()) { - continue; - } - - HistoryEntry historyEntry = new HistoryEntry(commit.getId().abbreviate(GIT_ABBREV_LEN).name(), - commit.getAuthorIdent().getWhen(), - commit.getAuthorIdent().getName() + - " <" + commit.getAuthorIdent().getEmailAddress() + ">", - commit.getFullMessage(), true); + CommitInfo commitInfo = new CommitInfo(commit.getId().abbreviate(GIT_ABBREV_LEN).name(), + commit.getAuthorIdent().getWhen(), commit.getAuthorIdent().getName(), + commit.getAuthorIdent().getEmailAddress(), commit.getFullMessage()); + + for (ChangesetVisitor visitor : visitors) { + // Even though the repository itself is set (not) to consume the merge changesets, + // it should be up to the visitor to have the say. This is because of the history based reindex. + if (commit.getParentCount() > 1 && !visitor.consumeMergeChangesets) { + continue; + } - if (isDirectory) { - SortedSet files = new TreeSet<>(); - getFilesForCommit(renamedFiles, files, commit, repository); - historyEntry.setFiles(files); + if (isDirectory) { + SortedSet files = new TreeSet<>(); + final Set renamedFiles = new HashSet<>(); + final Set deletedFiles = new HashSet<>(); + getFilesForCommit(renamedFiles, files, deletedFiles, commit, repository); + visitor.accept(new ChangesetInfo(commitInfo, files, renamedFiles, deletedFiles)); + } else { + visitor.accept(new ChangesetInfo(commitInfo)); + } } - entries.add(historyEntry); - if (numCommits != null && ++num >= numCommits) { break; } @@ -543,46 +520,62 @@ public History getHistory(File file, String sinceRevision, String tillRevision, } catch (IOException | ForbiddenSymlinkException e) { throw new HistoryException(String.format("failed to get history for ''%s''", file), e); } + } - History result = new History(entries, renamedFiles); + private void setupWalk(File file, String sinceRevision, String tillRevision, Repository repository, RevWalk walk) + throws IOException, ForbiddenSymlinkException { - // Assign tags to changesets they represent - // We don't need to check if this repository supports tags, - // because we know it :-) - if (RuntimeEnvironment.getInstance().isTagsEnabled()) { - assignTagsInHistory(result); + if (sinceRevision != null) { + walk.markUninteresting(walk.lookupCommit(repository.resolve(sinceRevision))); } - return result; + if (tillRevision != null) { + walk.markStart(walk.lookupCommit(repository.resolve(tillRevision))); + } else { + walk.markStart(walk.parseCommit(repository.resolve(Constants.HEAD))); + } + + String relativePath = RuntimeEnvironment.getInstance().getPathRelativeToSourceRoot(file); + if (!getDirectoryNameRelative().equals(relativePath)) { + if (isHandleRenamedFiles()) { + Config config = repository.getConfig(); + config.setBoolean("diff", null, "renames", true); + org.eclipse.jgit.diff.DiffConfig dc = config.get(org.eclipse.jgit.diff.DiffConfig.KEY); + FollowFilter followFilter = FollowFilter.create(getGitFilePath(getRepoRelativePath(file)), dc); + walk.setTreeFilter(followFilter); + } else { + walk.setTreeFilter(AndTreeFilter.create( + PathFilter.create(getGitFilePath(getRepoRelativePath(file))), + TreeFilter.ANY_DIFF)); + } + } } /** - * Accumulate list of changed files and renamed files (if enabled) for given commit. - * @param renamedFiles result containing the renamed files in this commit - * @param files result containing changed files in this commit + * Accumulate list of changed/deleted/renamed files for given commit. + * @param renamedFiles output: renamed files in this commit (if renamed file handling is enabled) + * @param changedFiles output: changed files in this commit + * @param deletedFiles output: deleted files in this commit * @param commit RevCommit object * @param repository repository object * @throws IOException on error traversing the commit tree */ - private void getFilesForCommit(Set renamedFiles, SortedSet files, RevCommit commit, + private void getFilesForCommit(Set renamedFiles, SortedSet changedFiles, Set deletedFiles, + RevCommit commit, Repository repository) throws IOException { - int numParents = commit.getParentCount(); - - if (numParents == 1) { - getFiles(repository, commit.getParent(0), commit, files, renamedFiles); - } else if (numParents == 0) { // first commit + if (commit.getParentCount() == 0) { // first commit - add all files try (TreeWalk treeWalk = new TreeWalk(repository)) { treeWalk.addTree(commit.getTree()); treeWalk.setRecursive(true); while (treeWalk.next()) { - files.add(getNativePath(getDirectoryNameRelative()) + File.separator + + changedFiles.add(getNativePath(getDirectoryNameRelative()) + File.separator + getNativePath(treeWalk.getPathString())); } } } else { - getFiles(repository, commit.getParent(0), commit, files, renamedFiles); + getFilesBetweenCommits(repository, commit.getParent(0), commit, changedFiles, renamedFiles, deletedFiles); } } @@ -595,17 +588,18 @@ private static String getNativePath(String path) { } /** - * Assemble list of files that changed between 2 commits. + * Assemble list of changed/deleted/renamed files between a commit and its parent. * @param repository repository object * @param oldCommit parent commit - * @param newCommit new commit (the mehotd assumes oldCommit is its parent) - * @param files set of files that changed (excludes renamed files) - * @param renamedFiles set of renamed files (if renamed handling is enabled) + * @param newCommit new commit (the method assumes oldCommit is its parent) + * @param changedFiles output: set of changedFiles that changed (excludes renamed changedFiles) + * @param renamedFiles output: set of renamed files (if renamed handling is enabled) + * @param deletedFiles output: set of deleted files * @throws IOException on I/O problem */ - private void getFiles(org.eclipse.jgit.lib.Repository repository, - RevCommit oldCommit, RevCommit newCommit, - Set files, Set renamedFiles) + private void getFilesBetweenCommits(org.eclipse.jgit.lib.Repository repository, + RevCommit oldCommit, RevCommit newCommit, + Set changedFiles, Set renamedFiles, Set deletedFiles) throws IOException { OutputStream outputStream = NullOutputStream.INSTANCE; @@ -619,18 +613,42 @@ private void getFiles(org.eclipse.jgit.lib.Repository repository, prepareTreeParser(repository, newCommit)); for (DiffEntry diff : diffs) { - if (diff.getChangeType() != DiffEntry.ChangeType.DELETE) { - if (files != null) { - files.add(getNativePath(getDirectoryNameRelative()) + File.separator + - getNativePath(diff.getNewPath())); + String newPath = getNativePath(getDirectoryNameRelative()) + File.separator + + getNativePath(diff.getNewPath()); + + handleDiff(changedFiles, renamedFiles, deletedFiles, diff, newPath); + } + } + } + + private void handleDiff(Set changedFiles, Set renamedFiles, Set deletedFiles, + DiffEntry diff, String newPath) { + + switch (diff.getChangeType()) { + case DELETE: + if (deletedFiles != null) { + // newPath would be "/dev/null" + String oldPath = getNativePath(getDirectoryNameRelative()) + File.separator + + getNativePath(diff.getOldPath()); + deletedFiles.add(oldPath); + } + break; + case RENAME: + if (isHandleRenamedFiles()) { + renamedFiles.add(newPath); + if (deletedFiles != null) { + String oldPath = getNativePath(getDirectoryNameRelative()) + File.separator + + getNativePath(diff.getOldPath()); + deletedFiles.add(oldPath); } } - - if (diff.getChangeType() == DiffEntry.ChangeType.RENAME && isHandleRenamedFiles()) { - renamedFiles.add(getNativePath(getDirectoryNameRelative()) + File.separator + - getNativePath(diff.getNewPath())); + break; + default: + if (changedFiles != null) { + // Added files (ChangeType.ADD) are treated as changed. + changedFiles.add(newPath); } - } + break; } } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java index 735affeb1b2..d2fab8cc7a5 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java @@ -112,15 +112,14 @@ History get(File file, @Nullable Repository repository, boolean withFiles) boolean hasCacheForFile(File file) throws HistoryException; /** - * Get the revision identifier for the latest cached revision in a - * repository. + * Get the revision identifier for the latest cached revision in a repository. * * @param repository the repository whose latest revision to return - * @return a string representing the latest revision in the cache, or - * {@code null} if it is unknown + * @return a string representing the latest revision in the cache, + * or {@code null} if it is unknown + * @throws HistoryException on error */ - String getLatestCachedRevision(Repository repository) - throws HistoryException; + String getLatestCachedRevision(Repository repository) throws HistoryException; /** * Get the last modified times for all files and subdirectories in the @@ -130,8 +129,7 @@ String getLatestCachedRevision(Repository repository) * @param repository the repository in which the directory lives * @return a map from file names to modification times */ - Map getLastModifiedTimes( - File directory, Repository repository) + Map getLastModifiedTimes(File directory, Repository repository) throws HistoryException; /** diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCollector.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCollector.java new file mode 100644 index 00000000000..1d527cfd5be --- /dev/null +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCollector.java @@ -0,0 +1,68 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + */ +package org.opengrok.indexer.history; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +class HistoryCollector extends ChangesetVisitor { + List entries; + Set renamedFiles; + + HistoryCollector(boolean consumeMergeChangesets) { + super(consumeMergeChangesets); + entries = new ArrayList<>(); + renamedFiles = new HashSet<>(); + } + + public void accept(RepositoryWithHistoryTraversal.ChangesetInfo changesetInfo) { + RepositoryWithHistoryTraversal.CommitInfo commit = changesetInfo.commit; + + // TODO: add a test for this + String author; + if (commit.authorEmail != null) { + author = commit.authorName + " <" + commit.authorEmail + ">"; + } else { + author = commit.authorName; + } + + HistoryEntry historyEntry = new HistoryEntry(commit.revision, + commit.date, author, + commit.message, true); + + if (changesetInfo.renamedFiles != null) { + renamedFiles.addAll(changesetInfo.renamedFiles); + } + if (changesetInfo.files != null) { + historyEntry.setFiles(changesetInfo.files); + } + if (changesetInfo.renamedFiles != null) { + // TODO: hack + historyEntry.getFiles().addAll(changesetInfo.renamedFiles); + } + + entries.add(historyEntry); + } +} \ No newline at end of file diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java index 09b3162a28a..32fbc3d8434 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java @@ -47,6 +47,7 @@ import java.util.stream.Collectors; import org.jetbrains.annotations.Nullable; +import org.jetbrains.annotations.VisibleForTesting; import org.opengrok.indexer.configuration.CommandTimeoutType; import org.opengrok.indexer.configuration.Configuration.RemoteSCM; import org.opengrok.indexer.configuration.PathAccepter; @@ -128,10 +129,9 @@ public static HistoryGuru getInstance() { } /** - * Return whether or not a cache should be used for the history log. + * Return whether cache should be used for the history log. * - * @return {@code true} if the history cache has been enabled and - * initialized, {@code false} otherwise + * @return {@code true} if the history cache has been enabled and initialized, {@code false} otherwise */ private boolean useCache() { return historyCache != null; @@ -285,6 +285,10 @@ public HistoryEntry getLastHistoryEntry(File file, boolean ui) throws HistoryExc return repository.getLastHistoryEntry(file, ui); } + public History getHistory(File file, boolean withFiles, boolean ui) throws HistoryException { + return getHistory(file, withFiles, ui, true); + } + /** * Get the history for the specified file. The history cache is tried first, then the repository. * @@ -292,10 +296,12 @@ public HistoryEntry getLastHistoryEntry(File file, boolean ui) throws HistoryExc * @param withFiles whether the returned history should contain a * list of files touched by each changeset (the file list may be skipped if false, but it doesn't have to) * @param ui called from the webapp + * @param fallback fall back to fetching the history from the repository + * if it cannot be retrieved from history cache * @return history for the file * @throws HistoryException on error when accessing the history */ - public History getHistory(File file, boolean withFiles, boolean ui) throws HistoryException { + public History getHistory(File file, boolean withFiles, boolean ui, boolean fallback) throws HistoryException { final File dir = file.isDirectory() ? file : file.getParentFile(); final Repository repository = getRepository(dir); @@ -306,7 +312,7 @@ public History getHistory(File file, boolean withFiles, boolean ui) throws Histo History history; try { - history = getHistoryFromCache(file, repository, withFiles, true); + history = getHistoryFromCache(file, repository, withFiles, fallback); if (history != null) { return history; } @@ -779,7 +785,7 @@ private List getReposFromString(Collection repositories) { return repos; } - Repository getRepository(File file) { + public Repository getRepository(File file) { return repositoryLookup.getRepository(file.toPath(), repositoryRoots.keySet(), repositories, PathUtils::getRelativeToCanonical); } @@ -907,7 +913,8 @@ public void invalidateRepositories(Collection repos, C "history.repositories.invalidate"); } - private void clear() { + @VisibleForTesting + public void clear() { repositoryRoots.clear(); repositories.clear(); repositoryLookup.clear(); diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/MercurialHistoryParser.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/MercurialHistoryParser.java index 71804e10c7b..8ef31b20d31 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/MercurialHistoryParser.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/MercurialHistoryParser.java @@ -18,7 +18,7 @@ */ /* - * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2022, Oracle and/or its affiliates. All rights reserved. * Portions Copyright (c) 2017, Chris Fraire . */ package org.opengrok.indexer.history; @@ -33,10 +33,8 @@ import java.text.ParseException; import java.util.ArrayList; import java.util.Date; -import java.util.HashSet; import java.util.Iterator; import java.util.List; -import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import org.opengrok.indexer.configuration.RuntimeEnvironment; @@ -54,31 +52,30 @@ class MercurialHistoryParser implements Executor.StreamHandler { /** Prefix which identifies lines with the description of a commit. */ private static final String DESC_PREFIX = "description: "; - private List entries = new ArrayList<>(); + private List entries = new ArrayList<>(); private final MercurialRepository repository; private final String mydir; private boolean isDir; - private final Set renamedFiles = new HashSet<>(); + private final List visitors; - MercurialHistoryParser(MercurialRepository repository) { + MercurialHistoryParser(MercurialRepository repository, List visitors) { this.repository = repository; + this.visitors = visitors; mydir = repository.getDirectoryName() + File.separator; } /** * Parse the history for the specified file or directory. If a changeset is - * specified, only return the history from the changeset right after the - * specified one. + * specified, only return the history from the changeset right after the specified one. * * @param file the file or directory to get history for * @param sinceRevision the changeset right before the first one to fetch, or * {@code null} if all changesets should be fetched * @param tillRevision end revision or {@code null} * @param numCommits number of revisions to get - * @return history for the specified file or directory * @throws HistoryException if an error happens when parsing the history */ - History parse(File file, String sinceRevision, String tillRevision, Integer numCommits) throws HistoryException { + void parse(File file, String sinceRevision, String tillRevision, Integer numCommits) throws HistoryException { isDir = file.isDirectory(); try { Executor executor = repository.getHistoryLogExecutor(file, sinceRevision, tillRevision, false, @@ -86,21 +83,18 @@ History parse(File file, String sinceRevision, String tillRevision, Integer numC int status = executor.exec(true, this); if (status != 0) { - throw new HistoryException("Failed to get history for: \"" + - file.getAbsolutePath() + + throw new HistoryException("Failed to get history for: \"" + file.getAbsolutePath() + "\" Exit code: " + status); } } catch (IOException e) { - throw new HistoryException("Failed to get history for: \"" + - file.getAbsolutePath() + "\"", e); + throw new HistoryException("Failed to get history for: \"" + file.getAbsolutePath() + "\"", e); } - // If a changeset to start from is specified, remove that changeset - // from the list, since only the ones following it should be returned. - // Also check that the specified changeset was found, otherwise throw - // an exception. + // If a changeset to start from is specified, remove that changeset from the list, + // since only the ones following it should be returned. + // Also check that the specified changeset was found, otherwise throw an exception. if (sinceRevision != null) { - repository.removeAndVerifyOldestChangeset(entries, sinceRevision); + removeAndVerifyOldestChangeset(entries, sinceRevision); } // See getHistoryLogExecutor() for explanation. @@ -108,13 +102,44 @@ History parse(File file, String sinceRevision, String tillRevision, Integer numC removeChangesets(entries, tillRevision); } - return new History(entries, renamedFiles); + // The visitors are fed with the ChangesetInfo instances here (as opposed to in parse()), + // because of the above manipulations with the entries. + for (RepositoryWithHistoryTraversal.ChangesetInfo info : entries) { + for (ChangesetVisitor visitor : visitors) { + visitor.accept(info); + } + } + } + + /** + * Remove the oldest changeset from a list (assuming sorted with most recent + * changeset first) and verify that it is the changeset expected to find there. + * + * @param entries a list of {@code HistoryEntry} objects + * @param revision the revision we expect the oldest entry to have + * @throws HistoryException if the oldest entry was not the one we expected + */ + private void removeAndVerifyOldestChangeset(List entries, String revision) + throws HistoryException { + + RepositoryWithHistoryTraversal.ChangesetInfo entry = entries.isEmpty() ? null : entries.remove(entries.size() - 1); + + // TODO We should check more thoroughly that the changeset is the one + // we expected it to be, since some SCMs may change the revision + // numbers so that identical revision numbers does not always mean + // identical changesets. We could for example get the cached changeset + // and compare more fields, like author and date. + if (entry == null || !revision.equals(entry.commit.revision)) { + throw new HistoryException("Cached revision '" + revision + + "' not found in the repository " + + repository.getDirectoryName()); + } } - private void removeChangesets(List entries, String tillRevision) { - for (Iterator iter = entries.listIterator(); iter.hasNext(); ) { - HistoryEntry entry = iter.next(); - if (entry.getRevision().equals(tillRevision)) { + private void removeChangesets(List entries, String tillRevision) { + for (Iterator iter = entries.listIterator(); iter.hasNext(); ) { + RepositoryWithHistoryTraversal.ChangesetInfo entry = iter.next(); + if (entry.commit.revision.equals(tillRevision)) { break; } iter.remove(); @@ -123,7 +148,7 @@ private void removeChangesets(List entries, String tillRevision) { /** * Process the output from the {@code hg log} command and collect - * {@link HistoryEntry} elements. + * {@link org.opengrok.indexer.history.RepositoryWithHistoryTraversal.ChangesetInfo} elements. * * @param input The output from the process * @throws java.io.IOException If an error occurs while reading the stream @@ -134,15 +159,14 @@ public void processStream(InputStream input) throws IOException { BufferedReader in = new BufferedReader(new InputStreamReader(input)); entries = new ArrayList<>(); String s; - HistoryEntry entry = null; + RepositoryWithHistoryTraversal.ChangesetInfo entry = null; while ((s = in.readLine()) != null) { if (s.startsWith(MercurialRepository.CHANGESET)) { - entry = new HistoryEntry(); + entry = new RepositoryWithHistoryTraversal.ChangesetInfo(new RepositoryWithHistoryTraversal.CommitInfo()); entries.add(entry); - entry.setActive(true); - entry.setRevision(s.substring(MercurialRepository.CHANGESET.length()).trim()); + entry.commit.revision = s.substring(MercurialRepository.CHANGESET.length()).trim(); } else if (s.startsWith(MercurialRepository.USER) && entry != null) { - entry.setAuthor(s.substring(MercurialRepository.USER.length()).trim()); + entry.commit.authorName = s.substring(MercurialRepository.USER.length()).trim(); } else if (s.startsWith(MercurialRepository.DATE) && entry != null) { Date date; try { @@ -154,7 +178,7 @@ public void processStream(InputStream input) throws IOException { // throw new IOException("Could not parse date: " + s, pe); } - entry.setDate(date); + entry.commit.date = date; } else if (s.startsWith(MercurialRepository.FILES) && entry != null) { String[] strings = s.split(" "); for (int ii = 1; ii < strings.length; ++ii) { @@ -162,7 +186,7 @@ public void processStream(InputStream input) throws IOException { File f = new File(mydir, strings[ii]); try { String path = env.getPathRelativeToSourceRoot(f); - entry.addFile(path.intern()); + entry.files.add(path.intern()); } catch (ForbiddenSymlinkException e) { LOGGER.log(Level.FINER, e.getMessage()); // ignore @@ -189,11 +213,11 @@ public void processStream(InputStream input) throws IOException { String[] move = part.split(" \\("); File f = new File(mydir + move[0]); if (!move[0].isEmpty() && f.exists()) { - renamedFiles.add(repository.getDirectoryNameRelative() + File.separator + move[0]); + entry.renamedFiles.add(repository.getDirectoryNameRelative() + File.separator + move[0]); } } } else if (s.startsWith(DESC_PREFIX) && entry != null) { - entry.setMessage(decodeDescription(s)); + entry.commit.message = decodeDescription(s); } else if (s.equals(MercurialRepository.END_OF_ENTRY) && entry != null) { entry = null; diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/MercurialRepository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/MercurialRepository.java index 3f19cf3b009..8d86a76bfd6 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/MercurialRepository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/MercurialRepository.java @@ -18,7 +18,7 @@ */ /* - * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2022, Oracle and/or its affiliates. All rights reserved. * Portions Copyright (c) 2017, 2019, Chris Fraire . */ package org.opengrok.indexer.history; @@ -52,7 +52,7 @@ * Access to a Mercurial repository. * */ -public class MercurialRepository extends RepositoryWithPerPartesHistory { +public class MercurialRepository extends RepositoryWithHistoryTraversal { private static final Logger LOGGER = LoggerFactory.getLogger(MercurialRepository.class); @@ -584,31 +584,12 @@ History getHistory(File file, String sinceRevision, String tillRevision) throws return getHistory(file, sinceRevision, tillRevision, null); } - History getHistory(File file, String sinceRevision, String tillRevision, - Integer numCommits) throws HistoryException { + // TODO: add a test for this + public void traverseHistory(File file, String sinceRevision, String tillRevision, + Integer numCommits, List visitors) throws HistoryException { - if (numCommits != null && numCommits <= 0) { - return null; - } - - RuntimeEnvironment env = RuntimeEnvironment.getInstance(); - // Note that the filtering of revisions based on sinceRevision is done - // in the history log executor by passing appropriate options to - // the 'hg' executable. - // This is done only for directories since if getHistory() is used - // for file, the file is renamed and its complete history is fetched - // so no sinceRevision filter is needed. - // See findOriginalName() code for more details. - History result = new MercurialHistoryParser(this). + new MercurialHistoryParser(this, visitors). parse(file, sinceRevision, tillRevision, numCommits); - - // Assign tags to changesets they represent. - // We don't need to check if this repository supports tags, - // because we know it :-) - if (env.isTagsEnabled()) { - assignTagsInHistory(result); - } - return result; } /** diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java index 4b35409b11b..7bb3e3de2c5 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java @@ -97,6 +97,7 @@ public abstract class Repository extends RepositoryInfo { */ abstract boolean hasHistoryForDirectories(); + @Override public String toString() { StringBuilder stringBuilder = new StringBuilder(); stringBuilder.append("{"); @@ -115,6 +116,9 @@ public String toString() { stringBuilder.append(","); stringBuilder.append("merge="); stringBuilder.append(this.isMergeCommitsEnabled()); + stringBuilder.append(","); + stringBuilder.append("historyBased="); + stringBuilder.append(this.isHistoryBasedReindex()); } stringBuilder.append("}"); return stringBuilder.toString(); @@ -154,7 +158,7 @@ public HistoryEntry getLastHistoryEntry(File file, boolean ui) throws HistoryExc } } - Repository() { + protected Repository() { super(); ignoredFiles = new ArrayList<>(); ignoredDirs = new ArrayList<>(); @@ -298,11 +302,9 @@ public InputStream getHistoryGet(String parent, String basename, String rev) { /** * Returns if this repository tags only files changed in last commit, i.e. - * if we need to prepare list of repository-wide tags prior to creation of - * file history entries. + * if we need to prepare list of repository-wide tags prior to creation of file history entries. * - * @return True if we need tag list creation prior to file parsing, false by - * default. + * @return True if we need tag list creation prior to file parsing, false by default. */ boolean hasFileBasedTags() { return false; @@ -390,7 +392,8 @@ protected String getRevisionForAnnotate(String historyRevision) { } protected void doCreateCache(HistoryCache cache, String sinceRevision, File directory) throws HistoryException { - finishCreateCache(cache, getHistory(directory, sinceRevision), null); + History history = getHistory(directory, sinceRevision); + finishCreateCache(cache, history, null); } /** diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryInfo.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryInfo.java index aad15010d83..fdb8432da5d 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryInfo.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryInfo.java @@ -79,6 +79,8 @@ public class RepositoryInfo implements Serializable { private boolean historyEnabled; @DTOElement private boolean mergeCommitsEnabled; + @DTOElement + private boolean historyBasedReindex; /** * Empty constructor to support serialization. @@ -99,6 +101,7 @@ public RepositoryInfo(RepositoryInfo orig) { this.historyEnabled = orig.historyEnabled; this.handleRenamedFiles = orig.handleRenamedFiles; this.mergeCommitsEnabled = orig.mergeCommitsEnabled; + this.historyBasedReindex = orig.historyBasedReindex; } /** @@ -129,6 +132,20 @@ public void setMergeCommitsEnabled(boolean flag) { this.mergeCommitsEnabled = flag; } + /** + * @return true if history based reindex is enabled for the repository, false otherwise + */ + public boolean isHistoryBasedReindex() { + return this.historyBasedReindex; + } + + /** + * @param flag if history based reindex should be enabled for the repository + */ + public void setHistoryBasedReindex(boolean flag) { + this.historyBasedReindex = flag; + } + /** * @return true if the repository should have history cache. */ @@ -313,12 +330,14 @@ public void fillFromProject() { setHistoryEnabled(proj.isHistoryEnabled()); setHandleRenamedFiles(proj.isHandleRenamedFiles()); setMergeCommitsEnabled(proj.isMergeCommitsEnabled()); + setHistoryBasedReindex(proj.isHistoryBasedReindex()); } else { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); setHistoryEnabled(env.isHistoryEnabled()); setHandleRenamedFiles(env.isHandleHistoryOfRenamedFiles()); setMergeCommitsEnabled(env.isMergeCommitsEnabled()); + setHistoryBasedReindex(env.isHistoryBasedReindex()); } } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java new file mode 100644 index 00000000000..f7723e867a5 --- /dev/null +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java @@ -0,0 +1,204 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved. + */ +package org.opengrok.indexer.history; + +import org.jetbrains.annotations.Nullable; +import org.opengrok.indexer.configuration.Project; +import org.opengrok.indexer.configuration.RuntimeEnvironment; +import org.opengrok.indexer.logger.LoggerFactory; +import org.opengrok.indexer.util.Statistics; + +import java.io.File; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.SortedSet; +import java.util.TreeSet; +import java.util.logging.Level; +import java.util.logging.Logger; + +public abstract class RepositoryWithHistoryTraversal extends RepositoryWithPerPartesHistory { + + private static final Logger LOGGER = LoggerFactory.getLogger(RepositoryWithHistoryTraversal.class); + + private static final long serialVersionUID = -1L; + + public static class CommitInfo { + String revision; + Date date; + String authorName; + String authorEmail; + String message; + + CommitInfo() { + } + + CommitInfo(String revision, Date date, String authorName, String authorEmail, String message) { + this.revision = revision; + this.date = date; + this.authorName = authorName; + this.authorEmail = authorEmail; + this.message = message; + } + } + + public static class ChangesetInfo { + CommitInfo commit; + public SortedSet files; + public Set renamedFiles; + public Set deletedFiles; + + ChangesetInfo(CommitInfo commit) { + this.commit = commit; + this.files = new TreeSet<>(); + this.renamedFiles = new HashSet<>(); + this.deletedFiles = new HashSet<>(); + } + + ChangesetInfo(CommitInfo commit, SortedSet files, Set renamedFiles, Set deletedFiles) { + this.commit = commit; + this.files = files; + this.renamedFiles = renamedFiles; + this.deletedFiles = deletedFiles; + } + } + + /** + * Traverse history of given file/directory. + * @param file File object + * @param sinceRevision start revision (non-inclusive) + * @param tillRevision end revision (inclusive) + * @param numCommits maximum number of commits to traverse (use {@code null} as unlimited) + * @param visitors list of {@link ChangesetVisitor} objects + * @throws HistoryException on error + */ + public abstract void traverseHistory(File file, String sinceRevision, @Nullable String tillRevision, + Integer numCommits, List visitors) throws HistoryException; + + public History getHistory(File file, String sinceRevision, String tillRevision, + Integer numCommits) throws HistoryException { + + if (numCommits != null && numCommits <= 0) { + return null; + } + + HistoryCollector historyCollector = new HistoryCollector(isMergeCommitsEnabled()); + traverseHistory(file, sinceRevision, tillRevision, numCommits, List.of(historyCollector)); + History history = new History(historyCollector.entries, historyCollector.renamedFiles); + + // Assign tags to changesets they represent. + if (RuntimeEnvironment.getInstance().isTagsEnabled() && hasFileBasedTags()) { + assignTagsInHistory(history); + } + + return history; + } + + @Override + protected void doCreateCache(HistoryCache cache, String sinceRevision, File directory) throws HistoryException { + RuntimeEnvironment env = RuntimeEnvironment.getInstance(); + + FileCollector fileCollector = null; + Project project = Project.getProject(directory); + if (project != null && isHistoryBasedReindex() && isHistoryEnabled()) { + // The fileCollector has to go through merge changesets no matter what the configuration says + // in order to detect the files that need to be indexed. + fileCollector = new FileCollector(true); + } + + if (!env.isHistoryCachePerPartesEnabled()) { + LOGGER.log(Level.INFO, "repository {0} supports per partes history cache creation however " + + "it is disabled in the configuration. Generating history cache as whole.", this); + + HistoryCollector historyCollector = new HistoryCollector(isMergeCommitsEnabled()); + List visitors = new ArrayList<>(); + visitors.add(historyCollector); + if (fileCollector != null) { + visitors.add(fileCollector); + } + traverseHistory(directory, sinceRevision, null, null, visitors); + History history = new History(historyCollector.entries, historyCollector.renamedFiles); + + // Assign tags to changesets they represent. + if (env.isTagsEnabled() && hasFileBasedTags()) { + assignTagsInHistory(history); + } + + finishCreateCache(cache, history, null); + + updateFileCollector(fileCollector, project); + + return; + } + + // For repositories that supports this, avoid storing complete History in memory + // (which can be sizeable, at least for the initial indexing, esp. if merge changeset support is enabled), + // by splitting the work into multiple chunks. + BoundaryChangesets boundaryChangesets = new BoundaryChangesets(this); + List boundaryChangesetList = new ArrayList<>(boundaryChangesets.getBoundaryChangesetIDs(sinceRevision)); + boundaryChangesetList.add(null); // to finish the last step in the cycle below + LOGGER.log(Level.FINE, "boundary changesets: {0}", boundaryChangesetList); + int cnt = 0; + for (String tillRevision: boundaryChangesetList) { + Statistics stat = new Statistics(); + LOGGER.log(Level.FINEST, "storing history cache for revision range ({0}, {1})", + new Object[]{sinceRevision, tillRevision}); + + HistoryCollector historyCollector = new HistoryCollector(isMergeCommitsEnabled()); + List visitors = new ArrayList<>(); + visitors.add(historyCollector); + if (fileCollector != null) { + visitors.add(fileCollector); + } + traverseHistory(directory, sinceRevision, tillRevision, null, visitors); + History history = new History(historyCollector.entries, historyCollector.renamedFiles); + + // Assign tags to changesets they represent. + if (env.isTagsEnabled() && hasFileBasedTags()) { + assignTagsInHistory(history); + } + + finishCreateCache(cache, history, tillRevision); + sinceRevision = tillRevision; + stat.report(LOGGER, Level.FINE, String.format("finished chunk %d/%d of history cache for repository ''%s''", + ++cnt, boundaryChangesetList.size(), this.getDirectoryName())); + } + + updateFileCollector(fileCollector, project); + } + + private void updateFileCollector(FileCollector fileCollector, Project project) { + RuntimeEnvironment env = RuntimeEnvironment.getInstance(); + + if (project != null && fileCollector != null) { + FileCollector fileCollectorEnv = env.getFileCollector(project.getName()); + if (fileCollectorEnv == null) { + env.setFileCollector(project.getName(), fileCollector); + } else { + fileCollectorEnv.addFiles(fileCollector.getFiles()); + } + } + } +} diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistory.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistory.java index 3a9e1d29247..0c656e0fbbb 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistory.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistory.java @@ -74,7 +74,8 @@ protected void doCreateCache(HistoryCache cache, String sinceRevision, File dire if (!RuntimeEnvironment.getInstance().isHistoryCachePerPartesEnabled()) { LOGGER.log(Level.INFO, "repository {0} supports per partes history cache creation however " + "it is disabled in the configuration. Generating history cache as whole.", this); - finishCreateCache(cache, getHistory(directory, sinceRevision), null); + History history = getHistory(directory, sinceRevision); + finishCreateCache(cache, history, null); return; } @@ -90,9 +91,9 @@ protected void doCreateCache(HistoryCache cache, String sinceRevision, File dire Statistics stat = new Statistics(); LOGGER.log(Level.FINEST, "storing history cache for revision range ({0}, {1})", new Object[]{sinceRevision, tillRevision}); - finishCreateCache(cache, getHistory(directory, sinceRevision, tillRevision), tillRevision); + History history = getHistory(directory, sinceRevision, tillRevision); + finishCreateCache(cache, history, tillRevision); sinceRevision = tillRevision; - stat.report(LOGGER, Level.FINE, String.format("finished chunk %d/%d of history cache for repository ''%s''", ++cnt, boundaryChangesetList.size(), this.getDirectoryName())); } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index b5f94f95ddf..6f14d6feaa3 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -87,6 +87,8 @@ import org.apache.lucene.store.SimpleFSLockFactory; import org.apache.lucene.util.BytesRef; import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.jetbrains.annotations.VisibleForTesting; import org.opengrok.indexer.analysis.AbstractAnalyzer; import org.opengrok.indexer.analysis.AnalyzerFactory; import org.opengrok.indexer.analysis.AnalyzerGuru; @@ -97,8 +99,11 @@ import org.opengrok.indexer.configuration.PathAccepter; import org.opengrok.indexer.configuration.Project; import org.opengrok.indexer.configuration.RuntimeEnvironment; +import org.opengrok.indexer.history.FileCollector; import org.opengrok.indexer.history.HistoryGuru; import org.opengrok.indexer.history.Repository; +import org.opengrok.indexer.history.RepositoryInfo; +import org.opengrok.indexer.history.RepositoryWithHistoryTraversal; import org.opengrok.indexer.logger.LoggerFactory; import org.opengrok.indexer.search.QueryBuilder; import org.opengrok.indexer.util.ForbiddenSymlinkException; @@ -113,7 +118,7 @@ import static org.opengrok.indexer.web.ApiUtils.waitForAsyncApi; /** - * This class is used to create / update the index databases. Currently we use + * This class is used to create / update the index databases. Currently, we use * one index database per project. * * @author Trond Norbye @@ -129,7 +134,7 @@ public class IndexDatabase { private static final Set REVERT_COUNTS_FIELDS; - private final Object INSTANCE_LOCK = new Object(); + private static final Object INSTANCE_LOCK = new Object(); /** * Key is canonical path; Value is the first accepted, absolute path. Map @@ -168,6 +173,8 @@ public class IndexDatabase { public static final String XREF_DIR = "xref"; public static final String SUGGESTER_DIR = "suggester"; + private final IndexDownArgsFactory indexDownArgsFactory; + /** * Create a new instance of the Index Database. Use this constructor if you * don't use any projects @@ -182,15 +189,21 @@ public IndexDatabase() throws IOException { * Create a new instance of an Index Database for a given project. * * @param project the project to create the database for - * @throws java.io.IOException if an error occurs while creating - * directories + * @param factory {@link IndexDownArgsFactory} instance + * @throws java.io.IOException if an error occurs while creating directories */ - public IndexDatabase(Project project) throws IOException { + public IndexDatabase(Project project, IndexDownArgsFactory factory) throws IOException { + indexDownArgsFactory = factory; this.project = project; lockfact = NoLockFactory.INSTANCE; initialize(); } + @VisibleForTesting + IndexDatabase(Project project) throws IOException { + this(project, new IndexDownArgsFactory()); + } + static { CHECK_FIELDS = new HashSet<>(); CHECK_FIELDS.add(QueryBuilder.TYPE); @@ -203,13 +216,13 @@ public IndexDatabase(Project project) throws IOException { } /** - * Update the index database for all of the projects. + * Update the index database for all the projects. * * @param listener where to signal the changes to the database * @throws IOException if an error occurs */ - static CountDownLatch updateAll(IndexChangedListener listener) - throws IOException { + static CountDownLatch updateAll(IndexChangedListener listener) throws IOException { + RuntimeEnvironment env = RuntimeEnvironment.getInstance(); List dbs = new ArrayList<>(); @@ -221,8 +234,7 @@ static CountDownLatch updateAll(IndexChangedListener listener) dbs.add(new IndexDatabase()); } - IndexerParallelizer parallelizer = RuntimeEnvironment.getInstance(). - getIndexerParallelizer(); + IndexerParallelizer parallelizer = RuntimeEnvironment.getInstance().getIndexerParallelizer(); CountDownLatch latch = new CountDownLatch(dbs.size()); for (IndexDatabase d : dbs) { final IndexDatabase db = d; @@ -355,8 +367,7 @@ public boolean addDirectory(String dir) { private void showFileCount(String dir, IndexDownArgs args) { if (RuntimeEnvironment.getInstance().isPrintProgress()) { - LOGGER.log(Level.INFO, String.format("Need to process: %d files for %s", - args.cur_count, dir)); + LOGGER.log(Level.INFO, String.format("Need to process: %d files for %s", args.curCount, dir)); } } @@ -410,6 +421,135 @@ private void markProjectIndexed(Project project) { } } + private static List getRepositoriesForProject(Project project) { + List repositoryList = new ArrayList<>(); + + RuntimeEnvironment env = RuntimeEnvironment.getInstance(); + List repositoryInfoList = env.getProjectRepositoriesMap().get(project); + + if (repositoryInfoList != null) { + for (RepositoryInfo repositoryInfo : repositoryInfoList) { + Repository repository = HistoryGuru.getInstance().getRepository(new File(repositoryInfo.getDirectoryName())); + if (repository != null) { + repositoryList.add(repository); + } + } + } + + return repositoryList; + } + + /** + * @return whether the repositories of given project are ready for history based reindex + */ + private boolean isReadyForHistoryBasedReindex() { + RuntimeEnvironment env = RuntimeEnvironment.getInstance(); + + // So far the history based reindex does not work without projects. + if (!env.hasProjects()) { + LOGGER.log(Level.FINEST, "projects are disabled, will be indexed by directory traversal."); + return false; + } + + if (project == null) { + LOGGER.log(Level.FINEST, "no project, will be indexed by directory traversal."); + return false; + } + + // History needs to be enabled for the history cache to work (see the comment below). + if (!project.isHistoryEnabled()) { + LOGGER.log(Level.FINEST, "history is disabled, will be indexed by directory traversal."); + return false; + } + + // History cache is necessary to get the last indexed revision for given repository. + if (!env.isHistoryCache()) { + LOGGER.log(Level.FINEST, "history cache is disabled, will be indexed by directory traversal."); + return false; + } + + // Per project tunable can override the global tunable, therefore env.isHistoryBasedReindex() is not checked. + if (!project.isHistoryBasedReindex()) { + LOGGER.log(Level.FINEST, "history-based reindex is disabled, will be indexed by directory traversal."); + return false; + } + + /* + * Check that the index is present for this project. + * In case of the initial indexing, the traversal of all changesets would most likely be counterproductive, + * assuming traversal of directory tree is cheaper than getting the files from SCM history + * in such case. + */ + try { + if (getNumFiles() == 0) { + LOGGER.log(Level.FINEST, "zero number of documents for project {0}, " + + "will be indexed by directory traversal.", project); + return false; + } + } catch (IOException e) { + LOGGER.log(Level.FINEST, "failed to get number of documents for project {0}," + + "will be indexed by directory traversal.", project); + return false; + } + + // If there was no change to any of the repositories of the project, a FileCollector instance will be returned + // however the list of files therein will be empty which is legitimate situation (no change of the project). + // Only in a case where getFileCollector() returns null (hinting at something went wrong), + // the file based traversal should be done. + if (env.getFileCollector(project.getName()) == null) { + LOGGER.log(Level.FINEST, "no file collector for project {0}, will be indexed by directory traversal.", + project); + return false; + } + + List repositories = getRepositoriesForProject(project); + // Projects without repositories have to be indexed using indexDown(). + if (repositories.isEmpty()) { + LOGGER.log(Level.FINEST, "project {0} has no repositories, will be indexed by directory traversal.", + project); + return false; + } + + for (Repository repository : repositories) { + if (!isReadyForHistoryBasedReindex(repository)) { + return false; + } + } + + // Here it is assumed there are no files untracked by the repositories of this project. + return true; + } + + /** + * @param repository Repository instance + * @return true if the repository can be used for history based reindex + */ + @VisibleForTesting + boolean isReadyForHistoryBasedReindex(Repository repository) { + if (!repository.isHistoryEnabled()) { + LOGGER.log(Level.FINE, "history is disabled for {0}, " + + "the associated project {1} will be indexed using directory traversal", + new Object[]{repository, project}); + return false; + } + + if (!repository.isHistoryBasedReindex()) { + LOGGER.log(Level.FINE, "history based reindex is disabled for {0}, " + + "the associated project {1} will be indexed using directory traversal", + new Object[]{repository, project}); + return false; + } + + if (!(repository instanceof RepositoryWithHistoryTraversal)) { + LOGGER.log(Level.FINE, "project {0} has a repository {1} that does not support history traversal," + + "the project will be indexed using directory traversal.", + new Object[]{project, repository}); + return false; + } + + return true; + } + /** * Update the content of this index database. * @@ -461,7 +601,7 @@ public void update() throws IOException { dir = Util.fixPathIfWindows(dir); - String startuid = Util.path2uid(dir, ""); + String startUid = Util.path2uid(dir, ""); reader = DirectoryReader.open(indexDirectory); // open existing index countsAggregator = new NumLinesLOCAggregator(); settings = readAnalysisSettings(); @@ -492,46 +632,31 @@ public void update() throws IOException { try { if (terms != null) { uidIter = terms.iterator(); - TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid)); //init uid + TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startUid)); //init uid if (stat == TermsEnum.SeekStatus.END) { uidIter = null; LOGGER.log(Level.WARNING, "Couldn''t find a start term for {0}, empty u field?", - startuid); + startUid); } } - // The actual indexing happens in indexParallel(). + // The actual indexing happens in indexParallel(). Here we merely collect the files + // that need to be indexed and the files that should be removed. + IndexDownArgs args = indexDownArgsFactory.getIndexDownArgs(); + boolean usedHistory = getIndexDownArgs(dir, sourceRoot, args); - IndexDownArgs args = new IndexDownArgs(); - Statistics elapsed = new Statistics(); - LOGGER.log(Level.INFO, "Starting traversal of directory {0}", dir); - indexDown(sourceRoot, dir, args); - elapsed.report(LOGGER, String.format("Done traversal of directory %s", dir), - "indexer.db.directory.traversal"); - - showFileCount(dir, args); + // Traverse the trailing terms. This needs to be done before indexParallel() because + // in some cases it can add items to the args parameter. + processTrailingTerms(startUid, usedHistory, args); - args.cur_count = 0; - elapsed = new Statistics(); + args.curCount = 0; + Statistics elapsed = new Statistics(); LOGGER.log(Level.INFO, "Starting indexing of directory {0}", dir); indexParallel(dir, args); elapsed.report(LOGGER, String.format("Done indexing of directory %s", dir), "indexer.db.directory.index"); - // Remove data for the trailing terms that indexDown() - // did not traverse. These correspond to files that have been - // removed and have higher ordering than any present files. - while (uidIter != null && uidIter.term() != null - && uidIter.term().utf8ToString().startsWith(startuid)) { - - removeFile(true); - BytesRef next = uidIter.next(); - if (next == null) { - uidIter = null; - } - } - /* * As a signifier that #Lines/LOC are comprehensively * stored so that later calculation is in deltas mode, we @@ -602,6 +727,86 @@ public void update() throws IOException { } } + private void processTrailingTerms(String startUid, boolean usedHistory, IndexDownArgs args) throws IOException { + while (uidIter != null && uidIter.term() != null + && uidIter.term().utf8ToString().startsWith(startUid)) { + + if (usedHistory) { + // Allow for forced reindex. For history based reindex the trailing terms + // correspond to the files that have not changed. Such files might need to be re-indexed + // if the index format changed. + String termPath = Util.uid2url(uidIter.term().utf8ToString()); + File termFile = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), termPath); + boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) && + checkSettings(termFile, termPath); + if (!matchOK) { + removeFile(false); + + args.curCount++; + args.works.add(new IndexFileWork(termFile, termPath)); + } + } else { + // Remove data for the trailing terms that getIndexDownArgs() + // did not traverse. These correspond to the files that have been + // removed and have higher ordering than any present files. + removeFile(true); + } + + BytesRef next = uidIter.next(); + if (next == null) { + uidIter = null; + } + } + } + + /** + * @param dir directory path + * @param sourceRoot source root File object + * @param args {@link IndexDownArgs} instance (output) + * @return true if history was used to gather the {@code IndexDownArgs} + * @throws IOException on error + */ + @VisibleForTesting + boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args) throws IOException { + RuntimeEnvironment env = RuntimeEnvironment.getInstance(); + boolean historyBased = isReadyForHistoryBasedReindex(); + + if (LOGGER.isLoggable(Level.INFO)) { + LOGGER.log(Level.INFO, String.format("Starting file collection using %s traversal for directory '%s'", + historyBased ? "history" : "file-system", dir)); + } + Statistics elapsed = new Statistics(); + if (historyBased) { + indexDownUsingHistory(env.getSourceRootFile(), args); + } else { + indexDown(sourceRoot, dir, args); + } + + elapsed.report(LOGGER, String.format("Done file collection for directory '%s'", dir), + "indexer.db.collection"); + + showFileCount(dir, args); + + return historyBased; + } + + /** + * Executes the first, serial stage of indexing, by going through set of files assembled from history. + * @param sourceRoot path to the source root (same as {@link RuntimeEnvironment#getSourceRootPath()}) + * @param args {@link IndexDownArgs} instance where the resulting files to be indexed will be stored + * @throws IOException on error + */ + @VisibleForTesting + void indexDownUsingHistory(File sourceRoot, IndexDownArgs args) throws IOException { + + FileCollector fileCollector = RuntimeEnvironment.getInstance().getFileCollector(project.getName()); + + for (String path : fileCollector.getFiles()) { + File file = new File(sourceRoot, path); + processFileIncremental(args, file, path); + } + } + /** * Optimize all index databases. * @@ -732,8 +937,7 @@ private File whatXrefFile(String path, boolean compress) { private void removeXrefFile(String path) { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); File xrefFile = whatXrefFile(path, env.isCompressXref()); - PendingFileDeletion pending = new PendingFileDeletion( - xrefFile.getAbsolutePath()); + PendingFileDeletion pending = new PendingFileDeletion(xrefFile.getAbsolutePath()); completer.add(pending); } @@ -742,8 +946,8 @@ private void removeHistoryFile(String path) { } /** - * Remove a stale file (uidIter.term().text()) from the index database and - * history cache, and queue the removal of xref. + * Remove a stale file from the index database and potentially also from history cache, + * and queue the removal of the associated xref file. * * @param removeHistory if false, do not remove history cache for this file * @throws java.io.IOException if an error occurs @@ -755,6 +959,23 @@ private void removeFile(boolean removeHistory) throws IOException { listener.fileRemove(path); } + removeFileDocUid(path); + + removeXrefFile(path); + + if (removeHistory) { + removeHistoryFile(path); + } + + setDirty(); + + for (IndexChangedListener listener : listeners) { + listener.fileRemoved(path); + } + } + + private void removeFileDocUid(String path) throws IOException { + // Determine if a reversal of counts is necessary, and execute if so. if (isCountingDeltas) { postsIter = uidIter.postings(postsIter); @@ -762,28 +983,22 @@ private void removeFile(boolean removeHistory) throws IOException { // Read a limited-fields version of the document. Document doc = reader.document(postsIter.docID(), REVERT_COUNTS_FIELDS); if (doc != null) { - NullableNumLinesLOC nullableCounts = NumLinesLOCUtil.read(doc); - if (nullableCounts.getNumLines() != null && nullableCounts.getLOC() != null) { - NumLinesLOC counts = new NumLinesLOC(path, - -nullableCounts.getNumLines(), - -nullableCounts.getLOC()); - countsAggregator.register(counts); - } + decrementLOCforDoc(path, doc); break; } } } writer.deleteDocuments(new Term(QueryBuilder.U, uidIter.term())); + } - removeXrefFile(path); - if (removeHistory) { - removeHistoryFile(path); - } - - setDirty(); - for (IndexChangedListener listener : listeners) { - listener.fileRemoved(path); + private void decrementLOCforDoc(String path, Document doc) { + NullableNumLinesLOC nullableCounts = NumLinesLOCUtil.read(doc); + if (nullableCounts.getNumLines() != null && nullableCounts.getLOC() != null) { + NumLinesLOC counts = new NumLinesLOC(path, + -nullableCounts.getNumLines(), + -nullableCounts.getLOC()); + countsAggregator.register(counts); } } @@ -796,8 +1011,7 @@ private void removeFile(boolean removeHistory) throws IOException { * @throws java.io.IOException if an error occurs * @throws InterruptedException if a timeout occurs */ - private void addFile(File file, String path, Ctags ctags) - throws IOException, InterruptedException { + private void addFile(File file, String path, Ctags ctags) throws IOException, InterruptedException { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); AbstractAnalyzer fa = getAnalyzerFor(file, path); @@ -871,6 +1085,7 @@ private void addFile(File file, String path, Ctags ctags) } setDirty(); + for (IndexChangedListener listener : listeners) { listener.fileAdded(path, fa.getClass().getSimpleName()); } @@ -1207,19 +1422,33 @@ private boolean isLocal(String path) { return false; } + private void handleSymlink(String path, AcceptSymlinkRet ret) { + /* + * If ret.localRelPath is defined, then a symlink was detected but + * not "accepted" to avoid redundancy with an already-accepted + * canonical target. Set up for a deferred creation of a symlink + * within xref/. + */ + if (ret.localRelPath != null) { + File xrefPath = new File(xrefDir, path); + PendingSymlinkage psym = new PendingSymlinkage(xrefPath.getAbsolutePath(), ret.localRelPath); + completer.add(psym); + } + } + /** - * Executes the first, serial stage of indexing, recursively. + * Executes the first, serial stage of indexing, by recursively traversing the file system + * and index alongside. *

Files at least are counted, and any deleted or updated files (based on * comparison to the Lucene index) are passed to - * {@link #removeFile(boolean)}. New or updated files are noted for - * indexing. + * {@link #removeFile(boolean)}. New or updated files are noted for indexing. * @param dir the root indexDirectory to generate indexes for * @param parent path to parent directory * @param args arguments to control execution and for collecting a list of * files for indexing */ - private void indexDown(File dir, String parent, IndexDownArgs args) - throws IOException { + @VisibleForTesting + void indexDown(File dir, String parent, IndexDownArgs args) throws IOException { if (isInterrupted()) { return; @@ -1227,18 +1456,7 @@ private void indexDown(File dir, String parent, IndexDownArgs args) AcceptSymlinkRet ret = new AcceptSymlinkRet(); if (!accept(dir, ret)) { - /* - * If ret.localRelPath is defined, then a symlink was detected but - * not "accepted" to avoid redundancy with an already-accepted - * canonical target. Set up for a deferred creation of a symlink - * within xref/. - */ - if (ret.localRelPath != null) { - File xrefPath = new File(xrefDir, parent); - PendingSymlinkage psym = new PendingSymlinkage( - xrefPath.getAbsolutePath(), ret.localRelPath); - completer.add(psym); - } + handleSymlink(parent, ret); return; } @@ -1253,82 +1471,157 @@ private void indexDown(File dir, String parent, IndexDownArgs args) for (File file : files) { String path = parent + File.separator + file.getName(); if (!accept(dir, file, ret)) { - if (ret.localRelPath != null) { - // See note above about ret.localRelPath. - File xrefPath = new File(xrefDir, path); - PendingSymlinkage psym = new PendingSymlinkage( - xrefPath.getAbsolutePath(), ret.localRelPath); - completer.add(psym); - } + handleSymlink(path, ret); } else { if (file.isDirectory()) { indexDown(file, path, args); } else { - args.cur_count++; - - if (uidIter != null) { - path = Util.fixPathIfWindows(path); - String uid = Util.path2uid(path, - DateTools.timeToString(file.lastModified(), - DateTools.Resolution.MILLISECOND)); // construct uid for doc - BytesRef buid = new BytesRef(uid); - // Traverse terms that have smaller UID than the current - // file, i.e. given the ordering they positioned before the file - // or it is the file that has been modified. - while (uidIter != null && uidIter.term() != null - && uidIter.term().compareTo(emptyBR) != 0 - && uidIter.term().compareTo(buid) < 0) { - - // If the term's path matches path of currently processed file, - // it is clear that the file has been modified and thus - // removeFile() will be followed by call to addFile() in indexParallel(). - // In such case, instruct removeFile() not to remove history - // cache for the file so that incremental history cache - // generation works. - String termPath = Util.uid2url(uidIter.term().utf8ToString()); - removeFile(!termPath.equals(path)); - - BytesRef next = uidIter.next(); - if (next == null) { - uidIter = null; - } - } + processFile(args, file, path); + } + } + } + } - // If the file was not modified, probably skip to the next one. - if (uidIter != null && uidIter.term() != null && - uidIter.term().bytesEquals(buid)) { - - /* - * Possibly short-circuit to force reindexing of prior-version indexes. - */ - boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) && - checkSettings(file, path); - if (!matchOK) { - removeFile(false); - } + /** + * Compared with {@link #processFile(IndexDownArgs, File, String)}, this method's file/path arguments + * represent files that have actually changed in some way, while the other method's argument represent + * files present on disk. + * @param args {@link IndexDownArgs} instance + * @param file File object + * @param path path of the file argument relative to source root (with leading slash) + * @throws IOException on error + */ + private void processFileIncremental(IndexDownArgs args, File file, String path) throws IOException { + if (uidIter != null) { + path = Util.fixPathIfWindows(path); + // Traverse terms until reaching one that matches the path of given file. + while (uidIter != null && uidIter.term() != null + && uidIter.term().compareTo(emptyBR) != 0 + && Util.uid2url(uidIter.term().utf8ToString()).compareTo(path) < 0) { + + // A file that was not changed. + /* + * Possibly short-circuit to force reindexing of prior-version indexes. + */ + String termPath = Util.uid2url(uidIter.term().utf8ToString()); + File termFile = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), termPath); + boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) && + checkSettings(termFile, termPath); + if (!matchOK) { + removeFile(false); + + args.curCount++; + args.works.add(new IndexFileWork(termFile, termPath)); + } - BytesRef next = uidIter.next(); - if (next == null) { - uidIter = null; - } + BytesRef next = uidIter.next(); + if (next == null) { + uidIter = null; + } + } - if (matchOK) { - continue; // keep matching docs - } - } - } + if (uidIter != null && uidIter.term() != null + && Util.uid2url(uidIter.term().utf8ToString()).equals(path)) { + /* + * At this point we know that the file has corresponding term in the index + * and has changed in some way. Either it was deleted or it was changed. + */ + if (!file.exists()) { + removeFile(true); + } else { + removeFile(false); + + args.curCount++; + args.works.add(new IndexFileWork(file, path)); + } + BytesRef next = uidIter.next(); + if (next == null) { + uidIter = null; + } + } else { + // Potentially new file. A file might be added and then deleted, + // so it is necessary to check its existence. + if (file.exists()) { + args.curCount++; args.works.add(new IndexFileWork(file, path)); } } + } else { + if (file.exists()) { + args.curCount++; + args.works.add(new IndexFileWork(file, path)); + } } } + /** + * Process a file on disk w.r.t. index. + * @param args {@link IndexDownArgs} instance + * @param file File object + * @param path path corresponding to the file parameter, relative to source root (with leading slash) + * @throws IOException on error + */ + private void processFile(IndexDownArgs args, File file, String path) throws IOException { + if (uidIter != null) { + path = Util.fixPathIfWindows(path); + String uid = Util.path2uid(path, + DateTools.timeToString(file.lastModified(), + DateTools.Resolution.MILLISECOND)); // construct uid for doc + BytesRef buid = new BytesRef(uid); + // Traverse terms that have smaller UID than the current file, + // i.e. given the ordering they positioned before the file, + // or it is the file that has been modified. + while (uidIter != null && uidIter.term() != null + && uidIter.term().compareTo(emptyBR) != 0 + && uidIter.term().compareTo(buid) < 0) { + + // If the term's path matches path of currently processed file, + // it is clear that the file has been modified and thus + // removeFile() will be followed by call to addFile() in indexParallel(). + // In such case, instruct removeFile() not to remove history + // cache for the file so that incremental history cache + // generation works. + String termPath = Util.uid2url(uidIter.term().utf8ToString()); + removeFile(!termPath.equals(path)); + + BytesRef next = uidIter.next(); + if (next == null) { + uidIter = null; + } + } + + // If the file was not modified, probably skip to the next one. + if (uidIter != null && uidIter.term() != null && uidIter.term().bytesEquals(buid)) { + + /* + * Possibly short-circuit to force reindexing of prior-version indexes. + */ + boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) && + checkSettings(file, path); + if (!matchOK) { + removeFile(false); + } + + BytesRef next = uidIter.next(); + if (next == null) { + uidIter = null; + } + + if (matchOK) { + return; + } + } + } + + args.curCount++; + args.works.add(new IndexFileWork(file, path)); + } + /** * Executes the second, parallel stage of indexing. * @param dir the parent directory (when appended to SOURCE_ROOT) - * @param args contains a list of files to index, found during the earlier - * stage + * @param args contains a list of files to index, found during the earlier stage */ private void indexParallel(String dir, IndexDownArgs args) { @@ -1402,7 +1695,7 @@ private void indexParallel(String dir, IndexDownArgs args) { LOGGER.log(Level.SEVERE, exmsg, e); } - args.cur_count = currentCounter.intValue(); + args.curCount = currentCounter.intValue(); // Start with failureCount=worksCount, and then subtract successes. int failureCount = worksCount; @@ -1666,7 +1959,7 @@ public static Definitions getDefinitions(File file) throws ParseException, IOExc } /** - * @param file File object of a file under source root + * @param file File object for a file under source root * @return Document object for the file or {@code null} * @throws IOException on I/O error * @throws ParseException on problem with building Query @@ -1683,34 +1976,39 @@ public static Document getDocument(File file) throws IOException, ParseException // Sanitize Windows path delimiters in order not to conflict with Lucene escape character. path = path.replace("\\", "/"); - try (IndexReader ireader = getIndexReader(path)) { - if (ireader == null) { - // No index, no document.. - return null; - } + try (IndexReader indexReader = getIndexReader(path)) { + return getDocument(path, indexReader); + } + } - Document doc; - Query q = new QueryBuilder().setPath(path).build(); - IndexSearcher searcher = new IndexSearcher(ireader); - Statistics stat = new Statistics(); - TopDocs top = searcher.search(q, 1); - stat.report(LOGGER, Level.FINEST, "search via getDocument done", - "search.latency", new String[]{"category", "getdocument", - "outcome", top.totalHits.value == 0 ? "empty" : "success"}); - if (top.totalHits.value == 0) { - // No hits, no document... - return null; - } - doc = searcher.doc(top.scoreDocs[0].doc); - String foundPath = doc.get(QueryBuilder.PATH); + @Nullable + private static Document getDocument(String path, IndexReader indexReader) throws ParseException, IOException { + if (indexReader == null) { + // No index, no document.. + return null; + } - // Only use the document if we found an exact match. - if (!path.equals(foundPath)) { - return null; - } + Document doc; + Query q = new QueryBuilder().setPath(path).build(); + IndexSearcher searcher = new IndexSearcher(indexReader); + Statistics stat = new Statistics(); + TopDocs top = searcher.search(q, 1); + stat.report(LOGGER, Level.FINEST, "search via getDocument() done", + "search.latency", new String[]{"category", "getdocument", + "outcome", top.totalHits.value == 0 ? "empty" : "success"}); + if (top.totalHits.value == 0) { + // No hits, no document... + return null; + } + doc = searcher.doc(top.scoreDocs[0].doc); + String foundPath = doc.get(QueryBuilder.PATH); - return doc; + // Only use the document if we found an exact match. + if (!path.equals(foundPath)) { + return null; } + + return doc; } @Override @@ -1806,10 +2104,12 @@ private void finishWriting() throws IOException { try { writeAnalysisSettings(); + LOGGER.log(Level.FINE, "preparing to commit changes to Lucene index"); // TODO add info about which database writer.prepareCommit(); hasPendingCommit = true; int n = completer.complete(); + // TODO: add elapsed LOGGER.log(Level.FINE, "completed {0} object(s)", n); // Just before commit(), reset the `hasPendingCommit' flag, @@ -1834,8 +2134,8 @@ private void finishWriting() throws IOException { * @param path the source file path * @return {@code false} if a mismatch is detected */ - private boolean checkSettings(File file, - String path) throws IOException { + @VisibleForTesting + boolean checkSettings(File file, String path) throws IOException { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); boolean outIsXrefWriter = false; // potential xref writer @@ -1879,8 +2179,7 @@ private boolean checkSettings(File file, break; } - AnalyzerFactory fac = - AnalyzerGuru.findByFileTypeName(fileTypeName); + AnalyzerFactory fac = AnalyzerGuru.findByFileTypeName(fileTypeName); if (fac != null) { fa = fac.getAnalyzer(); } @@ -1968,22 +2267,6 @@ private boolean xrefExistsFor(String path) { return true; } - private static class IndexDownArgs { - int cur_count; - final List works = new ArrayList<>(); - } - - private static class IndexFileWork { - final File file; - final String path; - Exception exception; - - IndexFileWork(File file, String path) { - this.file = file; - this.path = path; - } - } - private static class AcceptSymlinkRet { String localRelPath; } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDownArgs.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDownArgs.java new file mode 100644 index 00000000000..7a8178440b8 --- /dev/null +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDownArgs.java @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. + */ +package org.opengrok.indexer.index; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +class IndexDownArgs { + int curCount; + final List works = new ArrayList<>(); +} + +class IndexFileWork { + final File file; + final String path; + Exception exception; + + IndexFileWork(File file, String path) { + this.file = file; + this.path = path; + } +} \ No newline at end of file diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDownArgsFactory.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDownArgsFactory.java new file mode 100644 index 00000000000..d37c7b90343 --- /dev/null +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDownArgsFactory.java @@ -0,0 +1,32 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + */ +package org.opengrok.indexer.index; + +class IndexDownArgsFactory { + IndexDownArgsFactory() { + } + + public IndexDownArgs getIndexDownArgs() { + return new IndexDownArgs(); + } +} \ No newline at end of file diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java index b9453f3ccf1..b3a165ee2b0 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java @@ -112,7 +112,7 @@ public final class Indexer { private static final String HELP_OPT_2 = "-?"; private static final String HELP_OPT_3 = "-h"; - private static final Indexer index = new Indexer(); + private static final Indexer indexer = new Indexer(); private static Configuration cfg = null; private static boolean checkIndex = false; private static boolean runIndex = true; @@ -149,7 +149,7 @@ public final class Indexer { private static final int WEBAPP_CONNECT_TIMEOUT = 1000; // in milliseconds public static Indexer getInstance() { - return index; + return indexer; } /** @@ -200,8 +200,7 @@ public static void main(String[] argv) { env.setIndexer(true); // Complete the configuration of repository types. - List> repositoryClasses - = RepositoryFactory.getRepositoryClasses(); + List> repositoryClasses = RepositoryFactory.getRepositoryClasses(); for (Class clazz : repositoryClasses) { // Set external repository binaries from System properties. try { @@ -278,7 +277,8 @@ public static void main(String[] argv) { System.exit(0); } - // Set updated configuration in RuntimeEnvironment. + // Set updated configuration in RuntimeEnvironment. This is called so that the tunables set + // via command line options are available. env.setConfiguration(cfg, subFilesArgs, CommandTimeoutType.INDEXER); // Let repository types to add items to ignoredNames. @@ -287,6 +287,9 @@ public static void main(String[] argv) { RepositoryFactory.initializeIgnoredNames(env); if (bareConfig) { + // Set updated configuration in RuntimeEnvironment. + env.setConfiguration(cfg, subFilesArgs, CommandTimeoutType.INDEXER); + getInstance().sendToConfigHost(env, webappURI); writeConfigToFile(env, configFilename); System.exit(0); @@ -374,6 +377,10 @@ public static void main(String[] argv) { getInstance().prepareIndexer(env, searchPaths, addProjects, createDict, runIndex, subFiles, new ArrayList<>(repositories)); + // Set updated configuration in RuntimeEnvironment. This is called so that repositories discovered + // in prepareIndexer() are stored in the Configuration used by RuntimeEnvironment. + env.setConfiguration(cfg, subFilesArgs, CommandTimeoutType.INDEXER); + // prepareIndexer() populated the list of projects so now default projects can be set. env.setDefaultProjectsFromNames(defaultProjects); @@ -408,6 +415,22 @@ public static void main(String[] argv) { } } + private static void checkConfiguration() { + if (bareConfig && (env.getConfigURI() == null || env.getConfigURI().isEmpty())) { + die("Missing webappURI setting"); + } + + if (!repositories.isEmpty() && !cfg.isHistoryEnabled()) { + die("Repositories were specified; history is off however"); + } + + try { + cfg.checkConfiguration(); + } catch (Configuration.ConfigurationException e) { + die(e.getMessage()); + } + } + /** * Parse OpenGrok Indexer options * This method was created so that it would be easier to write unit @@ -566,7 +589,20 @@ public static String[] parseOptions(String[] argv) throws ParseException { "Assign commit tags to all entries in history for all repositories.").execute(v -> cfg.setTagsEnabled(true)); - parser.on("-H", "--history", "Enable history.").execute(v -> cfg.setHistoryEnabled(true)); + // for backward compatibility + parser.on("-H", "Enable history.").execute(v -> cfg.setHistoryEnabled(true)); + + parser.on("--historyBased", "=on|off", ON_OFF, Boolean.class, + "If history based reindex is in effect, the set of files ", + "changed/deleted since the last reindex is determined from history ", + "of the repositories. This needs history, history cache and ", + "projects to be enabled. This should be much faster than the ", + "classic way of traversing the directory structure. ", + "The default is on. If you need to e.g. index files untracked by ", + "SCM, set this to off. Currently works only for Git.", + "All repositories in a project need to support this in order ", + "to be indexed using history."). + execute(v -> cfg.setHistoryBasedReindex((Boolean) v)); parser.on("--historyThreads", "=number", Integer.class, "The number of threads to use for history cache generation on repository level. " + @@ -847,8 +883,7 @@ public static String[] parseOptions(String[] argv) throws ParseException { execute(v -> cfg.setWebappCtags((Boolean) v)); }); - // Need to read the configuration file first - // so that options may be overwritten later. + // Need to read the configuration file first, so that options may be overwritten later. configure.parse(argv); LOGGER.log(Level.INFO, "Indexer options: {0}", Arrays.toString(argv)); @@ -864,33 +899,6 @@ public static String[] parseOptions(String[] argv) throws ParseException { return argv; } - private static void checkConfiguration() { - env = RuntimeEnvironment.getInstance(); - - if (bareConfig && (env.getConfigURI() == null || env.getConfigURI().isEmpty())) { - die("Missing webappURI setting"); - } - - if (!repositories.isEmpty() && !cfg.isHistoryEnabled()) { - die("Repositories were specified; history is off however"); - } - - if (cfg.getSourceRoot() == null) { - die("Please specify a SRC_ROOT with option -s !"); - } - if (cfg.getDataRoot() == null) { - die("Please specify a DATA ROOT path"); - } - - if (!new File(cfg.getSourceRoot()).canRead()) { - die("Source root '" + cfg.getSourceRoot() + "' must be readable"); - } - - if (!new File(cfg.getDataRoot()).canWrite()) { - die("Data root '" + cfg.getDataRoot() + "' must be writable"); - } - } - private static void die(String message) { System.err.println("ERROR: " + message); System.exit(1); diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/util/ClassUtil.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/util/ClassUtil.java index 07d2d98a834..3a6c6b834d2 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/util/ClassUtil.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/util/ClassUtil.java @@ -53,7 +53,7 @@ private ClassUtil() { * Mark all transient fields in {@code targetClass} as @Transient for the * XML serialization. * - * Fields marked with java transient keyword do not work becase the + * Fields marked with java transient keyword do not work because the * XMLEncoder does not take these into account. This helper marks the fields * marked with transient keyword as transient also for the XMLDecoder. * @@ -73,7 +73,7 @@ public static void remarkTransientFields(Class targetClass) { } } } catch (IntrospectionException ex) { - LOGGER.log(Level.WARNING, "An exception ocurred during remarking transient fields:", ex); + LOGGER.log(Level.WARNING, "An exception occurred during remarking transient fields:", ex); } } diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/history/FileHistoryCacheTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/history/FileHistoryCacheTest.java index c5ef92fdf85..4c2e46d7436 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/history/FileHistoryCacheTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/history/FileHistoryCacheTest.java @@ -748,7 +748,7 @@ private void createSvnRepository() throws Exception { assertEquals(0, svnCheckoutProcess.waitFor()); } - private void changeFileAndCommit(Git git, File file, String comment) throws Exception { + static void changeFileAndCommit(Git git, File file, String comment) throws Exception { String authorName = "Foo Bar"; String authorEmail = "foo@bar.com"; diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistoryTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistoryTest.java index ce22568be39..e9ed9015dec 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistoryTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistoryTest.java @@ -18,7 +18,7 @@ */ /* - * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved. */ package org.opengrok.indexer.history; @@ -46,7 +46,7 @@ import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; -public class RepositoryWithPerPartesHistoryTest { +class RepositoryWithPerPartesHistoryTest { private TestRepository repositories; private GitRepository gitRepository; @@ -89,7 +89,8 @@ void testChangesets() throws HistoryException { Mockito.when(gitSpyRepository.getPerPartesCount()).thenReturn(3); gitSpyRepository.createCache(cache, null); Mockito.verify(gitSpyRepository, times(3)). - getHistory(any(), stringArgumentCaptor1.capture(), stringArgumentCaptor2.capture()); + traverseHistory(any(), stringArgumentCaptor1.capture(), stringArgumentCaptor2.capture(), + isNull(), any()); List sinceRevisions = new ArrayList<>(); sinceRevisions.add(null); @@ -120,7 +121,7 @@ void testPseudoIncomingChangeset() throws Exception { gitSpyRepository.createCache(cache, historyEntries.get(1).getRevision()); Mockito.verify(gitSpyRepository, times(1)). - getHistory(any(), anyString(), isNull()); + traverseHistory(any(), anyString(), isNull(), isNull(), any()); assertEquals(historyEntries.get(0).getRevision(), cache.getLatestCachedRevision(gitSpyRepository)); History cachedHistory = cache.get(Paths.get(gitRepository.getDirectoryName(), "moved2", "renamed2.c").toFile(), gitSpyRepository, false); diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 2a8eefe99f9..1cb247660c9 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -18,79 +18,152 @@ */ /* - * Copyright (c) 2010, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2010, 2022, Oracle and/or its affiliates. All rights reserved. * Portions Copyright (c) 2018, 2020, Chris Fraire . */ package org.opengrok.indexer.index; import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; import java.nio.file.Paths; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Set; import java.util.TreeSet; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.apache.lucene.document.Document; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.search.ScoreDoc; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; +import org.eclipse.jgit.api.Git; +import org.eclipse.jgit.api.MergeCommand; +import org.eclipse.jgit.lib.ObjectId; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; import org.opengrok.indexer.analysis.Definitions; +import org.opengrok.indexer.condition.EnabledForRepository; +import org.opengrok.indexer.configuration.CommandTimeoutType; import org.opengrok.indexer.configuration.Project; import org.opengrok.indexer.configuration.RuntimeEnvironment; +import org.opengrok.indexer.history.FileCollector; +import org.opengrok.indexer.history.History; +import org.opengrok.indexer.history.HistoryEntry; import org.opengrok.indexer.history.HistoryGuru; +import org.opengrok.indexer.history.Repository; import org.opengrok.indexer.history.RepositoryFactory; +import org.opengrok.indexer.history.RepositoryInfo; +import org.opengrok.indexer.history.RepositoryWithHistoryTraversal; import org.opengrok.indexer.search.QueryBuilder; import org.opengrok.indexer.search.SearchEngine; +import org.opengrok.indexer.util.ForbiddenSymlinkException; +import org.opengrok.indexer.util.IOUtils; import org.opengrok.indexer.util.TandemPath; import org.opengrok.indexer.util.TestRepository; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.atLeast; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import static org.opengrok.indexer.condition.RepositoryInstalled.Type.CVS; /** * Unit tests for the {@code IndexDatabase} class. + * + * This is quite a heavy test class - it runs the indexer before each (parametrized) test, + * so it might contribute significantly to the overall test run time. */ -public class IndexDatabaseTest { +class IndexDatabaseTest { private static TestRepository repository; - @BeforeAll - public static void setUpClass() throws Exception { - RuntimeEnvironment env = RuntimeEnvironment.getInstance(); + private Indexer indexer; + + private RuntimeEnvironment env; + + @BeforeEach + public void setUpClass() throws Exception { + env = RuntimeEnvironment.getInstance(); repository = new TestRepository(); repository.create(HistoryGuru.class.getResource("/repositories")); + // After copying the files from the archive, Git will consider the files to be changed, + // at least on Windows. This causes some tests, particularly testGetIndexDownArgs() to fail. + // To avoid this, clone the Git repository. + Path gitRepositoryRootPath = Path.of(repository.getSourceRoot(), "git"); + Path gitCheckoutPath = Path.of(repository.getSourceRoot(), "gitcheckout"); + Git git = Git.cloneRepository() + .setURI(gitRepositoryRootPath.toFile().toURI().toString()) + .setDirectory(gitCheckoutPath.toFile()) + .call(); + // The Git object has to be closed, otherwise the move below would fail on Windows with + // AccessDeniedException due to the file handle still being open. + git.close(); + IOUtils.removeRecursive(gitRepositoryRootPath); + Files.move(gitCheckoutPath, gitRepositoryRootPath); + env.setSourceRoot(repository.getSourceRoot()); env.setDataRoot(repository.getDataRoot()); env.setHistoryEnabled(true); env.setProjectsEnabled(true); RepositoryFactory.initializeIgnoredNames(env); - // Note that all tests in this class share the index created below. - // Ergo, if they need to modify it, this has to be done in such a way - // so that it does not affect other tests, no matter in which order - // the tests are run. - Indexer indexer = Indexer.getInstance(); + // Restore the project and repository information. + env.setProjects(new HashMap<>()); + HistoryGuru.getInstance().removeRepositories(List.of("/git")); + env.setRepositories(repository.getSourceRoot()); + HistoryGuru.getInstance().invalidateRepositories(env.getRepositories(), CommandTimeoutType.INDEXER); + env.generateProjectRepositoriesMap(); + + indexer = Indexer.getInstance(); indexer.prepareIndexer( env, true, true, false, null, null); + + // Reset the state of the git project w.r.t. history based reindex. + // It is the responsibility of each test that relies on the per project tunable + // to call gitProject.completeWithDefaults(). + Project gitProject = env.getProjects().get("git"); + gitProject.clearProperties(); + env.setDefaultProjectsFromNames(new TreeSet<>(Arrays.asList("/c"))); + indexer.doIndexerExecution(true, null, null); + + env.clearFileCollector(); } - @AfterAll - public static void tearDownClass() throws Exception { + @AfterEach + public void tearDownClass() throws Exception { repository.destroy(); } @Test - public void testGetDefinitions() throws Exception { + void testGetDefinitions() throws Exception { // Test that we can get definitions for one of the files in the // repository. File f1 = new File(repository.getSourceRoot() + "/git/main.c"); @@ -136,20 +209,19 @@ private void checkDataExistence(String fileName, boolean shouldExist) { * Test removal of IndexDatabase. xrefs and history index entries after * file has been removed from a repository. */ - @Test - public void testCleanupAfterIndexRemoval() throws Exception { + @ParameterizedTest + @ValueSource(booleans = {true, false}) + void testCleanupAfterIndexRemoval(boolean historyBasedReindex) throws Exception { final int origNumFiles; + env.setHistoryBasedReindex(historyBasedReindex); + String projectName = "git"; - String ppath = "/" + projectName; - Project project = new Project(projectName, ppath); + Project project = env.getProjects().get(projectName); + assertNotNull(project); IndexDatabase idb = new IndexDatabase(project); assertNotNull(idb); - // Note that the file to remove has to be different than the one used - // in {@code testGetDefinitions} because it shares the same index - // and this test is going to remove the file and therefore related - // definitions. String fileName = "header.h"; File gitRoot = new File(repository.getSourceRoot(), projectName); assertTrue(new File(gitRoot, fileName).exists()); @@ -166,7 +238,7 @@ public void testCleanupAfterIndexRemoval() throws Exception { // Remove the file and reindex using IndexDatabase directly. File file = new File(repository.getSourceRoot(), projectName + File.separator + fileName); - file.delete(); + assertTrue(file.delete()); assertFalse(file.exists(), "file " + fileName + " not removed"); idb.update(); @@ -180,7 +252,7 @@ public void testCleanupAfterIndexRemoval() throws Exception { * however it lacks the pre-requisite indexing phase. */ @Test - public void testIndexPath() throws IOException { + void testIndexPath() throws IOException { SearchEngine instance = new SearchEngine(); // Use as broad search as possible. instance.setFile("c"); @@ -195,10 +267,549 @@ public void testIndexPath() throws IOException { } @Test - public void testGetLastRev() throws IOException, ParseException { + void testGetLastRev() throws IOException, ParseException { Document doc = IndexDatabase.getDocument(Paths.get(repository.getSourceRoot(), "git", "main.c").toFile()); assertNotNull(doc); assertEquals("aa35c258", doc.get(QueryBuilder.LASTREV)); } + + static void changeFileAndCommit(Git git, File file, String comment) throws Exception { + String authorName = "Foo Bar"; + String authorEmail = "foobar@example.com"; + + try (FileOutputStream fos = new FileOutputStream(file, true)) { + fos.write(comment.getBytes(StandardCharsets.UTF_8)); + } + + git.commit().setMessage(comment).setAuthor(authorName, authorEmail).setAll(true).call(); + } + + private void addFileAndCommit(Git git, String newFileName, File repositoryRoot, String message) throws Exception { + File newFile = new File(repositoryRoot, newFileName); + if (!newFile.createNewFile()) { + throw new IOException("Could not create file " + newFile); + } + try (FileOutputStream fos = new FileOutputStream(newFile)) { + fos.write("foo bar foo bar foo bar".getBytes(StandardCharsets.UTF_8)); + } + git.add().addFilepattern(newFileName).call(); + git.commit().setMessage(message).setAuthor("foo bar", "foobar@example.com").setAll(true).call(); + } + + private void addMergeCommit(Git git, File repositoryRoot) throws Exception { + // Create and checkout a branch. + final String branchName = "mybranch"; + git.branchCreate().setName(branchName).call(); + git.checkout().setName(branchName).call(); + + // Change a file on the branch. + addFileAndCommit(git, "new.txt", repositoryRoot, "new file on a branch"); + + // Checkout the master branch again. + git.checkout().setName("master").call(); + + // Retrieve the objectId of the latest commit on the branch. + ObjectId mergeBase = git.getRepository().resolve(branchName); + + // Perform the actual merge without FastForward to see the + // actual merge-commit even though the merge is trivial. + git.merge(). + include(mergeBase). + setCommit(false). + setFastForward(MergeCommand.FastForwardMode.NO_FF). + setMessage("merge commit"). + call(); + + // Commit the merge separately so that the author can be set. + // (MergeCommand - a result of git.merge() - does not have the setAuthor() method) + git.commit().setAuthor("foo bar", "foobar@example.com").call(); + } + + /** + * Add some commits to the Git repository - change/remove/add/rename a file in separate commits, + * also add a merge commit. + * @param repositoryRoot Git repository root + */ + private void changeGitRepository(File repositoryRoot) throws Exception { + try (Git git = Git.init().setDirectory(repositoryRoot).call()) { + // This name is specifically picked to add file that would exercise the end of term traversal + // in processFileIncremental(), that is (uidIter == null). + String newFileName = "zzz.txt"; + addFileAndCommit(git, newFileName, repositoryRoot, "another new file"); + + // Add another file that is sorted behind to exercise another code path in processFileIncremental(). + // These 'z'-files are added first so their commits are not the last. This exercises the sorting + // of the files in FileCollector and the simultaneous traverse of the index and file list + // in processFileIncremental(). + newFileName = "zzzzzz.txt"; + addFileAndCommit(git, newFileName, repositoryRoot, "another new file"); + + // Change one of the pre-existing files. + File mainFile = new File(repositoryRoot, "main.c"); + assertTrue(mainFile.exists()); + changeFileAndCommit(git, mainFile, "new commit"); + + // Delete a file. + final String deletedFileName = "header.h"; + File rmFile = new File(repositoryRoot, deletedFileName); + assertTrue(rmFile.exists()); + git.rm().addFilepattern(deletedFileName).call(); + git.commit().setMessage("delete").setAuthor("foo", "foobar@example.com").setAll(true).call(); + assertFalse(rmFile.exists()); + + // Rename some file. + final String fooFileName = "Makefile"; + final String barFileName = "Makefile.renamed"; + File fooFile = new File(repositoryRoot, fooFileName); + assertTrue(fooFile.exists()); + File barFile = new File(repositoryRoot, barFileName); + assertTrue(fooFile.renameTo(barFile)); + git.add().addFilepattern(barFileName).call(); + git.rm().addFilepattern(fooFileName).call(); + git.commit().setMessage("rename").setAuthor("foo", "foobar@example.com").setAll(true).call(); + assertTrue(barFile.exists()); + assertFalse(fooFile.exists()); + + addMergeCommit(git, repositoryRoot); + } + } + + private static Stream provideParamsFortestGetIndexDownArgs() { + return Stream.of( + Arguments.of(false, false, false, false), + Arguments.of(false, false, false, true), + Arguments.of(false, false, true, false), + Arguments.of(false, false, true, true), + Arguments.of(false, true, false, false), + Arguments.of(false, true, false, true), + Arguments.of(false, true, true, false), + Arguments.of(false, true, true, true), + Arguments.of(true, false, false, false), + Arguments.of(true, false, false, true), + Arguments.of(true, false, true, false), + Arguments.of(true, false, true, true), + Arguments.of(true, true, false, false), + Arguments.of(true, true, false, true), + Arguments.of(true, true, true, false), + Arguments.of(true, true, true, true) + ); + } + + static class AddRemoveFilesListener implements IndexChangedListener { + // The file sets need to be thread safe because the methods that modify them can be called in parallel. + private final Set removedFiles = Collections.synchronizedSet(new HashSet<>()); + + private final Set addedFiles = Collections.synchronizedSet(new HashSet<>()); + + @Override + public void fileAdd(String path, String analyzer) { + addedFiles.add(path); + } + + @Override + public void fileAdded(String path, String analyzer) { + } + + @Override + public void fileRemove(String path) { + removedFiles.add(path); + } + + @Override + public void fileRemoved(String path) { + } + + @Override + public void fileUpdate(String path) { + } + + public Set getRemovedFiles() { + return removedFiles; + } + + public Set getAddedFiles() { + return addedFiles; + } + } + + /** + * Test specifically getIndexDownArgs() with IndexDatabase instance. + * This test ensures that correct set of files is discovered. + */ + @ParameterizedTest + @MethodSource("provideParamsFortestGetIndexDownArgs") + void testGetIndexDownArgs(boolean mergeCommits, boolean renamedFiles, boolean historyBased, boolean perPartes) + throws Exception { + + assertTrue(env.isHistoryEnabled()); + + env.setHistoryBasedReindex(historyBased); + env.setHandleHistoryOfRenamedFiles(renamedFiles); + env.setMergeCommitsEnabled(mergeCommits); + env.setHistoryCachePerPartesEnabled(perPartes); + + IndexDownArgsFactory factory = new IndexDownArgsFactory(); + IndexDownArgsFactory spyFactory = spy(factory); + IndexDownArgs args = new IndexDownArgs(); + // In this case the getIndexDownArgs() should be called from update() just once so this will suffice. + when(spyFactory.getIndexDownArgs()).thenReturn(args); + + Project gitProject = env.getProjects().get("git"); + assertNotNull(gitProject); + gitProject.completeWithDefaults(); + IndexDatabase idbOrig = new IndexDatabase(gitProject, spyFactory); + assertNotNull(idbOrig); + IndexDatabase idb = spy(idbOrig); + + File repositoryRoot = new File(repository.getSourceRoot(), "git"); + assertTrue(repositoryRoot.isDirectory()); + changeGitRepository(repositoryRoot); + + // Re-generate the history cache so that the data is ready for history based re-index. + HistoryGuru.getInstance().clear(); + indexer.prepareIndexer( + env, true, true, + false, List.of("/git"), null); + env.generateProjectRepositoriesMap(); + + // Check history cache w.r.t. the merge changeset. + File mergeFile = new File(repositoryRoot, "new.txt"); + History history = HistoryGuru.getInstance().getHistory(mergeFile, false, false, false); + assertNotNull(history); + assertNotNull(history.getHistoryEntries()); + boolean containsMergeCommitMessage = history.getHistoryEntries().stream(). + map(HistoryEntry::getMessage).collect(Collectors.toSet()).contains("merge commit"); + if (mergeCommits) { + assertTrue(containsMergeCommitMessage); + } else { + assertFalse(containsMergeCommitMessage); + } + + // Setup and use listener for the "removed" files. + AddRemoveFilesListener listener = new AddRemoveFilesListener(); + idb.addIndexChangedListener(listener); + idb.update(); + + verify(spyFactory).getIndexDownArgs(); + // Cannot use args.curCount to compare against because it gets reset in indexParallel() + // as it is reused in that stage of indexing. + assertNotEquals(0, args.works.size()); + // The expected data has to match the work done in changeGitRepository(). + Set expectedFileSet = new HashSet<>(); + expectedFileSet.add(Path.of("/git/Makefile.renamed")); + expectedFileSet.add(Path.of("/git/main.c")); + expectedFileSet.add(Path.of("/git/zzz.txt")); + expectedFileSet.add(Path.of("/git/zzzzzz.txt")); + expectedFileSet.add(Path.of("/git/new.txt")); + assertEquals(expectedFileSet, args.works.stream().map(v -> Path.of(v.path)).collect(Collectors.toSet())); + + assertEquals(Set.of( + Path.of("/git/header.h"), + Path.of("/git/main.c"), + Path.of("/git/Makefile") + ), listener.getRemovedFiles().stream().map(Path::of).collect(Collectors.toSet())); + + // Verify the assumption made above. + verify(idb, times(1)).getIndexDownArgs(any(), any(), any()); + + checkIndexDown(historyBased, idb); + } + + private void checkIndexDown(boolean historyBased, IndexDatabase idb) throws IOException { + // The initial index (done in setUpClass()) should use file based IndexWorkArgs discovery. + // Only the update() done in the actual test should lead to indexDownUsingHistory(), + // hence it should be called just once. + if (historyBased) { + verify(idb, times(1)).indexDownUsingHistory(any(), any()); + verify(idb, times(0)).indexDown(any(), any(), any()); + } else { + // indexDown() is recursive, so it will be called more than once. + verify(idb, times(0)).indexDownUsingHistory(any(), any()); + verify(idb, atLeast(1)).indexDown(any(), any(), any()); + } + } + + /** + * Make sure that history based reindex is not performed for projects + * where some repositories are not instances of {@code RepositoryWithHistoryTraversal} + * or have the history based reindex explicitly disabled. + * + * Instead of checking the result of the functions that make the decision, check the actual indexing. + */ + @EnabledForRepository(CVS) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + void testHistoryBasedReindexVsProjectWithDiverseRepos(boolean useCvs) throws Exception { + env.setHistoryBasedReindex(true); + + // Create a new project with two repositories. + String projectName = "new"; + Path projectPath = Path.of(repository.getSourceRoot(), projectName); + assertTrue(projectPath.toFile().mkdirs()); + assertTrue(projectPath.toFile().isDirectory()); + + String disabledGitRepoName = "git1"; + + if (useCvs) { + // Copy CVS repository underneath the project. + String subrepoName = "cvssubrepo"; + Path destinationPath = Path.of(repository.getSourceRoot(), projectName, subrepoName); + Path sourcePath = Path.of(repository.getSourceRoot(), "cvs_test", "cvsrepo"); + assertTrue(sourcePath.toFile().exists()); + assertTrue(destinationPath.toFile().mkdirs()); + repository.copyDirectory(sourcePath, destinationPath); + assertTrue(destinationPath.toFile().exists()); + + Repository subRepo = RepositoryFactory.getRepository(destinationPath.toFile()); + assertFalse(subRepo instanceof RepositoryWithHistoryTraversal); + } else { + // Clone Git repository underneath the project. + String cloneUrl = Path.of(repository.getSourceRoot(), "git").toFile().toURI().toString(); + Path repositoryRootPath = Path.of(repository.getSourceRoot(), projectName, disabledGitRepoName); + Git git = Git.cloneRepository() + .setURI(cloneUrl) + .setDirectory(repositoryRootPath.toFile()) + .call(); + git.close(); + assertTrue(repositoryRootPath.toFile().isDirectory()); + } + + // Clone Git repository underneath the project and make a change there. + String cloneUrl = Path.of(repository.getSourceRoot(), "git").toFile().toURI().toString(); + Path repositoryRootPath = Path.of(repository.getSourceRoot(), projectName, "git"); + Git git = Git.cloneRepository() + .setURI(cloneUrl) + .setDirectory(repositoryRootPath.toFile()) + .call(); + git.close(); + assertTrue(repositoryRootPath.toFile().isDirectory()); + changeGitRepository(repositoryRootPath.toFile()); + + // Rescan the repositories. + HistoryGuru.getInstance().clear(); + indexer.prepareIndexer( + env, true, true, + false, List.of("/git"), null); + env.setRepositories(new ArrayList<>(HistoryGuru.getInstance().getRepositories())); + env.generateProjectRepositoriesMap(); + + // Assert the repositories were detected. + Project project = env.getProjects().get(projectName); + assertNotNull(project); + List projectRepos = env.getProjectRepositoriesMap().get(project); + assertNotNull(projectRepos); + assertEquals(2, projectRepos.size()); + + if (!useCvs) { + for (RepositoryInfo repo : projectRepos) { + if (repo.getDirectoryNameRelative().equals(disabledGitRepoName)) { + repo.setHistoryBasedReindex(false); + } + } + } + + verifyIndexDown(project, false); + } + + /** + * Make sure the files detected for a sub-repository are correctly stored in the appropriate + * {@code FileCollector} instance. + */ + @Test + void testHistoryBasedReindexWithEligibleSubRepo() throws Exception { + env.setHistoryBasedReindex(true); + + assertNull(env.getFileCollector("git")); + + Project gitProject = env.getProjects().get("git"); + assertNotNull(gitProject); + gitProject.completeWithDefaults(); + + // Create a Git repository underneath the existing git repository and make a change there. + File repositoryRoot = new File(repository.getSourceRoot(), "git"); + assertTrue(repositoryRoot.isDirectory()); + changeGitRepository(repositoryRoot); + String subRepoName = "subrepo"; + File subRepositoryRoot = new File(repositoryRoot, subRepoName); + String changedFileName = "subfile.txt"; + try (Git git = Git.init().setDirectory(subRepositoryRoot).call()) { + addFileAndCommit(git, changedFileName, subRepositoryRoot, "new file in subrepo"); + } + assertTrue(new File(subRepositoryRoot, changedFileName).exists()); + + HistoryGuru.getInstance().clear(); + + // Rescan the repositories and refresh the history cache which should also collect the files + // for the 2nd stage of indexing. + indexer.prepareIndexer( + env, true, true, + false, List.of("/git"), null); + + // Verify the collected files. + FileCollector fileCollector = env.getFileCollector("git"); + assertNotNull(fileCollector); + assertTrue(fileCollector.getFiles().size() > 1); + assertTrue(fileCollector.getFiles(). + contains(File.separator + gitProject.getName() + + File.separator + subRepoName + + File.separator + changedFileName)); + } + + /** + * Verify project specific tunable has effect on how the indexing will be performed. + * The global history based tunable is tested in testGetIndexDownArgs(). + */ + @ParameterizedTest + @ValueSource(booleans = {true, false}) + void testHistoryBasedReindexProjectTunable(boolean historyBased) throws Exception { + env.setHistoryBasedReindex(!historyBased); + + // Make a change in the git repository. + File repositoryRoot = new File(repository.getSourceRoot(), "git"); + assertTrue(repositoryRoot.isDirectory()); + changeGitRepository(repositoryRoot); + + // The per project tunable should override the global tunable. + Project gitProject = env.getProjects().get("git"); + gitProject.setHistoryBasedReindex(historyBased); + gitProject.completeWithDefaults(); + + HistoryGuru.getInstance().clear(); + indexer.prepareIndexer( + env, true, true, + false, List.of("/git"), null); + env.generateProjectRepositoriesMap(); + + verifyIndexDown(gitProject, historyBased); + + gitProject.setHistoryBasedReindex(true); + } + + /** + * Test history based reindex if there was no change to the repository. + */ + @Test + void testHistoryBasedReindexWithNoChange() throws Exception { + env.setHistoryBasedReindex(true); + + Project gitProject = env.getProjects().get("git"); + gitProject.completeWithDefaults(); + + HistoryGuru.getInstance().clear(); + indexer.prepareIndexer( + env, true, true, + false, List.of("/git"), null); + env.generateProjectRepositoriesMap(); + + verifyIndexDown(gitProject, true); + } + + private void verifyIndexDown(Project gitProject, boolean historyBased) throws Exception { + // verify that indexer did not use history based reindex. + IndexDatabase idbOrig = new IndexDatabase(gitProject); + assertNotNull(idbOrig); + IndexDatabase idb = spy(idbOrig); + idb.update(); + checkIndexDown(historyBased, idb); + } + + /** + * Test forced reindex - see if removeFile() was called for all files in the repository + * even though there was no change. + */ + @ParameterizedTest + @ValueSource(booleans = {true, false}) + void testForcedReindex(boolean historyBased) throws Exception { + + env.setHistoryBasedReindex(historyBased); + + Project gitProject = env.getProjects().get("git"); + assertNotNull(gitProject); + gitProject.completeWithDefaults(); + IndexDatabase idbOrig = new IndexDatabase(gitProject); + assertNotNull(idbOrig); + IndexDatabase idb = spy(idbOrig); + + // Re-generate the history cache so that the git repository is ready for history based re-index. + indexer.prepareIndexer( + env, true, true, + false, List.of("/git"), null); + env.generateProjectRepositoriesMap(); + + // Emulate forcing reindex from scratch. + doReturn(false).when(idb).checkSettings(any(), any()); + + // Setup and use listener for the "removed" files. + AddRemoveFilesListener listener = new AddRemoveFilesListener(); + idb.addIndexChangedListener(listener); + idb.update(); + + checkIndexDown(historyBased, idb); + + // List the files in the /git directory tree and compare that to the IndexDatabase file sets. + Path repoRoot = Path.of(repository.getSourceRoot(), "git"); + Set result; + try (Stream walk = Files.walk(repoRoot)) { + result = walk.filter(Files::isRegularFile). + filter(p -> !p.toString().contains(".git")). + collect(Collectors.toSet()); + } + Set expectedFileSet = result.stream().map(f -> { + try { + return Path.of(RuntimeEnvironment.getInstance().getPathRelativeToSourceRoot(f.toFile())); + } catch (IOException | ForbiddenSymlinkException e) { + return null; + } + }).collect(Collectors.toSet()); + assertEquals(expectedFileSet, listener.getRemovedFiles().stream().map(Path::of).collect(Collectors.toSet())); + assertEquals(expectedFileSet, listener.getAddedFiles().stream().map(Path::of).collect(Collectors.toSet())); + } + + /** + * make sure the initial indexing is made using indexDown() even though history based reindex is possible. + */ + @Test + void testInitialReindexWithHistoryBased() throws Exception { + env.setHistoryBasedReindex(true); + + // Delete the index (and all data in fact). + assertFalse(repository.getDataRoot().isEmpty()); + IOUtils.removeRecursive(Path.of(repository.getDataRoot())); + assertFalse(Path.of(repository.getDataRoot()).toFile().exists()); + + // Update the index of the project. + Project gitProject = env.getProjects().get("git"); + assertNotNull(gitProject); + IndexDatabase idbOrig = new IndexDatabase(gitProject); + assertNotNull(idbOrig); + IndexDatabase idb = spy(idbOrig); + idb.update(); + + // Check that the index for the git project was created. + Document doc = IndexDatabase.getDocument(Path.of(repository.getSourceRoot(), "git", "main.c").toFile()); + assertNotNull(doc); + + checkIndexDown(false, idb); + } + + /** + * project-less configuration should lead to file-system based reindex. + */ + @Test + void testProjectLessReindexVsHistoryBased() throws Exception { + env.setProjectsEnabled(false); + + // Make a change in the git repository. + File repositoryRoot = new File(repository.getSourceRoot(), "git"); + assertTrue(repositoryRoot.isDirectory()); + changeGitRepository(repositoryRoot); + + IndexDatabase idbOrig = new IndexDatabase(); + assertNotNull(idbOrig); + IndexDatabase idb = spy(idbOrig); + idb.update(); + + checkIndexDown(false, idb); + } } diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexerTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexerTest.java index bfeadc55ef6..a1c44181e75 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexerTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexerTest.java @@ -360,7 +360,9 @@ void testRemoveFileOnFileChange() throws Exception { } // reindex + // TODO: add parameter for running the reindex with indexDown() and truly incremental (needs Git) idb.update(); + // Make sure that the file was actually processed. assertEquals(1, listener.removedFiles.size()); assertEquals(1, listener.filesToAdd.size()); @@ -424,7 +426,6 @@ void testBug3430() throws Exception { /** * Test IndexChangedListener behavior in repository with invalid files. - * @throws Exception */ @Test void testIncrementalIndexAddRemoveFile() throws Exception { @@ -432,8 +433,15 @@ void testIncrementalIndexAddRemoveFile() throws Exception { env.setSourceRoot(repository.getSourceRoot()); env.setDataRoot(repository.getDataRoot()); + // Make the test consistent. If run in sequence with other tests, env.hasProjects() returns true. + // The same should work for standalone test run. + HashMap projects = new HashMap<>(); String ppath = "/bug3430"; Project project = new Project("bug3430", ppath); + projects.put("bug3430", project); + env.setProjectsEnabled(true); + env.setProjects(projects); + IndexDatabase idb = new IndexDatabase(project); assertNotNull(idb); MyIndexChangeListener listener = new MyIndexChangeListener(); diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java index 45c38602cbb..167980d139f 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java @@ -18,7 +18,7 @@ */ /* - * Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved. * Portions Copyright (c) 2018, 2019, Chris Fraire . */ package org.opengrok.indexer.util; @@ -39,6 +39,7 @@ import org.jetbrains.annotations.NotNull; import org.opengrok.indexer.configuration.RuntimeEnvironment; +import static java.nio.file.StandardCopyOption.COPY_ATTRIBUTES; import static java.nio.file.StandardCopyOption.REPLACE_EXISTING; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -103,7 +104,13 @@ public void create(@NotNull final URL url) throws IOException, URISyntaxExceptio } } - private void copyDirectory(Path src, Path dest) throws IOException { + /** + * Assumes the destination directory exists. + * @param src source directory + * @param dest destination directory + * @throws IOException on error + */ + public void copyDirectory(Path src, Path dest) throws IOException { try (Stream stream = Files.walk(src)) { stream.forEach(sourceFile -> { if (sourceFile.equals(src)) { @@ -111,7 +118,14 @@ private void copyDirectory(Path src, Path dest) throws IOException { } try { Path destRelativePath = getDestinationRelativePath(src, sourceFile); - Files.copy(sourceFile, dest.resolve(destRelativePath.toString()), REPLACE_EXISTING); + Path destPath = dest.resolve(destRelativePath); + if (Files.isDirectory(sourceFile)) { + if (!Files.exists(destPath)) { + Files.createDirectory(destPath); + } + return; + } + Files.copy(sourceFile, destPath, REPLACE_EXISTING, COPY_ATTRIBUTES); } catch (Exception e) { throw new RuntimeException(e); }