From fc53bae70c239c88aca30a43e63bed2f06bdbfe8 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Mon, 22 Nov 2021 10:25:32 +0100 Subject: [PATCH 01/88] truly incremental reindex --- .../indexer/history/GitRepository.java | 118 +++++++++------ .../org/opengrok/indexer/history/History.java | 5 + .../opengrok/indexer/history/HistoryGuru.java | 2 +- .../RepositoryWithHistoryTraversal.java | 71 +++++++++ .../opengrok/indexer/index/IndexDatabase.java | 136 ++++++++++++++++-- .../org/opengrok/indexer/util/ClassUtil.java | 4 +- 6 files changed, 276 insertions(+), 60 deletions(-) create mode 100644 opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java index b900ce28a70..1244efb2834 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java @@ -92,7 +92,7 @@ * Access to a Git repository. * */ -public class GitRepository extends RepositoryWithPerPartesHistory { +public class GitRepository extends RepositoryWithHistoryTraversal { private static final Logger LOGGER = LoggerFactory.getLogger(GitRepository.class); @@ -476,15 +476,56 @@ public History getHistory(File file, String sinceRevision, String tillRevision) return getHistory(file, sinceRevision, tillRevision, null); } + private static class HistoryCollector { + List entries; + Set renamedFiles; + + HistoryCollector() { + entries = new ArrayList<>(); + renamedFiles = new HashSet<>(); + } + + public void visit(ChangesetInfo changesetInfo) { + RepositoryWithHistoryTraversal.CommitInfo commit = changesetInfo.commit; + HistoryEntry historyEntry = new HistoryEntry(commit.revision, + commit.date,commit.authorName + " <" + commit.authorEmail + ">", + commit.message, true); + + if (changesetInfo.renamedFiles != null) { + renamedFiles.addAll(changesetInfo.renamedFiles); + } + if (changesetInfo.files != null) { + historyEntry.setFiles(changesetInfo.files); + } + + entries.add(historyEntry); + } + } + public History getHistory(File file, String sinceRevision, String tillRevision, Integer numCommits) throws HistoryException { - if (numCommits != null && numCommits <= 0) { - return null; + HistoryCollector historyCollector = new HistoryCollector(); + traverseHistory(file, sinceRevision, tillRevision, numCommits, historyCollector::visit); + + History result = new History(historyCollector.entries, historyCollector.renamedFiles); + + // Assign tags to changesets they represent + // We don't need to check if this repository supports tags, + // because we know it :-) + if (RuntimeEnvironment.getInstance().isTagsEnabled()) { + assignTagsInHistory(result); } - final List entries = new ArrayList<>(); - final Set renamedFiles = new HashSet<>(); + return result; + } + + public void traverseHistory(File file, String sinceRevision, String tillRevision, + Integer numCommits, Consumer visitor) throws HistoryException { + + if (numCommits != null && numCommits <= 0) { + throw new HistoryException("invalid number of commits to retrieve"); + } boolean isDirectory = file.isDirectory(); @@ -522,20 +563,19 @@ public History getHistory(File file, String sinceRevision, String tillRevision, continue; } - HistoryEntry historyEntry = new HistoryEntry(commit.getId().abbreviate(GIT_ABBREV_LEN).name(), - commit.getAuthorIdent().getWhen(), - commit.getAuthorIdent().getName() + - " <" + commit.getAuthorIdent().getEmailAddress() + ">", - commit.getFullMessage(), true); - + CommitInfo commitInfo = new CommitInfo(commit.getId().abbreviate(GIT_ABBREV_LEN).name(), + commit.getAuthorIdent().getWhen(), commit.getAuthorIdent().getName(), + commit.getAuthorIdent().getEmailAddress(), commit.getFullMessage()); if (isDirectory) { SortedSet files = new TreeSet<>(); - getFilesForCommit(renamedFiles, files, commit, repository); - historyEntry.setFiles(files); + final Set renamedFiles = new HashSet<>(); + final Set deletedFiles = new HashSet<>(); + getFilesForCommit(renamedFiles, files, deletedFiles, commit, repository); + visitor.accept(new ChangesetInfo(commitInfo, files, renamedFiles, deletedFiles)); + } else { + visitor.accept(new ChangesetInfo(commitInfo)); } - entries.add(historyEntry); - if (numCommits != null && ++num >= numCommits) { break; } @@ -543,46 +583,36 @@ public History getHistory(File file, String sinceRevision, String tillRevision, } catch (IOException | ForbiddenSymlinkException e) { throw new HistoryException(String.format("failed to get history for ''%s''", file), e); } - - History result = new History(entries, renamedFiles); - - // Assign tags to changesets they represent - // We don't need to check if this repository supports tags, - // because we know it :-) - if (RuntimeEnvironment.getInstance().isTagsEnabled()) { - assignTagsInHistory(result); - } - - return result; } /** * Accumulate list of changed files and renamed files (if enabled) for given commit. * @param renamedFiles result containing the renamed files in this commit - * @param files result containing changed files in this commit + * @param changedFiles result containing changed files in this commit * @param commit RevCommit object * @param repository repository object * @throws IOException on error traversing the commit tree */ - private void getFilesForCommit(Set renamedFiles, SortedSet files, RevCommit commit, + private void getFilesForCommit(Set renamedFiles, SortedSet changedFiles, Set deletedFiles, + RevCommit commit, Repository repository) throws IOException { int numParents = commit.getParentCount(); if (numParents == 1) { - getFiles(repository, commit.getParent(0), commit, files, renamedFiles); + getFiles(repository, commit.getParent(0), commit, changedFiles, renamedFiles, deletedFiles); } else if (numParents == 0) { // first commit try (TreeWalk treeWalk = new TreeWalk(repository)) { treeWalk.addTree(commit.getTree()); treeWalk.setRecursive(true); while (treeWalk.next()) { - files.add(getNativePath(getDirectoryNameRelative()) + File.separator + + changedFiles.add(getNativePath(getDirectoryNameRelative()) + File.separator + getNativePath(treeWalk.getPathString())); } } } else { - getFiles(repository, commit.getParent(0), commit, files, renamedFiles); + getFiles(repository, commit.getParent(0), commit, changedFiles, renamedFiles, deletedFiles); } } @@ -595,17 +625,18 @@ private static String getNativePath(String path) { } /** - * Assemble list of files that changed between 2 commits. + * Assemble list of changedFiles that changed between 2 commits. * @param repository repository object * @param oldCommit parent commit - * @param newCommit new commit (the mehotd assumes oldCommit is its parent) - * @param files set of files that changed (excludes renamed files) - * @param renamedFiles set of renamed files (if renamed handling is enabled) + * @param newCommit new commit (the method assumes oldCommit is its parent) + * @param changedFiles output: set of changedFiles that changed (excludes renamed changedFiles) + * @param renamedFiles output: set of renamed files (if renamed handling is enabled) + * @param deletedFiles output: set of deleted files * @throws IOException on I/O problem */ private void getFiles(org.eclipse.jgit.lib.Repository repository, RevCommit oldCommit, RevCommit newCommit, - Set files, Set renamedFiles) + Set changedFiles, Set renamedFiles, Set deletedFiles) throws IOException { OutputStream outputStream = NullOutputStream.INSTANCE; @@ -619,16 +650,17 @@ private void getFiles(org.eclipse.jgit.lib.Repository repository, prepareTreeParser(repository, newCommit)); for (DiffEntry diff : diffs) { - if (diff.getChangeType() != DiffEntry.ChangeType.DELETE) { - if (files != null) { - files.add(getNativePath(getDirectoryNameRelative()) + File.separator + - getNativePath(diff.getNewPath())); - } + String newPath = getNativePath(getDirectoryNameRelative()) + File.separator + + getNativePath(diff.getNewPath()); + + if (diff.getChangeType() != DiffEntry.ChangeType.DELETE && changedFiles != null) { + changedFiles.add(newPath); + } else if (deletedFiles != null) { + deletedFiles.add(newPath); } if (diff.getChangeType() == DiffEntry.ChangeType.RENAME && isHandleRenamedFiles()) { - renamedFiles.add(getNativePath(getDirectoryNameRelative()) + File.separator + - getNativePath(diff.getNewPath())); + renamedFiles.add(newPath); } } } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/History.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/History.java index 88ad7ff32cf..bcf9b9b41f7 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/History.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/History.java @@ -75,6 +75,11 @@ public History() { this.renamedFiles = renamed; } + History(List entries, Set renamed, Set deletedFiles) { + this.entries = entries; + this.renamedFiles = renamed; + } + // Needed for serialization. public Map getTags() { return tags; diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java index 09b3162a28a..9376948a125 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java @@ -779,7 +779,7 @@ private List getReposFromString(Collection repositories) { return repos; } - Repository getRepository(File file) { + public Repository getRepository(File file) { return repositoryLookup.getRepository(file.toPath(), repositoryRoots.keySet(), repositories, PathUtils::getRelativeToCanonical); } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java new file mode 100644 index 00000000000..e59e027c395 --- /dev/null +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java @@ -0,0 +1,71 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + */ + +package org.opengrok.indexer.history; + +import java.io.File; +import java.util.Date; +import java.util.Set; +import java.util.SortedSet; +import java.util.function.Consumer; + +public abstract class RepositoryWithHistoryTraversal extends RepositoryWithPerPartesHistory { + private static final long serialVersionUID = -1L; + + public static class CommitInfo { + String revision; + Date date; + String authorName; + String authorEmail; + String message; + + CommitInfo(String revision, Date date, String authorName, String authorEmail, String message) { + this.revision = revision; + this.date = date; + this.authorName = authorName; + this.authorEmail = authorEmail; + this.message = message; + } + } + + public static class ChangesetInfo { + CommitInfo commit; + public SortedSet files; + public Set renamedFiles; + public Set deletedFiles; + + ChangesetInfo(CommitInfo commit) { + this.commit = commit; + } + + ChangesetInfo(CommitInfo commit, SortedSet files, Set renamedFiles, Set deletedFiles) { + this.commit = commit; + this.files = files; + this.renamedFiles = renamedFiles; + this.deletedFiles = deletedFiles; + } + } + + public abstract void traverseHistory(File file, String sinceRevision, String tillRevision, + Integer numCommits, Consumer visitor) throws HistoryException; +} diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index b5f94f95ddf..fa70a6b2baf 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -44,7 +44,9 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.SortedSet; import java.util.TreeMap; +import java.util.TreeSet; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; @@ -97,8 +99,11 @@ import org.opengrok.indexer.configuration.PathAccepter; import org.opengrok.indexer.configuration.Project; import org.opengrok.indexer.configuration.RuntimeEnvironment; +import org.opengrok.indexer.history.HistoryException; import org.opengrok.indexer.history.HistoryGuru; import org.opengrok.indexer.history.Repository; +import org.opengrok.indexer.history.RepositoryInfo; +import org.opengrok.indexer.history.RepositoryWithHistoryTraversal; import org.opengrok.indexer.logger.LoggerFactory; import org.opengrok.indexer.search.QueryBuilder; import org.opengrok.indexer.util.ForbiddenSymlinkException; @@ -410,6 +415,74 @@ private void markProjectIndexed(Project project) { } } + private static List getRepositoriesForProject(Project project) { + List repositoryList = new ArrayList<>(); + + RuntimeEnvironment env = RuntimeEnvironment.getInstance(); + List repositoryInfoList = env.getProjectRepositoriesMap().get(project); + + for (RepositoryInfo repositoryInfo : repositoryInfoList) { + Repository repository = HistoryGuru.getInstance().getRepository(new File(repositoryInfo.getDirectoryName())); + if (repository != null) { + repositoryList.add(repository); + } + } + + return repositoryList; + } + + // TODO: find a better name for the method + private static boolean isAllRepositoriesSupportDeletedFiles(Project project) { + if (project == null) { + return false; + } + if (!project.isHistoryEnabled()) { + return false; + } + if (!RuntimeEnvironment.getInstance().isHistoryCache()) { + return false; + } + + List repositories = getRepositoriesForProject(project); + // Projects without repositories have to be indexed using indexDown(). + if (repositories.isEmpty()) { + return false; + } + for (Repository repository : repositories) { + if (!(repository instanceof RepositoryWithHistoryTraversal)) { + // TODO: log + return false; + } + } + + // Here it is assumed there are no files untracked by the repositories. + return true; + } + + private static class FileCollector { + SortedSet files; + Set renamedFiles; + Set deletedFiles; + + FileCollector() { + files = new TreeSet<>(); + renamedFiles = new HashSet<>(); + deletedFiles = new HashSet<>(); + } + + public void visit(RepositoryWithHistoryTraversal.ChangesetInfo changesetInfo) { + if (changesetInfo.renamedFiles != null) { + renamedFiles.addAll(changesetInfo.renamedFiles); + } + if (changesetInfo.files != null) { + files.addAll(changesetInfo.files); + } + if (changesetInfo.deletedFiles != null) { + deletedFiles.addAll(changesetInfo.deletedFiles); + } + } + } + /** * Update the content of this index database. * @@ -501,19 +574,50 @@ public void update() throws IOException { } } - // The actual indexing happens in indexParallel(). - + // The actual indexing happens in indexParallel(). Here we merely collect the items + // that need to be indexed. IndexDownArgs args = new IndexDownArgs(); - Statistics elapsed = new Statistics(); - LOGGER.log(Level.INFO, "Starting traversal of directory {0}", dir); - indexDown(sourceRoot, dir, args); - elapsed.report(LOGGER, String.format("Done traversal of directory %s", dir), - "indexer.db.directory.traversal"); + // Only do this if all repositories for given project support file gathering via history traversal. + // TODO: add tunable for this (in case there are untracked files) + boolean indexDownPerformed = false; + if (isAllRepositoriesSupportDeletedFiles(project)) { + // TODO: do this for the initial index ? might not be worth it. + for (Repository repository : getRepositoriesForProject(project)) { + // TODO: need to get the changeset of the previous index run + // Traverse the history and add args to IndexDownArgs for the files/symlinks changed/deleted. + if (repository instanceof RepositoryWithHistoryTraversal) { + FileCollector fileCollector = new FileCollector(); + ((RepositoryWithHistoryTraversal) repository).traverseHistory(sourceRoot, + "from", "till", null, fileCollector::visit); + + for (String path : fileCollector.files) { + File file = new File(sourceRoot, path); + // Check that each file is present on file system to avoid problems + // in indexParallel(). + // TODO: what about deleted files ? + if (file.exists()) { + // TODO: call accept() to see if the file can be added (symlinks !) + args.works.add(new IndexFileWork(file, path)); + } + } + for (String path : fileCollector.deletedFiles) { + // TODO: removeFile() + } + } + } + } else { + Statistics elapsed = new Statistics(); + LOGGER.log(Level.INFO, "Starting traversal of directory {0}", dir); + indexDown(sourceRoot, dir, args); + indexDownPerformed = true; + elapsed.report(LOGGER, String.format("Done traversal of directory %s", dir), + "indexer.db.directory.traversal"); + } showFileCount(dir, args); args.cur_count = 0; - elapsed = new Statistics(); + Statistics elapsed = new Statistics(); LOGGER.log(Level.INFO, "Starting indexing of directory {0}", dir); indexParallel(dir, args); elapsed.report(LOGGER, String.format("Done indexing of directory %s", dir), @@ -522,13 +626,15 @@ public void update() throws IOException { // Remove data for the trailing terms that indexDown() // did not traverse. These correspond to files that have been // removed and have higher ordering than any present files. - while (uidIter != null && uidIter.term() != null - && uidIter.term().utf8ToString().startsWith(startuid)) { + if (indexDownPerformed) { + while (uidIter != null && uidIter.term() != null + && uidIter.term().utf8ToString().startsWith(startuid)) { - removeFile(true); - BytesRef next = uidIter.next(); - if (next == null) { - uidIter = null; + removeFile(true); + BytesRef next = uidIter.next(); + if (next == null) { + uidIter = null; + } } } @@ -554,6 +660,8 @@ public void update() throws IOException { isWithDirectoryCounts && isCountingDeltas); markProjectIndexed(project); + } catch (HistoryException e) { + // TODO } finally { reader.close(); } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/util/ClassUtil.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/util/ClassUtil.java index 07d2d98a834..3a6c6b834d2 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/util/ClassUtil.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/util/ClassUtil.java @@ -53,7 +53,7 @@ private ClassUtil() { * Mark all transient fields in {@code targetClass} as @Transient for the * XML serialization. * - * Fields marked with java transient keyword do not work becase the + * Fields marked with java transient keyword do not work because the * XMLEncoder does not take these into account. This helper marks the fields * marked with transient keyword as transient also for the XMLDecoder. * @@ -73,7 +73,7 @@ public static void remarkTransientFields(Class targetClass) { } } } catch (IntrospectionException ex) { - LOGGER.log(Level.WARNING, "An exception ocurred during remarking transient fields:", ex); + LOGGER.log(Level.WARNING, "An exception occurred during remarking transient fields:", ex); } } From 1665873b491f17818cb45ffb0274f1a92e26107a Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Fri, 29 Apr 2022 14:54:46 +0200 Subject: [PATCH 02/88] next stage - refactor - use getDocument() - collect all files from Git regardless of their nature --- .../indexer/history/GitRepository.java | 92 ++--- .../org/opengrok/indexer/history/History.java | 5 - .../opengrok/indexer/history/HistoryGuru.java | 19 +- .../RepositoryWithHistoryTraversal.java | 16 +- .../opengrok/indexer/index/IndexDatabase.java | 332 +++++++++++------- 5 files changed, 284 insertions(+), 180 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java index 1244efb2834..9eca65c3315 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java @@ -506,8 +506,7 @@ public History getHistory(File file, String sinceRevision, String tillRevision, Integer numCommits) throws HistoryException { HistoryCollector historyCollector = new HistoryCollector(); - traverseHistory(file, sinceRevision, tillRevision, numCommits, historyCollector::visit); - + traverseHistory(file, sinceRevision, tillRevision, numCommits, historyCollector::visit, false); History result = new History(historyCollector.entries, historyCollector.renamedFiles); // Assign tags to changesets they represent @@ -521,7 +520,7 @@ public History getHistory(File file, String sinceRevision, String tillRevision, } public void traverseHistory(File file, String sinceRevision, String tillRevision, - Integer numCommits, Consumer visitor) throws HistoryException { + Integer numCommits, Consumer visitor, boolean getAll) throws HistoryException { if (numCommits != null && numCommits <= 0) { throw new HistoryException("invalid number of commits to retrieve"); @@ -532,34 +531,13 @@ public void traverseHistory(File file, String sinceRevision, String tillRevision try (org.eclipse.jgit.lib.Repository repository = getJGitRepository(getDirectoryName()); RevWalk walk = new RevWalk(repository)) { - if (sinceRevision != null) { - walk.markUninteresting(walk.lookupCommit(repository.resolve(sinceRevision))); - } - - if (tillRevision != null) { - walk.markStart(walk.lookupCommit(repository.resolve(tillRevision))); - } else { - walk.markStart(walk.parseCommit(repository.resolve(Constants.HEAD))); - } - - String relativePath = RuntimeEnvironment.getInstance().getPathRelativeToSourceRoot(file); - if (!getDirectoryNameRelative().equals(relativePath)) { - if (isHandleRenamedFiles()) { - Config config = repository.getConfig(); - config.setBoolean("diff", null, "renames", true); - org.eclipse.jgit.diff.DiffConfig dc = config.get(org.eclipse.jgit.diff.DiffConfig.KEY); - FollowFilter followFilter = FollowFilter.create(getGitFilePath(getRepoRelativePath(file)), dc); - walk.setTreeFilter(followFilter); - } else { - walk.setTreeFilter(AndTreeFilter.create( - PathFilter.create(getGitFilePath(getRepoRelativePath(file))), - TreeFilter.ANY_DIFF)); - } - } + setupWalk(file, sinceRevision, tillRevision, repository, walk); int num = 0; for (RevCommit commit : walk) { - if (commit.getParentCount() > 1 && !isMergeCommitsEnabled()) { + // For truly incremental reindex merge commits have to be processed. + // TODO: maybe the same for renamed files - depends on what happens if renamed file detection is on + if (!getAll && commit.getParentCount() > 1 && !isMergeCommitsEnabled()) { continue; } @@ -585,10 +563,40 @@ public void traverseHistory(File file, String sinceRevision, String tillRevision } } + private void setupWalk(File file, String sinceRevision, String tillRevision, Repository repository, RevWalk walk) + throws IOException, ForbiddenSymlinkException { + + if (sinceRevision != null) { + walk.markUninteresting(walk.lookupCommit(repository.resolve(sinceRevision))); + } + + if (tillRevision != null) { + walk.markStart(walk.lookupCommit(repository.resolve(tillRevision))); + } else { + walk.markStart(walk.parseCommit(repository.resolve(Constants.HEAD))); + } + + String relativePath = RuntimeEnvironment.getInstance().getPathRelativeToSourceRoot(file); + if (!getDirectoryNameRelative().equals(relativePath)) { + if (isHandleRenamedFiles()) { + Config config = repository.getConfig(); + config.setBoolean("diff", null, "renames", true); + org.eclipse.jgit.diff.DiffConfig dc = config.get(org.eclipse.jgit.diff.DiffConfig.KEY); + FollowFilter followFilter = FollowFilter.create(getGitFilePath(getRepoRelativePath(file)), dc); + walk.setTreeFilter(followFilter); + } else { + walk.setTreeFilter(AndTreeFilter.create( + PathFilter.create(getGitFilePath(getRepoRelativePath(file))), + TreeFilter.ANY_DIFF)); + } + } + } + /** - * Accumulate list of changed files and renamed files (if enabled) for given commit. - * @param renamedFiles result containing the renamed files in this commit - * @param changedFiles result containing changed files in this commit + * Accumulate list of changed/deleted/renamed files for given commit. + * @param renamedFiles output: renamed files in this commit (if renamed file handling is enabled) + * @param changedFiles output: changed files in this commit + * @param deletedFiles output: deleted files in this commit * @param commit RevCommit object * @param repository repository object * @throws IOException on error traversing the commit tree @@ -597,11 +605,7 @@ private void getFilesForCommit(Set renamedFiles, SortedSet chang RevCommit commit, Repository repository) throws IOException { - int numParents = commit.getParentCount(); - - if (numParents == 1) { - getFiles(repository, commit.getParent(0), commit, changedFiles, renamedFiles, deletedFiles); - } else if (numParents == 0) { // first commit + if (commit.getParentCount() == 0) { // first commit - add all files try (TreeWalk treeWalk = new TreeWalk(repository)) { treeWalk.addTree(commit.getTree()); treeWalk.setRecursive(true); @@ -612,7 +616,7 @@ private void getFilesForCommit(Set renamedFiles, SortedSet chang } } } else { - getFiles(repository, commit.getParent(0), commit, changedFiles, renamedFiles, deletedFiles); + getFilesBetweenCommits(repository, commit.getParent(0), commit, changedFiles, renamedFiles, deletedFiles); } } @@ -625,7 +629,7 @@ private static String getNativePath(String path) { } /** - * Assemble list of changedFiles that changed between 2 commits. + * Assemble list of changed/deleted/renamed files between a commit and its parent. * @param repository repository object * @param oldCommit parent commit * @param newCommit new commit (the method assumes oldCommit is its parent) @@ -634,9 +638,9 @@ private static String getNativePath(String path) { * @param deletedFiles output: set of deleted files * @throws IOException on I/O problem */ - private void getFiles(org.eclipse.jgit.lib.Repository repository, - RevCommit oldCommit, RevCommit newCommit, - Set changedFiles, Set renamedFiles, Set deletedFiles) + private void getFilesBetweenCommits(org.eclipse.jgit.lib.Repository repository, + RevCommit oldCommit, RevCommit newCommit, + Set changedFiles, Set renamedFiles, Set deletedFiles) throws IOException { OutputStream outputStream = NullOutputStream.INSTANCE; @@ -654,6 +658,7 @@ private void getFiles(org.eclipse.jgit.lib.Repository repository, getNativePath(diff.getNewPath()); if (diff.getChangeType() != DiffEntry.ChangeType.DELETE && changedFiles != null) { + // Added files (ChangeType.ADD) are treated as changed. changedFiles.add(newPath); } else if (deletedFiles != null) { deletedFiles.add(newPath); @@ -661,6 +666,11 @@ private void getFiles(org.eclipse.jgit.lib.Repository repository, if (diff.getChangeType() == DiffEntry.ChangeType.RENAME && isHandleRenamedFiles()) { renamedFiles.add(newPath); + if (deletedFiles != null) { + String oldPath = getNativePath(getDirectoryNameRelative()) + File.separator + + getNativePath(diff.getOldPath()); + deletedFiles.add(oldPath); + } } } } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/History.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/History.java index bcf9b9b41f7..88ad7ff32cf 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/History.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/History.java @@ -75,11 +75,6 @@ public History() { this.renamedFiles = renamed; } - History(List entries, Set renamed, Set deletedFiles) { - this.entries = entries; - this.renamedFiles = renamed; - } - // Needed for serialization. public Map getTags() { return tags; diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java index 9376948a125..22b0a4256ab 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java @@ -128,10 +128,9 @@ public static HistoryGuru getInstance() { } /** - * Return whether or not a cache should be used for the history log. + * Return whether cache should be used for the history log. * - * @return {@code true} if the history cache has been enabled and - * initialized, {@code false} otherwise + * @return {@code true} if the history cache has been enabled and initialized, {@code false} otherwise */ private boolean useCache() { return historyCache != null; @@ -429,6 +428,20 @@ public Map getLastModifiedTimes(File directory) return Collections.emptyMap(); } + /** + * TODO: document + * @param repository + * @return + * @throws HistoryException + */ + public String getLatestCachedRevision(Repository repository) throws HistoryException { + if (repository != null && useCache()) { + return historyCache.getLatestCachedRevision(repository); + } + + throw new HistoryException(String.format("cannot get latest cached revision for %s", repository)); + } + /** * recursively search for repositories with a depth limit, add those found * to the internally used map. diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java index e59e027c395..2272a8eda54 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java @@ -23,6 +23,8 @@ package org.opengrok.indexer.history; +import org.jetbrains.annotations.Nullable; + import java.io.File; import java.util.Date; import java.util.Set; @@ -66,6 +68,16 @@ public static class ChangesetInfo { } } - public abstract void traverseHistory(File file, String sinceRevision, String tillRevision, - Integer numCommits, Consumer visitor) throws HistoryException; + /** + * TODO: document + * @param file + * @param sinceRevision + * @param tillRevision + * @param numCommits + * @param visitor + * @param getAll + * @throws HistoryException + */ + public abstract void traverseHistory(File file, String sinceRevision, @Nullable String tillRevision, + Integer numCommits, Consumer visitor, boolean getAll) throws HistoryException; } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index fa70a6b2baf..0fab9ec6aed 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -89,6 +89,7 @@ import org.apache.lucene.store.SimpleFSLockFactory; import org.apache.lucene.util.BytesRef; import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; import org.opengrok.indexer.analysis.AbstractAnalyzer; import org.opengrok.indexer.analysis.AnalyzerFactory; import org.opengrok.indexer.analysis.AnalyzerGuru; @@ -187,8 +188,7 @@ public IndexDatabase() throws IOException { * Create a new instance of an Index Database for a given project. * * @param project the project to create the database for - * @throws java.io.IOException if an error occurs while creating - * directories + * @throws java.io.IOException if an error occurs while creating directories */ public IndexDatabase(Project project) throws IOException { this.project = project; @@ -208,13 +208,13 @@ public IndexDatabase(Project project) throws IOException { } /** - * Update the index database for all of the projects. + * Update the index database for all the projects. * * @param listener where to signal the changes to the database * @throws IOException if an error occurs */ - static CountDownLatch updateAll(IndexChangedListener listener) - throws IOException { + static CountDownLatch updateAll(IndexChangedListener listener) throws IOException { + RuntimeEnvironment env = RuntimeEnvironment.getInstance(); List dbs = new ArrayList<>(); @@ -361,7 +361,7 @@ public boolean addDirectory(String dir) { private void showFileCount(String dir, IndexDownArgs args) { if (RuntimeEnvironment.getInstance().isPrintProgress()) { LOGGER.log(Level.INFO, String.format("Need to process: %d files for %s", - args.cur_count, dir)); + args.curCount, dir)); } } @@ -431,10 +431,13 @@ private static List getRepositoriesForProject(Project project) { return repositoryList; } - // TODO: find a better name for the method - private static boolean isAllRepositoriesSupportDeletedFiles(Project project) { + /** + * @param project project + * @return whether the repositories of given project are ready for truly incremental reindex + */ + private static boolean isReadyForTrulyIncrementalReindex(Project project) { if (project == null) { - return false; + throw new IllegalArgumentException("null project"); } if (!project.isHistoryEnabled()) { return false; @@ -446,39 +449,60 @@ private static boolean isAllRepositoriesSupportDeletedFiles(Project project) { List repositories = getRepositoriesForProject(project); // Projects without repositories have to be indexed using indexDown(). if (repositories.isEmpty()) { + LOGGER.log(Level.FINEST, "project {0} has no repositories, will be indexed by directory traversal.", + project); return false; } + for (Repository repository : repositories) { if (!(repository instanceof RepositoryWithHistoryTraversal)) { - // TODO: log + LOGGER.log(Level.FINE, "project {0} has a repository {1} that does not support history traversal," + + "the project will be indexed using directory traversal.", + new Object[]{project, repository}); + return false; + } + + /* + * This check means that this method will return false in the case of initial reindex. + * In such case the traversal of all changesets would most likely be counterproductive. + */ + try { + HistoryGuru.getInstance().getLatestCachedRevision(repository); + } catch (HistoryException ex) { + LOGGER.log(Level.FINE, String.format("cannot load latest cached revision for history cache " + + "for repository %s, the project will be indexed using directory traversal.", + repository), ex); return false; } } - // Here it is assumed there are no files untracked by the repositories. + // Here it is assumed there are no files untracked by the repositories of this project. return true; } + /** + * This class is meant to collect files that were touched in some way by SCM update. + * The visitor argument contains the files separated based on the type of modification performed, + * however the consumer of this class is not interested in this classification. + * This is because when incrementally indexing a bunch of changesets, + * in one changeset a file may be deleted, only to be re-added in the next changeset etc. + */ private static class FileCollector { - SortedSet files; - Set renamedFiles; - Set deletedFiles; + Set files; FileCollector() { files = new TreeSet<>(); - renamedFiles = new HashSet<>(); - deletedFiles = new HashSet<>(); } public void visit(RepositoryWithHistoryTraversal.ChangesetInfo changesetInfo) { if (changesetInfo.renamedFiles != null) { - renamedFiles.addAll(changesetInfo.renamedFiles); + files.addAll(changesetInfo.renamedFiles); } if (changesetInfo.files != null) { files.addAll(changesetInfo.files); } if (changesetInfo.deletedFiles != null) { - deletedFiles.addAll(changesetInfo.deletedFiles); + files.addAll(changesetInfo.deletedFiles); } } } @@ -574,49 +598,12 @@ public void update() throws IOException { } } - // The actual indexing happens in indexParallel(). Here we merely collect the items + // The actual indexing happens in indexParallel(). Here we merely collect the files // that need to be indexed. IndexDownArgs args = new IndexDownArgs(); - // Only do this if all repositories for given project support file gathering via history traversal. - // TODO: add tunable for this (in case there are untracked files) - boolean indexDownPerformed = false; - if (isAllRepositoriesSupportDeletedFiles(project)) { - // TODO: do this for the initial index ? might not be worth it. - for (Repository repository : getRepositoriesForProject(project)) { - // TODO: need to get the changeset of the previous index run - // Traverse the history and add args to IndexDownArgs for the files/symlinks changed/deleted. - if (repository instanceof RepositoryWithHistoryTraversal) { - FileCollector fileCollector = new FileCollector(); - ((RepositoryWithHistoryTraversal) repository).traverseHistory(sourceRoot, - "from", "till", null, fileCollector::visit); - - for (String path : fileCollector.files) { - File file = new File(sourceRoot, path); - // Check that each file is present on file system to avoid problems - // in indexParallel(). - // TODO: what about deleted files ? - if (file.exists()) { - // TODO: call accept() to see if the file can be added (symlinks !) - args.works.add(new IndexFileWork(file, path)); - } - } - for (String path : fileCollector.deletedFiles) { - // TODO: removeFile() - } - } - } - } else { - Statistics elapsed = new Statistics(); - LOGGER.log(Level.INFO, "Starting traversal of directory {0}", dir); - indexDown(sourceRoot, dir, args); - indexDownPerformed = true; - elapsed.report(LOGGER, String.format("Done traversal of directory %s", dir), - "indexer.db.directory.traversal"); - } - - showFileCount(dir, args); + boolean indexDownPerformed = getIndexDownArgs(dir, sourceRoot, args); - args.cur_count = 0; + args.curCount = 0; Statistics elapsed = new Statistics(); LOGGER.log(Level.INFO, "Starting indexing of directory {0}", dir); indexParallel(dir, args); @@ -624,13 +611,13 @@ public void update() throws IOException { "indexer.db.directory.index"); // Remove data for the trailing terms that indexDown() - // did not traverse. These correspond to files that have been + // did not traverse. These correspond to the files that have been // removed and have higher ordering than any present files. if (indexDownPerformed) { while (uidIter != null && uidIter.term() != null && uidIter.term().utf8ToString().startsWith(startuid)) { - removeFile(true); + removeFile(null, true); BytesRef next = uidIter.next(); if (next == null) { uidIter = null; @@ -710,6 +697,61 @@ public void update() throws IOException { } } + private boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args) throws HistoryException, IOException { + + boolean indexDownPerformed = false; + Statistics elapsed = new Statistics(); + + // Only do this if all repositories for given project support file gathering via history traversal. + // TODO: introduce per project tunable for this (in case there are untracked files) + // it will be also useful for testing + // TODO: what if the index is without LOC counts ? indexDown() forces full reindex in such case + // so perhaps it should be used in such case. + // TODO: what about setup without projects ? + if (isReadyForTrulyIncrementalReindex(project)) { + LOGGER.log(Level.INFO, "Starting file collection using history cache in directory {0}", dir); + for (Repository repository : getRepositoriesForProject(project)) { + // Traverse the history and add args to IndexDownArgs for the files/symlinks changed/deleted. + FileCollector fileCollector = new FileCollector(); + // Get the list of files starting with the latest changeset in the history cache + // and ending with the newest changeset of the repository. + ((RepositoryWithHistoryTraversal) repository).traverseHistory(sourceRoot, + HistoryGuru.getInstance().getLatestCachedRevision(repository), + null, null, fileCollector::visit, true); + + for (String path : fileCollector.files) { + File file = new File(sourceRoot, path); + + // Check that each file is present on file system to avoid problems in indexParallel(). + if (file.exists()) { + AcceptSymlinkRet ret = new AcceptSymlinkRet(); + if (!accept(file.getParentFile(), file, ret)) { + handleSymlink(path, ret); + } else { + removeFile(path, false); + args.works.add(new IndexFileWork(file, path)); + args.curCount++; + } + } else { + removeFile(path, true); + } + } + } + elapsed.report(LOGGER, String.format("Done file collection of directory %s", dir), + "indexer.db.directory.collection"); + } else { + LOGGER.log(Level.INFO, "Starting traversal of directory {0}", dir); + indexDown(sourceRoot, dir, args); + indexDownPerformed = true; + elapsed.report(LOGGER, String.format("Done traversal of directory %s", dir), + "indexer.db.directory.traversal"); + } + + showFileCount(dir, args); + + return indexDownPerformed; + } + /** * Optimize all index databases. * @@ -850,13 +892,49 @@ private void removeHistoryFile(String path) { } /** - * Remove a stale file (uidIter.term().text()) from the index database and - * history cache, and queue the removal of xref. + * Remove a stale file from the index database and potentially also from history cache, + * and queue the removal of the associated xref file. * * @param removeHistory if false, do not remove history cache for this file * @throws java.io.IOException if an error occurs */ - private void removeFile(boolean removeHistory) throws IOException { + private void removeFile(@Nullable String path, boolean removeHistory) throws IOException { + if (path == null) { // indexDown() + removeFileDocUid(); + } else { + for (IndexChangedListener listener : listeners) { + listener.fileRemove(path); + } + + Document doc = null; + try { + doc = getDocument(path, reader); + } catch (ParseException e) { + LOGGER.log(Level.WARNING, String.format("could not find document for %s, " + + "the index might contain stale data as a result", path), e); + } + if (doc != null) { + decrementLOCforDoc(path, doc); + + String storedU = doc.get(QueryBuilder.U); + writer.deleteDocuments(new Term(QueryBuilder.U, storedU)); + } + } + + removeXrefFile(path); + + if (removeHistory) { + removeHistoryFile(path); + } + + setDirty(); + + for (IndexChangedListener listener : listeners) { + listener.fileRemoved(path); + } + } + + private void removeFileDocUid() throws IOException { String path = Util.uid2url(uidIter.term().utf8ToString()); for (IndexChangedListener listener : listeners) { @@ -870,28 +948,22 @@ private void removeFile(boolean removeHistory) throws IOException { // Read a limited-fields version of the document. Document doc = reader.document(postsIter.docID(), REVERT_COUNTS_FIELDS); if (doc != null) { - NullableNumLinesLOC nullableCounts = NumLinesLOCUtil.read(doc); - if (nullableCounts.getNumLines() != null && nullableCounts.getLOC() != null) { - NumLinesLOC counts = new NumLinesLOC(path, - -nullableCounts.getNumLines(), - -nullableCounts.getLOC()); - countsAggregator.register(counts); - } + decrementLOCforDoc(path, doc); break; } } } writer.deleteDocuments(new Term(QueryBuilder.U, uidIter.term())); + } - removeXrefFile(path); - if (removeHistory) { - removeHistoryFile(path); - } - - setDirty(); - for (IndexChangedListener listener : listeners) { - listener.fileRemoved(path); + private void decrementLOCforDoc(String path, Document doc) { + NullableNumLinesLOC nullableCounts = NumLinesLOCUtil.read(doc); + if (nullableCounts.getNumLines() != null && nullableCounts.getLOC() != null) { + NumLinesLOC counts = new NumLinesLOC(path, + -nullableCounts.getNumLines(), + -nullableCounts.getLOC()); + countsAggregator.register(counts); } } @@ -979,6 +1051,7 @@ private void addFile(File file, String path, Ctags ctags) } setDirty(); + for (IndexChangedListener listener : listeners) { listener.fileAdded(path, fa.getClass().getSimpleName()); } @@ -1315,19 +1388,32 @@ private boolean isLocal(String path) { return false; } + private void handleSymlink(String path, AcceptSymlinkRet ret) { + /* + * If ret.localRelPath is defined, then a symlink was detected but + * not "accepted" to avoid redundancy with an already-accepted + * canonical target. Set up for a deferred creation of a symlink + * within xref/. + */ + if (ret.localRelPath != null) { + File xrefPath = new File(xrefDir, path); + PendingSymlinkage psym = new PendingSymlinkage(xrefPath.getAbsolutePath(), ret.localRelPath); + completer.add(psym); + } + } + /** * Executes the first, serial stage of indexing, recursively. *

Files at least are counted, and any deleted or updated files (based on * comparison to the Lucene index) are passed to - * {@link #removeFile(boolean)}. New or updated files are noted for + * {@link #removeFile(String, boolean)}. New or updated files are noted for * indexing. * @param dir the root indexDirectory to generate indexes for * @param parent path to parent directory * @param args arguments to control execution and for collecting a list of * files for indexing */ - private void indexDown(File dir, String parent, IndexDownArgs args) - throws IOException { + private void indexDown(File dir, String parent, IndexDownArgs args) throws IOException { if (isInterrupted()) { return; @@ -1335,18 +1421,7 @@ private void indexDown(File dir, String parent, IndexDownArgs args) AcceptSymlinkRet ret = new AcceptSymlinkRet(); if (!accept(dir, ret)) { - /* - * If ret.localRelPath is defined, then a symlink was detected but - * not "accepted" to avoid redundancy with an already-accepted - * canonical target. Set up for a deferred creation of a symlink - * within xref/. - */ - if (ret.localRelPath != null) { - File xrefPath = new File(xrefDir, parent); - PendingSymlinkage psym = new PendingSymlinkage( - xrefPath.getAbsolutePath(), ret.localRelPath); - completer.add(psym); - } + handleSymlink(parent, ret); return; } @@ -1361,18 +1436,12 @@ private void indexDown(File dir, String parent, IndexDownArgs args) for (File file : files) { String path = parent + File.separator + file.getName(); if (!accept(dir, file, ret)) { - if (ret.localRelPath != null) { - // See note above about ret.localRelPath. - File xrefPath = new File(xrefDir, path); - PendingSymlinkage psym = new PendingSymlinkage( - xrefPath.getAbsolutePath(), ret.localRelPath); - completer.add(psym); - } + handleSymlink(path, ret); } else { if (file.isDirectory()) { indexDown(file, path, args); } else { - args.cur_count++; + args.curCount++; if (uidIter != null) { path = Util.fixPathIfWindows(path); @@ -1394,7 +1463,7 @@ private void indexDown(File dir, String parent, IndexDownArgs args) // cache for the file so that incremental history cache // generation works. String termPath = Util.uid2url(uidIter.term().utf8ToString()); - removeFile(!termPath.equals(path)); + removeFile(null, !termPath.equals(path)); BytesRef next = uidIter.next(); if (next == null) { @@ -1412,7 +1481,7 @@ private void indexDown(File dir, String parent, IndexDownArgs args) boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) && checkSettings(file, path); if (!matchOK) { - removeFile(false); + removeFile(null,false); } BytesRef next = uidIter.next(); @@ -1510,7 +1579,7 @@ private void indexParallel(String dir, IndexDownArgs args) { LOGGER.log(Level.SEVERE, exmsg, e); } - args.cur_count = currentCounter.intValue(); + args.curCount = currentCounter.intValue(); // Start with failureCount=worksCount, and then subtract successes. int failureCount = worksCount; @@ -1791,34 +1860,39 @@ public static Document getDocument(File file) throws IOException, ParseException // Sanitize Windows path delimiters in order not to conflict with Lucene escape character. path = path.replace("\\", "/"); - try (IndexReader ireader = getIndexReader(path)) { - if (ireader == null) { - // No index, no document.. - return null; - } + try (IndexReader indexReader = getIndexReader(path)) { + return getDocument(path, indexReader); + } + } - Document doc; - Query q = new QueryBuilder().setPath(path).build(); - IndexSearcher searcher = new IndexSearcher(ireader); - Statistics stat = new Statistics(); - TopDocs top = searcher.search(q, 1); - stat.report(LOGGER, Level.FINEST, "search via getDocument done", - "search.latency", new String[]{"category", "getdocument", - "outcome", top.totalHits.value == 0 ? "empty" : "success"}); - if (top.totalHits.value == 0) { - // No hits, no document... - return null; - } - doc = searcher.doc(top.scoreDocs[0].doc); - String foundPath = doc.get(QueryBuilder.PATH); + @Nullable + private static Document getDocument(String path, IndexReader indexReader) throws ParseException, IOException { + if (indexReader == null) { + // No index, no document.. + return null; + } - // Only use the document if we found an exact match. - if (!path.equals(foundPath)) { - return null; - } + Document doc; + Query q = new QueryBuilder().setPath(path).build(); + IndexSearcher searcher = new IndexSearcher(indexReader); + Statistics stat = new Statistics(); + TopDocs top = searcher.search(q, 1); + stat.report(LOGGER, Level.FINEST, "search via getDocument() done", + "search.latency", new String[]{"category", "getdocument", + "outcome", top.totalHits.value == 0 ? "empty" : "success"}); + if (top.totalHits.value == 0) { + // No hits, no document... + return null; + } + doc = searcher.doc(top.scoreDocs[0].doc); + String foundPath = doc.get(QueryBuilder.PATH); - return doc; + // Only use the document if we found an exact match. + if (!path.equals(foundPath)) { + return null; } + + return doc; } @Override @@ -2077,7 +2151,7 @@ private boolean xrefExistsFor(String path) { } private static class IndexDownArgs { - int cur_count; + int curCount; final List works = new ArrayList<>(); } From 32b4cd63ed2607b1a2c5e04b2f2d52ebfc171740 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Fri, 29 Apr 2022 16:26:13 +0200 Subject: [PATCH 03/88] next chunk of changes - store and grab last revision - fix IndexDatabaseTest - refactor --- .../configuration/RuntimeEnvironment.java | 4 +- .../indexer/history/FileHistoryCache.java | 46 +++++++--- .../indexer/history/HistoryCache.java | 9 ++ .../opengrok/indexer/history/HistoryGuru.java | 6 +- .../opengrok/indexer/index/IndexDatabase.java | 88 +++++++++++-------- .../indexer/index/IndexDatabaseTest.java | 13 +-- 6 files changed, 108 insertions(+), 58 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java index 285fd08c9d8..727e303b0c9 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java @@ -62,6 +62,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.NamedThreadFactory; +import org.jetbrains.annotations.VisibleForTesting; import org.opengrok.indexer.authorization.AuthorizationFramework; import org.opengrok.indexer.authorization.AuthorizationStack; import org.opengrok.indexer.history.HistoryGuru; @@ -1491,7 +1492,8 @@ public void writeConfiguration(String host) throws IOException, InterruptedExcep * Project with some repository information is considered as a repository * otherwise it is just a simple project. */ - private void generateProjectRepositoriesMap() throws IOException { + @VisibleForTesting + public void generateProjectRepositoriesMap() throws IOException { repository_map.clear(); for (RepositoryInfo r : getRepositories()) { Project proj; diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileHistoryCache.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileHistoryCache.java index bd11ad41bd7..0b3a8c5b1d2 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileHistoryCache.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileHistoryCache.java @@ -43,6 +43,7 @@ import java.io.Writer; import java.nio.file.Files; import java.nio.file.NoSuchFileException; +import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collections; @@ -62,6 +63,7 @@ import io.micrometer.core.instrument.Counter; import io.micrometer.core.instrument.MeterRegistry; +import org.jetbrains.annotations.Nullable; import org.jetbrains.annotations.TestOnly; import org.opengrok.indexer.Metrics; import org.opengrok.indexer.configuration.PathAccepter; @@ -739,38 +741,56 @@ private String getRepositoryCachedRevPath(Repository repository) { return histDir + File.separatorChar + LATEST_REV_FILE_NAME; } + private String getRepositoryPreviousCachedRevPath(Repository repository) { + String histDir = getRepositoryHistDataDirname(repository); + if (histDir == null) { + return null; + } + return histDir + File.separatorChar + LATEST_REV_FILE_NAME + ".prev"; + } + /** * Store latest indexed revision for the repository under data directory. * @param repository repository * @param rev latest revision which has been just indexed */ private void storeLatestCachedRevision(Repository repository, String rev) { - Writer writer = null; - + // Save the file so that it can be used by truly incremental reindex via getPreviousCachedRevision(). + Path newPath = Path.of(getRepositoryCachedRevPath(repository)); + Path oldPath = Path.of(getRepositoryPreviousCachedRevPath(repository)); try { - writer = new BufferedWriter(new OutputStreamWriter( - new FileOutputStream(getRepositoryCachedRevPath(repository)))); + if (newPath.toFile().exists()) { + Files.move(newPath, oldPath); + } + } catch (IOException e) { + LOGGER.log(Level.WARNING, String.format("cannot move %s to %s", newPath, oldPath), e); + } + + try (Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(newPath.toFile())))) { writer.write(rev); } catch (IOException ex) { LOGGER.log(Level.WARNING, String.format("Cannot write latest cached revision to file for repository %s", repository), ex); - } finally { - try { - if (writer != null) { - writer.close(); - } - } catch (IOException ex) { - LOGGER.log(Level.WARNING, "Cannot close file", ex); - } } } @Override + @Nullable public String getLatestCachedRevision(Repository repository) { + return getCachedRevision(repository, getRepositoryCachedRevPath(repository)); + } + + @Override + @Nullable + public String getPreviousCachedRevision(Repository repository) { + return getCachedRevision(repository, getRepositoryPreviousCachedRevPath(repository)); + } + + @Nullable + private String getCachedRevision(Repository repository, String revPath) { String rev; BufferedReader input; - String revPath = getRepositoryCachedRevPath(repository); if (revPath == null) { LOGGER.log(Level.WARNING, "no rev path for repository {0}", repository); return null; diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java index 735affeb1b2..ce3ef570e40 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java @@ -122,6 +122,15 @@ History get(File file, @Nullable Repository repository, boolean withFiles) String getLatestCachedRevision(Repository repository) throws HistoryException; + /** + * TODO: document + * @param repository + * @return + * @throws HistoryException + */ + String getPreviousCachedRevision(Repository repository) + throws HistoryException; + /** * Get the last modified times for all files and subdirectories in the * specified directory. diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java index 22b0a4256ab..37ca403502d 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java @@ -434,12 +434,12 @@ public Map getLastModifiedTimes(File directory) * @return * @throws HistoryException */ - public String getLatestCachedRevision(Repository repository) throws HistoryException { + public String getPreviousCachedRevision(Repository repository) throws HistoryException { if (repository != null && useCache()) { - return historyCache.getLatestCachedRevision(repository); + return historyCache.getPreviousCachedRevision(repository); } - throw new HistoryException(String.format("cannot get latest cached revision for %s", repository)); + throw new HistoryException(String.format("cannot get previous cached revision for %s", repository)); } /** diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index 0fab9ec6aed..046d504df41 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -44,7 +44,6 @@ import java.util.Map; import java.util.Objects; import java.util.Set; -import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.CopyOnWriteArrayList; @@ -439,10 +438,20 @@ private static boolean isReadyForTrulyIncrementalReindex(Project project) { if (project == null) { throw new IllegalArgumentException("null project"); } + + // History needs to be enabled for the history cache to work (see the comment below). if (!project.isHistoryEnabled()) { return false; } - if (!RuntimeEnvironment.getInstance().isHistoryCache()) { + + RuntimeEnvironment env = RuntimeEnvironment.getInstance(); + // History cache is necessary to get the last indexed revision for given repository. + if (!env.isHistoryCache()) { + return false; + } + + // So far the truly incremental reindex does not work without projects. + if (!env.hasProjects()) { return false; } @@ -464,10 +473,14 @@ private static boolean isReadyForTrulyIncrementalReindex(Project project) { /* * This check means that this method will return false in the case of initial reindex. - * In such case the traversal of all changesets would most likely be counterproductive. + * In such case the traversal of all changesets would most likely be counterproductive, + * assuming traversal of directory tree is cheaper than reading files from SCM history + * in such case. */ try { - HistoryGuru.getInstance().getLatestCachedRevision(repository); + if (HistoryGuru.getInstance().getPreviousCachedRevision(repository) == null) { + return false; + } } catch (HistoryException ex) { LOGGER.log(Level.FINE, String.format("cannot load latest cached revision for history cache " + "for repository %s, the project will be indexed using directory traversal.", @@ -707,36 +720,9 @@ private boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args // it will be also useful for testing // TODO: what if the index is without LOC counts ? indexDown() forces full reindex in such case // so perhaps it should be used in such case. - // TODO: what about setup without projects ? if (isReadyForTrulyIncrementalReindex(project)) { LOGGER.log(Level.INFO, "Starting file collection using history cache in directory {0}", dir); - for (Repository repository : getRepositoriesForProject(project)) { - // Traverse the history and add args to IndexDownArgs for the files/symlinks changed/deleted. - FileCollector fileCollector = new FileCollector(); - // Get the list of files starting with the latest changeset in the history cache - // and ending with the newest changeset of the repository. - ((RepositoryWithHistoryTraversal) repository).traverseHistory(sourceRoot, - HistoryGuru.getInstance().getLatestCachedRevision(repository), - null, null, fileCollector::visit, true); - - for (String path : fileCollector.files) { - File file = new File(sourceRoot, path); - - // Check that each file is present on file system to avoid problems in indexParallel(). - if (file.exists()) { - AcceptSymlinkRet ret = new AcceptSymlinkRet(); - if (!accept(file.getParentFile(), file, ret)) { - handleSymlink(path, ret); - } else { - removeFile(path, false); - args.works.add(new IndexFileWork(file, path)); - args.curCount++; - } - } else { - removeFile(path, true); - } - } - } + getIndexDownArgsTrulyIncrementally(sourceRoot, args); elapsed.report(LOGGER, String.format("Done file collection of directory %s", dir), "indexer.db.directory.collection"); } else { @@ -752,6 +738,36 @@ private boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args return indexDownPerformed; } + private void getIndexDownArgsTrulyIncrementally(File sourceRoot, IndexDownArgs args) throws HistoryException, IOException { + for (Repository repository : getRepositoriesForProject(project)) { + // Traverse the history and add args to IndexDownArgs for the files/symlinks changed/deleted. + FileCollector fileCollector = new FileCollector(); + // Get the list of files starting with the latest changeset in the history cache + // and ending with the newest changeset of the repository. + ((RepositoryWithHistoryTraversal) repository).traverseHistory(sourceRoot, + HistoryGuru.getInstance().getPreviousCachedRevision(repository), + null, null, fileCollector::visit, true); + + for (String path : fileCollector.files) { + File file = new File(sourceRoot, path); + + // Check that each file is present on file system to avoid problems in indexParallel(). + if (file.exists()) { + AcceptSymlinkRet ret = new AcceptSymlinkRet(); + if (!accept(file.getParentFile(), file, ret)) { + handleSymlink(path, ret); + } else { + removeFile(path, false); + args.works.add(new IndexFileWork(file, path)); + args.curCount++; + } + } else { + removeFile(path, true); + } + } + } + } + /** * Optimize all index databases. * @@ -900,7 +916,7 @@ private void removeHistoryFile(String path) { */ private void removeFile(@Nullable String path, boolean removeHistory) throws IOException { if (path == null) { // indexDown() - removeFileDocUid(); + path = removeFileDocUid(); } else { for (IndexChangedListener listener : listeners) { listener.fileRemove(path); @@ -934,7 +950,7 @@ private void removeFile(@Nullable String path, boolean removeHistory) throws IOE } } - private void removeFileDocUid() throws IOException { + private String removeFileDocUid() throws IOException { String path = Util.uid2url(uidIter.term().utf8ToString()); for (IndexChangedListener listener : listeners) { @@ -955,6 +971,8 @@ private void removeFileDocUid() throws IOException { } writer.deleteDocuments(new Term(QueryBuilder.U, uidIter.term())); + + return path; } private void decrementLOCforDoc(String path, Document doc) { @@ -1843,7 +1861,7 @@ public static Definitions getDefinitions(File file) throws ParseException, IOExc } /** - * @param file File object of a file under source root + * @param file File object for a file under source root * @return Document object for the file or {@code null} * @throws IOException on I/O error * @throws ParseException on problem with building Query diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 2a8eefe99f9..009cc6fce76 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -18,7 +18,7 @@ */ /* - * Copyright (c) 2010, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2010, 2022, Oracle and/or its affiliates. All rights reserved. * Portions Copyright (c) 2018, 2020, Chris Fraire . */ package org.opengrok.indexer.index; @@ -81,6 +81,7 @@ public static void setUpClass() throws Exception { env, true, true, false, null, null); env.setDefaultProjectsFromNames(new TreeSet<>(Arrays.asList("/c"))); + env.generateProjectRepositoriesMap(); indexer.doIndexerExecution(true, null, null); } @@ -90,7 +91,7 @@ public static void tearDownClass() throws Exception { } @Test - public void testGetDefinitions() throws Exception { + void testGetDefinitions() throws Exception { // Test that we can get definitions for one of the files in the // repository. File f1 = new File(repository.getSourceRoot() + "/git/main.c"); @@ -137,7 +138,7 @@ private void checkDataExistence(String fileName, boolean shouldExist) { * file has been removed from a repository. */ @Test - public void testCleanupAfterIndexRemoval() throws Exception { + void testCleanupAfterIndexRemoval() throws Exception { final int origNumFiles; String projectName = "git"; @@ -146,7 +147,7 @@ public void testCleanupAfterIndexRemoval() throws Exception { IndexDatabase idb = new IndexDatabase(project); assertNotNull(idb); - // Note that the file to remove has to be different than the one used + // Note that the file to remove has to be different from the one used // in {@code testGetDefinitions} because it shares the same index // and this test is going to remove the file and therefore related // definitions. @@ -180,7 +181,7 @@ public void testCleanupAfterIndexRemoval() throws Exception { * however it lacks the pre-requisite indexing phase. */ @Test - public void testIndexPath() throws IOException { + void testIndexPath() throws IOException { SearchEngine instance = new SearchEngine(); // Use as broad search as possible. instance.setFile("c"); @@ -195,7 +196,7 @@ public void testIndexPath() throws IOException { } @Test - public void testGetLastRev() throws IOException, ParseException { + void testGetLastRev() throws IOException, ParseException { Document doc = IndexDatabase.getDocument(Paths.get(repository.getSourceRoot(), "git", "main.c").toFile()); assertNotNull(doc); From f65b3823786e699b056940d0b2ecd776800fa081 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Fri, 29 Apr 2022 16:41:28 +0200 Subject: [PATCH 04/88] fix some nits --- .../opengrok/indexer/history/HistoryGuru.java | 7 +++---- .../RepositoryWithHistoryTraversal.java | 19 +++++++++---------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java index 37ca403502d..f050520de1e 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java @@ -429,10 +429,9 @@ public Map getLastModifiedTimes(File directory) } /** - * TODO: document - * @param repository - * @return - * @throws HistoryException + * @param repository Repository object + * @return previously cached revision (before currently running reindex) + * @throws HistoryException on error */ public String getPreviousCachedRevision(Repository repository) throws HistoryException { if (repository != null && useCache()) { diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java index 2272a8eda54..74c15dc5e0b 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java @@ -16,9 +16,8 @@ * * CDDL HEADER END */ - /* - * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved. */ package org.opengrok.indexer.history; @@ -69,14 +68,14 @@ public static class ChangesetInfo { } /** - * TODO: document - * @param file - * @param sinceRevision - * @param tillRevision - * @param numCommits - * @param visitor - * @param getAll - * @throws HistoryException + * Traverse history of given file/directory. + * @param file File object + * @param sinceRevision start revision (non-inclusive) + * @param tillRevision end revision (inclusive) + * @param numCommits maximum number of commits to traverse (use 0 as unlimited) + * @param visitor visitor method + * @param getAll include merge commits (even if not set to be handled) + * @throws HistoryException on error */ public abstract void traverseHistory(File file, String sinceRevision, @Nullable String tillRevision, Integer numCommits, Consumer visitor, boolean getAll) throws HistoryException; From 6b9b9644ea01a481727f24f77583b36149906655 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Fri, 29 Apr 2022 16:42:21 +0200 Subject: [PATCH 05/88] IndexDatabase grew too long --- dev/checkstyle/suppressions.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/checkstyle/suppressions.xml b/dev/checkstyle/suppressions.xml index 4047c8265f4..9529cc9846e 100644 --- a/dev/checkstyle/suppressions.xml +++ b/dev/checkstyle/suppressions.xml @@ -18,7 +18,7 @@ information: Portions Copyright [yyyy] [name of copyright owner] CDDL HEADER END -Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. +Copyright (c) 2018, 2002, Oracle and/or its affiliates. All rights reserved. Portions Copyright (c) 2018-2020, Chris Fraire . --> @@ -43,7 +43,7 @@ Portions Copyright (c) 2018-2020, Chris Fraire . |Context\.java|HistoryContext\.java|Suggester\.java| |ProjectHelperTestBase\.java|SearchHelper\.java" /> - + From e18d9cb08da63383970ab5072caf5a08e1e42ba2 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Fri, 29 Apr 2022 16:49:53 +0200 Subject: [PATCH 06/88] fix more style nits --- .../indexer/history/HistoryCache.java | 24 +++++++++---------- .../RepositoryWithHistoryTraversal.java | 4 ++-- .../opengrok/indexer/index/IndexDatabase.java | 2 +- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java index ce3ef570e40..aa19ffd2fd7 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java @@ -112,24 +112,24 @@ History get(File file, @Nullable Repository repository, boolean withFiles) boolean hasCacheForFile(File file) throws HistoryException; /** - * Get the revision identifier for the latest cached revision in a - * repository. + * Get the revision identifier for the latest cached revision in a repository. * * @param repository the repository whose latest revision to return - * @return a string representing the latest revision in the cache, or - * {@code null} if it is unknown + * @return a string representing the latest revision in the cache, + * or {@code null} if it is unknown + * @throws HistoryException on error */ - String getLatestCachedRevision(Repository repository) - throws HistoryException; + String getLatestCachedRevision(Repository repository) throws HistoryException; /** - * TODO: document - * @param repository - * @return - * @throws HistoryException + * Get the revision identifier for the latest cached revision in a repository. + * + * @param repository Repository object + * @return a string representing the previous revision (prior to the current indexer run), + * or {@code null} if it is unknown + * @throws HistoryException on error */ - String getPreviousCachedRevision(Repository repository) - throws HistoryException; + String getPreviousCachedRevision(Repository repository) throws HistoryException; /** * Get the last modified times for all files and subdirectories in the diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java index 74c15dc5e0b..6eb53eb5eaa 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java @@ -16,10 +16,10 @@ * * CDDL HEADER END */ + /* * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved. */ - package org.opengrok.indexer.history; import org.jetbrains.annotations.Nullable; @@ -72,7 +72,7 @@ public static class ChangesetInfo { * @param file File object * @param sinceRevision start revision (non-inclusive) * @param tillRevision end revision (inclusive) - * @param numCommits maximum number of commits to traverse (use 0 as unlimited) + * @param numCommits maximum number of commits to traverse (use {@code null} as unlimited) * @param visitor visitor method * @param getAll include merge commits (even if not set to be handled) * @throws HistoryException on error diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index 046d504df41..2f5e4f9b4a8 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -1499,7 +1499,7 @@ private void indexDown(File dir, String parent, IndexDownArgs args) throws IOExc boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) && checkSettings(file, path); if (!matchOK) { - removeFile(null,false); + removeFile(null, false); } BytesRef next = uidIter.next(); From 390dbae70227c2577f78cbe897eebff8f07fa36a Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Fri, 29 Apr 2022 16:55:42 +0200 Subject: [PATCH 07/88] add missing whitespace --- .../main/java/org/opengrok/indexer/history/GitRepository.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java index 9eca65c3315..62de8b98168 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java @@ -488,7 +488,7 @@ private static class HistoryCollector { public void visit(ChangesetInfo changesetInfo) { RepositoryWithHistoryTraversal.CommitInfo commit = changesetInfo.commit; HistoryEntry historyEntry = new HistoryEntry(commit.revision, - commit.date,commit.authorName + " <" + commit.authorEmail + ">", + commit.date, commit.authorName + " <" + commit.authorEmail + ">", commit.message, true); if (changesetInfo.renamedFiles != null) { From b2fc531d28eff4a1dcd5f766147a602861ca643b Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Fri, 29 Apr 2022 17:11:50 +0200 Subject: [PATCH 08/88] fix testXrefGeneration() --- .../src/main/java/org/opengrok/indexer/index/IndexDatabase.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index 2f5e4f9b4a8..460863f4ce1 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -436,7 +436,7 @@ private static List getRepositoriesForProject(Project project) { */ private static boolean isReadyForTrulyIncrementalReindex(Project project) { if (project == null) { - throw new IllegalArgumentException("null project"); + return false; } // History needs to be enabled for the history cache to work (see the comment below). From 7e740a877c95e26ff3b7a26252828d7d04fa2305 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Fri, 29 Apr 2022 22:13:52 +0200 Subject: [PATCH 09/88] avoid NPE, fix test to be consistent --- .../indexer/configuration/RuntimeEnvironment.java | 2 +- .../java/org/opengrok/indexer/index/IndexDatabase.java | 10 ++++++---- .../java/org/opengrok/indexer/index/IndexerTest.java | 10 +++++++++- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java index 727e303b0c9..5a88e0da0f6 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java @@ -466,7 +466,7 @@ public List getProjectList() { /** * Get project map. * - * @return a Map with all of the projects + * @return a Map with all the projects */ public Map getProjects() { return syncReadConfiguration(Configuration::getProjects); diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index 460863f4ce1..ed3cde6700b 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -420,10 +420,12 @@ private static List getRepositoriesForProject(Project project) { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); List repositoryInfoList = env.getProjectRepositoriesMap().get(project); - for (RepositoryInfo repositoryInfo : repositoryInfoList) { - Repository repository = HistoryGuru.getInstance().getRepository(new File(repositoryInfo.getDirectoryName())); - if (repository != null) { - repositoryList.add(repository); + if (repositoryInfoList != null) { + for (RepositoryInfo repositoryInfo : repositoryInfoList) { + Repository repository = HistoryGuru.getInstance().getRepository(new File(repositoryInfo.getDirectoryName())); + if (repository != null) { + repositoryList.add(repository); + } } } diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexerTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexerTest.java index bfeadc55ef6..a1c44181e75 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexerTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexerTest.java @@ -360,7 +360,9 @@ void testRemoveFileOnFileChange() throws Exception { } // reindex + // TODO: add parameter for running the reindex with indexDown() and truly incremental (needs Git) idb.update(); + // Make sure that the file was actually processed. assertEquals(1, listener.removedFiles.size()); assertEquals(1, listener.filesToAdd.size()); @@ -424,7 +426,6 @@ void testBug3430() throws Exception { /** * Test IndexChangedListener behavior in repository with invalid files. - * @throws Exception */ @Test void testIncrementalIndexAddRemoveFile() throws Exception { @@ -432,8 +433,15 @@ void testIncrementalIndexAddRemoveFile() throws Exception { env.setSourceRoot(repository.getSourceRoot()); env.setDataRoot(repository.getDataRoot()); + // Make the test consistent. If run in sequence with other tests, env.hasProjects() returns true. + // The same should work for standalone test run. + HashMap projects = new HashMap<>(); String ppath = "/bug3430"; Project project = new Project("bug3430", ppath); + projects.put("bug3430", project); + env.setProjectsEnabled(true); + env.setProjects(projects); + IndexDatabase idb = new IndexDatabase(project); assertNotNull(idb); MyIndexChangeListener listener = new MyIndexChangeListener(); From 12365fb4573530f2de48294a6af76592ebcbeecf Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Sat, 30 Apr 2022 20:37:08 +0200 Subject: [PATCH 10/88] add global tunable --- .../indexer/configuration/Configuration.java | 11 +++++++++++ .../configuration/RuntimeEnvironment.java | 8 ++++++++ .../opengrok/indexer/index/IndexDatabase.java | 17 ++++++++++------- .../org/opengrok/indexer/index/Indexer.java | 7 +++++++ 4 files changed, 36 insertions(+), 7 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Configuration.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Configuration.java index e7f4961b3be..1ebe7b6ba2d 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Configuration.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Configuration.java @@ -300,6 +300,8 @@ public final class Configuration { private int connectTimeout = -1; // connect timeout in seconds private int apiTimeout = -1; // API timeout in seconds + private boolean trulyIncrementalReindex; + /* * types of handling history for remote SCM repositories: * ON - index history and display it in webapp @@ -576,6 +578,7 @@ public Configuration() { setTagsEnabled(false); //setUserPage("http://www.myserver.org/viewProfile.jspa?username="); // Set to empty string so we can append it to the URL unconditionally later. + setTrulyIncrementalReindex(true); setUserPageSuffix(""); setWebappLAF("default"); // webappCtags is default(boolean) @@ -1412,6 +1415,14 @@ public void setApiTimeout(int apiTimeout) { this.apiTimeout = apiTimeout; } + public boolean isTrulyIncrementalReindex() { + return trulyIncrementalReindex; + } + + public void setTrulyIncrementalReindex(boolean flag) { + trulyIncrementalReindex = flag; + } + /** * Write the current configuration to a file. * diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java index 5a88e0da0f6..e3ff098b857 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java @@ -1418,6 +1418,14 @@ public void setConnectTimeout(int connectTimeout) { syncWriteConfiguration(connectTimeout, Configuration::setConnectTimeout); } + public boolean isTrulyIncrementalReindex() { + return syncReadConfiguration(Configuration::isTrulyIncrementalReindex); + } + + public void setTrulyIncrementalReindex(boolean flag) { + syncWriteConfiguration(flag, Configuration::setTrulyIncrementalReindex); + } + /** * Read an configuration file and set it as the current configuration. * diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index ed3cde6700b..22ffa40c5bd 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -718,17 +718,18 @@ private boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args Statistics elapsed = new Statistics(); // Only do this if all repositories for given project support file gathering via history traversal. - // TODO: introduce per project tunable for this (in case there are untracked files) + // TODO: introduce per project tunable for isTrulyIncrementalReindex + // (in case there are untracked files) // it will be also useful for testing // TODO: what if the index is without LOC counts ? indexDown() forces full reindex in such case // so perhaps it should be used in such case. - if (isReadyForTrulyIncrementalReindex(project)) { - LOGGER.log(Level.INFO, "Starting file collection using history cache in directory {0}", dir); + if (RuntimeEnvironment.getInstance().isTrulyIncrementalReindex() && isReadyForTrulyIncrementalReindex(project)) { + LOGGER.log(Level.INFO, "Starting file collection using history traversal in directory {0}", dir); getIndexDownArgsTrulyIncrementally(sourceRoot, args); elapsed.report(LOGGER, String.format("Done file collection of directory %s", dir), "indexer.db.directory.collection"); } else { - LOGGER.log(Level.INFO, "Starting traversal of directory {0}", dir); + LOGGER.log(Level.INFO, "Starting file collection using file-system traversal of directory {0}", dir); indexDown(sourceRoot, dir, args); indexDownPerformed = true; elapsed.report(LOGGER, String.format("Done traversal of directory %s", dir), @@ -740,7 +741,9 @@ private boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args return indexDownPerformed; } - private void getIndexDownArgsTrulyIncrementally(File sourceRoot, IndexDownArgs args) throws HistoryException, IOException { + private void getIndexDownArgsTrulyIncrementally(File sourceRoot, IndexDownArgs args) + throws HistoryException, IOException { + for (Repository repository : getRepositoriesForProject(project)) { // Traverse the history and add args to IndexDownArgs for the files/symlinks changed/deleted. FileCollector fileCollector = new FileCollector(); @@ -750,6 +753,7 @@ private void getIndexDownArgsTrulyIncrementally(File sourceRoot, IndexDownArgs a HistoryGuru.getInstance().getPreviousCachedRevision(repository), null, null, fileCollector::visit, true); + // TODO: can this be parallelized ? (esp. w.r.t. removePath() - xref removal and empty dirs, setDirty(), etc.) for (String path : fileCollector.files) { File file = new File(sourceRoot, path); @@ -900,8 +904,7 @@ private File whatXrefFile(String path, boolean compress) { private void removeXrefFile(String path) { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); File xrefFile = whatXrefFile(path, env.isCompressXref()); - PendingFileDeletion pending = new PendingFileDeletion( - xrefFile.getAbsolutePath()); + PendingFileDeletion pending = new PendingFileDeletion(xrefFile.getAbsolutePath()); completer.add(pending); } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java index b9453f3ccf1..ec442a90b56 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java @@ -794,6 +794,13 @@ public static String[] parseOptions(String[] argv) throws ParseException { } }); + parser.on("--trulyIncremental", "=on|off", ON_OFF, Boolean.class, + "If truly incremental reindex is in effect, the set of files changed/deleted since the last ", + "reindex is determined from history of the repositories. This needs history ", + "and projects to be enabled. This should be much faster than the classic way of traversing ", + "the directory structure. Currently works only for Git."). + execute(v -> cfg.setTrulyIncrementalReindex((Boolean) v)); + parser.on("-U", "--uri", "=SCHEME://webappURI:port/contextPath", "Send the current configuration to the specified web application.").execute(webAddr -> { webappURI = (String) webAddr; From 37bbf5c9f6c741d3439557b37695918243cc708c Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Mon, 2 May 2022 11:12:00 +0200 Subject: [PATCH 11/88] add notes/comments --- .../main/java/org/opengrok/indexer/index/IndexDatabase.java | 3 ++- .../src/main/java/org/opengrok/indexer/index/Indexer.java | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index 22ffa40c5bd..b56d7ec5741 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -466,6 +466,7 @@ private static boolean isReadyForTrulyIncrementalReindex(Project project) { } for (Repository repository : repositories) { + // Do this only if all repositories for given project support file gathering via history traversal. if (!(repository instanceof RepositoryWithHistoryTraversal)) { LOGGER.log(Level.FINE, "project {0} has a repository {1} that does not support history traversal," + "the project will be indexed using directory traversal.", @@ -474,6 +475,7 @@ private static boolean isReadyForTrulyIncrementalReindex(Project project) { } /* + * Further, there needs to be history cache already present for the repositories. * This check means that this method will return false in the case of initial reindex. * In such case the traversal of all changesets would most likely be counterproductive, * assuming traversal of directory tree is cheaper than reading files from SCM history @@ -717,7 +719,6 @@ private boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args boolean indexDownPerformed = false; Statistics elapsed = new Statistics(); - // Only do this if all repositories for given project support file gathering via history traversal. // TODO: introduce per project tunable for isTrulyIncrementalReindex // (in case there are untracked files) // it will be also useful for testing diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java index ec442a90b56..9cd96b59bfe 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java @@ -796,9 +796,10 @@ public static String[] parseOptions(String[] argv) throws ParseException { parser.on("--trulyIncremental", "=on|off", ON_OFF, Boolean.class, "If truly incremental reindex is in effect, the set of files changed/deleted since the last ", - "reindex is determined from history of the repositories. This needs history ", + "reindex is determined from history of the repositories. This needs history, history cache ", "and projects to be enabled. This should be much faster than the classic way of traversing ", - "the directory structure. Currently works only for Git."). + "the directory structure. The default is on. If you need to e.g. index files untracked by ", + "SCM, set this to off. Currently works only for Git."). execute(v -> cfg.setTrulyIncrementalReindex((Boolean) v)); parser.on("-U", "--uri", "=SCHEME://webappURI:port/contextPath", From b8ad14212e000cf1949e5873fc79a0d5f21ee5ee Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Thu, 5 May 2022 13:55:40 +0200 Subject: [PATCH 12/88] make it work in the basic mode - fix paths when collecting the files - change args counting in indexDown() - set configuration to make repositories visible in RuntimeEnvironment --- .../indexer/history/HistoryCache.java | 3 +- .../opengrok/indexer/index/IndexDatabase.java | 50 +++++++++++-------- .../org/opengrok/indexer/index/Indexer.java | 13 +++-- 3 files changed, 39 insertions(+), 27 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java index aa19ffd2fd7..2aeb65412e3 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java @@ -139,8 +139,7 @@ History get(File file, @Nullable Repository repository, boolean withFiles) * @param repository the repository in which the directory lives * @return a map from file names to modification times */ - Map getLastModifiedTimes( - File directory, Repository repository) + Map getLastModifiedTimes(File directory, Repository repository) throws HistoryException; /** diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index b56d7ec5741..8eb1ddafc12 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -29,6 +29,7 @@ import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; +import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; @@ -359,8 +360,7 @@ public boolean addDirectory(String dir) { private void showFileCount(String dir, IndexDownArgs args) { if (RuntimeEnvironment.getInstance().isPrintProgress()) { - LOGGER.log(Level.INFO, String.format("Need to process: %d files for %s", - args.curCount, dir)); + LOGGER.log(Level.INFO, String.format("Need to process: %d files for %s", args.curCount, dir)); } } @@ -486,7 +486,7 @@ private static boolean isReadyForTrulyIncrementalReindex(Project project) { return false; } } catch (HistoryException ex) { - LOGGER.log(Level.FINE, String.format("cannot load latest cached revision for history cache " + + LOGGER.log(Level.FINE, String.format("cannot load previous cached revision for history cache " + "for repository %s, the project will be indexed using directory traversal.", repository), ex); return false; @@ -719,14 +719,16 @@ private boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args boolean indexDownPerformed = false; Statistics elapsed = new Statistics(); + RuntimeEnvironment env = RuntimeEnvironment.getInstance(); + // TODO: introduce per project tunable for isTrulyIncrementalReindex // (in case there are untracked files) // it will be also useful for testing // TODO: what if the index is without LOC counts ? indexDown() forces full reindex in such case // so perhaps it should be used in such case. - if (RuntimeEnvironment.getInstance().isTrulyIncrementalReindex() && isReadyForTrulyIncrementalReindex(project)) { + if (env.isTrulyIncrementalReindex() && isReadyForTrulyIncrementalReindex(project)) { LOGGER.log(Level.INFO, "Starting file collection using history traversal in directory {0}", dir); - getIndexDownArgsTrulyIncrementally(sourceRoot, args); + getIndexDownArgsTrulyIncrementally(env.getSourceRootFile(), args); elapsed.report(LOGGER, String.format("Done file collection of directory %s", dir), "indexer.db.directory.collection"); } else { @@ -737,6 +739,11 @@ private boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args "indexer.db.directory.traversal"); } + // TODO debug only + try (FileWriter writer = new FileWriter("/tmp/args.txt")) { + writer.write(args.works.stream().map(v -> v.path).collect(Collectors.joining("\n"))); + } + showFileCount(dir, args); return indexDownPerformed; @@ -750,9 +757,12 @@ private void getIndexDownArgsTrulyIncrementally(File sourceRoot, IndexDownArgs a FileCollector fileCollector = new FileCollector(); // Get the list of files starting with the latest changeset in the history cache // and ending with the newest changeset of the repository. - ((RepositoryWithHistoryTraversal) repository).traverseHistory(sourceRoot, - HistoryGuru.getInstance().getPreviousCachedRevision(repository), - null, null, fileCollector::visit, true); + String previousRevision = HistoryGuru.getInstance().getPreviousCachedRevision(repository); + LOGGER.log(Level.FINE, "getting list of files for truly incremental reindex since revision {0}", + previousRevision); + ((RepositoryWithHistoryTraversal) repository).traverseHistory(new File(sourceRoot, project.getPath()), + previousRevision, null, null, fileCollector::visit, true); + LOGGER.log(Level.FINE, "Done getting list of files, got {0} files", fileCollector.files.size()); // TODO: can this be parallelized ? (esp. w.r.t. removePath() - xref removal and empty dirs, setDirty(), etc.) for (String path : fileCollector.files) { @@ -1430,8 +1440,7 @@ private void handleSymlink(String path, AcceptSymlinkRet ret) { * Executes the first, serial stage of indexing, recursively. *

Files at least are counted, and any deleted or updated files (based on * comparison to the Lucene index) are passed to - * {@link #removeFile(String, boolean)}. New or updated files are noted for - * indexing. + * {@link #removeFile(String, boolean)}. New or updated files are noted for indexing. * @param dir the root indexDirectory to generate indexes for * @param parent path to parent directory * @param args arguments to control execution and for collecting a list of @@ -1465,16 +1474,14 @@ private void indexDown(File dir, String parent, IndexDownArgs args) throws IOExc if (file.isDirectory()) { indexDown(file, path, args); } else { - args.curCount++; - if (uidIter != null) { path = Util.fixPathIfWindows(path); String uid = Util.path2uid(path, DateTools.timeToString(file.lastModified(), DateTools.Resolution.MILLISECOND)); // construct uid for doc BytesRef buid = new BytesRef(uid); - // Traverse terms that have smaller UID than the current - // file, i.e. given the ordering they positioned before the file + // Traverse terms that have smaller UID than the current file, + // i.e. given the ordering they positioned before the file, // or it is the file that has been modified. while (uidIter != null && uidIter.term() != null && uidIter.term().compareTo(emptyBR) != 0 @@ -1496,8 +1503,7 @@ private void indexDown(File dir, String parent, IndexDownArgs args) throws IOExc } // If the file was not modified, probably skip to the next one. - if (uidIter != null && uidIter.term() != null && - uidIter.term().bytesEquals(buid)) { + if (uidIter != null && uidIter.term() != null && uidIter.term().bytesEquals(buid)) { /* * Possibly short-circuit to force reindexing of prior-version indexes. @@ -1519,6 +1525,7 @@ private void indexDown(File dir, String parent, IndexDownArgs args) throws IOExc } } + args.curCount++; args.works.add(new IndexFileWork(file, path)); } } @@ -1528,8 +1535,7 @@ private void indexDown(File dir, String parent, IndexDownArgs args) throws IOExc /** * Executes the second, parallel stage of indexing. * @param dir the parent directory (when appended to SOURCE_ROOT) - * @param args contains a list of files to index, found during the earlier - * stage + * @param args contains a list of files to index, found during the earlier stage */ private void indexParallel(String dir, IndexDownArgs args) { @@ -2012,10 +2018,12 @@ private void finishWriting() throws IOException { try { writeAnalysisSettings(); + LOGGER.log(Level.FINE, "preparing to commit changes to Lucene index"); // TODO add info about which database writer.prepareCommit(); hasPendingCommit = true; int n = completer.complete(); + // TODO: add elapsed LOGGER.log(Level.FINE, "completed {0} object(s)", n); // Just before commit(), reset the `hasPendingCommit' flag, @@ -2040,8 +2048,7 @@ private void finishWriting() throws IOException { * @param path the source file path * @return {@code false} if a mismatch is detected */ - private boolean checkSettings(File file, - String path) throws IOException { + private boolean checkSettings(File file, String path) throws IOException { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); boolean outIsXrefWriter = false; // potential xref writer @@ -2085,8 +2092,7 @@ private boolean checkSettings(File file, break; } - AnalyzerFactory fac = - AnalyzerGuru.findByFileTypeName(fileTypeName); + AnalyzerFactory fac = AnalyzerGuru.findByFileTypeName(fileTypeName); if (fac != null) { fa = fac.getAnalyzer(); } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java index 9cd96b59bfe..3ad28f77bee 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java @@ -200,8 +200,7 @@ public static void main(String[] argv) { env.setIndexer(true); // Complete the configuration of repository types. - List> repositoryClasses - = RepositoryFactory.getRepositoryClasses(); + List> repositoryClasses = RepositoryFactory.getRepositoryClasses(); for (Class clazz : repositoryClasses) { // Set external repository binaries from System properties. try { @@ -278,7 +277,8 @@ public static void main(String[] argv) { System.exit(0); } - // Set updated configuration in RuntimeEnvironment. + // Set updated configuration in RuntimeEnvironment. This is called so that the tunables set + // via command line options are available. env.setConfiguration(cfg, subFilesArgs, CommandTimeoutType.INDEXER); // Let repository types to add items to ignoredNames. @@ -287,6 +287,9 @@ public static void main(String[] argv) { RepositoryFactory.initializeIgnoredNames(env); if (bareConfig) { + // Set updated configuration in RuntimeEnvironment. + env.setConfiguration(cfg, subFilesArgs, CommandTimeoutType.INDEXER); + getInstance().sendToConfigHost(env, webappURI); writeConfigToFile(env, configFilename); System.exit(0); @@ -374,6 +377,10 @@ public static void main(String[] argv) { getInstance().prepareIndexer(env, searchPaths, addProjects, createDict, runIndex, subFiles, new ArrayList<>(repositories)); + // Set updated configuration in RuntimeEnvironment. This is called so that repositories discovered + // in prepareIndexer() are stored in the Configuration used by RuntimeEnvironment. + env.setConfiguration(cfg, subFilesArgs, CommandTimeoutType.INDEXER); + // prepareIndexer() populated the list of projects so now default projects can be set. env.setDefaultProjectsFromNames(defaultProjects); From 7a438aca34aa1cf06fad8ecef921d9147d764e98 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Thu, 5 May 2022 14:20:17 +0200 Subject: [PATCH 13/88] add per project property --- .../indexer/configuration/Project.java | 25 ++++++++++++++++++- .../opengrok/indexer/index/IndexDatabase.java | 14 ++++++----- 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Project.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Project.java index 761cab92c58..9208f095143 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Project.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Project.java @@ -18,7 +18,7 @@ */ /* - * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2022, Oracle and/or its affiliates. All rights reserved. * Portions Copyright (c) 2018, Chris Fraire . */ package org.opengrok.indexer.configuration; @@ -99,6 +99,11 @@ public class Project implements Comparable, Nameable, Serializable { */ private boolean indexed = false; + /** + * This flag sets per-project truly incremental reindex. + */ + private Boolean trulyIncrementalReindex = null; + /** * Set of groups which match this project. */ @@ -289,6 +294,20 @@ public void setMergeCommitsEnabled(boolean flag) { this.mergeCommitsEnabled = flag; } + /** + * @return true if this project handles renamed files. + */ + public boolean isTrulyIncrementalReindex() { + return trulyIncrementalReindex != null && trulyIncrementalReindex; + } + + /** + * @param flag true if project should handle renamed files, false otherwise. + */ + public void setTrulyIncrementalReindex(boolean flag) { + this.trulyIncrementalReindex = flag; + } + /** * Return groups where this project belongs. * @@ -436,6 +455,10 @@ public final void completeWithDefaults() { if (reviewPattern == null) { setReviewPattern(env.getReviewPattern()); } + + if (trulyIncrementalReindex == null) { + setTrulyIncrementalReindex(env.isTrulyIncrementalReindex()); + } } /** diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index 8eb1ddafc12..b79fe6b6ebf 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -433,7 +433,7 @@ private static List getRepositoriesForProject(Project project) { } /** - * @param project project + * @param project instance of {@link Project} * @return whether the repositories of given project are ready for truly incremental reindex */ private static boolean isReadyForTrulyIncrementalReindex(Project project) { @@ -457,6 +457,13 @@ private static boolean isReadyForTrulyIncrementalReindex(Project project) { return false; } + if (!project.isTrulyIncrementalReindex()) { + return false; + } + + // TODO: what if the index is without LOC counts ? indexDown() forces full reindex in such case + // so perhaps it should be used in such case. + List repositories = getRepositoriesForProject(project); // Projects without repositories have to be indexed using indexDown(). if (repositories.isEmpty()) { @@ -721,11 +728,6 @@ private boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args RuntimeEnvironment env = RuntimeEnvironment.getInstance(); - // TODO: introduce per project tunable for isTrulyIncrementalReindex - // (in case there are untracked files) - // it will be also useful for testing - // TODO: what if the index is without LOC counts ? indexDown() forces full reindex in such case - // so perhaps it should be used in such case. if (env.isTrulyIncrementalReindex() && isReadyForTrulyIncrementalReindex(project)) { LOGGER.log(Level.INFO, "Starting file collection using history traversal in directory {0}", dir); getIndexDownArgsTrulyIncrementally(env.getSourceRootFile(), args); From cab9624d5a131e7f8c03963533b7625dbdf2c9e1 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Mon, 9 May 2022 09:18:25 +0200 Subject: [PATCH 14/88] fix deleted files harvesting avoids '/dev/null' entries --- .../org/opengrok/indexer/history/GitRepository.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java index 62de8b98168..b19a65fd34d 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java @@ -660,8 +660,15 @@ private void getFilesBetweenCommits(org.eclipse.jgit.lib.Repository repository, if (diff.getChangeType() != DiffEntry.ChangeType.DELETE && changedFiles != null) { // Added files (ChangeType.ADD) are treated as changed. changedFiles.add(newPath); - } else if (deletedFiles != null) { - deletedFiles.add(newPath); + continue; + } + + if (diff.getChangeType() == DiffEntry.ChangeType.DELETE && deletedFiles != null) { + // newPath would be "/dev/null" + String oldPath = getNativePath(getDirectoryNameRelative()) + File.separator + + getNativePath(diff.getOldPath()); + deletedFiles.add(oldPath); + continue; } if (diff.getChangeType() == DiffEntry.ChangeType.RENAME && isHandleRenamedFiles()) { From af69832120478658b3caa8fd8b8cf603484e2fcc Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Mon, 9 May 2022 09:22:33 +0200 Subject: [PATCH 15/88] even for truly incremental reindex the whole index has to be traversed This is necessary to allow for forced reindex from scratch. --- .../opengrok/indexer/index/IndexDatabase.java | 309 +++++++++++------- 1 file changed, 185 insertions(+), 124 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index b79fe6b6ebf..04cc3eb0deb 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -45,6 +45,7 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.CopyOnWriteArrayList; @@ -512,8 +513,11 @@ private static boolean isReadyForTrulyIncrementalReindex(Project project) { * in one changeset a file may be deleted, only to be re-added in the next changeset etc. */ private static class FileCollector { - Set files; + SortedSet files; + /** + * Assumes comparing in the same way as {@link #FILENAME_COMPARATOR}. + */ FileCollector() { files = new TreeSet<>(); } @@ -582,7 +586,7 @@ public void update() throws IOException { dir = Util.fixPathIfWindows(dir); - String startuid = Util.path2uid(dir, ""); + String startUid = Util.path2uid(dir, ""); reader = DirectoryReader.open(indexDirectory); // open existing index countsAggregator = new NumLinesLOCAggregator(); settings = readAnalysisSettings(); @@ -613,19 +617,19 @@ public void update() throws IOException { try { if (terms != null) { uidIter = terms.iterator(); - TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid)); //init uid + TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startUid)); //init uid if (stat == TermsEnum.SeekStatus.END) { uidIter = null; LOGGER.log(Level.WARNING, "Couldn''t find a start term for {0}, empty u field?", - startuid); + startUid); } } // The actual indexing happens in indexParallel(). Here we merely collect the files - // that need to be indexed. + // that need to be indexed and the files that should be removed. IndexDownArgs args = new IndexDownArgs(); - boolean indexDownPerformed = getIndexDownArgs(dir, sourceRoot, args); + getIndexDownArgs(dir, sourceRoot, args); args.curCount = 0; Statistics elapsed = new Statistics(); @@ -634,18 +638,17 @@ public void update() throws IOException { elapsed.report(LOGGER, String.format("Done indexing of directory %s", dir), "indexer.db.directory.index"); - // Remove data for the trailing terms that indexDown() + // Remove data for the trailing terms that getIndexDownArgs() // did not traverse. These correspond to the files that have been // removed and have higher ordering than any present files. - if (indexDownPerformed) { - while (uidIter != null && uidIter.term() != null - && uidIter.term().utf8ToString().startsWith(startuid)) { - - removeFile(null, true); - BytesRef next = uidIter.next(); - if (next == null) { - uidIter = null; - } + // TODO: reintroduce truly incremental awareness + while (uidIter != null && uidIter.term() != null + && uidIter.term().utf8ToString().startsWith(startUid)) { + + removeFile(true); + BytesRef next = uidIter.next(); + if (next == null) { + uidIter = null; } } @@ -721,22 +724,20 @@ public void update() throws IOException { } } - private boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args) throws HistoryException, IOException { + private void getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args) throws HistoryException, IOException { - boolean indexDownPerformed = false; Statistics elapsed = new Statistics(); RuntimeEnvironment env = RuntimeEnvironment.getInstance(); if (env.isTrulyIncrementalReindex() && isReadyForTrulyIncrementalReindex(project)) { LOGGER.log(Level.INFO, "Starting file collection using history traversal in directory {0}", dir); - getIndexDownArgsTrulyIncrementally(env.getSourceRootFile(), args); + indexDownUsingHistory(env.getSourceRootFile(), args); elapsed.report(LOGGER, String.format("Done file collection of directory %s", dir), "indexer.db.directory.collection"); } else { LOGGER.log(Level.INFO, "Starting file collection using file-system traversal of directory {0}", dir); indexDown(sourceRoot, dir, args); - indexDownPerformed = true; elapsed.report(LOGGER, String.format("Done traversal of directory %s", dir), "indexer.db.directory.traversal"); } @@ -745,45 +746,41 @@ private boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args try (FileWriter writer = new FileWriter("/tmp/args.txt")) { writer.write(args.works.stream().map(v -> v.path).collect(Collectors.joining("\n"))); } + try (FileWriter writer = new FileWriter("/tmp/removed.txt")) { + writer.write(String.join("\n", filesToRemove)); + } showFileCount(dir, args); - - return indexDownPerformed; } - private void getIndexDownArgsTrulyIncrementally(File sourceRoot, IndexDownArgs args) - throws HistoryException, IOException { + /** + * Executes the first, serial stage of indexing, by going through set of files assembled from history. + * @param sourceRoot path to the source root (same as {@link RuntimeEnvironment#getSourceRootPath()}) + * @param args {@link IndexDownArgs} instance where the resulting files to be indexed will be stored + * @throws HistoryException TODO will be moved + * @throws IOException on error + */ + private void indexDownUsingHistory(File sourceRoot, IndexDownArgs args) throws HistoryException, IOException { + + FileCollector fileCollector = new FileCollector(); + // TODO: get the list of files in the first stage to be more efficient + Statistics elapsed = new Statistics(); + LOGGER.log(Level.FINE, "getting list of files for truly incremental reindex in {0}", sourceRoot); for (Repository repository : getRepositoriesForProject(project)) { // Traverse the history and add args to IndexDownArgs for the files/symlinks changed/deleted. - FileCollector fileCollector = new FileCollector(); // Get the list of files starting with the latest changeset in the history cache // and ending with the newest changeset of the repository. String previousRevision = HistoryGuru.getInstance().getPreviousCachedRevision(repository); - LOGGER.log(Level.FINE, "getting list of files for truly incremental reindex since revision {0}", - previousRevision); ((RepositoryWithHistoryTraversal) repository).traverseHistory(new File(sourceRoot, project.getPath()), previousRevision, null, null, fileCollector::visit, true); - LOGGER.log(Level.FINE, "Done getting list of files, got {0} files", fileCollector.files.size()); - - // TODO: can this be parallelized ? (esp. w.r.t. removePath() - xref removal and empty dirs, setDirty(), etc.) - for (String path : fileCollector.files) { - File file = new File(sourceRoot, path); + } + elapsed.report(LOGGER, Level.FINE, + String.format("Done getting list of files, got %d files", fileCollector.files.size())); - // Check that each file is present on file system to avoid problems in indexParallel(). - if (file.exists()) { - AcceptSymlinkRet ret = new AcceptSymlinkRet(); - if (!accept(file.getParentFile(), file, ret)) { - handleSymlink(path, ret); - } else { - removeFile(path, false); - args.works.add(new IndexFileWork(file, path)); - args.curCount++; - } - } else { - removeFile(path, true); - } - } + for (String path : fileCollector.files) { + File file = new File(sourceRoot, path); + processFileIncremental(args, file, path); } } @@ -925,6 +922,8 @@ private void removeHistoryFile(String path) { HistoryGuru.getInstance().clearCacheFile(path); } + private final Set filesToRemove = new TreeSet<>(); + /** * Remove a stale file from the index database and potentially also from history cache, * and queue the removal of the associated xref file. @@ -932,29 +931,18 @@ private void removeHistoryFile(String path) { * @param removeHistory if false, do not remove history cache for this file * @throws java.io.IOException if an error occurs */ - private void removeFile(@Nullable String path, boolean removeHistory) throws IOException { - if (path == null) { // indexDown() - path = removeFileDocUid(); - } else { - for (IndexChangedListener listener : listeners) { - listener.fileRemove(path); - } - - Document doc = null; - try { - doc = getDocument(path, reader); - } catch (ParseException e) { - LOGGER.log(Level.WARNING, String.format("could not find document for %s, " + - "the index might contain stale data as a result", path), e); - } - if (doc != null) { - decrementLOCforDoc(path, doc); + private void removeFile(boolean removeHistory) throws IOException { + String path = Util.uid2url(uidIter.term().utf8ToString()); - String storedU = doc.get(QueryBuilder.U); - writer.deleteDocuments(new Term(QueryBuilder.U, storedU)); - } + for (IndexChangedListener listener : listeners) { + listener.fileRemove(path); } + // TODO: debug only + filesToRemove.add(path); + + removeFileDocUid(path); + removeXrefFile(path); if (removeHistory) { @@ -968,12 +956,7 @@ private void removeFile(@Nullable String path, boolean removeHistory) throws IOE } } - private String removeFileDocUid() throws IOException { - String path = Util.uid2url(uidIter.term().utf8ToString()); - - for (IndexChangedListener listener : listeners) { - listener.fileRemove(path); - } + private void removeFileDocUid(String path) throws IOException { // Determine if a reversal of counts is necessary, and execute if so. if (isCountingDeltas) { @@ -989,8 +972,6 @@ private String removeFileDocUid() throws IOException { } writer.deleteDocuments(new Term(QueryBuilder.U, uidIter.term())); - - return path; } private void decrementLOCforDoc(String path, Document doc) { @@ -1012,8 +993,7 @@ private void decrementLOCforDoc(String path, Document doc) { * @throws java.io.IOException if an error occurs * @throws InterruptedException if a timeout occurs */ - private void addFile(File file, String path, Ctags ctags) - throws IOException, InterruptedException { + private void addFile(File file, String path, Ctags ctags) throws IOException, InterruptedException { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); AbstractAnalyzer fa = getAnalyzerFor(file, path); @@ -1439,10 +1419,11 @@ private void handleSymlink(String path, AcceptSymlinkRet ret) { } /** - * Executes the first, serial stage of indexing, recursively. + * Executes the first, serial stage of indexing, by recursively traversing the file system + * and index alongside. *

Files at least are counted, and any deleted or updated files (based on * comparison to the Lucene index) are passed to - * {@link #removeFile(String, boolean)}. New or updated files are noted for indexing. + * {@link #removeFile(boolean)}. New or updated files are noted for indexing. * @param dir the root indexDirectory to generate indexes for * @param parent path to parent directory * @param args arguments to control execution and for collecting a list of @@ -1476,62 +1457,142 @@ private void indexDown(File dir, String parent, IndexDownArgs args) throws IOExc if (file.isDirectory()) { indexDown(file, path, args); } else { - if (uidIter != null) { - path = Util.fixPathIfWindows(path); - String uid = Util.path2uid(path, - DateTools.timeToString(file.lastModified(), - DateTools.Resolution.MILLISECOND)); // construct uid for doc - BytesRef buid = new BytesRef(uid); - // Traverse terms that have smaller UID than the current file, - // i.e. given the ordering they positioned before the file, - // or it is the file that has been modified. - while (uidIter != null && uidIter.term() != null - && uidIter.term().compareTo(emptyBR) != 0 - && uidIter.term().compareTo(buid) < 0) { - - // If the term's path matches path of currently processed file, - // it is clear that the file has been modified and thus - // removeFile() will be followed by call to addFile() in indexParallel(). - // In such case, instruct removeFile() not to remove history - // cache for the file so that incremental history cache - // generation works. - String termPath = Util.uid2url(uidIter.term().utf8ToString()); - removeFile(null, !termPath.equals(path)); - - BytesRef next = uidIter.next(); - if (next == null) { - uidIter = null; - } - } + processFile(args, file, path); + } + } + } + } - // If the file was not modified, probably skip to the next one. - if (uidIter != null && uidIter.term() != null && uidIter.term().bytesEquals(buid)) { + /** + * Compared with {@link #processFile(IndexDownArgs, File, String)}, this method's file/path arguments + * represent files that have actually changed in some way, while the other method's argument represent + * files present on disk. + * @param args {@link IndexDownArgs} instance + * @param file File object + * @param path path of the file argument relative to source root (with leading slash) + * @throws IOException on error + */ + private void processFileIncremental(IndexDownArgs args, File file, String path) throws IOException { + if (uidIter != null) { + // Traverse terms until reaching one that matches the path of given file. + while (uidIter != null && uidIter.term() != null + && uidIter.term().compareTo(emptyBR) != 0 + && Util.uid2url(uidIter.term().utf8ToString()).compareTo(path) < 0) { + + // A file that was not changed. + /* + * Possibly short-circuit to force reindexing of prior-version indexes. + */ + String termPath = Util.uid2url(uidIter.term().utf8ToString()); + File termFile = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), termPath); + boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) && + checkSettings(termFile, termPath); + if (!matchOK) { + removeFile(false); - /* - * Possibly short-circuit to force reindexing of prior-version indexes. - */ - boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) && - checkSettings(file, path); - if (!matchOK) { - removeFile(null, false); - } + args.curCount++; + args.works.add(new IndexFileWork(termFile, termPath)); + } - BytesRef next = uidIter.next(); - if (next == null) { - uidIter = null; - } + BytesRef next = uidIter.next(); + if (next == null) { + uidIter = null; + } + } - if (matchOK) { - continue; // keep matching docs - } - } - } + if (uidIter != null && uidIter.term() != null + && Util.uid2url(uidIter.term().utf8ToString()).equals(path)) { + /* + * At this point we know that the file has corresponding term in the index + * and has changed in some way. Either it was deleted or it was changed. + */ + if (!file.exists()) { + removeFile(true); + } else { + removeFile(false); args.curCount++; args.works.add(new IndexFileWork(file, path)); } + + BytesRef next = uidIter.next(); + if (next == null) { + uidIter = null; + } + } else { + // Potentially new file. A file might be added and then deleted, + // so it is necessary to check its existence. + if (file.exists()) { + args.curCount++; + args.works.add(new IndexFileWork(file, path)); + } } } + // TODO: if uidIter is null the file should be added if exists ? + // add a test for this first + } + + /** + * Process a file on disk w.r.t. index. + * @param args {@link IndexDownArgs} instance + * @param file File object + * @param path path corresponding to the file parameter, relative to source root (with leading slash) + * @throws IOException on error + */ + private void processFile(IndexDownArgs args, File file, String path) throws IOException { + if (uidIter != null) { + path = Util.fixPathIfWindows(path); + String uid = Util.path2uid(path, + DateTools.timeToString(file.lastModified(), + DateTools.Resolution.MILLISECOND)); // construct uid for doc + BytesRef buid = new BytesRef(uid); + // Traverse terms that have smaller UID than the current file, + // i.e. given the ordering they positioned before the file, + // or it is the file that has been modified. + while (uidIter != null && uidIter.term() != null + && uidIter.term().compareTo(emptyBR) != 0 + && uidIter.term().compareTo(buid) < 0) { + + // If the term's path matches path of currently processed file, + // it is clear that the file has been modified and thus + // removeFile() will be followed by call to addFile() in indexParallel(). + // In such case, instruct removeFile() not to remove history + // cache for the file so that incremental history cache + // generation works. + String termPath = Util.uid2url(uidIter.term().utf8ToString()); + removeFile(!termPath.equals(path)); + + BytesRef next = uidIter.next(); + if (next == null) { + uidIter = null; + } + } + + // If the file was not modified, probably skip to the next one. + if (uidIter != null && uidIter.term() != null && uidIter.term().bytesEquals(buid)) { + + /* + * Possibly short-circuit to force reindexing of prior-version indexes. + */ + boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) && + checkSettings(file, path); + if (!matchOK) { + removeFile(false); + } + + BytesRef next = uidIter.next(); + if (next == null) { + uidIter = null; + } + + if (matchOK) { + return; + } + } + } + + args.curCount++; + args.works.add(new IndexFileWork(file, path)); } /** From 660da7133be5c62799cd0903913dadd6c614fc19 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Mon, 9 May 2022 13:27:50 +0200 Subject: [PATCH 16/88] fix FileHistoryCacheTest renamed file detection was broken --- .../indexer/history/GitRepository.java | 47 ++++++++++--------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java index b19a65fd34d..a6dba150ddd 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java @@ -657,27 +657,32 @@ private void getFilesBetweenCommits(org.eclipse.jgit.lib.Repository repository, String newPath = getNativePath(getDirectoryNameRelative()) + File.separator + getNativePath(diff.getNewPath()); - if (diff.getChangeType() != DiffEntry.ChangeType.DELETE && changedFiles != null) { - // Added files (ChangeType.ADD) are treated as changed. - changedFiles.add(newPath); - continue; - } - - if (diff.getChangeType() == DiffEntry.ChangeType.DELETE && deletedFiles != null) { - // newPath would be "/dev/null" - String oldPath = getNativePath(getDirectoryNameRelative()) + File.separator + - getNativePath(diff.getOldPath()); - deletedFiles.add(oldPath); - continue; - } - - if (diff.getChangeType() == DiffEntry.ChangeType.RENAME && isHandleRenamedFiles()) { - renamedFiles.add(newPath); - if (deletedFiles != null) { - String oldPath = getNativePath(getDirectoryNameRelative()) + File.separator + - getNativePath(diff.getOldPath()); - deletedFiles.add(oldPath); - } + // TODO: refactor + switch (diff.getChangeType()) { + case DELETE: + if (deletedFiles != null) { + // newPath would be "/dev/null" + String oldPath = getNativePath(getDirectoryNameRelative()) + File.separator + + getNativePath(diff.getOldPath()); + deletedFiles.add(oldPath); + } + break; + case RENAME: + if (isHandleRenamedFiles()) { + renamedFiles.add(newPath); + if (deletedFiles != null) { + String oldPath = getNativePath(getDirectoryNameRelative()) + File.separator + + getNativePath(diff.getOldPath()); + deletedFiles.add(oldPath); + } + } + break; + default: + if (changedFiles != null) { + // Added files (ChangeType.ADD) are treated as changed. + changedFiles.add(newPath); + } + break; } } } From 1a0b523d04cee74c3eed4beafda516f193f24f94 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Tue, 10 May 2022 09:57:05 +0200 Subject: [PATCH 17/88] renamed parts should be part of the changed files in HistoryEntry --- .../main/java/org/opengrok/indexer/history/GitRepository.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java index a6dba150ddd..df5202b45f2 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java @@ -497,6 +497,10 @@ public void visit(ChangesetInfo changesetInfo) { if (changesetInfo.files != null) { historyEntry.setFiles(changesetInfo.files); } + if (changesetInfo.renamedFiles != null) { + // TODO: hack + historyEntry.getFiles().addAll(changesetInfo.renamedFiles); + } entries.add(historyEntry); } From 99642fe50c3699e8092bf9cf92ccdee5f71bda8e Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Tue, 10 May 2022 10:50:32 +0200 Subject: [PATCH 18/88] remove debug-only code --- .../java/org/opengrok/indexer/index/IndexDatabase.java | 8 -------- 1 file changed, 8 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index 04cc3eb0deb..c004f38790d 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -742,14 +742,6 @@ private void getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args) t "indexer.db.directory.traversal"); } - // TODO debug only - try (FileWriter writer = new FileWriter("/tmp/args.txt")) { - writer.write(args.works.stream().map(v -> v.path).collect(Collectors.joining("\n"))); - } - try (FileWriter writer = new FileWriter("/tmp/removed.txt")) { - writer.write(String.join("\n", filesToRemove)); - } - showFileCount(dir, args); } From 4328c1ba1254d2b3a8053134733c183aec6ec338 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Tue, 10 May 2022 10:58:40 +0200 Subject: [PATCH 19/88] remove unused import --- .../src/main/java/org/opengrok/indexer/index/IndexDatabase.java | 1 - 1 file changed, 1 deletion(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index c004f38790d..bad849fc0a5 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -29,7 +29,6 @@ import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; -import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.OutputStreamWriter; From f4192d95e9b72fea7f0ff587df06e506d16e3712 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Tue, 10 May 2022 11:16:52 +0200 Subject: [PATCH 20/88] handle trailing terms properly for history based reindex --- .../opengrok/indexer/index/IndexDatabase.java | 60 ++++++++++++++----- 1 file changed, 45 insertions(+), 15 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index bad849fc0a5..a2b44e4aa5e 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -628,7 +628,7 @@ public void update() throws IOException { // The actual indexing happens in indexParallel(). Here we merely collect the files // that need to be indexed and the files that should be removed. IndexDownArgs args = new IndexDownArgs(); - getIndexDownArgs(dir, sourceRoot, args); + boolean usedHistory = getIndexDownArgs(dir, sourceRoot, args); args.curCount = 0; Statistics elapsed = new Statistics(); @@ -637,19 +637,8 @@ public void update() throws IOException { elapsed.report(LOGGER, String.format("Done indexing of directory %s", dir), "indexer.db.directory.index"); - // Remove data for the trailing terms that getIndexDownArgs() - // did not traverse. These correspond to the files that have been - // removed and have higher ordering than any present files. - // TODO: reintroduce truly incremental awareness - while (uidIter != null && uidIter.term() != null - && uidIter.term().utf8ToString().startsWith(startUid)) { - - removeFile(true); - BytesRef next = uidIter.next(); - if (next == null) { - uidIter = null; - } - } + // Traverse the trailing terms. + processTrailingTerms(startUid, usedHistory); /* * As a signifier that #Lines/LOC are comprehensively @@ -723,15 +712,54 @@ public void update() throws IOException { } } - private void getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args) throws HistoryException, IOException { + private void processTrailingTerms(String startUid, boolean usedHistory) throws IOException { + while (uidIter != null && uidIter.term() != null + && uidIter.term().utf8ToString().startsWith(startUid)) { + + if (usedHistory) { + // Allow for forced reindex. For history based reindex the trailing terms + // correspond to the files that have not changed. Such files might need to be re-indexed + // if the index format changed. + String termPath = Util.uid2url(uidIter.term().utf8ToString()); + File termFile = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), termPath); + boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) && + checkSettings(termFile, termPath); + if (!matchOK) { + removeFile(true); + } + } else { + // Remove data for the trailing terms that getIndexDownArgs() + // did not traverse. These correspond to the files that have been + // removed and have higher ordering than any present files. + removeFile(true); + } + + BytesRef next = uidIter.next(); + if (next == null) { + uidIter = null; + } + } + } + + /** + * @param dir directory path + * @param sourceRoot source root File object + * @param args {@link IndexDownArgs} instance (output) + * @return true if history was used to gather the {@code IndexDownArgs} + * @throws HistoryException TODO will be moved + * @throws IOException on error + */ + private boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args) throws HistoryException, IOException { Statistics elapsed = new Statistics(); + boolean usedHistory = false; RuntimeEnvironment env = RuntimeEnvironment.getInstance(); if (env.isTrulyIncrementalReindex() && isReadyForTrulyIncrementalReindex(project)) { LOGGER.log(Level.INFO, "Starting file collection using history traversal in directory {0}", dir); indexDownUsingHistory(env.getSourceRootFile(), args); + usedHistory = true; elapsed.report(LOGGER, String.format("Done file collection of directory %s", dir), "indexer.db.directory.collection"); } else { @@ -742,6 +770,8 @@ private void getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args) t } showFileCount(dir, args); + + return usedHistory; } /** From ca2549f20bf9cd5649d51c7889b54976cad7cae9 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Tue, 10 May 2022 13:43:42 +0200 Subject: [PATCH 21/88] check if repository has history enabled --- .../java/org/opengrok/indexer/index/IndexDatabase.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index a2b44e4aa5e..8b5ee5c1dfd 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -473,6 +473,13 @@ private static boolean isReadyForTrulyIncrementalReindex(Project project) { } for (Repository repository : repositories) { + if (!repository.isHistoryEnabled()) { + LOGGER.log(Level.FINE, "history is disabled for {0}, " + + "the associated project {1} will be indexed using directory traversal", + new Object[]{repository, project}); + return false; + } + // Do this only if all repositories for given project support file gathering via history traversal. if (!(repository instanceof RepositoryWithHistoryTraversal)) { LOGGER.log(Level.FINE, "project {0} has a repository {1} that does not support history traversal," + @@ -789,7 +796,6 @@ private void indexDownUsingHistory(File sourceRoot, IndexDownArgs args) throws H Statistics elapsed = new Statistics(); LOGGER.log(Level.FINE, "getting list of files for truly incremental reindex in {0}", sourceRoot); for (Repository repository : getRepositoriesForProject(project)) { - // Traverse the history and add args to IndexDownArgs for the files/symlinks changed/deleted. // Get the list of files starting with the latest changeset in the history cache // and ending with the newest changeset of the repository. String previousRevision = HistoryGuru.getInstance().getPreviousCachedRevision(repository); From 2cf46988abba7145ae80c16670e64d914a53b2d3 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Tue, 10 May 2022 13:47:51 +0200 Subject: [PATCH 22/88] refactor truly incremental check for repository --- .../opengrok/indexer/index/IndexDatabase.java | 67 +++++++++++-------- 1 file changed, 40 insertions(+), 27 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index 8b5ee5c1dfd..8d8e47bc4b9 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -473,41 +473,54 @@ private static boolean isReadyForTrulyIncrementalReindex(Project project) { } for (Repository repository : repositories) { - if (!repository.isHistoryEnabled()) { - LOGGER.log(Level.FINE, "history is disabled for {0}, " + - "the associated project {1} will be indexed using directory traversal", - new Object[]{repository, project}); + if (isReadyForTrulyIncrementalReindex(project, repository)) { return false; } + } - // Do this only if all repositories for given project support file gathering via history traversal. - if (!(repository instanceof RepositoryWithHistoryTraversal)) { - LOGGER.log(Level.FINE, "project {0} has a repository {1} that does not support history traversal," + - "the project will be indexed using directory traversal.", - new Object[]{project, repository}); - return false; - } + // Here it is assumed there are no files untracked by the repositories of this project. + return true; + } - /* - * Further, there needs to be history cache already present for the repositories. - * This check means that this method will return false in the case of initial reindex. - * In such case the traversal of all changesets would most likely be counterproductive, - * assuming traversal of directory tree is cheaper than reading files from SCM history - * in such case. - */ - try { - if (HistoryGuru.getInstance().getPreviousCachedRevision(repository) == null) { - return false; - } - } catch (HistoryException ex) { - LOGGER.log(Level.FINE, String.format("cannot load previous cached revision for history cache " + - "for repository %s, the project will be indexed using directory traversal.", - repository), ex); + /** + * @param project Project instance + * @param repository Repository instance + * @return true if the repository can be used for history based reindex + */ + private static boolean isReadyForTrulyIncrementalReindex(Project project, Repository repository) { + if (!repository.isHistoryEnabled()) { + LOGGER.log(Level.FINE, "history is disabled for {0}, " + + "the associated project {1} will be indexed using directory traversal", + new Object[]{repository, project}); + return false; + } + + // Do this only if all repositories for given project support file gathering via history traversal. + if (!(repository instanceof RepositoryWithHistoryTraversal)) { + LOGGER.log(Level.FINE, "project {0} has a repository {1} that does not support history traversal," + + "the project will be indexed using directory traversal.", + new Object[]{project, repository}); + return false; + } + + /* + * Further, there needs to be history cache already present for the repositories. + * This check means that this method will return false in the case of initial reindex. + * In such case the traversal of all changesets would most likely be counterproductive, + * assuming traversal of directory tree is cheaper than reading files from SCM history + * in such case. + */ + try { + if (HistoryGuru.getInstance().getPreviousCachedRevision(repository) == null) { return false; } + } catch (HistoryException ex) { + LOGGER.log(Level.FINE, String.format("cannot load previous cached revision for history cache " + + "for repository %s, the project %s will be indexed using directory traversal.", + repository, project), ex); + return false; } - // Here it is assumed there are no files untracked by the repositories of this project. return true; } From 8b9069febddbde3a10f9c2a90b340c7883e4dbb9 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Tue, 10 May 2022 15:38:23 +0200 Subject: [PATCH 23/88] convert visitor pattern (use list of visitors) This is preparation for the history traversal that generates the history cache and collects the changed files at once. --- .../indexer/history/ChangesetVisitor.java | 33 +++++++++++++ .../indexer/history/GitRepository.java | 47 ++++++++++--------- .../RepositoryWithHistoryTraversal.java | 7 ++- .../opengrok/indexer/index/IndexDatabase.java | 14 +++--- 4 files changed, 69 insertions(+), 32 deletions(-) create mode 100644 opengrok-indexer/src/main/java/org/opengrok/indexer/history/ChangesetVisitor.java diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/ChangesetVisitor.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/ChangesetVisitor.java new file mode 100644 index 00000000000..f2b18da6b3e --- /dev/null +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/ChangesetVisitor.java @@ -0,0 +1,33 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + */ +package org.opengrok.indexer.history; + +import java.util.function.Consumer; + +public abstract class ChangesetVisitor implements Consumer { + boolean consumeMergeChangesets; + + protected ChangesetVisitor(boolean consumeMergeChangesets) { + this.consumeMergeChangesets = consumeMergeChangesets; + } +} \ No newline at end of file diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java index df5202b45f2..89ed190e55f 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java @@ -476,16 +476,17 @@ public History getHistory(File file, String sinceRevision, String tillRevision) return getHistory(file, sinceRevision, tillRevision, null); } - private static class HistoryCollector { + private static class HistoryCollector extends ChangesetVisitor { List entries; Set renamedFiles; - HistoryCollector() { + HistoryCollector(boolean consumeMergeChangesets) { + super(consumeMergeChangesets); entries = new ArrayList<>(); renamedFiles = new HashSet<>(); } - public void visit(ChangesetInfo changesetInfo) { + public void accept(ChangesetInfo changesetInfo) { RepositoryWithHistoryTraversal.CommitInfo commit = changesetInfo.commit; HistoryEntry historyEntry = new HistoryEntry(commit.revision, commit.date, commit.authorName + " <" + commit.authorEmail + ">", @@ -509,8 +510,8 @@ public void visit(ChangesetInfo changesetInfo) { public History getHistory(File file, String sinceRevision, String tillRevision, Integer numCommits) throws HistoryException { - HistoryCollector historyCollector = new HistoryCollector(); - traverseHistory(file, sinceRevision, tillRevision, numCommits, historyCollector::visit, false); + HistoryCollector historyCollector = new HistoryCollector(false); + traverseHistory(file, sinceRevision, tillRevision, numCommits, List.of(historyCollector)); History result = new History(historyCollector.entries, historyCollector.renamedFiles); // Assign tags to changesets they represent @@ -524,7 +525,7 @@ public History getHistory(File file, String sinceRevision, String tillRevision, } public void traverseHistory(File file, String sinceRevision, String tillRevision, - Integer numCommits, Consumer visitor, boolean getAll) throws HistoryException { + Integer numCommits, List visitors) throws HistoryException { if (numCommits != null && numCommits <= 0) { throw new HistoryException("invalid number of commits to retrieve"); @@ -539,23 +540,25 @@ public void traverseHistory(File file, String sinceRevision, String tillRevision int num = 0; for (RevCommit commit : walk) { - // For truly incremental reindex merge commits have to be processed. - // TODO: maybe the same for renamed files - depends on what happens if renamed file detection is on - if (!getAll && commit.getParentCount() > 1 && !isMergeCommitsEnabled()) { - continue; - } + for (ChangesetVisitor visitor : visitors) { + // For truly incremental reindex merge commits have to be processed. + // TODO: maybe the same for renamed files - depends on what happens if renamed file detection is on + if (!visitor.consumeMergeChangesets && commit.getParentCount() > 1 && !isMergeCommitsEnabled()) { + continue; + } - CommitInfo commitInfo = new CommitInfo(commit.getId().abbreviate(GIT_ABBREV_LEN).name(), - commit.getAuthorIdent().getWhen(), commit.getAuthorIdent().getName(), - commit.getAuthorIdent().getEmailAddress(), commit.getFullMessage()); - if (isDirectory) { - SortedSet files = new TreeSet<>(); - final Set renamedFiles = new HashSet<>(); - final Set deletedFiles = new HashSet<>(); - getFilesForCommit(renamedFiles, files, deletedFiles, commit, repository); - visitor.accept(new ChangesetInfo(commitInfo, files, renamedFiles, deletedFiles)); - } else { - visitor.accept(new ChangesetInfo(commitInfo)); + CommitInfo commitInfo = new CommitInfo(commit.getId().abbreviate(GIT_ABBREV_LEN).name(), + commit.getAuthorIdent().getWhen(), commit.getAuthorIdent().getName(), + commit.getAuthorIdent().getEmailAddress(), commit.getFullMessage()); + if (isDirectory) { + SortedSet files = new TreeSet<>(); + final Set renamedFiles = new HashSet<>(); + final Set deletedFiles = new HashSet<>(); + getFilesForCommit(renamedFiles, files, deletedFiles, commit, repository); + visitor.accept(new ChangesetInfo(commitInfo, files, renamedFiles, deletedFiles)); + } else { + visitor.accept(new ChangesetInfo(commitInfo)); + } } if (numCommits != null && ++num >= numCommits) { diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java index 6eb53eb5eaa..147d45bb18c 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java @@ -26,9 +26,9 @@ import java.io.File; import java.util.Date; +import java.util.List; import java.util.Set; import java.util.SortedSet; -import java.util.function.Consumer; public abstract class RepositoryWithHistoryTraversal extends RepositoryWithPerPartesHistory { private static final long serialVersionUID = -1L; @@ -73,10 +73,9 @@ public static class ChangesetInfo { * @param sinceRevision start revision (non-inclusive) * @param tillRevision end revision (inclusive) * @param numCommits maximum number of commits to traverse (use {@code null} as unlimited) - * @param visitor visitor method - * @param getAll include merge commits (even if not set to be handled) + * @param visitors list of {@link ChangesetVisitor} objects * @throws HistoryException on error */ public abstract void traverseHistory(File file, String sinceRevision, @Nullable String tillRevision, - Integer numCommits, Consumer visitor, boolean getAll) throws HistoryException; + Integer numCommits, List visitors) throws HistoryException; } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index 8d8e47bc4b9..dbdcaee00ba 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -100,6 +100,7 @@ import org.opengrok.indexer.configuration.PathAccepter; import org.opengrok.indexer.configuration.Project; import org.opengrok.indexer.configuration.RuntimeEnvironment; +import org.opengrok.indexer.history.ChangesetVisitor; import org.opengrok.indexer.history.HistoryException; import org.opengrok.indexer.history.HistoryGuru; import org.opengrok.indexer.history.Repository; @@ -473,7 +474,7 @@ private static boolean isReadyForTrulyIncrementalReindex(Project project) { } for (Repository repository : repositories) { - if (isReadyForTrulyIncrementalReindex(project, repository)) { + if (!isReadyForTrulyIncrementalReindex(project, repository)) { return false; } } @@ -531,17 +532,18 @@ private static boolean isReadyForTrulyIncrementalReindex(Project project, Reposi * This is because when incrementally indexing a bunch of changesets, * in one changeset a file may be deleted, only to be re-added in the next changeset etc. */ - private static class FileCollector { + private static class FileCollector extends ChangesetVisitor { SortedSet files; /** * Assumes comparing in the same way as {@link #FILENAME_COMPARATOR}. */ - FileCollector() { + FileCollector(boolean consumeMergeChangesets) { + super(consumeMergeChangesets); files = new TreeSet<>(); } - public void visit(RepositoryWithHistoryTraversal.ChangesetInfo changesetInfo) { + public void accept(RepositoryWithHistoryTraversal.ChangesetInfo changesetInfo) { if (changesetInfo.renamedFiles != null) { files.addAll(changesetInfo.renamedFiles); } @@ -803,7 +805,7 @@ private boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args */ private void indexDownUsingHistory(File sourceRoot, IndexDownArgs args) throws HistoryException, IOException { - FileCollector fileCollector = new FileCollector(); + FileCollector fileCollector = new FileCollector(true); // TODO: get the list of files in the first stage to be more efficient Statistics elapsed = new Statistics(); @@ -813,7 +815,7 @@ private void indexDownUsingHistory(File sourceRoot, IndexDownArgs args) throws H // and ending with the newest changeset of the repository. String previousRevision = HistoryGuru.getInstance().getPreviousCachedRevision(repository); ((RepositoryWithHistoryTraversal) repository).traverseHistory(new File(sourceRoot, project.getPath()), - previousRevision, null, null, fileCollector::visit, true); + previousRevision, null, null, List.of(fileCollector)); } elapsed.report(LOGGER, Level.FINE, String.format("Done getting list of files, got %d files", fileCollector.files.size())); From 72882aefb2799b3c7ed32e188a5cef050d636b27 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Tue, 10 May 2022 15:51:36 +0200 Subject: [PATCH 24/88] remove trailing space --- .../src/main/java/org/opengrok/indexer/index/IndexDatabase.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index dbdcaee00ba..7d5df975f3c 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -491,7 +491,7 @@ private static boolean isReadyForTrulyIncrementalReindex(Project project) { private static boolean isReadyForTrulyIncrementalReindex(Project project, Repository repository) { if (!repository.isHistoryEnabled()) { LOGGER.log(Level.FINE, "history is disabled for {0}, " + - "the associated project {1} will be indexed using directory traversal", + "the associated project {1} will be indexed using directory traversal", new Object[]{repository, project}); return false; } From f5a0be4b323b80e6710358039a22d9b822eb3794 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Tue, 10 May 2022 16:00:15 +0200 Subject: [PATCH 25/88] move the CommitInfo construction --- .../java/org/opengrok/indexer/history/GitRepository.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java index 89ed190e55f..75129f72f2f 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java @@ -540,6 +540,10 @@ public void traverseHistory(File file, String sinceRevision, String tillRevision int num = 0; for (RevCommit commit : walk) { + CommitInfo commitInfo = new CommitInfo(commit.getId().abbreviate(GIT_ABBREV_LEN).name(), + commit.getAuthorIdent().getWhen(), commit.getAuthorIdent().getName(), + commit.getAuthorIdent().getEmailAddress(), commit.getFullMessage()); + for (ChangesetVisitor visitor : visitors) { // For truly incremental reindex merge commits have to be processed. // TODO: maybe the same for renamed files - depends on what happens if renamed file detection is on @@ -547,9 +551,6 @@ public void traverseHistory(File file, String sinceRevision, String tillRevision continue; } - CommitInfo commitInfo = new CommitInfo(commit.getId().abbreviate(GIT_ABBREV_LEN).name(), - commit.getAuthorIdent().getWhen(), commit.getAuthorIdent().getName(), - commit.getAuthorIdent().getEmailAddress(), commit.getFullMessage()); if (isDirectory) { SortedSet files = new TreeSet<>(); final Set renamedFiles = new HashSet<>(); From 2d8cba21b4a46ac62f101470f146f076fd0d9a4a Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Wed, 11 May 2022 14:03:26 +0200 Subject: [PATCH 26/88] make indexDown*() testable - by moving IndexDownArgs away and introducing factory - also fix bug in index traversal for history based reindex - add parametrized test for IndexDatabase update and file changes --- .../opengrok/indexer/index/IndexDatabase.java | 65 +++--- .../opengrok/indexer/index/IndexDownArgs.java | 46 ++++ .../indexer/index/IndexDownArgsFactory.java | 32 +++ .../org/opengrok/indexer/index/Indexer.java | 4 +- .../indexer/history/FileHistoryCacheTest.java | 2 +- .../indexer/index/IndexDatabaseTest.java | 213 ++++++++++++++++-- 6 files changed, 311 insertions(+), 51 deletions(-) create mode 100644 opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDownArgs.java create mode 100644 opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDownArgsFactory.java diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index 7d5df975f3c..d0039433a29 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -88,8 +88,10 @@ import org.apache.lucene.store.NoLockFactory; import org.apache.lucene.store.SimpleFSLockFactory; import org.apache.lucene.util.BytesRef; +import org.eclipse.jgit.util.IO; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; +import org.jetbrains.annotations.VisibleForTesting; import org.opengrok.indexer.analysis.AbstractAnalyzer; import org.opengrok.indexer.analysis.AnalyzerFactory; import org.opengrok.indexer.analysis.AnalyzerGuru; @@ -175,6 +177,8 @@ public class IndexDatabase { public static final String XREF_DIR = "xref"; public static final String SUGGESTER_DIR = "suggester"; + private final IndexDownArgsFactory indexDownArgsFactory; + /** * Create a new instance of the Index Database. Use this constructor if you * don't use any projects @@ -189,14 +193,21 @@ public IndexDatabase() throws IOException { * Create a new instance of an Index Database for a given project. * * @param project the project to create the database for + * @param factory {@link IndexDownArgsFactory} instance * @throws java.io.IOException if an error occurs while creating directories */ - public IndexDatabase(Project project) throws IOException { + public IndexDatabase(Project project, IndexDownArgsFactory factory) throws IOException { + indexDownArgsFactory = factory; this.project = project; lockfact = NoLockFactory.INSTANCE; initialize(); } + @VisibleForTesting + IndexDatabase(Project project) throws IOException { + this(project, new IndexDownArgsFactory()); + } + static { CHECK_FIELDS = new HashSet<>(); CHECK_FIELDS.add(QueryBuilder.TYPE); @@ -434,10 +445,9 @@ private static List getRepositoriesForProject(Project project) { } /** - * @param project instance of {@link Project} * @return whether the repositories of given project are ready for truly incremental reindex */ - private static boolean isReadyForTrulyIncrementalReindex(Project project) { + private boolean isReadyForTrulyIncrementalReindex() { if (project == null) { return false; } @@ -474,7 +484,7 @@ private static boolean isReadyForTrulyIncrementalReindex(Project project) { } for (Repository repository : repositories) { - if (!isReadyForTrulyIncrementalReindex(project, repository)) { + if (!isReadyForTrulyIncrementalReindex(repository)) { return false; } } @@ -484,11 +494,11 @@ private static boolean isReadyForTrulyIncrementalReindex(Project project) { } /** - * @param project Project instance * @param repository Repository instance * @return true if the repository can be used for history based reindex */ - private static boolean isReadyForTrulyIncrementalReindex(Project project, Repository repository) { + @VisibleForTesting + boolean isReadyForTrulyIncrementalReindex(Repository repository) { if (!repository.isHistoryEnabled()) { LOGGER.log(Level.FINE, "history is disabled for {0}, " + "the associated project {1} will be indexed using directory traversal", @@ -649,7 +659,7 @@ public void update() throws IOException { // The actual indexing happens in indexParallel(). Here we merely collect the files // that need to be indexed and the files that should be removed. - IndexDownArgs args = new IndexDownArgs(); + IndexDownArgs args = indexDownArgsFactory.getIndexDownArgs(); boolean usedHistory = getIndexDownArgs(dir, sourceRoot, args); args.curCount = 0; @@ -771,14 +781,15 @@ private void processTrailingTerms(String startUid, boolean usedHistory) throws I * @throws HistoryException TODO will be moved * @throws IOException on error */ - private boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args) throws HistoryException, IOException { - + @VisibleForTesting + boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args) throws HistoryException, IOException { Statistics elapsed = new Statistics(); boolean usedHistory = false; RuntimeEnvironment env = RuntimeEnvironment.getInstance(); - if (env.isTrulyIncrementalReindex() && isReadyForTrulyIncrementalReindex(project)) { + // TODO: rename trulyIncrementalReindex -> historyBasedReindex + if (env.isTrulyIncrementalReindex() && isReadyForTrulyIncrementalReindex()) { LOGGER.log(Level.INFO, "Starting file collection using history traversal in directory {0}", dir); indexDownUsingHistory(env.getSourceRootFile(), args); usedHistory = true; @@ -803,7 +814,8 @@ private boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args * @throws HistoryException TODO will be moved * @throws IOException on error */ - private void indexDownUsingHistory(File sourceRoot, IndexDownArgs args) throws HistoryException, IOException { + @VisibleForTesting + void indexDownUsingHistory(File sourceRoot, IndexDownArgs args) throws HistoryException, IOException { FileCollector fileCollector = new FileCollector(true); @@ -964,8 +976,6 @@ private void removeHistoryFile(String path) { HistoryGuru.getInstance().clearCacheFile(path); } - private final Set filesToRemove = new TreeSet<>(); - /** * Remove a stale file from the index database and potentially also from history cache, * and queue the removal of the associated xref file. @@ -980,9 +990,6 @@ private void removeFile(boolean removeHistory) throws IOException { listener.fileRemove(path); } - // TODO: debug only - filesToRemove.add(path); - removeFileDocUid(path); removeXrefFile(path); @@ -1471,7 +1478,8 @@ private void handleSymlink(String path, AcceptSymlinkRet ret) { * @param args arguments to control execution and for collecting a list of * files for indexing */ - private void indexDown(File dir, String parent, IndexDownArgs args) throws IOException { + @VisibleForTesting + void indexDown(File dir, String parent, IndexDownArgs args) throws IOException { if (isInterrupted()) { return; @@ -1569,9 +1577,12 @@ private void processFileIncremental(IndexDownArgs args, File file, String path) args.works.add(new IndexFileWork(file, path)); } } + } else { + if (file.exists()) { + args.curCount++; + args.works.add(new IndexFileWork(file, path)); + } } - // TODO: if uidIter is null the file should be added if exists ? - // add a test for this first } /** @@ -2285,22 +2296,6 @@ private boolean xrefExistsFor(String path) { return true; } - private static class IndexDownArgs { - int curCount; - final List works = new ArrayList<>(); - } - - private static class IndexFileWork { - final File file; - final String path; - Exception exception; - - IndexFileWork(File file, String path) { - this.file = file; - this.path = path; - } - } - private static class AcceptSymlinkRet { String localRelPath; } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDownArgs.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDownArgs.java new file mode 100644 index 00000000000..5509cbe3f00 --- /dev/null +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDownArgs.java @@ -0,0 +1,46 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved. + */ +package org.opengrok.indexer.index; + +import org.jetbrains.annotations.VisibleForTesting; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +@VisibleForTesting +class IndexDownArgs { + int curCount; + final List works = new ArrayList<>(); +} + +class IndexFileWork { + final File file; + final String path; + Exception exception; + + IndexFileWork(File file, String path) { + this.file = file; + this.path = path; + } +} \ No newline at end of file diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDownArgsFactory.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDownArgsFactory.java new file mode 100644 index 00000000000..a8a869d661b --- /dev/null +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDownArgsFactory.java @@ -0,0 +1,32 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + */ +package org.opengrok.indexer.index; + +class IndexDownArgsFactory { + public IndexDownArgsFactory() { + } + + public IndexDownArgs getIndexDownArgs() { + return new IndexDownArgs(); + } +} \ No newline at end of file diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java index 3ad28f77bee..458bd2f49ee 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java @@ -112,7 +112,7 @@ public final class Indexer { private static final String HELP_OPT_2 = "-?"; private static final String HELP_OPT_3 = "-h"; - private static final Indexer index = new Indexer(); + private static final Indexer indexer = new Indexer(); private static Configuration cfg = null; private static boolean checkIndex = false; private static boolean runIndex = true; @@ -149,7 +149,7 @@ public final class Indexer { private static final int WEBAPP_CONNECT_TIMEOUT = 1000; // in milliseconds public static Indexer getInstance() { - return index; + return indexer; } /** diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/history/FileHistoryCacheTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/history/FileHistoryCacheTest.java index c5ef92fdf85..4c2e46d7436 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/history/FileHistoryCacheTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/history/FileHistoryCacheTest.java @@ -748,7 +748,7 @@ private void createSvnRepository() throws Exception { assertEquals(0, svnCheckoutProcess.waitFor()); } - private void changeFileAndCommit(Git git, File file, String comment) throws Exception { + static void changeFileAndCommit(Git git, File file, String comment) throws Exception { String authorName = "Foo Bar"; String authorEmail = "foo@bar.com"; diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 009cc6fce76..28bbc6e55e6 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -24,18 +24,29 @@ package org.opengrok.indexer.index; import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.nio.file.Paths; import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; import java.util.TreeSet; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.apache.lucene.document.Document; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.search.ScoreDoc; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeAll; +import org.eclipse.jgit.api.Git; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import org.opengrok.indexer.analysis.Definitions; import org.opengrok.indexer.configuration.Project; import org.opengrok.indexer.configuration.RuntimeEnvironment; @@ -48,20 +59,34 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.atLeast; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; /** * Unit tests for the {@code IndexDatabase} class. + * + * This is quite a heavy test class - it runs the indexer before each (parametrized) test, + * so it might contribute significantly to the overall test run time. */ -public class IndexDatabaseTest { +class IndexDatabaseTest { private static TestRepository repository; - @BeforeAll - public static void setUpClass() throws Exception { - RuntimeEnvironment env = RuntimeEnvironment.getInstance(); + private Indexer indexer; + + private RuntimeEnvironment env; + + @BeforeEach + public void setUpClass() throws Exception { + env = RuntimeEnvironment.getInstance(); repository = new TestRepository(); repository.create(HistoryGuru.class.getResource("/repositories")); @@ -72,11 +97,7 @@ public static void setUpClass() throws Exception { env.setProjectsEnabled(true); RepositoryFactory.initializeIgnoredNames(env); - // Note that all tests in this class share the index created below. - // Ergo, if they need to modify it, this has to be done in such a way - // so that it does not affect other tests, no matter in which order - // the tests are run. - Indexer indexer = Indexer.getInstance(); + indexer = Indexer.getInstance(); indexer.prepareIndexer( env, true, true, false, null, null); @@ -85,8 +106,8 @@ public static void setUpClass() throws Exception { indexer.doIndexerExecution(true, null, null); } - @AfterAll - public static void tearDownClass() throws Exception { + @AfterEach + public void tearDownClass() throws Exception { repository.destroy(); } @@ -202,4 +223,170 @@ void testGetLastRev() throws IOException, ParseException { assertNotNull(doc); assertEquals("aa35c258", doc.get(QueryBuilder.LASTREV)); } + + static void changeFileAndCommit(Git git, File file, String comment) throws Exception { + String authorName = "Foo Bar"; + String authorEmail = "foobar@example.com"; + + try (FileOutputStream fos = new FileOutputStream(file, true)) { + fos.write(comment.getBytes(StandardCharsets.UTF_8)); + } + + git.commit().setMessage(comment).setAuthor(authorName, authorEmail).setAll(true).call(); + } + + private void addFileAndCommit(Git git, String newFileName, File repositoryRoot, String message) throws Exception { + File newFile = new File(repositoryRoot, newFileName); + if (!newFile.createNewFile()) { + throw new IOException("Could not create file " + newFile); + } + try (FileOutputStream fos = new FileOutputStream(newFile)) { + fos.write("foo bar foo bar foo bar".getBytes(StandardCharsets.UTF_8)); + } + git.add().addFilepattern(newFileName).call(); + git.commit().setMessage(message).setAuthor("foo bar", "foobar@example.com").setAll(true).call(); + } + + /** + * Add some commits to the Git repository. + * @param repositoryRoot Git repository root + */ + private void changeGitRepository(File repositoryRoot) throws Exception { + try (Git git = Git.init().setDirectory(repositoryRoot).call()) { + // This name is specifically picked to add file that would exercise the end of term traversal + // in processFileIncremental(), that is (uidIter == null). + String newFileName = "zzz.txt"; + addFileAndCommit(git, newFileName, repositoryRoot, "another new file"); + + // Add another file that is sorted behind to exercise another code path in processFileIncremental(). + // These 'z'-files are added first so their commits are not the last. This exercises the sorting + // of the files in FileCollector and the simultaneous traverse of the index and file list + // in processFileIncremental(). + newFileName = "zzzzzz.txt"; + addFileAndCommit(git, newFileName, repositoryRoot, "another new file"); + + // Change one of the pre-existing files. + File mainFile = new File(repositoryRoot, "main.c"); + assertTrue(mainFile.exists()); + changeFileAndCommit(git, mainFile, "new commit"); + + File rmFile = new File(repositoryRoot, "main.o"); + assertTrue(rmFile.exists()); + git.rm().addFilepattern("main.o").call(); + git.commit().setMessage("delete").setAuthor("foo", "foobar@example.com").setAll(true).call(); + + // Rename some file. + File fooFile = new File(repositoryRoot, "Makefile"); + assertTrue(fooFile.exists()); + File barFile = new File(repositoryRoot, "Makefile.renamed"); + assertTrue(fooFile.renameTo(barFile)); + git.add().addFilepattern("Makefile.renamed").call(); + git.rm().addFilepattern("Makefile").call(); + git.commit().setMessage("rename").setAuthor("foo", "foobar@example.com").setAll(true).call(); + } + } + + private static Stream provideParamsFortestGetIndexDownArgs() { + return Stream.of( + Arguments.of(false, false, false), + Arguments.of(false, false, true), + Arguments.of(false, true, false), + Arguments.of(false, true, true), + Arguments.of(true, false, false), + Arguments.of(true, false, true), + Arguments.of(true, true, false), + Arguments.of(true, true, true) + ); + } + + /** + * Test specifically getIndexDownArgs() with IndexDatabase instance. + * This test ensures that correct set of files is discovered. + */ + @ParameterizedTest + @MethodSource("provideParamsFortestGetIndexDownArgs") + void testGetIndexDownArgs(boolean mergeCommits, boolean renamedFiles, boolean historyBased) throws Exception { + assertTrue(env.isHistoryEnabled()); + + env.setTrulyIncrementalReindex(historyBased); + env.setHandleHistoryOfRenamedFiles(renamedFiles); + env.setMergeCommitsEnabled(mergeCommits); + + IndexDownArgsFactory factory = new IndexDownArgsFactory(); + IndexDownArgsFactory spyFactory = spy(factory); + IndexDownArgs args = new IndexDownArgs(); + // In this case the getIndexDownArgs() should be called from update() just once so this will suffice. + when(spyFactory.getIndexDownArgs()).thenReturn(args); + + Project gitProject = env.getProjects().get("git"); + assertNotNull(gitProject); + IndexDatabase idbOrig = new IndexDatabase(gitProject, spyFactory); + assertNotNull(idbOrig); + IndexDatabase idb = spy(idbOrig); + + File repositoryRoot = new File(repository.getSourceRoot(), "git"); + assertTrue(repositoryRoot.isDirectory()); + changeGitRepository(repositoryRoot); + + // Re-generate the history cache so that the git repository is ready for history based re-index. + indexer.prepareIndexer( + env, true, true, + false, List.of("/git"), null); + + // Setup and use listener for the "removed" files. + class RemovedFilesListener implements IndexChangedListener { + private final Set removedFiles = new HashSet<>(); + + @Override + public void fileAdd(String path, String analyzer) { + } + + @Override + public void fileAdded(String path, String analyzer) { + } + + @Override + public void fileRemove(String path) { + removedFiles.add(path); + } + + @Override + public void fileRemoved(String path) { + } + + @Override + public void fileUpdate(String path) { + } + + public Set getRemovedFiles() { + return removedFiles; + } + }; + RemovedFilesListener listener = new RemovedFilesListener(); + idb.addIndexChangedListener(listener); + idb.update(); + + verify(spyFactory).getIndexDownArgs(); + // Cannot use args.curCount to compare against because it gets reset in indexParallel() + // as it is reused in that stage of indexing. + assertNotEquals(0, args.works.size()); + // The expected data has to match the work done in changeGitRepository(). + assertEquals(Set.of("/git/Makefile.renamed", "/git/main.c", "/git/zzz.txt", "/git/zzzzzz.txt"), + args.works.stream().map(v -> v.path).collect(Collectors.toSet())); + + assertEquals(Set.of("/git/main.o", "/git/main.c", "/git/Makefile"), listener.getRemovedFiles()); + + // Verify the assumption made above. + verify(idb, times(1)).getIndexDownArgs(any(), any(), any()); + + // The initial index (done in setUpClass()) should use file based IndexWorkArgs discovery. + // Only the update() done here should lead to indexDownUsingHistory(), + // hence it should be called just once. + if (historyBased) { + verify(idb, times(1)).indexDownUsingHistory(any(), any()); + } else { + // indexDown() is recursive, so it will be called more than once. + verify(idb, atLeast(1)).indexDown(any(), any(), any()); + } + } } From f4571972af2bea1787c3c7fe25ebe37c89f396a4 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Wed, 11 May 2022 14:09:08 +0200 Subject: [PATCH 27/88] truly incremental -> history based --- .../indexer/configuration/Configuration.java | 12 ++++++------ .../indexer/configuration/Project.java | 16 ++++++++-------- .../configuration/RuntimeEnvironment.java | 8 ++++---- .../opengrok/indexer/index/IndexDatabase.java | 18 ++++++++---------- .../org/opengrok/indexer/index/Indexer.java | 2 +- .../indexer/index/IndexDatabaseTest.java | 2 +- 6 files changed, 28 insertions(+), 30 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Configuration.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Configuration.java index 1ebe7b6ba2d..dd68b5e0c23 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Configuration.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Configuration.java @@ -300,7 +300,7 @@ public final class Configuration { private int connectTimeout = -1; // connect timeout in seconds private int apiTimeout = -1; // API timeout in seconds - private boolean trulyIncrementalReindex; + private boolean historyBasedReindex; /* * types of handling history for remote SCM repositories: @@ -578,7 +578,7 @@ public Configuration() { setTagsEnabled(false); //setUserPage("http://www.myserver.org/viewProfile.jspa?username="); // Set to empty string so we can append it to the URL unconditionally later. - setTrulyIncrementalReindex(true); + setHistoryBasedReindex(true); setUserPageSuffix(""); setWebappLAF("default"); // webappCtags is default(boolean) @@ -1415,12 +1415,12 @@ public void setApiTimeout(int apiTimeout) { this.apiTimeout = apiTimeout; } - public boolean isTrulyIncrementalReindex() { - return trulyIncrementalReindex; + public boolean isHistoryBasedReindex() { + return historyBasedReindex; } - public void setTrulyIncrementalReindex(boolean flag) { - trulyIncrementalReindex = flag; + public void setHistoryBasedReindex(boolean flag) { + historyBasedReindex = flag; } /** diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Project.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Project.java index 9208f095143..73464eff578 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Project.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Project.java @@ -100,9 +100,9 @@ public class Project implements Comparable, Nameable, Serializable { private boolean indexed = false; /** - * This flag sets per-project truly incremental reindex. + * This flag sets per-project reindex based on traversing SCM history. */ - private Boolean trulyIncrementalReindex = null; + private Boolean historyBasedReindex = null; /** * Set of groups which match this project. @@ -297,15 +297,15 @@ public void setMergeCommitsEnabled(boolean flag) { /** * @return true if this project handles renamed files. */ - public boolean isTrulyIncrementalReindex() { - return trulyIncrementalReindex != null && trulyIncrementalReindex; + public boolean isHistoryBasedReindex() { + return historyBasedReindex != null && historyBasedReindex; } /** * @param flag true if project should handle renamed files, false otherwise. */ - public void setTrulyIncrementalReindex(boolean flag) { - this.trulyIncrementalReindex = flag; + public void setHistoryBasedReindex(boolean flag) { + this.historyBasedReindex = flag; } /** @@ -456,8 +456,8 @@ public final void completeWithDefaults() { setReviewPattern(env.getReviewPattern()); } - if (trulyIncrementalReindex == null) { - setTrulyIncrementalReindex(env.isTrulyIncrementalReindex()); + if (historyBasedReindex == null) { + setHistoryBasedReindex(env.isHistoryBasedReindex()); } } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java index e3ff098b857..b1402a59742 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java @@ -1418,12 +1418,12 @@ public void setConnectTimeout(int connectTimeout) { syncWriteConfiguration(connectTimeout, Configuration::setConnectTimeout); } - public boolean isTrulyIncrementalReindex() { - return syncReadConfiguration(Configuration::isTrulyIncrementalReindex); + public boolean isHistoryBasedReindex() { + return syncReadConfiguration(Configuration::isHistoryBasedReindex); } - public void setTrulyIncrementalReindex(boolean flag) { - syncWriteConfiguration(flag, Configuration::setTrulyIncrementalReindex); + public void setHistoryBasedReindex(boolean flag) { + syncWriteConfiguration(flag, Configuration::setHistoryBasedReindex); } /** diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index d0039433a29..088b0392267 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -88,7 +88,6 @@ import org.apache.lucene.store.NoLockFactory; import org.apache.lucene.store.SimpleFSLockFactory; import org.apache.lucene.util.BytesRef; -import org.eclipse.jgit.util.IO; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; import org.jetbrains.annotations.VisibleForTesting; @@ -445,9 +444,9 @@ private static List getRepositoriesForProject(Project project) { } /** - * @return whether the repositories of given project are ready for truly incremental reindex + * @return whether the repositories of given project are ready for history based reindex */ - private boolean isReadyForTrulyIncrementalReindex() { + private boolean isReadyForHistoryBasedReindex() { if (project == null) { return false; } @@ -463,12 +462,12 @@ private boolean isReadyForTrulyIncrementalReindex() { return false; } - // So far the truly incremental reindex does not work without projects. + // So far the history based reindex does not work without projects. if (!env.hasProjects()) { return false; } - if (!project.isTrulyIncrementalReindex()) { + if (!project.isHistoryBasedReindex()) { return false; } @@ -484,7 +483,7 @@ private boolean isReadyForTrulyIncrementalReindex() { } for (Repository repository : repositories) { - if (!isReadyForTrulyIncrementalReindex(repository)) { + if (!isReadyForHistoryBasedReindex(repository)) { return false; } } @@ -498,7 +497,7 @@ private boolean isReadyForTrulyIncrementalReindex() { * @return true if the repository can be used for history based reindex */ @VisibleForTesting - boolean isReadyForTrulyIncrementalReindex(Repository repository) { + boolean isReadyForHistoryBasedReindex(Repository repository) { if (!repository.isHistoryEnabled()) { LOGGER.log(Level.FINE, "history is disabled for {0}, " + "the associated project {1} will be indexed using directory traversal", @@ -788,8 +787,7 @@ boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args) throws RuntimeEnvironment env = RuntimeEnvironment.getInstance(); - // TODO: rename trulyIncrementalReindex -> historyBasedReindex - if (env.isTrulyIncrementalReindex() && isReadyForTrulyIncrementalReindex()) { + if (env.isHistoryBasedReindex() && isReadyForHistoryBasedReindex()) { LOGGER.log(Level.INFO, "Starting file collection using history traversal in directory {0}", dir); indexDownUsingHistory(env.getSourceRootFile(), args); usedHistory = true; @@ -821,7 +819,7 @@ void indexDownUsingHistory(File sourceRoot, IndexDownArgs args) throws HistoryEx // TODO: get the list of files in the first stage to be more efficient Statistics elapsed = new Statistics(); - LOGGER.log(Level.FINE, "getting list of files for truly incremental reindex in {0}", sourceRoot); + LOGGER.log(Level.FINE, "getting list of files for history based reindex in {0}", sourceRoot); for (Repository repository : getRepositoriesForProject(project)) { // Get the list of files starting with the latest changeset in the history cache // and ending with the newest changeset of the repository. diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java index 458bd2f49ee..dcddd84ac12 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java @@ -807,7 +807,7 @@ public static String[] parseOptions(String[] argv) throws ParseException { "and projects to be enabled. This should be much faster than the classic way of traversing ", "the directory structure. The default is on. If you need to e.g. index files untracked by ", "SCM, set this to off. Currently works only for Git."). - execute(v -> cfg.setTrulyIncrementalReindex((Boolean) v)); + execute(v -> cfg.setHistoryBasedReindex((Boolean) v)); parser.on("-U", "--uri", "=SCHEME://webappURI:port/contextPath", "Send the current configuration to the specified web application.").execute(webAddr -> { diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 28bbc6e55e6..7f027d161d7 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -308,7 +308,7 @@ private static Stream provideParamsFortestGetIndexDownArgs() { void testGetIndexDownArgs(boolean mergeCommits, boolean renamedFiles, boolean historyBased) throws Exception { assertTrue(env.isHistoryEnabled()); - env.setTrulyIncrementalReindex(historyBased); + env.setHistoryBasedReindex(historyBased); env.setHandleHistoryOfRenamedFiles(renamedFiles); env.setMergeCommitsEnabled(mergeCommits); From 4fdf134069a8eac55eef38a5a0fb4f2257e9c4fc Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Wed, 11 May 2022 14:11:09 +0200 Subject: [PATCH 28/88] remove redundant public modifier --- .../java/org/opengrok/indexer/index/IndexDownArgsFactory.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDownArgsFactory.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDownArgsFactory.java index a8a869d661b..d37c7b90343 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDownArgsFactory.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDownArgsFactory.java @@ -23,7 +23,7 @@ package org.opengrok.indexer.index; class IndexDownArgsFactory { - public IndexDownArgsFactory() { + IndexDownArgsFactory() { } public IndexDownArgs getIndexDownArgs() { From 4cad0656b8940d0274d9557c5ea5f3e75d249672 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Wed, 11 May 2022 14:11:52 +0200 Subject: [PATCH 29/88] remove the VisibleForTesting annotation --- .../main/java/org/opengrok/indexer/index/IndexDownArgs.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDownArgs.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDownArgs.java index 5509cbe3f00..7a8178440b8 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDownArgs.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDownArgs.java @@ -18,17 +18,14 @@ */ /* - * Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. */ package org.opengrok.indexer.index; -import org.jetbrains.annotations.VisibleForTesting; - import java.io.File; import java.util.ArrayList; import java.util.List; -@VisibleForTesting class IndexDownArgs { int curCount; final List works = new ArrayList<>(); From 29312e6db3e89cff650a9d7390f46eaac1b14de0 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Wed, 11 May 2022 14:13:40 +0200 Subject: [PATCH 30/88] fix nits --- .../main/java/org/opengrok/indexer/index/IndexDatabase.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index 088b0392267..301ebd67524 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -121,7 +121,7 @@ import static org.opengrok.indexer.web.ApiUtils.waitForAsyncApi; /** - * This class is used to create / update the index databases. Currently we use + * This class is used to create / update the index databases. Currently, we use * one index database per project. * * @author Trond Norbye @@ -137,7 +137,7 @@ public class IndexDatabase { private static final Set REVERT_COUNTS_FIELDS; - private final Object INSTANCE_LOCK = new Object(); + private static final Object INSTANCE_LOCK = new Object(); /** * Key is canonical path; Value is the first accepted, absolute path. Map From dda1cb4f2069927811f06761c17397ec6469a506 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Wed, 11 May 2022 14:18:37 +0200 Subject: [PATCH 31/88] allow per-project override of history based reindex --- .../src/main/java/org/opengrok/indexer/index/IndexDatabase.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index 301ebd67524..23b4b502c5a 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -787,7 +787,7 @@ boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args) throws RuntimeEnvironment env = RuntimeEnvironment.getInstance(); - if (env.isHistoryBasedReindex() && isReadyForHistoryBasedReindex()) { + if (isReadyForHistoryBasedReindex()) { LOGGER.log(Level.INFO, "Starting file collection using history traversal in directory {0}", dir); indexDownUsingHistory(env.getSourceRootFile(), args); usedHistory = true; From 3c5b87cf9f91a1217c0468a2ccda9c95bc58c723 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Wed, 11 May 2022 14:19:51 +0200 Subject: [PATCH 32/88] fix wording --- .../main/java/org/opengrok/indexer/index/IndexDatabase.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index 23b4b502c5a..7dc200faabc 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -788,10 +788,10 @@ boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args) throws RuntimeEnvironment env = RuntimeEnvironment.getInstance(); if (isReadyForHistoryBasedReindex()) { - LOGGER.log(Level.INFO, "Starting file collection using history traversal in directory {0}", dir); + LOGGER.log(Level.INFO, "Starting file collection using history traversal for directory {0}", dir); indexDownUsingHistory(env.getSourceRootFile(), args); usedHistory = true; - elapsed.report(LOGGER, String.format("Done file collection of directory %s", dir), + elapsed.report(LOGGER, String.format("Done file collection for directory %s", dir), "indexer.db.directory.collection"); } else { LOGGER.log(Level.INFO, "Starting file collection using file-system traversal of directory {0}", dir); From e215867bf1751e101c0eecc6ad61b0975da0e8a7 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Wed, 11 May 2022 14:35:08 +0200 Subject: [PATCH 33/88] split the getHistory() call for better readability --- .../main/java/org/opengrok/indexer/history/Repository.java | 3 ++- .../indexer/history/RepositoryWithPerPartesHistory.java | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java index 4b35409b11b..ea3f7d24cc2 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java @@ -390,7 +390,8 @@ protected String getRevisionForAnnotate(String historyRevision) { } protected void doCreateCache(HistoryCache cache, String sinceRevision, File directory) throws HistoryException { - finishCreateCache(cache, getHistory(directory, sinceRevision), null); + History history = getHistory(directory, sinceRevision); + finishCreateCache(cache, history, null); } /** diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistory.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistory.java index 3a9e1d29247..8a8a033fc05 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistory.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistory.java @@ -90,9 +90,9 @@ protected void doCreateCache(HistoryCache cache, String sinceRevision, File dire Statistics stat = new Statistics(); LOGGER.log(Level.FINEST, "storing history cache for revision range ({0}, {1})", new Object[]{sinceRevision, tillRevision}); - finishCreateCache(cache, getHistory(directory, sinceRevision, tillRevision), tillRevision); + History history = getHistory(directory, sinceRevision, tillRevision); + finishCreateCache(cache, history, tillRevision); sinceRevision = tillRevision; - stat.report(LOGGER, Level.FINE, String.format("finished chunk %d/%d of history cache for repository ''%s''", ++cnt, boundaryChangesetList.size(), this.getDirectoryName())); } From fadf908097ad8422775b68533e012a3430e7f061 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Wed, 11 May 2022 15:31:56 +0200 Subject: [PATCH 34/88] acquire the list of files during history cache generation --- .../configuration/RuntimeEnvironment.java | 16 ++++ .../indexer/history/FileCollector.java | 61 +++++++++++++ .../indexer/history/GitRepository.java | 37 +------- .../indexer/history/HistoryCollector.java | 59 ++++++++++++ .../RepositoryWithHistoryTraversal.java | 64 +++++++++++++ .../opengrok/indexer/index/IndexDatabase.java | 90 +++---------------- .../indexer/index/IndexDatabaseTest.java | 1 + 7 files changed, 218 insertions(+), 110 deletions(-) create mode 100644 opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileCollector.java create mode 100644 opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCollector.java diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java index b1402a59742..b5a4cc70179 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java @@ -36,6 +36,7 @@ import java.util.Collection; import java.util.Collections; import java.util.Date; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -65,6 +66,7 @@ import org.jetbrains.annotations.VisibleForTesting; import org.opengrok.indexer.authorization.AuthorizationFramework; import org.opengrok.indexer.authorization.AuthorizationStack; +import org.opengrok.indexer.history.FileCollector; import org.opengrok.indexer.history.HistoryGuru; import org.opengrok.indexer.history.RepositoryInfo; import org.opengrok.indexer.index.IndexDatabase; @@ -138,6 +140,12 @@ public List getSubFiles() { private final List subFiles = new ArrayList<>(); + /** + * Maps project name to FileCollector object. This is used to pass the list of files acquired when + * generating history cache in the first phase of indexing to the second phase of indexing. + */ + private final Map fileCollectorMap = new HashMap<>(); + /** * Creates a new instance of RuntimeEnvironment. Private to ensure a * singleton anti-pattern. @@ -1426,6 +1434,14 @@ public void setHistoryBasedReindex(boolean flag) { syncWriteConfiguration(flag, Configuration::setHistoryBasedReindex); } + public FileCollector getFileCollector(String name) { + return fileCollectorMap.get(name); + } + + public void setFileCollector(String name, FileCollector fileCollector) { + fileCollectorMap.put(name, fileCollector); + } + /** * Read an configuration file and set it as the current configuration. * diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileCollector.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileCollector.java new file mode 100644 index 00000000000..d77e9c9bd97 --- /dev/null +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileCollector.java @@ -0,0 +1,61 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + */ +package org.opengrok.indexer.history; + +import java.util.SortedSet; +import java.util.TreeSet; + +/** + * This class is meant to collect files that were touched in some way by SCM update. + * The visitor argument contains the files separated based on the type of modification performed, + * however the consumer of this class is not interested in this classification. + * This is because when incrementally indexing a bunch of changesets, + * in one changeset a file may be deleted, only to be re-added in the next changeset etc. + */ +public class FileCollector extends ChangesetVisitor { + private SortedSet files; + + /** + * Assumes comparing in the same way as {@code org.opengrok.indexer.index.IndexDatabase#FILENAME_COMPARATOR}. + */ + public FileCollector(boolean consumeMergeChangesets) { + super(consumeMergeChangesets); + files = new TreeSet<>(); + } + + public void accept(RepositoryWithHistoryTraversal.ChangesetInfo changesetInfo) { + if (changesetInfo.renamedFiles != null) { + files.addAll(changesetInfo.renamedFiles); + } + if (changesetInfo.files != null) { + files.addAll(changesetInfo.files); + } + if (changesetInfo.deletedFiles != null) { + files.addAll(changesetInfo.deletedFiles); + } + } + + public SortedSet getFiles() { + return files; + } +} \ No newline at end of file diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java index 75129f72f2f..83db7ca16e6 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java @@ -476,52 +476,21 @@ public History getHistory(File file, String sinceRevision, String tillRevision) return getHistory(file, sinceRevision, tillRevision, null); } - private static class HistoryCollector extends ChangesetVisitor { - List entries; - Set renamedFiles; - - HistoryCollector(boolean consumeMergeChangesets) { - super(consumeMergeChangesets); - entries = new ArrayList<>(); - renamedFiles = new HashSet<>(); - } - - public void accept(ChangesetInfo changesetInfo) { - RepositoryWithHistoryTraversal.CommitInfo commit = changesetInfo.commit; - HistoryEntry historyEntry = new HistoryEntry(commit.revision, - commit.date, commit.authorName + " <" + commit.authorEmail + ">", - commit.message, true); - - if (changesetInfo.renamedFiles != null) { - renamedFiles.addAll(changesetInfo.renamedFiles); - } - if (changesetInfo.files != null) { - historyEntry.setFiles(changesetInfo.files); - } - if (changesetInfo.renamedFiles != null) { - // TODO: hack - historyEntry.getFiles().addAll(changesetInfo.renamedFiles); - } - - entries.add(historyEntry); - } - } - public History getHistory(File file, String sinceRevision, String tillRevision, Integer numCommits) throws HistoryException { HistoryCollector historyCollector = new HistoryCollector(false); traverseHistory(file, sinceRevision, tillRevision, numCommits, List.of(historyCollector)); - History result = new History(historyCollector.entries, historyCollector.renamedFiles); + History history = new History(historyCollector.entries, historyCollector.renamedFiles); // Assign tags to changesets they represent // We don't need to check if this repository supports tags, // because we know it :-) if (RuntimeEnvironment.getInstance().isTagsEnabled()) { - assignTagsInHistory(result); + assignTagsInHistory(history); } - return result; + return history; } public void traverseHistory(File file, String sinceRevision, String tillRevision, diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCollector.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCollector.java new file mode 100644 index 00000000000..be34dcf24d4 --- /dev/null +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCollector.java @@ -0,0 +1,59 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * See LICENSE.txt included in this distribution for the specific + * language governing permissions and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at LICENSE.txt. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + */ +package org.opengrok.indexer.history; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +class HistoryCollector extends ChangesetVisitor { + List entries; + Set renamedFiles; + + HistoryCollector(boolean consumeMergeChangesets) { + super(consumeMergeChangesets); + entries = new ArrayList<>(); + renamedFiles = new HashSet<>(); + } + + public void accept(RepositoryWithHistoryTraversal.ChangesetInfo changesetInfo) { + RepositoryWithHistoryTraversal.CommitInfo commit = changesetInfo.commit; + HistoryEntry historyEntry = new HistoryEntry(commit.revision, + commit.date, commit.authorName + " <" + commit.authorEmail + ">", + commit.message, true); + + if (changesetInfo.renamedFiles != null) { + renamedFiles.addAll(changesetInfo.renamedFiles); + } + if (changesetInfo.files != null) { + historyEntry.setFiles(changesetInfo.files); + } + if (changesetInfo.renamedFiles != null) { + // TODO: hack + historyEntry.getFiles().addAll(changesetInfo.renamedFiles); + } + + entries.add(historyEntry); + } +} \ No newline at end of file diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java index 147d45bb18c..ea09410a2de 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java @@ -23,14 +23,23 @@ package org.opengrok.indexer.history; import org.jetbrains.annotations.Nullable; +import org.opengrok.indexer.configuration.RuntimeEnvironment; +import org.opengrok.indexer.logger.LoggerFactory; +import org.opengrok.indexer.util.Statistics; import java.io.File; +import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Set; import java.util.SortedSet; +import java.util.logging.Level; +import java.util.logging.Logger; public abstract class RepositoryWithHistoryTraversal extends RepositoryWithPerPartesHistory { + + private static final Logger LOGGER = LoggerFactory.getLogger(RepositoryWithHistoryTraversal.class); + private static final long serialVersionUID = -1L; public static class CommitInfo { @@ -78,4 +87,59 @@ public static class ChangesetInfo { */ public abstract void traverseHistory(File file, String sinceRevision, @Nullable String tillRevision, Integer numCommits, List visitors) throws HistoryException; + + @Override + protected void doCreateCache(HistoryCache cache, String sinceRevision, File directory) throws HistoryException { + // TODO: file collector should not be used if configuration says so + FileCollector fileCollector = new FileCollector(true); + + if (!RuntimeEnvironment.getInstance().isHistoryCachePerPartesEnabled()) { + LOGGER.log(Level.INFO, "repository {0} supports per partes history cache creation however " + + "it is disabled in the configuration. Generating history cache as whole.", this); + + HistoryCollector historyCollector = new HistoryCollector(false); // TODO: the flag should be based on configuration + traverseHistory(directory, sinceRevision, null, null, + List.of(historyCollector, fileCollector)); + History history = new History(historyCollector.entries, historyCollector.renamedFiles); + + finishCreateCache(cache, history, null); + + RuntimeEnvironment.getInstance().setFileCollector(directory.getName(), fileCollector); + + return; + } + + // For repositories that supports this, avoid storing complete History in memory + // (which can be sizeable, at least for the initial indexing, esp. if merge changeset support is enabled), + // by splitting the work into multiple chunks. + BoundaryChangesets boundaryChangesets = new BoundaryChangesets(this); + List boundaryChangesetList = new ArrayList<>(boundaryChangesets.getBoundaryChangesetIDs(sinceRevision)); + boundaryChangesetList.add(null); // to finish the last step in the cycle below + LOGGER.log(Level.FINE, "boundary changesets: {0}", boundaryChangesetList); + int cnt = 0; + for (String tillRevision: boundaryChangesetList) { + Statistics stat = new Statistics(); + LOGGER.log(Level.FINEST, "storing history cache for revision range ({0}, {1})", + new Object[]{sinceRevision, tillRevision}); + + HistoryCollector historyCollector = new HistoryCollector(false); // TODO: the flag should be based on configuration + traverseHistory(directory, sinceRevision, tillRevision, null, + List.of(historyCollector, fileCollector)); + History history = new History(historyCollector.entries, historyCollector.renamedFiles); + + // Assign tags to changesets they represent + // We don't need to check if this repository supports tags, + // because we know it :-) + if (RuntimeEnvironment.getInstance().isTagsEnabled()) { + assignTagsInHistory(history); + } + + finishCreateCache(cache, history, tillRevision); + sinceRevision = tillRevision; + stat.report(LOGGER, Level.FINE, String.format("finished chunk %d/%d of history cache for repository ''%s''", + ++cnt, boundaryChangesetList.size(), this.getDirectoryName())); + } + + RuntimeEnvironment.getInstance().setFileCollector(directory.getName(), fileCollector); + } } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index 7dc200faabc..66b4ffd502c 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -44,9 +44,7 @@ import java.util.Map; import java.util.Objects; import java.util.Set; -import java.util.SortedSet; import java.util.TreeMap; -import java.util.TreeSet; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; @@ -101,7 +99,7 @@ import org.opengrok.indexer.configuration.PathAccepter; import org.opengrok.indexer.configuration.Project; import org.opengrok.indexer.configuration.RuntimeEnvironment; -import org.opengrok.indexer.history.ChangesetVisitor; +import org.opengrok.indexer.history.FileCollector; import org.opengrok.indexer.history.HistoryException; import org.opengrok.indexer.history.HistoryGuru; import org.opengrok.indexer.history.Repository; @@ -462,6 +460,11 @@ private boolean isReadyForHistoryBasedReindex() { return false; } + // TODO: should be possible to do per project override + if (!env.isHistoryBasedReindex()) { + return false; + } + // So far the history based reindex does not work without projects. if (!env.hasProjects()) { return false; @@ -471,8 +474,9 @@ private boolean isReadyForHistoryBasedReindex() { return false; } - // TODO: what if the index is without LOC counts ? indexDown() forces full reindex in such case - // so perhaps it should be used in such case. + if (env.getFileCollector(project.getName()) == null) { + return false; + } List repositories = getRepositoriesForProject(project); // Projects without repositories have to be indexed using indexDown(). @@ -513,58 +517,9 @@ boolean isReadyForHistoryBasedReindex(Repository repository) { return false; } - /* - * Further, there needs to be history cache already present for the repositories. - * This check means that this method will return false in the case of initial reindex. - * In such case the traversal of all changesets would most likely be counterproductive, - * assuming traversal of directory tree is cheaper than reading files from SCM history - * in such case. - */ - try { - if (HistoryGuru.getInstance().getPreviousCachedRevision(repository) == null) { - return false; - } - } catch (HistoryException ex) { - LOGGER.log(Level.FINE, String.format("cannot load previous cached revision for history cache " + - "for repository %s, the project %s will be indexed using directory traversal.", - repository, project), ex); - return false; - } - return true; } - /** - * This class is meant to collect files that were touched in some way by SCM update. - * The visitor argument contains the files separated based on the type of modification performed, - * however the consumer of this class is not interested in this classification. - * This is because when incrementally indexing a bunch of changesets, - * in one changeset a file may be deleted, only to be re-added in the next changeset etc. - */ - private static class FileCollector extends ChangesetVisitor { - SortedSet files; - - /** - * Assumes comparing in the same way as {@link #FILENAME_COMPARATOR}. - */ - FileCollector(boolean consumeMergeChangesets) { - super(consumeMergeChangesets); - files = new TreeSet<>(); - } - - public void accept(RepositoryWithHistoryTraversal.ChangesetInfo changesetInfo) { - if (changesetInfo.renamedFiles != null) { - files.addAll(changesetInfo.renamedFiles); - } - if (changesetInfo.files != null) { - files.addAll(changesetInfo.files); - } - if (changesetInfo.deletedFiles != null) { - files.addAll(changesetInfo.deletedFiles); - } - } - } - /** * Update the content of this index database. * @@ -693,8 +648,6 @@ public void update() throws IOException { isWithDirectoryCounts && isCountingDeltas); markProjectIndexed(project); - } catch (HistoryException e) { - // TODO } finally { reader.close(); } @@ -777,11 +730,10 @@ private void processTrailingTerms(String startUid, boolean usedHistory) throws I * @param sourceRoot source root File object * @param args {@link IndexDownArgs} instance (output) * @return true if history was used to gather the {@code IndexDownArgs} - * @throws HistoryException TODO will be moved * @throws IOException on error */ @VisibleForTesting - boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args) throws HistoryException, IOException { + boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args) throws IOException { Statistics elapsed = new Statistics(); boolean usedHistory = false; @@ -809,28 +761,14 @@ boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args) throws * Executes the first, serial stage of indexing, by going through set of files assembled from history. * @param sourceRoot path to the source root (same as {@link RuntimeEnvironment#getSourceRootPath()}) * @param args {@link IndexDownArgs} instance where the resulting files to be indexed will be stored - * @throws HistoryException TODO will be moved * @throws IOException on error */ - @VisibleForTesting - void indexDownUsingHistory(File sourceRoot, IndexDownArgs args) throws HistoryException, IOException { + @VisibleForTesting + void indexDownUsingHistory(File sourceRoot, IndexDownArgs args) throws IOException { - FileCollector fileCollector = new FileCollector(true); + FileCollector fileCollector = RuntimeEnvironment.getInstance().getFileCollector(project.getName()); - // TODO: get the list of files in the first stage to be more efficient - Statistics elapsed = new Statistics(); - LOGGER.log(Level.FINE, "getting list of files for history based reindex in {0}", sourceRoot); - for (Repository repository : getRepositoriesForProject(project)) { - // Get the list of files starting with the latest changeset in the history cache - // and ending with the newest changeset of the repository. - String previousRevision = HistoryGuru.getInstance().getPreviousCachedRevision(repository); - ((RepositoryWithHistoryTraversal) repository).traverseHistory(new File(sourceRoot, project.getPath()), - previousRevision, null, null, List.of(fileCollector)); - } - elapsed.report(LOGGER, Level.FINE, - String.format("Done getting list of files, got %d files", fileCollector.files.size())); - - for (String path : fileCollector.files) { + for (String path : fileCollector.getFiles()) { File file = new File(sourceRoot, path); processFileIncremental(args, file, path); } diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 7f027d161d7..f81ac05fded 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -302,6 +302,7 @@ private static Stream provideParamsFortestGetIndexDownArgs() { /** * Test specifically getIndexDownArgs() with IndexDatabase instance. * This test ensures that correct set of files is discovered. + * TODO: add param setHistoryCachePerPartesEnabled() */ @ParameterizedTest @MethodSource("provideParamsFortestGetIndexDownArgs") From 238f2f2327f617ec504beb729d3f3141edaeb9c2 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Wed, 11 May 2022 16:33:42 +0200 Subject: [PATCH 35/88] apply Path.of() for Windows builds --- .../indexer/index/IndexDatabaseTest.java | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index f81ac05fded..1ce6fe82b84 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -27,6 +27,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.nio.charset.StandardCharsets; +import java.nio.file.Path; import java.nio.file.Paths; import java.util.Arrays; import java.util.HashSet; @@ -372,10 +373,17 @@ public Set getRemovedFiles() { // as it is reused in that stage of indexing. assertNotEquals(0, args.works.size()); // The expected data has to match the work done in changeGitRepository(). - assertEquals(Set.of("/git/Makefile.renamed", "/git/main.c", "/git/zzz.txt", "/git/zzzzzz.txt"), - args.works.stream().map(v -> v.path).collect(Collectors.toSet())); - - assertEquals(Set.of("/git/main.o", "/git/main.c", "/git/Makefile"), listener.getRemovedFiles()); + assertEquals(Set.of(Path.of("/git/Makefile.renamed"), + Path.of("/git/main.c"), + Path.of("/git/zzz.txt"), + Path.of("/git/zzzzzz.txt")), + args.works.stream().map(v -> Path.of(v.path)).collect(Collectors.toSet())); + + assertEquals(Set.of( + Path.of("/git/main.o"), + Path.of("/git/main.c"), + Path.of("/git/Makefile") + ), listener.getRemovedFiles().stream().map(Path::of).collect(Collectors.toSet())); // Verify the assumption made above. verify(idb, times(1)).getIndexDownArgs(any(), any(), any()); From 56a6ea28d4c77ad20b9066635424f75d87340340 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Wed, 11 May 2022 16:36:26 +0200 Subject: [PATCH 36/88] remove unused imports --- .../main/java/org/opengrok/indexer/history/GitRepository.java | 3 +-- .../main/java/org/opengrok/indexer/index/IndexDatabase.java | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java index 83db7ca16e6..f37aa7c59be 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java @@ -18,7 +18,7 @@ */ /* - * Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved. * Portions Copyright (c) 2017, 2020, Chris Fraire . * Portions Copyright (c) 2019, Krystof Tulinger . */ @@ -33,7 +33,6 @@ import java.util.Date; import java.util.HashSet; import java.util.List; -import java.util.ArrayList; import java.util.HashMap; import java.util.Map; import java.util.Scanner; diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index 66b4ffd502c..63f6d7bf2ac 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -100,7 +100,6 @@ import org.opengrok.indexer.configuration.Project; import org.opengrok.indexer.configuration.RuntimeEnvironment; import org.opengrok.indexer.history.FileCollector; -import org.opengrok.indexer.history.HistoryException; import org.opengrok.indexer.history.HistoryGuru; import org.opengrok.indexer.history.Repository; import org.opengrok.indexer.history.RepositoryInfo; From 8db1c39213da10291078d8906904ec810de28143 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Wed, 11 May 2022 16:45:45 +0200 Subject: [PATCH 37/88] check global configuration before creating FileCollector instance --- .../RepositoryWithHistoryTraversal.java | 26 ++++++++++++++----- .../opengrok/indexer/index/IndexDatabase.java | 1 + 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java index ea09410a2de..839adda1505 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java @@ -90,16 +90,24 @@ public abstract void traverseHistory(File file, String sinceRevision, @Nullable @Override protected void doCreateCache(HistoryCache cache, String sinceRevision, File directory) throws HistoryException { - // TODO: file collector should not be used if configuration says so - FileCollector fileCollector = new FileCollector(true); + RuntimeEnvironment env = RuntimeEnvironment.getInstance(); - if (!RuntimeEnvironment.getInstance().isHistoryCachePerPartesEnabled()) { + FileCollector fileCollector = null; + if (env.isHistoryBasedReindex()) { // TODO: per project check + fileCollector = new FileCollector(true); + } + + if (!env.isHistoryCachePerPartesEnabled()) { LOGGER.log(Level.INFO, "repository {0} supports per partes history cache creation however " + "it is disabled in the configuration. Generating history cache as whole.", this); HistoryCollector historyCollector = new HistoryCollector(false); // TODO: the flag should be based on configuration - traverseHistory(directory, sinceRevision, null, null, - List.of(historyCollector, fileCollector)); + List visitors = new ArrayList<>(); + visitors.add(historyCollector); + if (fileCollector != null) { + visitors.add(fileCollector); + } + traverseHistory(directory, sinceRevision, null, null, visitors); History history = new History(historyCollector.entries, historyCollector.renamedFiles); finishCreateCache(cache, history, null); @@ -123,8 +131,12 @@ protected void doCreateCache(HistoryCache cache, String sinceRevision, File dire new Object[]{sinceRevision, tillRevision}); HistoryCollector historyCollector = new HistoryCollector(false); // TODO: the flag should be based on configuration - traverseHistory(directory, sinceRevision, tillRevision, null, - List.of(historyCollector, fileCollector)); + List visitors = new ArrayList<>(); + visitors.add(historyCollector); + if (fileCollector != null) { + visitors.add(fileCollector); + } + traverseHistory(directory, sinceRevision, tillRevision, null, visitors); History history = new History(historyCollector.entries, historyCollector.renamedFiles); // Assign tags to changesets they represent diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index 63f6d7bf2ac..9a298bb67d5 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -442,6 +442,7 @@ private static List getRepositoriesForProject(Project project) { /** * @return whether the repositories of given project are ready for history based reindex + * TODO: move part of this to doCreateCache() (project specific and global checks) */ private boolean isReadyForHistoryBasedReindex() { if (project == null) { From 271cd185e7b1cb06337b9e9cd039d7142dbb7b24 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Wed, 11 May 2022 17:19:55 +0200 Subject: [PATCH 38/88] add TODOs --- .../java/org/opengrok/indexer/index/IndexDatabaseTest.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 1ce6fe82b84..f41098330c9 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -398,4 +398,8 @@ public Set getRemovedFiles() { verify(idb, atLeast(1)).indexDown(any(), any(), any()); } } + + // TODO: add test for project with multiple repositories + + // TODO: add test for forced reindex - see if renamedFile() was called for all files } From de55ca2a329dfe8a9d63cce56bc0dfad5c071a51 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Wed, 11 May 2022 17:20:15 +0200 Subject: [PATCH 39/88] fix RepositoryWithPerPartesHistoryTest cleanup code for storing/retrieving previous revision --- .../indexer/history/FileHistoryCache.java | 24 ------------------- .../indexer/history/HistoryCache.java | 10 -------- .../opengrok/indexer/history/HistoryGuru.java | 13 ---------- .../RepositoryWithPerPartesHistoryTest.java | 9 +++---- 4 files changed, 5 insertions(+), 51 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileHistoryCache.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileHistoryCache.java index 0b3a8c5b1d2..59ff50da1e6 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileHistoryCache.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileHistoryCache.java @@ -741,31 +741,13 @@ private String getRepositoryCachedRevPath(Repository repository) { return histDir + File.separatorChar + LATEST_REV_FILE_NAME; } - private String getRepositoryPreviousCachedRevPath(Repository repository) { - String histDir = getRepositoryHistDataDirname(repository); - if (histDir == null) { - return null; - } - return histDir + File.separatorChar + LATEST_REV_FILE_NAME + ".prev"; - } - /** * Store latest indexed revision for the repository under data directory. * @param repository repository * @param rev latest revision which has been just indexed */ private void storeLatestCachedRevision(Repository repository, String rev) { - // Save the file so that it can be used by truly incremental reindex via getPreviousCachedRevision(). Path newPath = Path.of(getRepositoryCachedRevPath(repository)); - Path oldPath = Path.of(getRepositoryPreviousCachedRevPath(repository)); - try { - if (newPath.toFile().exists()) { - Files.move(newPath, oldPath); - } - } catch (IOException e) { - LOGGER.log(Level.WARNING, String.format("cannot move %s to %s", newPath, oldPath), e); - } - try (Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(newPath.toFile())))) { writer.write(rev); } catch (IOException ex) { @@ -780,12 +762,6 @@ public String getLatestCachedRevision(Repository repository) { return getCachedRevision(repository, getRepositoryCachedRevPath(repository)); } - @Override - @Nullable - public String getPreviousCachedRevision(Repository repository) { - return getCachedRevision(repository, getRepositoryPreviousCachedRevPath(repository)); - } - @Nullable private String getCachedRevision(Repository repository, String revPath) { String rev; diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java index 2aeb65412e3..d2fab8cc7a5 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCache.java @@ -121,16 +121,6 @@ History get(File file, @Nullable Repository repository, boolean withFiles) */ String getLatestCachedRevision(Repository repository) throws HistoryException; - /** - * Get the revision identifier for the latest cached revision in a repository. - * - * @param repository Repository object - * @return a string representing the previous revision (prior to the current indexer run), - * or {@code null} if it is unknown - * @throws HistoryException on error - */ - String getPreviousCachedRevision(Repository repository) throws HistoryException; - /** * Get the last modified times for all files and subdirectories in the * specified directory. diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java index f050520de1e..430db1530b9 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java @@ -428,19 +428,6 @@ public Map getLastModifiedTimes(File directory) return Collections.emptyMap(); } - /** - * @param repository Repository object - * @return previously cached revision (before currently running reindex) - * @throws HistoryException on error - */ - public String getPreviousCachedRevision(Repository repository) throws HistoryException { - if (repository != null && useCache()) { - return historyCache.getPreviousCachedRevision(repository); - } - - throw new HistoryException(String.format("cannot get previous cached revision for %s", repository)); - } - /** * recursively search for repositories with a depth limit, add those found * to the internally used map. diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistoryTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistoryTest.java index ce22568be39..e9ed9015dec 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistoryTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistoryTest.java @@ -18,7 +18,7 @@ */ /* - * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved. */ package org.opengrok.indexer.history; @@ -46,7 +46,7 @@ import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; -public class RepositoryWithPerPartesHistoryTest { +class RepositoryWithPerPartesHistoryTest { private TestRepository repositories; private GitRepository gitRepository; @@ -89,7 +89,8 @@ void testChangesets() throws HistoryException { Mockito.when(gitSpyRepository.getPerPartesCount()).thenReturn(3); gitSpyRepository.createCache(cache, null); Mockito.verify(gitSpyRepository, times(3)). - getHistory(any(), stringArgumentCaptor1.capture(), stringArgumentCaptor2.capture()); + traverseHistory(any(), stringArgumentCaptor1.capture(), stringArgumentCaptor2.capture(), + isNull(), any()); List sinceRevisions = new ArrayList<>(); sinceRevisions.add(null); @@ -120,7 +121,7 @@ void testPseudoIncomingChangeset() throws Exception { gitSpyRepository.createCache(cache, historyEntries.get(1).getRevision()); Mockito.verify(gitSpyRepository, times(1)). - getHistory(any(), anyString(), isNull()); + traverseHistory(any(), anyString(), isNull(), isNull(), any()); assertEquals(historyEntries.get(0).getRevision(), cache.getLatestCachedRevision(gitSpyRepository)); History cachedHistory = cache.get(Paths.get(gitRepository.getDirectoryName(), "moved2", "renamed2.c").toFile(), gitSpyRepository, false); From 97177289669cc7bad20e94c662983f10383a0bc8 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Wed, 11 May 2022 18:09:44 +0200 Subject: [PATCH 40/88] parametrize the cleanup test --- .../opengrok/indexer/index/IndexDatabaseTest.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index f41098330c9..ae88cdd1e67 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -48,6 +48,7 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.Arguments; import org.junit.jupiter.params.provider.MethodSource; +import org.junit.jupiter.params.provider.ValueSource; import org.opengrok.indexer.analysis.Definitions; import org.opengrok.indexer.configuration.Project; import org.opengrok.indexer.configuration.RuntimeEnvironment; @@ -159,13 +160,16 @@ private void checkDataExistence(String fileName, boolean shouldExist) { * Test removal of IndexDatabase. xrefs and history index entries after * file has been removed from a repository. */ - @Test - void testCleanupAfterIndexRemoval() throws Exception { + @ParameterizedTest + @ValueSource(booleans = {true, false}) + void testCleanupAfterIndexRemoval(boolean historyBasedReindex) throws Exception { final int origNumFiles; + env.setHistoryBasedReindex(historyBasedReindex); + String projectName = "git"; - String ppath = "/" + projectName; - Project project = new Project(projectName, ppath); + Project project = env.getProjects().get(projectName); + assertNotNull(project); IndexDatabase idb = new IndexDatabase(project); assertNotNull(idb); From 52a59697b8c381819240029df7548ce159de62a3 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Wed, 11 May 2022 18:10:16 +0200 Subject: [PATCH 41/88] assert file deletion --- .../test/java/org/opengrok/indexer/index/IndexDatabaseTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index ae88cdd1e67..2ef7ee43337 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -193,7 +193,7 @@ void testCleanupAfterIndexRemoval(boolean historyBasedReindex) throws Exception // Remove the file and reindex using IndexDatabase directly. File file = new File(repository.getSourceRoot(), projectName + File.separator + fileName); - file.delete(); + assertTrue(file.delete()); assertFalse(file.exists(), "file " + fileName + " not removed"); idb.update(); From 655d978e7965d36c9ff01efb61cbfd14cbdbca86 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Thu, 12 May 2022 14:18:04 +0200 Subject: [PATCH 42/88] add per partes test param --- .../indexer/index/IndexDatabaseTest.java | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 2ef7ee43337..86a1dc19cfc 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -293,30 +293,40 @@ private void changeGitRepository(File repositoryRoot) throws Exception { private static Stream provideParamsFortestGetIndexDownArgs() { return Stream.of( - Arguments.of(false, false, false), - Arguments.of(false, false, true), - Arguments.of(false, true, false), - Arguments.of(false, true, true), - Arguments.of(true, false, false), - Arguments.of(true, false, true), - Arguments.of(true, true, false), - Arguments.of(true, true, true) + Arguments.of(false, false, false, false), + Arguments.of(false, false, false, true), + Arguments.of(false, false, true, false), + Arguments.of(false, false, true, true), + Arguments.of(false, true, false, false), + Arguments.of(false, true, false, true), + Arguments.of(false, true, true, false), + Arguments.of(false, true, true, true), + Arguments.of(true, false, false, false), + Arguments.of(true, false, false, true), + Arguments.of(true, false, true, false), + Arguments.of(true, false, true, true), + Arguments.of(true, true, false, false), + Arguments.of(true, true, false, true), + Arguments.of(true, true, true, false), + Arguments.of(true, true, true, true) ); } /** * Test specifically getIndexDownArgs() with IndexDatabase instance. * This test ensures that correct set of files is discovered. - * TODO: add param setHistoryCachePerPartesEnabled() */ @ParameterizedTest @MethodSource("provideParamsFortestGetIndexDownArgs") - void testGetIndexDownArgs(boolean mergeCommits, boolean renamedFiles, boolean historyBased) throws Exception { + void testGetIndexDownArgs(boolean mergeCommits, boolean renamedFiles, boolean historyBased, boolean perPartes) + throws Exception { + assertTrue(env.isHistoryEnabled()); env.setHistoryBasedReindex(historyBased); env.setHandleHistoryOfRenamedFiles(renamedFiles); env.setMergeCommitsEnabled(mergeCommits); + env.setHistoryCachePerPartesEnabled(perPartes); IndexDownArgsFactory factory = new IndexDownArgsFactory(); IndexDownArgsFactory spyFactory = spy(factory); From aa32923430f2ccef673b1bc53703185886cae04b Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Thu, 12 May 2022 16:39:03 +0200 Subject: [PATCH 43/88] add test for forced reindex, fix it for history based reindex --- .../opengrok/indexer/index/IndexDatabase.java | 17 ++- .../indexer/index/IndexDatabaseTest.java | 130 +++++++++++++----- 2 files changed, 110 insertions(+), 37 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index 9a298bb67d5..8f5075457d6 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -616,6 +616,10 @@ public void update() throws IOException { IndexDownArgs args = indexDownArgsFactory.getIndexDownArgs(); boolean usedHistory = getIndexDownArgs(dir, sourceRoot, args); + // Traverse the trailing terms. This needs to be done before indexParallel() because + // in some cases it can add items to the args parameter. + processTrailingTerms(startUid, usedHistory, args); + args.curCount = 0; Statistics elapsed = new Statistics(); LOGGER.log(Level.INFO, "Starting indexing of directory {0}", dir); @@ -623,9 +627,6 @@ public void update() throws IOException { elapsed.report(LOGGER, String.format("Done indexing of directory %s", dir), "indexer.db.directory.index"); - // Traverse the trailing terms. - processTrailingTerms(startUid, usedHistory); - /* * As a signifier that #Lines/LOC are comprehensively * stored so that later calculation is in deltas mode, we @@ -696,7 +697,7 @@ public void update() throws IOException { } } - private void processTrailingTerms(String startUid, boolean usedHistory) throws IOException { + private void processTrailingTerms(String startUid, boolean usedHistory, IndexDownArgs args) throws IOException { while (uidIter != null && uidIter.term() != null && uidIter.term().utf8ToString().startsWith(startUid)) { @@ -709,7 +710,10 @@ private void processTrailingTerms(String startUid, boolean usedHistory) throws I boolean matchOK = (isWithDirectoryCounts || isCountingDeltas) && checkSettings(termFile, termPath); if (!matchOK) { - removeFile(true); + removeFile(false); + + args.curCount++; + args.works.add(new IndexFileWork(termFile, termPath)); } } else { // Remove data for the trailing terms that getIndexDownArgs() @@ -2100,7 +2104,8 @@ private void finishWriting() throws IOException { * @param path the source file path * @return {@code false} if a mismatch is detected */ - private boolean checkSettings(File file, String path) throws IOException { + @VisibleForTesting + boolean checkSettings(File file, String path) throws IOException { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); boolean outIsXrefWriter = false; // potential xref writer diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 86a1dc19cfc..73b8f3da6f6 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -27,9 +27,11 @@ import java.io.FileOutputStream; import java.io.IOException; import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Arrays; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -56,6 +58,7 @@ import org.opengrok.indexer.history.RepositoryFactory; import org.opengrok.indexer.search.QueryBuilder; import org.opengrok.indexer.search.SearchEngine; +import org.opengrok.indexer.util.ForbiddenSymlinkException; import org.opengrok.indexer.util.TandemPath; import org.opengrok.indexer.util.TestRepository; @@ -67,6 +70,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.atLeast; +import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -105,6 +109,7 @@ public void setUpClass() throws Exception { false, null, null); env.setDefaultProjectsFromNames(new TreeSet<>(Arrays.asList("/c"))); env.generateProjectRepositoriesMap(); + // TODO: make sure the initial index is made using indexDown() indexer.doIndexerExecution(true, null, null); } @@ -312,6 +317,43 @@ private static Stream provideParamsFortestGetIndexDownArgs() { ); } + static class AddRemoveFilesListener implements IndexChangedListener { + // The file sets need to be thread safe because the methods that modify them can be called in parallel. + private final Set removedFiles = Collections.synchronizedSet(new HashSet<>()); + + private final Set addedFiles = Collections.synchronizedSet(new HashSet<>()); + + @Override + public void fileAdd(String path, String analyzer) { + addedFiles.add(path); + } + + @Override + public void fileAdded(String path, String analyzer) { + } + + @Override + public void fileRemove(String path) { + removedFiles.add(path); + } + + @Override + public void fileRemoved(String path) { + } + + @Override + public void fileUpdate(String path) { + } + + public Set getRemovedFiles() { + return removedFiles; + } + + public Set getAddedFiles() { + return addedFiles; + } + }; + /** * Test specifically getIndexDownArgs() with IndexDatabase instance. * This test ensures that correct set of files is discovered. @@ -350,35 +392,7 @@ void testGetIndexDownArgs(boolean mergeCommits, boolean renamedFiles, boolean hi false, List.of("/git"), null); // Setup and use listener for the "removed" files. - class RemovedFilesListener implements IndexChangedListener { - private final Set removedFiles = new HashSet<>(); - - @Override - public void fileAdd(String path, String analyzer) { - } - - @Override - public void fileAdded(String path, String analyzer) { - } - - @Override - public void fileRemove(String path) { - removedFiles.add(path); - } - - @Override - public void fileRemoved(String path) { - } - - @Override - public void fileUpdate(String path) { - } - - public Set getRemovedFiles() { - return removedFiles; - } - }; - RemovedFilesListener listener = new RemovedFilesListener(); + AddRemoveFilesListener listener = new AddRemoveFilesListener(); idb.addIndexChangedListener(listener); idb.update(); @@ -402,8 +416,12 @@ public Set getRemovedFiles() { // Verify the assumption made above. verify(idb, times(1)).getIndexDownArgs(any(), any(), any()); + checkIndexDown(historyBased, idb); + } + + private void checkIndexDown(boolean historyBased, IndexDatabase idb) throws IOException { // The initial index (done in setUpClass()) should use file based IndexWorkArgs discovery. - // Only the update() done here should lead to indexDownUsingHistory(), + // Only the update() done in the actual test should lead to indexDownUsingHistory(), // hence it should be called just once. if (historyBased) { verify(idb, times(1)).indexDownUsingHistory(any(), any()); @@ -415,5 +433,55 @@ public Set getRemovedFiles() { // TODO: add test for project with multiple repositories - // TODO: add test for forced reindex - see if renamedFile() was called for all files + // TODO: add test for per project tunables + + /** + * Test forced reindex - see if renamedFile() was called for all files in the repository + * even though there was no change. + */ + @ParameterizedTest + @ValueSource(booleans = {true, false}) + void testForcedReindex(boolean historyBased) throws Exception { + + env.setHistoryBasedReindex(historyBased); + + Project gitProject = env.getProjects().get("git"); + assertNotNull(gitProject); + IndexDatabase idbOrig = new IndexDatabase(gitProject); + assertNotNull(idbOrig); + IndexDatabase idb = spy(idbOrig); + + // Re-generate the history cache so that the git repository is ready for history based re-index. + indexer.prepareIndexer( + env, true, true, + false, List.of("/git"), null); + + // Emulate forcing reindex from scratch. + doReturn(false).when(idb).checkSettings(any(), any()); + + // Setup and use listener for the "removed" files. + AddRemoveFilesListener listener = new AddRemoveFilesListener(); + idb.addIndexChangedListener(listener); + idb.update(); + + checkIndexDown(historyBased, idb); + + // List the files in the /git directory tree and compare that to the IndexDatabase file sets. + Path repoRoot = Path.of(repository.getSourceRoot(), "git"); + Set result; + try (Stream walk = Files.walk(repoRoot)) { + result = walk.filter(Files::isRegularFile). + filter(p -> !p.toString().contains(".git")). + collect(Collectors.toSet()); + } + Set expectedFileSet = result.stream().map(f -> { + try { + return Path.of(RuntimeEnvironment.getInstance().getPathRelativeToSourceRoot(f.toFile())); + } catch (IOException|ForbiddenSymlinkException e) { + return null; + } + }).collect(Collectors.toSet()); + assertEquals(expectedFileSet, listener.getRemovedFiles().stream().map(Path::of).collect(Collectors.toSet())); + assertEquals(expectedFileSet, listener.getAddedFiles().stream().map(Path::of).collect(Collectors.toSet())); + } } From af9e9595faf26edb96783eccd27ca1ed932edb7a Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Mon, 16 May 2022 15:01:12 +0200 Subject: [PATCH 44/88] add negative test --- .../indexer/index/IndexDatabaseTest.java | 54 ++++++++++++++++++- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 73b8f3da6f6..53c238ebc88 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -52,10 +52,15 @@ import org.junit.jupiter.params.provider.MethodSource; import org.junit.jupiter.params.provider.ValueSource; import org.opengrok.indexer.analysis.Definitions; +import org.opengrok.indexer.condition.EnabledForRepository; import org.opengrok.indexer.configuration.Project; import org.opengrok.indexer.configuration.RuntimeEnvironment; import org.opengrok.indexer.history.HistoryGuru; +import org.opengrok.indexer.history.MercurialRepositoryTest; +import org.opengrok.indexer.history.Repository; import org.opengrok.indexer.history.RepositoryFactory; +import org.opengrok.indexer.history.RepositoryInfo; +import org.opengrok.indexer.history.RepositoryWithHistoryTraversal; import org.opengrok.indexer.search.QueryBuilder; import org.opengrok.indexer.search.SearchEngine; import org.opengrok.indexer.util.ForbiddenSymlinkException; @@ -75,6 +80,7 @@ import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +import static org.opengrok.indexer.condition.RepositoryInstalled.Type.MERCURIAL; /** * Unit tests for the {@code IndexDatabase} class. @@ -386,7 +392,7 @@ void testGetIndexDownArgs(boolean mergeCommits, boolean renamedFiles, boolean hi assertTrue(repositoryRoot.isDirectory()); changeGitRepository(repositoryRoot); - // Re-generate the history cache so that the git repository is ready for history based re-index. + // Re-generate the history cache so that the data is ready for history based re-index. indexer.prepareIndexer( env, true, true, false, List.of("/git"), null); @@ -431,7 +437,51 @@ private void checkIndexDown(boolean historyBased, IndexDatabase idb) throws IOEx } } - // TODO: add test for project with multiple repositories + // TODO: test project-less configuration + + /** + * Make sure that history based reindex is not performed only for projects + * where some repositories are not instances of {@code RepositoryWithHistoryTraversal}. + * + * Instead of checking the result of the functions that make the decision, check the actual indexing. + */ + @EnabledForRepository(MERCURIAL) + @Test + void testHistoryBasedReindexVsProjectWithDiverseRepos() throws Exception { + env.setHistoryBasedReindex(true); + + // Clone the Mercurial repository underneath the "git" project/repository. + Path destinationPath = Path.of(repository.getSourceRoot(), "git", "mercurial"); + MercurialRepositoryTest.runHgCommand(new File(repository.getSourceRoot()), + "clone", Path.of(repository.getSourceRoot(), "mercurial").toString(), + destinationPath.toString()); + assertTrue(destinationPath.toFile().exists()); + + // rescan the repositories + indexer.prepareIndexer( + env, true, true, + false, List.of("/git"), null); + + // Once the Mercurial repository gets changed over to RepositoryWithHistoryTraversal, + // the test will have to start using something else. + Repository mercurialRepo = RepositoryFactory.getRepository(destinationPath.toFile()); + assertFalse(mercurialRepo instanceof RepositoryWithHistoryTraversal); + + // assert the Mercurial repository was detected. + Project gitProject = env.getProjects().get("git"); + assertNotNull(gitProject); + env.generateProjectRepositoriesMap(); + List gitProjectRepos = env.getProjectRepositoriesMap().get(gitProject); + assertNotNull(gitProjectRepos); + assertEquals(2, gitProjectRepos.size()); + + // verify that indexer did not use history based reindex. + IndexDatabase idbOrig = new IndexDatabase(gitProject); + assertNotNull(idbOrig); + IndexDatabase idb = spy(idbOrig); + idb.update(); + checkIndexDown(false, idb); + } // TODO: add test for per project tunables From 707696380944ab8f6c5e48fb0cff757ec84371c3 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Mon, 16 May 2022 15:06:24 +0200 Subject: [PATCH 45/88] fix style --- .../java/org/opengrok/indexer/index/IndexDatabaseTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 53c238ebc88..995f032fa9e 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -483,6 +483,7 @@ void testHistoryBasedReindexVsProjectWithDiverseRepos() throws Exception { checkIndexDown(false, idb); } + // TODO: add test for the global tunable // TODO: add test for per project tunables /** @@ -527,7 +528,7 @@ void testForcedReindex(boolean historyBased) throws Exception { Set expectedFileSet = result.stream().map(f -> { try { return Path.of(RuntimeEnvironment.getInstance().getPathRelativeToSourceRoot(f.toFile())); - } catch (IOException|ForbiddenSymlinkException e) { + } catch (IOException | ForbiddenSymlinkException e) { return null; } }).collect(Collectors.toSet()); From 7587f28ab6fd34245bc546ae0373722293223bee Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Mon, 16 May 2022 17:52:37 +0200 Subject: [PATCH 46/88] overhaul testHistoryBasedReindexVsProjectWithDiverseRepos() add per project tunable test --- .../indexer/index/IndexDatabaseTest.java | 59 ++++++++++++++----- 1 file changed, 45 insertions(+), 14 deletions(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 995f032fa9e..a6ed1da2d29 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -431,16 +431,16 @@ private void checkIndexDown(boolean historyBased, IndexDatabase idb) throws IOEx // hence it should be called just once. if (historyBased) { verify(idb, times(1)).indexDownUsingHistory(any(), any()); + verify(idb, times(0)).indexDown(any(), any(), any()); } else { // indexDown() is recursive, so it will be called more than once. + verify(idb, times(0)).indexDownUsingHistory(any(), any()); verify(idb, atLeast(1)).indexDown(any(), any(), any()); } } - // TODO: test project-less configuration - /** - * Make sure that history based reindex is not performed only for projects + * Make sure that history based reindex is not performed for projects * where some repositories are not instances of {@code RepositoryWithHistoryTraversal}. * * Instead of checking the result of the functions that make the decision, check the actual indexing. @@ -450,23 +450,28 @@ private void checkIndexDown(boolean historyBased, IndexDatabase idb) throws IOEx void testHistoryBasedReindexVsProjectWithDiverseRepos() throws Exception { env.setHistoryBasedReindex(true); + // Make a change in the git repository. + File repositoryRoot = new File(repository.getSourceRoot(), "git"); + assertTrue(repositoryRoot.isDirectory()); + changeGitRepository(repositoryRoot); + // Clone the Mercurial repository underneath the "git" project/repository. Path destinationPath = Path.of(repository.getSourceRoot(), "git", "mercurial"); MercurialRepositoryTest.runHgCommand(new File(repository.getSourceRoot()), - "clone", Path.of(repository.getSourceRoot(), "mercurial").toString(), - destinationPath.toString()); + "clone", Path.of(repository.getSourceRoot(), "mercurial").toString(), + destinationPath.toString()); assertTrue(destinationPath.toFile().exists()); - // rescan the repositories - indexer.prepareIndexer( - env, true, true, - false, List.of("/git"), null); - // Once the Mercurial repository gets changed over to RepositoryWithHistoryTraversal, - // the test will have to start using something else. + // the test will have to start some other repository. Repository mercurialRepo = RepositoryFactory.getRepository(destinationPath.toFile()); assertFalse(mercurialRepo instanceof RepositoryWithHistoryTraversal); + // Rescan the repositories. + indexer.prepareIndexer( + env, true, true, + false, List.of("/git"), null); + // assert the Mercurial repository was detected. Project gitProject = env.getProjects().get("git"); assertNotNull(gitProject); @@ -475,6 +480,33 @@ void testHistoryBasedReindexVsProjectWithDiverseRepos() throws Exception { assertNotNull(gitProjectRepos); assertEquals(2, gitProjectRepos.size()); + verifyIndexDown(gitProject); + } + + /** + * The global history based tunable is tested in testGetIndexDownArgs(). + */ + @Test + void testHistoryBasedReindexProjectTunable() throws Exception { + // Make a change in the git repository. + File repositoryRoot = new File(repository.getSourceRoot(), "git"); + assertTrue(repositoryRoot.isDirectory()); + changeGitRepository(repositoryRoot); + + // Toggle the tunable. + Project gitProject = env.getProjects().get("git"); + gitProject.setHistoryBasedReindex(false); + + indexer.prepareIndexer( + env, true, true, + false, List.of("/git"), null); + + verifyIndexDown(gitProject); + + gitProject.setHistoryBasedReindex(true); + } + + private void verifyIndexDown(Project gitProject) throws Exception { // verify that indexer did not use history based reindex. IndexDatabase idbOrig = new IndexDatabase(gitProject); assertNotNull(idbOrig); @@ -483,11 +515,10 @@ void testHistoryBasedReindexVsProjectWithDiverseRepos() throws Exception { checkIndexDown(false, idb); } - // TODO: add test for the global tunable - // TODO: add test for per project tunables + // TODO: test project-less configuration /** - * Test forced reindex - see if renamedFile() was called for all files in the repository + * Test forced reindex - see if removeFile() was called for all files in the repository * even though there was no change. */ @ParameterizedTest From bd29b31096dbd342e30d9731d66129f109662e70 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Mon, 16 May 2022 19:02:13 +0200 Subject: [PATCH 47/88] add logs --- .../java/org/opengrok/indexer/index/IndexDatabase.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index 8f5075457d6..45de418277a 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -446,35 +446,43 @@ private static List getRepositoriesForProject(Project project) { */ private boolean isReadyForHistoryBasedReindex() { if (project == null) { + LOGGER.log(Level.FINEST, "no project, will be indexed by directory traversal."); return false; } // History needs to be enabled for the history cache to work (see the comment below). if (!project.isHistoryEnabled()) { + LOGGER.log(Level.FINEST, "history is disabled, will be indexed by directory traversal."); return false; } RuntimeEnvironment env = RuntimeEnvironment.getInstance(); // History cache is necessary to get the last indexed revision for given repository. if (!env.isHistoryCache()) { + LOGGER.log(Level.FINEST, "history cache is disabled, will be indexed by directory traversal."); return false; } // TODO: should be possible to do per project override if (!env.isHistoryBasedReindex()) { + LOGGER.log(Level.FINEST, "history-based reindex is disabled, will be indexed by directory traversal."); return false; } // So far the history based reindex does not work without projects. if (!env.hasProjects()) { + LOGGER.log(Level.FINEST, "projects are disabled, will be indexed by directory traversal."); return false; } if (!project.isHistoryBasedReindex()) { + LOGGER.log(Level.FINEST, "history based reindex is turned off for project {0}", project); return false; } if (env.getFileCollector(project.getName()) == null) { + LOGGER.log(Level.FINEST, "no collected files for project {0}, will be indexed by directory traversal.", + project); return false; } From cd7eae86972e26ba228bb398031c1d451cda3958 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Tue, 17 May 2022 14:16:00 +0200 Subject: [PATCH 48/88] initial reindex should use file traversal --- .../opengrok/indexer/index/IndexDatabase.java | 22 +++++++++++-- .../indexer/index/IndexDatabaseTest.java | 31 ++++++++++++++++++- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index 45de418277a..da390bdfd79 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -475,6 +475,24 @@ private boolean isReadyForHistoryBasedReindex() { return false; } + /* + * Check that the index is present for this project. + * In such case the traversal of all changesets would most likely be counterproductive, + * assuming traversal of directory tree is cheaper than getting the files from SCM history + * in such case. + */ + try { + if (getNumFiles() == 0) { + LOGGER.log(Level.FINEST, "zero number of documents for project {0}, " + + "will be indexed by directory traversal.", project); + return false; + } + } catch (IOException e) { + LOGGER.log(Level.FINEST, "failed to get number of documents for project {0}," + + "will be indexed by directory traversal.", project); + return false; + } + if (!project.isHistoryBasedReindex()) { LOGGER.log(Level.FINEST, "history based reindex is turned off for project {0}", project); return false; @@ -508,8 +526,8 @@ private boolean isReadyForHistoryBasedReindex() { * @param repository Repository instance * @return true if the repository can be used for history based reindex */ - @VisibleForTesting - boolean isReadyForHistoryBasedReindex(Repository repository) { + @VisibleForTesting + boolean isReadyForHistoryBasedReindex(Repository repository) { if (!repository.isHistoryEnabled()) { LOGGER.log(Level.FINE, "history is disabled for {0}, " + "the associated project {1} will be indexed using directory traversal", diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index a6ed1da2d29..55a23ffd963 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -64,6 +64,7 @@ import org.opengrok.indexer.search.QueryBuilder; import org.opengrok.indexer.search.SearchEngine; import org.opengrok.indexer.util.ForbiddenSymlinkException; +import org.opengrok.indexer.util.IOUtils; import org.opengrok.indexer.util.TandemPath; import org.opengrok.indexer.util.TestRepository; @@ -115,7 +116,6 @@ public void setUpClass() throws Exception { false, null, null); env.setDefaultProjectsFromNames(new TreeSet<>(Arrays.asList("/c"))); env.generateProjectRepositoriesMap(); - // TODO: make sure the initial index is made using indexDown() indexer.doIndexerExecution(true, null, null); } @@ -484,7 +484,9 @@ void testHistoryBasedReindexVsProjectWithDiverseRepos() throws Exception { } /** + * Verify project specific tunable has effect on how the indexing will be performed. * The global history based tunable is tested in testGetIndexDownArgs(). + * TODO: verify per project override of the global tunable */ @Test void testHistoryBasedReindexProjectTunable() throws Exception { @@ -566,4 +568,31 @@ void testForcedReindex(boolean historyBased) throws Exception { assertEquals(expectedFileSet, listener.getRemovedFiles().stream().map(Path::of).collect(Collectors.toSet())); assertEquals(expectedFileSet, listener.getAddedFiles().stream().map(Path::of).collect(Collectors.toSet())); } + + /** + * make sure the initial indexing is made using indexDown() even though history based reindex is possible. + */ + @Test + void testInitialReindexWithHistoryBased() throws Exception { + env.setHistoryBasedReindex(true); + + // Delete the index (and all data in fact). + assertFalse(repository.getDataRoot().isEmpty()); + IOUtils.removeRecursive(Path.of(repository.getDataRoot())); + assertFalse(Path.of(repository.getDataRoot()).toFile().exists()); + + // Update the index of the project. + Project gitProject = env.getProjects().get("git"); + assertNotNull(gitProject); + IndexDatabase idbOrig = new IndexDatabase(gitProject); + assertNotNull(idbOrig); + IndexDatabase idb = spy(idbOrig); + idb.update(); + + // Check that the index for the git project was created. + Document doc = IndexDatabase.getDocument(Path.of(repository.getSourceRoot(), "git", "main.c").toFile()); + assertNotNull(doc); + + checkIndexDown(false, idb); + } } From d4f69c6038bc97b0f2dcf1f6de185e9230434c58 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Tue, 17 May 2022 16:52:59 +0200 Subject: [PATCH 49/88] restore the state needed to remove the Mercurial repository from the data structures --- .../org/opengrok/indexer/index/IndexDatabaseTest.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 55a23ffd963..72910eb2a4f 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -53,6 +53,7 @@ import org.junit.jupiter.params.provider.ValueSource; import org.opengrok.indexer.analysis.Definitions; import org.opengrok.indexer.condition.EnabledForRepository; +import org.opengrok.indexer.configuration.CommandTimeoutType; import org.opengrok.indexer.configuration.Project; import org.opengrok.indexer.configuration.RuntimeEnvironment; import org.opengrok.indexer.history.HistoryGuru; @@ -110,12 +111,18 @@ public void setUpClass() throws Exception { env.setProjectsEnabled(true); RepositoryFactory.initializeIgnoredNames(env); + // Restore the repository information. + env.setRepositories(repository.getSourceRoot()); + HistoryGuru.getInstance().invalidateRepositories(env.getRepositories(), CommandTimeoutType.INDEXER); + env.generateProjectRepositoriesMap(); + indexer = Indexer.getInstance(); indexer.prepareIndexer( env, true, true, false, null, null); + env.setDefaultProjectsFromNames(new TreeSet<>(Arrays.asList("/c"))); - env.generateProjectRepositoriesMap(); + indexer.doIndexerExecution(true, null, null); } From 23a4c82b49468e46ea6afb65bc203af9e29ddafa Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Tue, 17 May 2022 17:39:48 +0200 Subject: [PATCH 50/88] split history for better readability --- .../indexer/history/RepositoryWithPerPartesHistory.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistory.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistory.java index 8a8a033fc05..0c656e0fbbb 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistory.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithPerPartesHistory.java @@ -74,7 +74,8 @@ protected void doCreateCache(HistoryCache cache, String sinceRevision, File dire if (!RuntimeEnvironment.getInstance().isHistoryCachePerPartesEnabled()) { LOGGER.log(Level.INFO, "repository {0} supports per partes history cache creation however " + "it is disabled in the configuration. Generating history cache as whole.", this); - finishCreateCache(cache, getHistory(directory, sinceRevision), null); + History history = getHistory(directory, sinceRevision); + finishCreateCache(cache, history, null); return; } From b26f36f45a10ba3d94f4e4bc8f398337627bcd73 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Tue, 17 May 2022 17:40:08 +0200 Subject: [PATCH 51/88] add TODO --- .../main/java/org/opengrok/indexer/history/GitRepository.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java index f37aa7c59be..6c7d8a6f2b9 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java @@ -478,7 +478,7 @@ public History getHistory(File file, String sinceRevision, String tillRevision) public History getHistory(File file, String sinceRevision, String tillRevision, Integer numCommits) throws HistoryException { - HistoryCollector historyCollector = new HistoryCollector(false); + HistoryCollector historyCollector = new HistoryCollector(false); // TODO: should be based on configuration traverseHistory(file, sinceRevision, tillRevision, numCommits, List.of(historyCollector)); History history = new History(historyCollector.entries, historyCollector.renamedFiles); From a6a884ea41308489c245fea0cb7b333064bf4d53 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Tue, 17 May 2022 17:41:55 +0200 Subject: [PATCH 52/88] address per project history based tunable --- .../indexer/configuration/Project.java | 9 +++- .../RepositoryWithHistoryTraversal.java | 4 +- .../opengrok/indexer/index/IndexDatabase.java | 30 ++++++------- .../indexer/index/IndexDatabaseTest.java | 43 +++++++++++++++---- 4 files changed, 59 insertions(+), 27 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Project.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Project.java index 73464eff578..ec5612841e9 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Project.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Project.java @@ -34,6 +34,7 @@ import java.util.logging.Logger; import java.util.regex.PatternSyntaxException; +import org.jetbrains.annotations.VisibleForTesting; import org.opengrok.indexer.logger.LoggerFactory; import org.opengrok.indexer.util.ClassUtil; import org.opengrok.indexer.util.ForbiddenSymlinkException; @@ -308,6 +309,11 @@ public void setHistoryBasedReindex(boolean flag) { this.historyBasedReindex = flag; } + @VisibleForTesting + public void setHistoryBasedReindexToNull() { + this.historyBasedReindex = null; + } + /** * Return groups where this project belongs. * @@ -499,8 +505,7 @@ public static Project getProject(String path) { * Get the project for a specific file. * * @param file the file to lookup - * @return the project that this file belongs to (or null if the file - * doesn't belong to a project) + * @return the project that this file belongs to (or {@code null} if the file doesn't belong to a project) */ public static Project getProject(File file) { Project ret = null; diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java index 839adda1505..ff956928569 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java @@ -23,6 +23,7 @@ package org.opengrok.indexer.history; import org.jetbrains.annotations.Nullable; +import org.opengrok.indexer.configuration.Project; import org.opengrok.indexer.configuration.RuntimeEnvironment; import org.opengrok.indexer.logger.LoggerFactory; import org.opengrok.indexer.util.Statistics; @@ -93,7 +94,8 @@ protected void doCreateCache(HistoryCache cache, String sinceRevision, File dire RuntimeEnvironment env = RuntimeEnvironment.getInstance(); FileCollector fileCollector = null; - if (env.isHistoryBasedReindex()) { // TODO: per project check + Project project = Project.getProject(directory); + if (project != null && project.isHistoryBasedReindex()) { fileCollector = new FileCollector(true); } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index da390bdfd79..ea6b17028f7 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -445,6 +445,14 @@ private static List getRepositoriesForProject(Project project) { * TODO: move part of this to doCreateCache() (project specific and global checks) */ private boolean isReadyForHistoryBasedReindex() { + RuntimeEnvironment env = RuntimeEnvironment.getInstance(); + + // So far the history based reindex does not work without projects. + if (!env.hasProjects()) { + LOGGER.log(Level.FINEST, "projects are disabled, will be indexed by directory traversal."); + return false; + } + if (project == null) { LOGGER.log(Level.FINEST, "no project, will be indexed by directory traversal."); return false; @@ -456,25 +464,18 @@ private boolean isReadyForHistoryBasedReindex() { return false; } - RuntimeEnvironment env = RuntimeEnvironment.getInstance(); // History cache is necessary to get the last indexed revision for given repository. if (!env.isHistoryCache()) { LOGGER.log(Level.FINEST, "history cache is disabled, will be indexed by directory traversal."); return false; } - // TODO: should be possible to do per project override - if (!env.isHistoryBasedReindex()) { + // Per project tunable can override the global tunable, therefore env.isHistoryBasedReindex() is not checked. + if (!project.isHistoryBasedReindex()) { LOGGER.log(Level.FINEST, "history-based reindex is disabled, will be indexed by directory traversal."); return false; } - // So far the history based reindex does not work without projects. - if (!env.hasProjects()) { - LOGGER.log(Level.FINEST, "projects are disabled, will be indexed by directory traversal."); - return false; - } - /* * Check that the index is present for this project. * In such case the traversal of all changesets would most likely be counterproductive, @@ -493,13 +494,12 @@ private boolean isReadyForHistoryBasedReindex() { return false; } - if (!project.isHistoryBasedReindex()) { - LOGGER.log(Level.FINEST, "history based reindex is turned off for project {0}", project); - return false; - } - + // If there was no change to any of the repositories of the project, a FileCollector instance will be returned + // however the list of files therein will be empty which is legitimate situation (no change of the project). + // Only in a case where getFileCollector() returns null (hinting at something went wrong), + // the file based traversal should be done. if (env.getFileCollector(project.getName()) == null) { - LOGGER.log(Level.FINEST, "no collected files for project {0}, will be indexed by directory traversal.", + LOGGER.log(Level.FINEST, "no file collector for project {0}, will be indexed by directory traversal.", project); return false; } diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 72910eb2a4f..e45e425d372 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -121,6 +121,12 @@ public void setUpClass() throws Exception { env, true, true, false, null, null); + // Reset the state of the git project w.r.t. history based reindex. + // It is the responsibility of each test that relies on the per project tunable + // to call gitProject.completeWithDefaults() or gitProject.setHistoryBasedReindex(). + Project gitProject = env.getProjects().get("git"); + gitProject.setHistoryBasedReindexToNull(); + env.setDefaultProjectsFromNames(new TreeSet<>(Arrays.asList("/c"))); indexer.doIndexerExecution(true, null, null); @@ -391,6 +397,7 @@ void testGetIndexDownArgs(boolean mergeCommits, boolean renamedFiles, boolean hi Project gitProject = env.getProjects().get("git"); assertNotNull(gitProject); + gitProject.completeWithDefaults(); IndexDatabase idbOrig = new IndexDatabase(gitProject, spyFactory); assertNotNull(idbOrig); IndexDatabase idb = spy(idbOrig); @@ -487,41 +494,59 @@ void testHistoryBasedReindexVsProjectWithDiverseRepos() throws Exception { assertNotNull(gitProjectRepos); assertEquals(2, gitProjectRepos.size()); - verifyIndexDown(gitProject); + verifyIndexDown(gitProject, false); } /** * Verify project specific tunable has effect on how the indexing will be performed. * The global history based tunable is tested in testGetIndexDownArgs(). - * TODO: verify per project override of the global tunable */ - @Test - void testHistoryBasedReindexProjectTunable() throws Exception { + @ParameterizedTest + @ValueSource(booleans = {true, false}) + void testHistoryBasedReindexProjectTunable(boolean historyBased) throws Exception { + env.setHistoryBasedReindex(!historyBased); + // Make a change in the git repository. File repositoryRoot = new File(repository.getSourceRoot(), "git"); assertTrue(repositoryRoot.isDirectory()); changeGitRepository(repositoryRoot); - // Toggle the tunable. + // The per project tunable should override the global tunable. Project gitProject = env.getProjects().get("git"); - gitProject.setHistoryBasedReindex(false); + gitProject.setHistoryBasedReindex(historyBased); indexer.prepareIndexer( env, true, true, false, List.of("/git"), null); - verifyIndexDown(gitProject); + verifyIndexDown(gitProject, historyBased); gitProject.setHistoryBasedReindex(true); } - private void verifyIndexDown(Project gitProject) throws Exception { + /** + * test history based reindex if there was no change to the repository + */ + @Test + void testHistoryBasedReindexWithNoChange() throws Exception { + env.setHistoryBasedReindex(true); + + Project gitProject = env.getProjects().get("git"); + + indexer.prepareIndexer( + env, true, true, + false, List.of("/git"), null); + + verifyIndexDown(gitProject, true); + } + + private void verifyIndexDown(Project gitProject, boolean historyBased) throws Exception { // verify that indexer did not use history based reindex. IndexDatabase idbOrig = new IndexDatabase(gitProject); assertNotNull(idbOrig); IndexDatabase idb = spy(idbOrig); idb.update(); - checkIndexDown(false, idb); + checkIndexDown(historyBased, idb); } // TODO: test project-less configuration From eae92f11999127125fdcdcf09bdedb1d2a2b1251 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Wed, 18 May 2022 12:37:49 +0200 Subject: [PATCH 53/88] add merge changeset adjust the listeners to use the configuration --- .../indexer/history/GitRepository.java | 4 +- .../RepositoryWithHistoryTraversal.java | 6 +- .../indexer/index/IndexDatabaseTest.java | 58 ++++++++++++++++--- 3 files changed, 55 insertions(+), 13 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java index 6c7d8a6f2b9..391066ae97a 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java @@ -478,7 +478,7 @@ public History getHistory(File file, String sinceRevision, String tillRevision) public History getHistory(File file, String sinceRevision, String tillRevision, Integer numCommits) throws HistoryException { - HistoryCollector historyCollector = new HistoryCollector(false); // TODO: should be based on configuration + HistoryCollector historyCollector = new HistoryCollector(isMergeCommitsEnabled()); traverseHistory(file, sinceRevision, tillRevision, numCommits, List.of(historyCollector)); History history = new History(historyCollector.entries, historyCollector.renamedFiles); @@ -513,8 +513,6 @@ public void traverseHistory(File file, String sinceRevision, String tillRevision commit.getAuthorIdent().getEmailAddress(), commit.getFullMessage()); for (ChangesetVisitor visitor : visitors) { - // For truly incremental reindex merge commits have to be processed. - // TODO: maybe the same for renamed files - depends on what happens if renamed file detection is on if (!visitor.consumeMergeChangesets && commit.getParentCount() > 1 && !isMergeCommitsEnabled()) { continue; } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java index ff956928569..9eb472b5557 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java @@ -96,6 +96,8 @@ protected void doCreateCache(HistoryCache cache, String sinceRevision, File dire FileCollector fileCollector = null; Project project = Project.getProject(directory); if (project != null && project.isHistoryBasedReindex()) { + // The fileCollector has to go through merge changesets no matter what the configuration says + // in order to detect the files that need to be indexed. fileCollector = new FileCollector(true); } @@ -103,7 +105,7 @@ protected void doCreateCache(HistoryCache cache, String sinceRevision, File dire LOGGER.log(Level.INFO, "repository {0} supports per partes history cache creation however " + "it is disabled in the configuration. Generating history cache as whole.", this); - HistoryCollector historyCollector = new HistoryCollector(false); // TODO: the flag should be based on configuration + HistoryCollector historyCollector = new HistoryCollector(isMergeCommitsEnabled()); List visitors = new ArrayList<>(); visitors.add(historyCollector); if (fileCollector != null) { @@ -132,7 +134,7 @@ protected void doCreateCache(HistoryCache cache, String sinceRevision, File dire LOGGER.log(Level.FINEST, "storing history cache for revision range ({0}, {1})", new Object[]{sinceRevision, tillRevision}); - HistoryCollector historyCollector = new HistoryCollector(false); // TODO: the flag should be based on configuration + HistoryCollector historyCollector = new HistoryCollector(isMergeCommitsEnabled()); List visitors = new ArrayList<>(); visitors.add(historyCollector); if (fileCollector != null) { diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index e45e425d372..0467e9d9080 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -44,6 +44,11 @@ import org.apache.lucene.search.ScoreDoc; import org.eclipse.jgit.api.Git; +import org.eclipse.jgit.api.MergeCommand; +import org.eclipse.jgit.api.MergeResult; +import org.eclipse.jgit.api.errors.GitAPIException; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.Ref; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -276,8 +281,38 @@ private void addFileAndCommit(Git git, String newFileName, File repositoryRoot, git.commit().setMessage(message).setAuthor("foo bar", "foobar@example.com").setAll(true).call(); } + private void addMergeCommit(Git git, File repositoryRoot) throws Exception { + // Create and checkout a branch. + final String branchName = "mybranch"; + git.branchCreate().setName(branchName).call(); + git.checkout().setName(branchName).call(); + + // Change a file on the branch. + addFileAndCommit(git, "new.txt", repositoryRoot, "new file on a branch"); + + // Checkout the master branch again. + git.checkout().setName("master").call(); + + // Retrieve the objectId of the latest commit on the branch. + ObjectId mergeBase = git.getRepository().resolve(branchName); + + // Perform the actual merge without FastForward to see the + // actual merge-commit even though the merge is trivial. + git.merge(). + include(mergeBase). + setCommit(false). + setFastForward(MergeCommand.FastForwardMode.NO_FF). + setMessage("merge commit"). + call(); + + // Commit the merge separately so that the author can be set. + // (MergeCommand - a result of git.merge() - does not have the setAuthor() method) + git.commit().setAuthor("foo bar", "foobar@example.com").call(); + } + /** - * Add some commits to the Git repository. + * Add some commits to the Git repository - change/remove/add/rename a file in separate commits, + * also add a merge commit. * @param repositoryRoot Git repository root */ private void changeGitRepository(File repositoryRoot) throws Exception { @@ -312,6 +347,8 @@ private void changeGitRepository(File repositoryRoot) throws Exception { git.add().addFilepattern("Makefile.renamed").call(); git.rm().addFilepattern("Makefile").call(); git.commit().setMessage("rename").setAuthor("foo", "foobar@example.com").setAll(true).call(); + + addMergeCommit(git, repositoryRoot); } } @@ -411,6 +448,8 @@ void testGetIndexDownArgs(boolean mergeCommits, boolean renamedFiles, boolean hi env, true, true, false, List.of("/git"), null); + // TODO: check the history cache w.r.t. the merge changeset + // Setup and use listener for the "removed" files. AddRemoveFilesListener listener = new AddRemoveFilesListener(); idb.addIndexChangedListener(listener); @@ -421,11 +460,13 @@ void testGetIndexDownArgs(boolean mergeCommits, boolean renamedFiles, boolean hi // as it is reused in that stage of indexing. assertNotEquals(0, args.works.size()); // The expected data has to match the work done in changeGitRepository(). - assertEquals(Set.of(Path.of("/git/Makefile.renamed"), - Path.of("/git/main.c"), - Path.of("/git/zzz.txt"), - Path.of("/git/zzzzzz.txt")), - args.works.stream().map(v -> Path.of(v.path)).collect(Collectors.toSet())); + Set expectedFileSet = new HashSet<>(); + expectedFileSet.add(Path.of("/git/Makefile.renamed")); + expectedFileSet.add(Path.of("/git/main.c")); + expectedFileSet.add(Path.of("/git/zzz.txt")); + expectedFileSet.add(Path.of("/git/zzzzzz.txt")); + expectedFileSet.add(Path.of("/git/new.txt")); + assertEquals(expectedFileSet, args.works.stream().map(v -> Path.of(v.path)).collect(Collectors.toSet())); assertEquals(Set.of( Path.of("/git/main.o"), @@ -500,6 +541,7 @@ void testHistoryBasedReindexVsProjectWithDiverseRepos() throws Exception { /** * Verify project specific tunable has effect on how the indexing will be performed. * The global history based tunable is tested in testGetIndexDownArgs(). + * TODO: standalone run of this test fails (for true) */ @ParameterizedTest @ValueSource(booleans = {true, false}) @@ -549,8 +591,6 @@ private void verifyIndexDown(Project gitProject, boolean historyBased) throws Ex checkIndexDown(historyBased, idb); } - // TODO: test project-less configuration - /** * Test forced reindex - see if removeFile() was called for all files in the repository * even though there was no change. @@ -627,4 +667,6 @@ void testInitialReindexWithHistoryBased() throws Exception { checkIndexDown(false, idb); } + + // TODO: test project-less configuration with history based reindex } From 343997e75ee3283157484f1410b5ca8411f05e6c Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Wed, 18 May 2022 12:39:00 +0200 Subject: [PATCH 54/88] refactor diff handling to a new method --- .../indexer/history/GitRepository.java | 59 ++++++++++--------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java index 391066ae97a..cb541182d8e 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java @@ -631,37 +631,42 @@ private void getFilesBetweenCommits(org.eclipse.jgit.lib.Repository repository, String newPath = getNativePath(getDirectoryNameRelative()) + File.separator + getNativePath(diff.getNewPath()); - // TODO: refactor - switch (diff.getChangeType()) { - case DELETE: - if (deletedFiles != null) { - // newPath would be "/dev/null" - String oldPath = getNativePath(getDirectoryNameRelative()) + File.separator + - getNativePath(diff.getOldPath()); - deletedFiles.add(oldPath); - } - break; - case RENAME: - if (isHandleRenamedFiles()) { - renamedFiles.add(newPath); - if (deletedFiles != null) { - String oldPath = getNativePath(getDirectoryNameRelative()) + File.separator + - getNativePath(diff.getOldPath()); - deletedFiles.add(oldPath); - } - } - break; - default: - if (changedFiles != null) { - // Added files (ChangeType.ADD) are treated as changed. - changedFiles.add(newPath); - } - break; - } + handleDiff(changedFiles, renamedFiles, deletedFiles, diff, newPath); } } } + private void handleDiff(Set changedFiles, Set renamedFiles, Set deletedFiles, + DiffEntry diff, String newPath) { + + switch (diff.getChangeType()) { + case DELETE: + if (deletedFiles != null) { + // newPath would be "/dev/null" + String oldPath = getNativePath(getDirectoryNameRelative()) + File.separator + + getNativePath(diff.getOldPath()); + deletedFiles.add(oldPath); + } + break; + case RENAME: + if (isHandleRenamedFiles()) { + renamedFiles.add(newPath); + if (deletedFiles != null) { + String oldPath = getNativePath(getDirectoryNameRelative()) + File.separator + + getNativePath(diff.getOldPath()); + deletedFiles.add(oldPath); + } + } + break; + default: + if (changedFiles != null) { + // Added files (ChangeType.ADD) are treated as changed. + changedFiles.add(newPath); + } + break; + } + } + private static AbstractTreeIterator prepareTreeParser(org.eclipse.jgit.lib.Repository repository, RevCommit commit) throws IOException { // from the commit we can build the tree which allows us to construct the TreeParser From 1a6dea6dd35f5798005a82486bf84fd68b1cbee1 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Wed, 18 May 2022 16:07:39 +0200 Subject: [PATCH 55/88] simplify the merge changeset check --- .../main/java/org/opengrok/indexer/history/GitRepository.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java index cb541182d8e..6bddd9e0950 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java @@ -513,7 +513,9 @@ public void traverseHistory(File file, String sinceRevision, String tillRevision commit.getAuthorIdent().getEmailAddress(), commit.getFullMessage()); for (ChangesetVisitor visitor : visitors) { - if (!visitor.consumeMergeChangesets && commit.getParentCount() > 1 && !isMergeCommitsEnabled()) { + // Even though the repository itself is set (not) to consume the merge changesets, + // it should be up to the visitor to have the say. This is because of the history based reindex. + if (commit.getParentCount() > 1 && !visitor.consumeMergeChangesets) { continue; } From 39c1d5757a7bc95252b1c9c8d19dc034dee9031d Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Wed, 18 May 2022 16:12:39 +0200 Subject: [PATCH 56/88] introduce repository tunable --- .../indexer/history/RepositoryInfo.java | 19 +++++++++++++++++++ .../RepositoryWithHistoryTraversal.java | 4 +--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryInfo.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryInfo.java index aad15010d83..90e500fdbc2 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryInfo.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryInfo.java @@ -79,6 +79,8 @@ public class RepositoryInfo implements Serializable { private boolean historyEnabled; @DTOElement private boolean mergeCommitsEnabled; + @DTOElement + private boolean historyBasedReindex; /** * Empty constructor to support serialization. @@ -99,6 +101,7 @@ public RepositoryInfo(RepositoryInfo orig) { this.historyEnabled = orig.historyEnabled; this.handleRenamedFiles = orig.handleRenamedFiles; this.mergeCommitsEnabled = orig.mergeCommitsEnabled; + this.historyBasedReindex = orig.historyBasedReindex; } /** @@ -129,6 +132,20 @@ public void setMergeCommitsEnabled(boolean flag) { this.mergeCommitsEnabled = flag; } + /** + * @return true if history based reindex is enabled for the repository, false otherwise + */ + public boolean isHistoryBasedReindex() { + return this.historyBasedReindex; + } + + /** + * @param flag if history based reindex should be enabled for the repository + */ + public void setHistoryBasedReindex(boolean flag) { + this.historyBasedReindex = flag; + } + /** * @return true if the repository should have history cache. */ @@ -313,12 +330,14 @@ public void fillFromProject() { setHistoryEnabled(proj.isHistoryEnabled()); setHandleRenamedFiles(proj.isHandleRenamedFiles()); setMergeCommitsEnabled(proj.isMergeCommitsEnabled()); + setHistoryEnabled(proj.isHistoryBasedReindex()); } else { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); setHistoryEnabled(env.isHistoryEnabled()); setHandleRenamedFiles(env.isHandleHistoryOfRenamedFiles()); setMergeCommitsEnabled(env.isMergeCommitsEnabled()); + setHistoryBasedReindex(env.isHistoryBasedReindex()); } } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java index 9eb472b5557..7343be1ec77 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java @@ -23,7 +23,6 @@ package org.opengrok.indexer.history; import org.jetbrains.annotations.Nullable; -import org.opengrok.indexer.configuration.Project; import org.opengrok.indexer.configuration.RuntimeEnvironment; import org.opengrok.indexer.logger.LoggerFactory; import org.opengrok.indexer.util.Statistics; @@ -94,8 +93,7 @@ protected void doCreateCache(HistoryCache cache, String sinceRevision, File dire RuntimeEnvironment env = RuntimeEnvironment.getInstance(); FileCollector fileCollector = null; - Project project = Project.getProject(directory); - if (project != null && project.isHistoryBasedReindex()) { + if (isHistoryBasedReindex()) { // The fileCollector has to go through merge changesets no matter what the configuration says // in order to detect the files that need to be indexed. fileCollector = new FileCollector(true); From 5650cf1579547c2f9158c047af5b8af27ab2e6d3 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Thu, 19 May 2022 16:16:51 +0200 Subject: [PATCH 57/88] fix tests Also fix handling of sub-repositories w.r.t. FileCollector. --- .../configuration/RuntimeEnvironment.java | 5 ++ .../indexer/history/FileCollector.java | 7 +- .../opengrok/indexer/history/HistoryGuru.java | 14 +++- .../opengrok/indexer/history/Repository.java | 3 + .../indexer/history/RepositoryInfo.java | 2 +- .../RepositoryWithHistoryTraversal.java | 34 +++++++-- .../opengrok/indexer/index/IndexDatabase.java | 8 ++- .../indexer/index/IndexDatabaseTest.java | 69 +++++++++++++++++-- 8 files changed, 123 insertions(+), 19 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java index b5a4cc70179..03f97afc3f3 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/RuntimeEnvironment.java @@ -1442,6 +1442,11 @@ public void setFileCollector(String name, FileCollector fileCollector) { fileCollectorMap.put(name, fileCollector); } + @VisibleForTesting + public void clearFileCollector() { + fileCollectorMap.clear(); + } + /** * Read an configuration file and set it as the current configuration. * diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileCollector.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileCollector.java index d77e9c9bd97..bcb8281680d 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileCollector.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/FileCollector.java @@ -22,6 +22,7 @@ */ package org.opengrok.indexer.history; +import java.util.Collection; import java.util.SortedSet; import java.util.TreeSet; @@ -33,7 +34,7 @@ * in one changeset a file may be deleted, only to be re-added in the next changeset etc. */ public class FileCollector extends ChangesetVisitor { - private SortedSet files; + private final SortedSet files; /** * Assumes comparing in the same way as {@code org.opengrok.indexer.index.IndexDatabase#FILENAME_COMPARATOR}. @@ -58,4 +59,8 @@ public void accept(RepositoryWithHistoryTraversal.ChangesetInfo changesetInfo) { public SortedSet getFiles() { return files; } + + void addFiles(Collection files) { + this.files.addAll(files); + } } \ No newline at end of file diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java index 430db1530b9..32fbc3d8434 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryGuru.java @@ -47,6 +47,7 @@ import java.util.stream.Collectors; import org.jetbrains.annotations.Nullable; +import org.jetbrains.annotations.VisibleForTesting; import org.opengrok.indexer.configuration.CommandTimeoutType; import org.opengrok.indexer.configuration.Configuration.RemoteSCM; import org.opengrok.indexer.configuration.PathAccepter; @@ -284,6 +285,10 @@ public HistoryEntry getLastHistoryEntry(File file, boolean ui) throws HistoryExc return repository.getLastHistoryEntry(file, ui); } + public History getHistory(File file, boolean withFiles, boolean ui) throws HistoryException { + return getHistory(file, withFiles, ui, true); + } + /** * Get the history for the specified file. The history cache is tried first, then the repository. * @@ -291,10 +296,12 @@ public HistoryEntry getLastHistoryEntry(File file, boolean ui) throws HistoryExc * @param withFiles whether the returned history should contain a * list of files touched by each changeset (the file list may be skipped if false, but it doesn't have to) * @param ui called from the webapp + * @param fallback fall back to fetching the history from the repository + * if it cannot be retrieved from history cache * @return history for the file * @throws HistoryException on error when accessing the history */ - public History getHistory(File file, boolean withFiles, boolean ui) throws HistoryException { + public History getHistory(File file, boolean withFiles, boolean ui, boolean fallback) throws HistoryException { final File dir = file.isDirectory() ? file : file.getParentFile(); final Repository repository = getRepository(dir); @@ -305,7 +312,7 @@ public History getHistory(File file, boolean withFiles, boolean ui) throws Histo History history; try { - history = getHistoryFromCache(file, repository, withFiles, true); + history = getHistoryFromCache(file, repository, withFiles, fallback); if (history != null) { return history; } @@ -906,7 +913,8 @@ public void invalidateRepositories(Collection repos, C "history.repositories.invalidate"); } - private void clear() { + @VisibleForTesting + public void clear() { repositoryRoots.clear(); repositories.clear(); repositoryLookup.clear(); diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java index ea3f7d24cc2..2246063e66e 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java @@ -115,6 +115,9 @@ public String toString() { stringBuilder.append(","); stringBuilder.append("merge="); stringBuilder.append(this.isMergeCommitsEnabled()); + stringBuilder.append(","); + stringBuilder.append("historyBased="); + stringBuilder.append(this.isHistoryBasedReindex()); } stringBuilder.append("}"); return stringBuilder.toString(); diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryInfo.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryInfo.java index 90e500fdbc2..fdb8432da5d 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryInfo.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryInfo.java @@ -330,7 +330,7 @@ public void fillFromProject() { setHistoryEnabled(proj.isHistoryEnabled()); setHandleRenamedFiles(proj.isHandleRenamedFiles()); setMergeCommitsEnabled(proj.isMergeCommitsEnabled()); - setHistoryEnabled(proj.isHistoryBasedReindex()); + setHistoryBasedReindex(proj.isHistoryBasedReindex()); } else { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java index 7343be1ec77..a9e2c5915ff 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java @@ -23,6 +23,7 @@ package org.opengrok.indexer.history; import org.jetbrains.annotations.Nullable; +import org.opengrok.indexer.configuration.Project; import org.opengrok.indexer.configuration.RuntimeEnvironment; import org.opengrok.indexer.logger.LoggerFactory; import org.opengrok.indexer.util.Statistics; @@ -93,7 +94,8 @@ protected void doCreateCache(HistoryCache cache, String sinceRevision, File dire RuntimeEnvironment env = RuntimeEnvironment.getInstance(); FileCollector fileCollector = null; - if (isHistoryBasedReindex()) { + Project project = Project.getProject(directory); + if (project != null && isHistoryBasedReindex()) { // The fileCollector has to go through merge changesets no matter what the configuration says // in order to detect the files that need to be indexed. fileCollector = new FileCollector(true); @@ -112,9 +114,15 @@ protected void doCreateCache(HistoryCache cache, String sinceRevision, File dire traverseHistory(directory, sinceRevision, null, null, visitors); History history = new History(historyCollector.entries, historyCollector.renamedFiles); + // Assign tags to changesets they represent. + // We don't need to check if this repository supports tags, because we know it :-) + if (env.isTagsEnabled()) { + assignTagsInHistory(history); + } + finishCreateCache(cache, history, null); - RuntimeEnvironment.getInstance().setFileCollector(directory.getName(), fileCollector); + updateFileCollector(fileCollector, project); return; } @@ -141,10 +149,9 @@ protected void doCreateCache(HistoryCache cache, String sinceRevision, File dire traverseHistory(directory, sinceRevision, tillRevision, null, visitors); History history = new History(historyCollector.entries, historyCollector.renamedFiles); - // Assign tags to changesets they represent - // We don't need to check if this repository supports tags, - // because we know it :-) - if (RuntimeEnvironment.getInstance().isTagsEnabled()) { + // Assign tags to changesets they represent. + // We don't need to check if this repository supports tags, because we know it :-) + if (env.isTagsEnabled()) { assignTagsInHistory(history); } @@ -154,6 +161,19 @@ protected void doCreateCache(HistoryCache cache, String sinceRevision, File dire ++cnt, boundaryChangesetList.size(), this.getDirectoryName())); } - RuntimeEnvironment.getInstance().setFileCollector(directory.getName(), fileCollector); + updateFileCollector(fileCollector, project); + } + + private void updateFileCollector(FileCollector fileCollector, Project project) { + RuntimeEnvironment env = RuntimeEnvironment.getInstance(); + + if (project != null && fileCollector != null) { + FileCollector fileCollectorEnv = env.getFileCollector(project.getName()); + if (fileCollectorEnv == null) { + env.setFileCollector(project.getName(), fileCollector); + } else { + fileCollectorEnv.addFiles(fileCollector.getFiles()); + } + } } } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index ea6b17028f7..cf1f399527f 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -535,7 +535,13 @@ boolean isReadyForHistoryBasedReindex(Repository repository) { return false; } - // Do this only if all repositories for given project support file gathering via history traversal. + if (!repository.isHistoryBasedReindex()) { + LOGGER.log(Level.FINE, "history based reindex is disabled for {0}, " + + "the associated project {1} will be indexed using directory traversal", + new Object[]{repository, project}); + return false; + } + if (!(repository instanceof RepositoryWithHistoryTraversal)) { LOGGER.log(Level.FINE, "project {0} has a repository {1} that does not support history traversal," + "the project will be indexed using directory traversal.", diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 0467e9d9080..bd31f659dc4 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -30,8 +30,10 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -45,10 +47,7 @@ import org.eclipse.jgit.api.Git; import org.eclipse.jgit.api.MergeCommand; -import org.eclipse.jgit.api.MergeResult; -import org.eclipse.jgit.api.errors.GitAPIException; import org.eclipse.jgit.lib.ObjectId; -import org.eclipse.jgit.lib.Ref; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -61,6 +60,8 @@ import org.opengrok.indexer.configuration.CommandTimeoutType; import org.opengrok.indexer.configuration.Project; import org.opengrok.indexer.configuration.RuntimeEnvironment; +import org.opengrok.indexer.history.FileCollector; +import org.opengrok.indexer.history.History; import org.opengrok.indexer.history.HistoryGuru; import org.opengrok.indexer.history.MercurialRepositoryTest; import org.opengrok.indexer.history.Repository; @@ -116,7 +117,9 @@ public void setUpClass() throws Exception { env.setProjectsEnabled(true); RepositoryFactory.initializeIgnoredNames(env); - // Restore the repository information. + // Restore the project and repository information. + env.setProjects(new HashMap<>()); + HistoryGuru.getInstance().removeRepositories(List.of("/git")); env.setRepositories(repository.getSourceRoot()); HistoryGuru.getInstance().invalidateRepositories(env.getRepositories(), CommandTimeoutType.INDEXER); env.generateProjectRepositoriesMap(); @@ -135,6 +138,8 @@ public void setUpClass() throws Exception { env.setDefaultProjectsFromNames(new TreeSet<>(Arrays.asList("/c"))); indexer.doIndexerExecution(true, null, null); + + env.clearFileCollector(); } @AfterEach @@ -408,7 +413,7 @@ public Set getRemovedFiles() { public Set getAddedFiles() { return addedFiles; } - }; + } /** * Test specifically getIndexDownArgs() with IndexDatabase instance. @@ -444,11 +449,13 @@ void testGetIndexDownArgs(boolean mergeCommits, boolean renamedFiles, boolean hi changeGitRepository(repositoryRoot); // Re-generate the history cache so that the data is ready for history based re-index. + HistoryGuru.getInstance().clear(); indexer.prepareIndexer( env, true, true, false, List.of("/git"), null); + env.generateProjectRepositoriesMap(); - // TODO: check the history cache w.r.t. the merge changeset + // TODO: check history cache w.r.t. the merge changeset // Setup and use listener for the "removed" files. AddRemoveFilesListener listener = new AddRemoveFilesListener(); @@ -538,6 +545,48 @@ void testHistoryBasedReindexVsProjectWithDiverseRepos() throws Exception { verifyIndexDown(gitProject, false); } + /** + * Make sure the files detected for a sub-repository are correctly stored in the appropriate + * {@code FileCollector} instance. + */ + @Test + void testHistoryBasedReindexWithEligibleSubRepo() throws Exception { + env.setHistoryBasedReindex(true); + + assertNull(env.getFileCollector("git")); + + Project gitProject = env.getProjects().get("git"); + assertNotNull(gitProject); + gitProject.completeWithDefaults(); + + // Create a Git repository underneath the existing git repository and make a change there. + File repositoryRoot = new File(repository.getSourceRoot(), "git"); + assertTrue(repositoryRoot.isDirectory()); + changeGitRepository(repositoryRoot); + String subRepoName = "subrepo"; + File subRepositoryRoot = new File(repositoryRoot, subRepoName); + String changedFileName = "subfile.txt"; + try (Git git = Git.init().setDirectory(subRepositoryRoot).call()) { + addFileAndCommit(git, changedFileName, subRepositoryRoot, "new file in subrepo"); + } + assertTrue(new File(subRepositoryRoot, changedFileName).exists()); + + HistoryGuru.getInstance().clear(); + + // Rescan the repositories and refresh the history cache which should also collect the files + // for the 2nd stage of indexing. + indexer.prepareIndexer( + env, true, true, + false, List.of("/git"), null); + + // Verify the collected files. + FileCollector fileCollector = env.getFileCollector("git"); + assertNotNull(fileCollector); + assertTrue(fileCollector.getFiles().size() > 1); + assertTrue(fileCollector.getFiles(). + contains("/" + gitProject.getName() + "/" + subRepoName + "/" + changedFileName)); + } + /** * Verify project specific tunable has effect on how the indexing will be performed. * The global history based tunable is tested in testGetIndexDownArgs(). @@ -556,10 +605,13 @@ void testHistoryBasedReindexProjectTunable(boolean historyBased) throws Exceptio // The per project tunable should override the global tunable. Project gitProject = env.getProjects().get("git"); gitProject.setHistoryBasedReindex(historyBased); + gitProject.completeWithDefaults(); + HistoryGuru.getInstance().clear(); indexer.prepareIndexer( env, true, true, false, List.of("/git"), null); + env.generateProjectRepositoriesMap(); verifyIndexDown(gitProject, historyBased); @@ -574,10 +626,13 @@ void testHistoryBasedReindexWithNoChange() throws Exception { env.setHistoryBasedReindex(true); Project gitProject = env.getProjects().get("git"); + gitProject.completeWithDefaults(); + HistoryGuru.getInstance().clear(); indexer.prepareIndexer( env, true, true, false, List.of("/git"), null); + env.generateProjectRepositoriesMap(); verifyIndexDown(gitProject, true); } @@ -603,6 +658,7 @@ void testForcedReindex(boolean historyBased) throws Exception { Project gitProject = env.getProjects().get("git"); assertNotNull(gitProject); + gitProject.completeWithDefaults(); IndexDatabase idbOrig = new IndexDatabase(gitProject); assertNotNull(idbOrig); IndexDatabase idb = spy(idbOrig); @@ -611,6 +667,7 @@ void testForcedReindex(boolean historyBased) throws Exception { indexer.prepareIndexer( env, true, true, false, List.of("/git"), null); + env.generateProjectRepositoriesMap(); // Emulate forcing reindex from scratch. doReturn(false).when(idb).checkSettings(any(), any()); From c9bcef446738499e1696c16197fd0bcc99e5ecc6 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Thu, 19 May 2022 16:19:07 +0200 Subject: [PATCH 58/88] remove unused imports --- .../test/java/org/opengrok/indexer/index/IndexDatabaseTest.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index bd31f659dc4..2dd203fe11d 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -30,7 +30,6 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; @@ -61,7 +60,6 @@ import org.opengrok.indexer.configuration.Project; import org.opengrok.indexer.configuration.RuntimeEnvironment; import org.opengrok.indexer.history.FileCollector; -import org.opengrok.indexer.history.History; import org.opengrok.indexer.history.HistoryGuru; import org.opengrok.indexer.history.MercurialRepositoryTest; import org.opengrok.indexer.history.Repository; From 984a886900a9f5c75621818edc0cb9b689e359ab Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Thu, 19 May 2022 16:50:50 +0200 Subject: [PATCH 59/88] test merge changesets, fix project properties --- .../indexer/configuration/Project.java | 7 +++++-- .../indexer/index/IndexDatabaseTest.java | 19 ++++++++++++++++--- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Project.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Project.java index ec5612841e9..e86e7d94dde 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Project.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Project.java @@ -310,8 +310,11 @@ public void setHistoryBasedReindex(boolean flag) { } @VisibleForTesting - public void setHistoryBasedReindexToNull() { - this.historyBasedReindex = null; + public void clearProperties() { + historyBasedReindex = null; + mergeCommitsEnabled = null; + historyEnabled = null; + handleRenamedFiles = null; } /** diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 2dd203fe11d..5f29940532e 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -60,6 +60,8 @@ import org.opengrok.indexer.configuration.Project; import org.opengrok.indexer.configuration.RuntimeEnvironment; import org.opengrok.indexer.history.FileCollector; +import org.opengrok.indexer.history.History; +import org.opengrok.indexer.history.HistoryEntry; import org.opengrok.indexer.history.HistoryGuru; import org.opengrok.indexer.history.MercurialRepositoryTest; import org.opengrok.indexer.history.Repository; @@ -129,9 +131,9 @@ public void setUpClass() throws Exception { // Reset the state of the git project w.r.t. history based reindex. // It is the responsibility of each test that relies on the per project tunable - // to call gitProject.completeWithDefaults() or gitProject.setHistoryBasedReindex(). + // to call gitProject.completeWithDefaults(). Project gitProject = env.getProjects().get("git"); - gitProject.setHistoryBasedReindexToNull(); + gitProject.clearProperties(); env.setDefaultProjectsFromNames(new TreeSet<>(Arrays.asList("/c"))); @@ -453,7 +455,18 @@ void testGetIndexDownArgs(boolean mergeCommits, boolean renamedFiles, boolean hi false, List.of("/git"), null); env.generateProjectRepositoriesMap(); - // TODO: check history cache w.r.t. the merge changeset + // Check history cache w.r.t. the merge changeset. + File mergeFile = new File(repositoryRoot, "new.txt"); + History history = HistoryGuru.getInstance().getHistory(mergeFile, false, false,false); + assertNotNull(history); + assertNotNull(history.getHistoryEntries()); + boolean containsMergeCommitMessage = history.getHistoryEntries().stream(). + map(HistoryEntry::getMessage).collect(Collectors.toSet()).contains("merge commit"); + if (mergeCommits) { + assertTrue(containsMergeCommitMessage); + } else { + assertFalse(containsMergeCommitMessage); + } // Setup and use listener for the "removed" files. AddRemoveFilesListener listener = new AddRemoveFilesListener(); From 857731e71694accaf29ff525498b718718733eed Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Thu, 19 May 2022 16:54:27 +0200 Subject: [PATCH 60/88] fix style --- .../java/org/opengrok/indexer/index/IndexDatabaseTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 5f29940532e..9c621f6d046 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -457,7 +457,7 @@ void testGetIndexDownArgs(boolean mergeCommits, boolean renamedFiles, boolean hi // Check history cache w.r.t. the merge changeset. File mergeFile = new File(repositoryRoot, "new.txt"); - History history = HistoryGuru.getInstance().getHistory(mergeFile, false, false,false); + History history = HistoryGuru.getInstance().getHistory(mergeFile, false, false, false); assertNotNull(history); assertNotNull(history.getHistoryEntries()); boolean containsMergeCommitMessage = history.getHistoryEntries().stream(). @@ -630,7 +630,7 @@ void testHistoryBasedReindexProjectTunable(boolean historyBased) throws Exceptio } /** - * test history based reindex if there was no change to the repository + * Test history based reindex if there was no change to the repository. */ @Test void testHistoryBasedReindexWithNoChange() throws Exception { From 88bcde943c29185e4cd29c2b92877e76bcfc3f19 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Thu, 19 May 2022 19:10:31 +0200 Subject: [PATCH 61/88] remove TODO, the test does not fail anymore when run standalone --- .../test/java/org/opengrok/indexer/index/IndexDatabaseTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 9c621f6d046..ff3c108fab9 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -601,7 +601,6 @@ void testHistoryBasedReindexWithEligibleSubRepo() throws Exception { /** * Verify project specific tunable has effect on how the indexing will be performed. * The global history based tunable is tested in testGetIndexDownArgs(). - * TODO: standalone run of this test fails (for true) */ @ParameterizedTest @ValueSource(booleans = {true, false}) From 5b1b346a6015d76cfb79aefdb69274bf2a084ce2 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Thu, 19 May 2022 19:16:37 +0200 Subject: [PATCH 62/88] rename the option to match the tunable also shorten the description lines --- .../java/org/opengrok/indexer/index/Indexer.java | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java index dcddd84ac12..899dd2066ab 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java @@ -801,12 +801,16 @@ public static String[] parseOptions(String[] argv) throws ParseException { } }); - parser.on("--trulyIncremental", "=on|off", ON_OFF, Boolean.class, - "If truly incremental reindex is in effect, the set of files changed/deleted since the last ", - "reindex is determined from history of the repositories. This needs history, history cache ", - "and projects to be enabled. This should be much faster than the classic way of traversing ", - "the directory structure. The default is on. If you need to e.g. index files untracked by ", - "SCM, set this to off. Currently works only for Git."). + parser.on("--historyBased", "=on|off", ON_OFF, Boolean.class, + "If history based reindex is in effect, the set of files ", + "changed/deleted since the last reindex is determined from history ", + "of the repositories. This needs history, history cache and ", + "projects to be enabled. This should be much faster than the ", + "classic way of traversing the directory structure. ", + "The default is on. If you need to e.g. index files untracked by ", + "SCM, set this to off. Currently works only for Git.", + "All repositories in a project need to support this in order ", + "to be indexed using history."). execute(v -> cfg.setHistoryBasedReindex((Boolean) v)); parser.on("-U", "--uri", "=SCHEME://webappURI:port/contextPath", From ac7d53afaf19da9f50118d1f3e2fdccceb225fbd Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Fri, 20 May 2022 15:16:36 +0200 Subject: [PATCH 63/88] cleanup, check history is enabled for repository --- .../indexer/history/RepositoryWithHistoryTraversal.java | 2 +- .../main/java/org/opengrok/indexer/index/IndexDatabase.java | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java index a9e2c5915ff..437be863e67 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java @@ -95,7 +95,7 @@ protected void doCreateCache(HistoryCache cache, String sinceRevision, File dire FileCollector fileCollector = null; Project project = Project.getProject(directory); - if (project != null && isHistoryBasedReindex()) { + if (project != null && isHistoryBasedReindex() && isHistoryEnabled()) { // The fileCollector has to go through merge changesets no matter what the configuration says // in order to detect the files that need to be indexed. fileCollector = new FileCollector(true); diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index cf1f399527f..d56ff0c374c 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -442,7 +442,6 @@ private static List getRepositoriesForProject(Project project) { /** * @return whether the repositories of given project are ready for history based reindex - * TODO: move part of this to doCreateCache() (project specific and global checks) */ private boolean isReadyForHistoryBasedReindex() { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); @@ -478,7 +477,7 @@ private boolean isReadyForHistoryBasedReindex() { /* * Check that the index is present for this project. - * In such case the traversal of all changesets would most likely be counterproductive, + * In case of the initial indexing, the traversal of all changesets would most likely be counterproductive, * assuming traversal of directory tree is cheaper than getting the files from SCM history * in such case. */ From 3372675c9ce82ea14d6af5ca97f1a89a102d2e1b Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Fri, 20 May 2022 15:30:22 +0200 Subject: [PATCH 64/88] add checks for history related tunables --- .../src/main/java/org/opengrok/indexer/index/Indexer.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java index 899dd2066ab..8e004fe27b2 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java @@ -908,6 +908,14 @@ private static void checkConfiguration() { if (!new File(cfg.getDataRoot()).canWrite()) { die("Data root '" + cfg.getDataRoot() + "' must be writable"); } + + if (!cfg.isHistoryEnabled() && cfg.isHistoryBasedReindex()) { + die("History has to be enabled for history based reindex"); + } + + if (!cfg.isHistoryCache() && cfg.isHistoryBasedReindex()) { + die("History cache has to be enabled for history based reindex"); + } } private static void die(String message) { From c54bd05d6a4234598f37169471f77ce24260d862 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Fri, 20 May 2022 15:30:38 +0200 Subject: [PATCH 65/88] use single Statistics instance when reporting file collection --- .../opengrok/indexer/index/IndexDatabase.java | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index d56ff0c374c..1b6bf623c72 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -769,27 +769,26 @@ private void processTrailingTerms(String startUid, boolean usedHistory, IndexDow */ @VisibleForTesting boolean getIndexDownArgs(String dir, File sourceRoot, IndexDownArgs args) throws IOException { - Statistics elapsed = new Statistics(); - boolean usedHistory = false; - RuntimeEnvironment env = RuntimeEnvironment.getInstance(); + boolean historyBased = isReadyForHistoryBasedReindex(); - if (isReadyForHistoryBasedReindex()) { - LOGGER.log(Level.INFO, "Starting file collection using history traversal for directory {0}", dir); + if (LOGGER.isLoggable(Level.INFO)) { + LOGGER.log(Level.INFO, String.format("Starting file collection using %s traversal for directory '%s'", + historyBased ? "history" : "file-system", dir)); + } + Statistics elapsed = new Statistics(); + if (historyBased) { indexDownUsingHistory(env.getSourceRootFile(), args); - usedHistory = true; - elapsed.report(LOGGER, String.format("Done file collection for directory %s", dir), - "indexer.db.directory.collection"); } else { - LOGGER.log(Level.INFO, "Starting file collection using file-system traversal of directory {0}", dir); indexDown(sourceRoot, dir, args); - elapsed.report(LOGGER, String.format("Done traversal of directory %s", dir), - "indexer.db.directory.traversal"); } + elapsed.report(LOGGER, String.format("Done file collection for directory '%s'", dir), + "indexer.db.collection"); + showFileCount(dir, args); - return usedHistory; + return historyBased; } /** From 68ee168360e852a1fe38e3a72f633479a15d09f2 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Fri, 20 May 2022 15:52:56 +0200 Subject: [PATCH 66/88] add project-less based test for history based reindex --- .../indexer/index/IndexDatabaseTest.java | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index ff3c108fab9..70af57dd7d7 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -735,5 +735,23 @@ void testInitialReindexWithHistoryBased() throws Exception { checkIndexDown(false, idb); } - // TODO: test project-less configuration with history based reindex + /** + * project-less configuration should lead to file-system based reindex. + */ + @Test + void testProjectLessReindexVsHistoryBased() throws Exception { + env.setProjectsEnabled(false); + + // Make a change in the git repository. + File repositoryRoot = new File(repository.getSourceRoot(), "git"); + assertTrue(repositoryRoot.isDirectory()); + changeGitRepository(repositoryRoot); + + IndexDatabase idbOrig = new IndexDatabase(); + assertNotNull(idbOrig); + IndexDatabase idb = spy(idbOrig); + idb.update(); + + checkIndexDown(false, idb); + } } From a96f0325d155491e630c606e7ca1cd3c9e90ae46 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Fri, 20 May 2022 15:53:09 +0200 Subject: [PATCH 67/88] unwrap the line for better readability --- .../main/java/org/opengrok/indexer/index/IndexDatabase.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index 1b6bf623c72..b779e31d0ae 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -234,8 +234,7 @@ static CountDownLatch updateAll(IndexChangedListener listener) throws IOExceptio dbs.add(new IndexDatabase()); } - IndexerParallelizer parallelizer = RuntimeEnvironment.getInstance(). - getIndexerParallelizer(); + IndexerParallelizer parallelizer = RuntimeEnvironment.getInstance().getIndexerParallelizer(); CountDownLatch latch = new CountDownLatch(dbs.size()); for (IndexDatabase d : dbs) { final IndexDatabase db = d; From 516b9eba0add8a6d8e8a44c1c12422ecdaab5522 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Fri, 20 May 2022 17:46:44 +0200 Subject: [PATCH 68/88] add check for numCommits argument value --- .../main/java/org/opengrok/indexer/history/GitRepository.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java index 6bddd9e0950..8c93f698dda 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java @@ -478,6 +478,10 @@ public History getHistory(File file, String sinceRevision, String tillRevision) public History getHistory(File file, String sinceRevision, String tillRevision, Integer numCommits) throws HistoryException { + if (numCommits != null && numCommits <= 0) { + return null; + } + HistoryCollector historyCollector = new HistoryCollector(isMergeCommitsEnabled()); traverseHistory(file, sinceRevision, tillRevision, numCommits, List.of(historyCollector)); History history = new History(historyCollector.entries, historyCollector.renamedFiles); From 794d0b7601051ece0392a8b2dc98950bb17570e1 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Sat, 21 May 2022 10:23:31 +0200 Subject: [PATCH 69/88] convert Mercurial to RepositoryWithHistoryTraversal --- .../indexer/history/GitRepository.java | 21 ---- .../indexer/history/HistoryCollector.java | 11 +- .../history/MercurialHistoryParser.java | 92 +++++++++------ .../indexer/history/MercurialRepository.java | 31 +---- .../opengrok/indexer/history/Repository.java | 6 +- .../RepositoryWithHistoryTraversal.java | 33 +++++- .../indexer/index/IndexDatabaseTest.java | 110 +++++++++++++----- 7 files changed, 187 insertions(+), 117 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java index 8c93f698dda..d6ac0222dcf 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/GitRepository.java @@ -475,27 +475,6 @@ public History getHistory(File file, String sinceRevision, String tillRevision) return getHistory(file, sinceRevision, tillRevision, null); } - public History getHistory(File file, String sinceRevision, String tillRevision, - Integer numCommits) throws HistoryException { - - if (numCommits != null && numCommits <= 0) { - return null; - } - - HistoryCollector historyCollector = new HistoryCollector(isMergeCommitsEnabled()); - traverseHistory(file, sinceRevision, tillRevision, numCommits, List.of(historyCollector)); - History history = new History(historyCollector.entries, historyCollector.renamedFiles); - - // Assign tags to changesets they represent - // We don't need to check if this repository supports tags, - // because we know it :-) - if (RuntimeEnvironment.getInstance().isTagsEnabled()) { - assignTagsInHistory(history); - } - - return history; - } - public void traverseHistory(File file, String sinceRevision, String tillRevision, Integer numCommits, List visitors) throws HistoryException { diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCollector.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCollector.java index be34dcf24d4..1d527cfd5be 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCollector.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/HistoryCollector.java @@ -39,8 +39,17 @@ class HistoryCollector extends ChangesetVisitor { public void accept(RepositoryWithHistoryTraversal.ChangesetInfo changesetInfo) { RepositoryWithHistoryTraversal.CommitInfo commit = changesetInfo.commit; + + // TODO: add a test for this + String author; + if (commit.authorEmail != null) { + author = commit.authorName + " <" + commit.authorEmail + ">"; + } else { + author = commit.authorName; + } + HistoryEntry historyEntry = new HistoryEntry(commit.revision, - commit.date, commit.authorName + " <" + commit.authorEmail + ">", + commit.date, author, commit.message, true); if (changesetInfo.renamedFiles != null) { diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/MercurialHistoryParser.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/MercurialHistoryParser.java index 71804e10c7b..8ef31b20d31 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/MercurialHistoryParser.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/MercurialHistoryParser.java @@ -18,7 +18,7 @@ */ /* - * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2022, Oracle and/or its affiliates. All rights reserved. * Portions Copyright (c) 2017, Chris Fraire . */ package org.opengrok.indexer.history; @@ -33,10 +33,8 @@ import java.text.ParseException; import java.util.ArrayList; import java.util.Date; -import java.util.HashSet; import java.util.Iterator; import java.util.List; -import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import org.opengrok.indexer.configuration.RuntimeEnvironment; @@ -54,31 +52,30 @@ class MercurialHistoryParser implements Executor.StreamHandler { /** Prefix which identifies lines with the description of a commit. */ private static final String DESC_PREFIX = "description: "; - private List entries = new ArrayList<>(); + private List entries = new ArrayList<>(); private final MercurialRepository repository; private final String mydir; private boolean isDir; - private final Set renamedFiles = new HashSet<>(); + private final List visitors; - MercurialHistoryParser(MercurialRepository repository) { + MercurialHistoryParser(MercurialRepository repository, List visitors) { this.repository = repository; + this.visitors = visitors; mydir = repository.getDirectoryName() + File.separator; } /** * Parse the history for the specified file or directory. If a changeset is - * specified, only return the history from the changeset right after the - * specified one. + * specified, only return the history from the changeset right after the specified one. * * @param file the file or directory to get history for * @param sinceRevision the changeset right before the first one to fetch, or * {@code null} if all changesets should be fetched * @param tillRevision end revision or {@code null} * @param numCommits number of revisions to get - * @return history for the specified file or directory * @throws HistoryException if an error happens when parsing the history */ - History parse(File file, String sinceRevision, String tillRevision, Integer numCommits) throws HistoryException { + void parse(File file, String sinceRevision, String tillRevision, Integer numCommits) throws HistoryException { isDir = file.isDirectory(); try { Executor executor = repository.getHistoryLogExecutor(file, sinceRevision, tillRevision, false, @@ -86,21 +83,18 @@ History parse(File file, String sinceRevision, String tillRevision, Integer numC int status = executor.exec(true, this); if (status != 0) { - throw new HistoryException("Failed to get history for: \"" + - file.getAbsolutePath() + + throw new HistoryException("Failed to get history for: \"" + file.getAbsolutePath() + "\" Exit code: " + status); } } catch (IOException e) { - throw new HistoryException("Failed to get history for: \"" + - file.getAbsolutePath() + "\"", e); + throw new HistoryException("Failed to get history for: \"" + file.getAbsolutePath() + "\"", e); } - // If a changeset to start from is specified, remove that changeset - // from the list, since only the ones following it should be returned. - // Also check that the specified changeset was found, otherwise throw - // an exception. + // If a changeset to start from is specified, remove that changeset from the list, + // since only the ones following it should be returned. + // Also check that the specified changeset was found, otherwise throw an exception. if (sinceRevision != null) { - repository.removeAndVerifyOldestChangeset(entries, sinceRevision); + removeAndVerifyOldestChangeset(entries, sinceRevision); } // See getHistoryLogExecutor() for explanation. @@ -108,13 +102,44 @@ History parse(File file, String sinceRevision, String tillRevision, Integer numC removeChangesets(entries, tillRevision); } - return new History(entries, renamedFiles); + // The visitors are fed with the ChangesetInfo instances here (as opposed to in parse()), + // because of the above manipulations with the entries. + for (RepositoryWithHistoryTraversal.ChangesetInfo info : entries) { + for (ChangesetVisitor visitor : visitors) { + visitor.accept(info); + } + } + } + + /** + * Remove the oldest changeset from a list (assuming sorted with most recent + * changeset first) and verify that it is the changeset expected to find there. + * + * @param entries a list of {@code HistoryEntry} objects + * @param revision the revision we expect the oldest entry to have + * @throws HistoryException if the oldest entry was not the one we expected + */ + private void removeAndVerifyOldestChangeset(List entries, String revision) + throws HistoryException { + + RepositoryWithHistoryTraversal.ChangesetInfo entry = entries.isEmpty() ? null : entries.remove(entries.size() - 1); + + // TODO We should check more thoroughly that the changeset is the one + // we expected it to be, since some SCMs may change the revision + // numbers so that identical revision numbers does not always mean + // identical changesets. We could for example get the cached changeset + // and compare more fields, like author and date. + if (entry == null || !revision.equals(entry.commit.revision)) { + throw new HistoryException("Cached revision '" + revision + + "' not found in the repository " + + repository.getDirectoryName()); + } } - private void removeChangesets(List entries, String tillRevision) { - for (Iterator iter = entries.listIterator(); iter.hasNext(); ) { - HistoryEntry entry = iter.next(); - if (entry.getRevision().equals(tillRevision)) { + private void removeChangesets(List entries, String tillRevision) { + for (Iterator iter = entries.listIterator(); iter.hasNext(); ) { + RepositoryWithHistoryTraversal.ChangesetInfo entry = iter.next(); + if (entry.commit.revision.equals(tillRevision)) { break; } iter.remove(); @@ -123,7 +148,7 @@ private void removeChangesets(List entries, String tillRevision) { /** * Process the output from the {@code hg log} command and collect - * {@link HistoryEntry} elements. + * {@link org.opengrok.indexer.history.RepositoryWithHistoryTraversal.ChangesetInfo} elements. * * @param input The output from the process * @throws java.io.IOException If an error occurs while reading the stream @@ -134,15 +159,14 @@ public void processStream(InputStream input) throws IOException { BufferedReader in = new BufferedReader(new InputStreamReader(input)); entries = new ArrayList<>(); String s; - HistoryEntry entry = null; + RepositoryWithHistoryTraversal.ChangesetInfo entry = null; while ((s = in.readLine()) != null) { if (s.startsWith(MercurialRepository.CHANGESET)) { - entry = new HistoryEntry(); + entry = new RepositoryWithHistoryTraversal.ChangesetInfo(new RepositoryWithHistoryTraversal.CommitInfo()); entries.add(entry); - entry.setActive(true); - entry.setRevision(s.substring(MercurialRepository.CHANGESET.length()).trim()); + entry.commit.revision = s.substring(MercurialRepository.CHANGESET.length()).trim(); } else if (s.startsWith(MercurialRepository.USER) && entry != null) { - entry.setAuthor(s.substring(MercurialRepository.USER.length()).trim()); + entry.commit.authorName = s.substring(MercurialRepository.USER.length()).trim(); } else if (s.startsWith(MercurialRepository.DATE) && entry != null) { Date date; try { @@ -154,7 +178,7 @@ public void processStream(InputStream input) throws IOException { // throw new IOException("Could not parse date: " + s, pe); } - entry.setDate(date); + entry.commit.date = date; } else if (s.startsWith(MercurialRepository.FILES) && entry != null) { String[] strings = s.split(" "); for (int ii = 1; ii < strings.length; ++ii) { @@ -162,7 +186,7 @@ public void processStream(InputStream input) throws IOException { File f = new File(mydir, strings[ii]); try { String path = env.getPathRelativeToSourceRoot(f); - entry.addFile(path.intern()); + entry.files.add(path.intern()); } catch (ForbiddenSymlinkException e) { LOGGER.log(Level.FINER, e.getMessage()); // ignore @@ -189,11 +213,11 @@ public void processStream(InputStream input) throws IOException { String[] move = part.split(" \\("); File f = new File(mydir + move[0]); if (!move[0].isEmpty() && f.exists()) { - renamedFiles.add(repository.getDirectoryNameRelative() + File.separator + move[0]); + entry.renamedFiles.add(repository.getDirectoryNameRelative() + File.separator + move[0]); } } } else if (s.startsWith(DESC_PREFIX) && entry != null) { - entry.setMessage(decodeDescription(s)); + entry.commit.message = decodeDescription(s); } else if (s.equals(MercurialRepository.END_OF_ENTRY) && entry != null) { entry = null; diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/MercurialRepository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/MercurialRepository.java index 3f19cf3b009..8d86a76bfd6 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/MercurialRepository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/MercurialRepository.java @@ -18,7 +18,7 @@ */ /* - * Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2022, Oracle and/or its affiliates. All rights reserved. * Portions Copyright (c) 2017, 2019, Chris Fraire . */ package org.opengrok.indexer.history; @@ -52,7 +52,7 @@ * Access to a Mercurial repository. * */ -public class MercurialRepository extends RepositoryWithPerPartesHistory { +public class MercurialRepository extends RepositoryWithHistoryTraversal { private static final Logger LOGGER = LoggerFactory.getLogger(MercurialRepository.class); @@ -584,31 +584,12 @@ History getHistory(File file, String sinceRevision, String tillRevision) throws return getHistory(file, sinceRevision, tillRevision, null); } - History getHistory(File file, String sinceRevision, String tillRevision, - Integer numCommits) throws HistoryException { + // TODO: add a test for this + public void traverseHistory(File file, String sinceRevision, String tillRevision, + Integer numCommits, List visitors) throws HistoryException { - if (numCommits != null && numCommits <= 0) { - return null; - } - - RuntimeEnvironment env = RuntimeEnvironment.getInstance(); - // Note that the filtering of revisions based on sinceRevision is done - // in the history log executor by passing appropriate options to - // the 'hg' executable. - // This is done only for directories since if getHistory() is used - // for file, the file is renamed and its complete history is fetched - // so no sinceRevision filter is needed. - // See findOriginalName() code for more details. - History result = new MercurialHistoryParser(this). + new MercurialHistoryParser(this, visitors). parse(file, sinceRevision, tillRevision, numCommits); - - // Assign tags to changesets they represent. - // We don't need to check if this repository supports tags, - // because we know it :-) - if (env.isTagsEnabled()) { - assignTagsInHistory(result); - } - return result; } /** diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java index 2246063e66e..6a1915e36e0 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java @@ -301,11 +301,9 @@ public InputStream getHistoryGet(String parent, String basename, String rev) { /** * Returns if this repository tags only files changed in last commit, i.e. - * if we need to prepare list of repository-wide tags prior to creation of - * file history entries. + * if we need to prepare list of repository-wide tags prior to creation of file history entries. * - * @return True if we need tag list creation prior to file parsing, false by - * default. + * @return True if we need tag list creation prior to file parsing, false by default. */ boolean hasFileBasedTags() { return false; diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java index 437be863e67..f7723e867a5 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/RepositoryWithHistoryTraversal.java @@ -31,9 +31,11 @@ import java.io.File; import java.util.ArrayList; import java.util.Date; +import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.SortedSet; +import java.util.TreeSet; import java.util.logging.Level; import java.util.logging.Logger; @@ -50,6 +52,9 @@ public static class CommitInfo { String authorEmail; String message; + CommitInfo() { + } + CommitInfo(String revision, Date date, String authorName, String authorEmail, String message) { this.revision = revision; this.date = date; @@ -67,6 +72,9 @@ public static class ChangesetInfo { ChangesetInfo(CommitInfo commit) { this.commit = commit; + this.files = new TreeSet<>(); + this.renamedFiles = new HashSet<>(); + this.deletedFiles = new HashSet<>(); } ChangesetInfo(CommitInfo commit, SortedSet files, Set renamedFiles, Set deletedFiles) { @@ -89,6 +97,25 @@ public static class ChangesetInfo { public abstract void traverseHistory(File file, String sinceRevision, @Nullable String tillRevision, Integer numCommits, List visitors) throws HistoryException; + public History getHistory(File file, String sinceRevision, String tillRevision, + Integer numCommits) throws HistoryException { + + if (numCommits != null && numCommits <= 0) { + return null; + } + + HistoryCollector historyCollector = new HistoryCollector(isMergeCommitsEnabled()); + traverseHistory(file, sinceRevision, tillRevision, numCommits, List.of(historyCollector)); + History history = new History(historyCollector.entries, historyCollector.renamedFiles); + + // Assign tags to changesets they represent. + if (RuntimeEnvironment.getInstance().isTagsEnabled() && hasFileBasedTags()) { + assignTagsInHistory(history); + } + + return history; + } + @Override protected void doCreateCache(HistoryCache cache, String sinceRevision, File directory) throws HistoryException { RuntimeEnvironment env = RuntimeEnvironment.getInstance(); @@ -115,8 +142,7 @@ protected void doCreateCache(HistoryCache cache, String sinceRevision, File dire History history = new History(historyCollector.entries, historyCollector.renamedFiles); // Assign tags to changesets they represent. - // We don't need to check if this repository supports tags, because we know it :-) - if (env.isTagsEnabled()) { + if (env.isTagsEnabled() && hasFileBasedTags()) { assignTagsInHistory(history); } @@ -150,8 +176,7 @@ protected void doCreateCache(HistoryCache cache, String sinceRevision, File dire History history = new History(historyCollector.entries, historyCollector.renamedFiles); // Assign tags to changesets they represent. - // We don't need to check if this repository supports tags, because we know it :-) - if (env.isTagsEnabled()) { + if (env.isTagsEnabled() && hasFileBasedTags()) { assignTagsInHistory(history); } diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 70af57dd7d7..2f3b94b3d1c 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -30,6 +30,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; @@ -70,11 +71,13 @@ import org.opengrok.indexer.history.RepositoryWithHistoryTraversal; import org.opengrok.indexer.search.QueryBuilder; import org.opengrok.indexer.search.SearchEngine; +import org.opengrok.indexer.util.FileUtilities; import org.opengrok.indexer.util.ForbiddenSymlinkException; import org.opengrok.indexer.util.IOUtils; import org.opengrok.indexer.util.TandemPath; import org.opengrok.indexer.util.TestRepository; +import static java.nio.file.StandardCopyOption.REPLACE_EXISTING; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; @@ -88,7 +91,7 @@ import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; -import static org.opengrok.indexer.condition.RepositoryInstalled.Type.MERCURIAL; +import static org.opengrok.indexer.condition.RepositoryInstalled.Type.CVS; /** * Unit tests for the {@code IndexDatabase} class. @@ -512,48 +515,99 @@ private void checkIndexDown(boolean historyBased, IndexDatabase idb) throws IOEx } } + private static void copyDirectory(Path src, Path dest) throws IOException { + Files.walk(src).forEach(srcPath -> { + try { + Path destPath = dest.resolve(src.relativize(srcPath)); + if (Files.isDirectory(srcPath)) { + if(!Files.exists(destPath)) + Files.createDirectory(destPath); + return; + } + Files.copy(srcPath, destPath); + } catch(Exception e) { + e.printStackTrace(); + } + }); + } + /** * Make sure that history based reindex is not performed for projects - * where some repositories are not instances of {@code RepositoryWithHistoryTraversal}. + * where some repositories are not instances of {@code RepositoryWithHistoryTraversal} + * or have the history based reindex explicitly disabled. * * Instead of checking the result of the functions that make the decision, check the actual indexing. */ - @EnabledForRepository(MERCURIAL) - @Test - void testHistoryBasedReindexVsProjectWithDiverseRepos() throws Exception { + @EnabledForRepository(CVS) + @ParameterizedTest + @ValueSource(booleans = {true, false}) + void testHistoryBasedReindexVsProjectWithDiverseRepos(boolean useCvs) throws Exception { env.setHistoryBasedReindex(true); - // Make a change in the git repository. - File repositoryRoot = new File(repository.getSourceRoot(), "git"); - assertTrue(repositoryRoot.isDirectory()); - changeGitRepository(repositoryRoot); - - // Clone the Mercurial repository underneath the "git" project/repository. - Path destinationPath = Path.of(repository.getSourceRoot(), "git", "mercurial"); - MercurialRepositoryTest.runHgCommand(new File(repository.getSourceRoot()), - "clone", Path.of(repository.getSourceRoot(), "mercurial").toString(), - destinationPath.toString()); - assertTrue(destinationPath.toFile().exists()); + // Create a new project with two repositories. + String projectName = "new"; + Path projectPath = Path.of(repository.getSourceRoot(), projectName); + assertTrue(projectPath.toFile().mkdirs()); + assertTrue(projectPath.toFile().isDirectory()); + + String disabledGitRepoName = "git1"; + + if (useCvs) { + // Copy CVS repository underneath the project. + String subrepoName = "cvssubrepo"; + Path destinationPath = Path.of(repository.getSourceRoot(), projectName, subrepoName); + Path sourcePath = Path.of(repository.getSourceRoot(), "cvs_test", "cvsrepo"); + assertTrue(sourcePath.toFile().exists()); + copyDirectory(sourcePath, destinationPath); + assertTrue(destinationPath.toFile().exists()); + + Repository subRepo = RepositoryFactory.getRepository(destinationPath.toFile()); + assertFalse(subRepo instanceof RepositoryWithHistoryTraversal); + } else { + // Clone Git repository underneath the project. + String cloneUrl = Path.of(repository.getSourceRoot(), "git").toFile().toURI().toString(); + Path repositoryRootPath = Path.of(repository.getSourceRoot(), projectName, disabledGitRepoName); + Git.cloneRepository() + .setURI(cloneUrl) + .setDirectory(repositoryRootPath.toFile()) + .call(); + assertTrue(repositoryRootPath.toFile().isDirectory()); + } - // Once the Mercurial repository gets changed over to RepositoryWithHistoryTraversal, - // the test will have to start some other repository. - Repository mercurialRepo = RepositoryFactory.getRepository(destinationPath.toFile()); - assertFalse(mercurialRepo instanceof RepositoryWithHistoryTraversal); + // Clone Git repository underneath the project and make a change there. + String cloneUrl = Path.of(repository.getSourceRoot(), "git").toFile().toURI().toString(); + Path repositoryRootPath = Path.of(repository.getSourceRoot(), projectName, "git"); + Git.cloneRepository() + .setURI(cloneUrl) + .setDirectory(repositoryRootPath.toFile()) + .call(); + assertTrue(repositoryRootPath.toFile().isDirectory()); + changeGitRepository(repositoryRootPath.toFile()); // Rescan the repositories. + HistoryGuru.getInstance().clear(); indexer.prepareIndexer( env, true, true, false, List.of("/git"), null); - - // assert the Mercurial repository was detected. - Project gitProject = env.getProjects().get("git"); - assertNotNull(gitProject); + env.setRepositories(new ArrayList<>(HistoryGuru.getInstance().getRepositories())); env.generateProjectRepositoriesMap(); - List gitProjectRepos = env.getProjectRepositoriesMap().get(gitProject); - assertNotNull(gitProjectRepos); - assertEquals(2, gitProjectRepos.size()); - verifyIndexDown(gitProject, false); + // Assert the repositories were detected. + Project project = env.getProjects().get(projectName); + assertNotNull(project); + List projectRepos = env.getProjectRepositoriesMap().get(project); + assertNotNull(projectRepos); + assertEquals(2, projectRepos.size()); + + if (!useCvs) { + for (RepositoryInfo repo : projectRepos) { + if (repo.getDirectoryNameRelative().equals(disabledGitRepoName)) { + repo.setHistoryBasedReindex(false); + } + } + } + + verifyIndexDown(project, false); } /** From 1799a97f153b93fe2ef602472baffa16a26ba8b0 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Sat, 21 May 2022 10:24:31 +0200 Subject: [PATCH 70/88] add Override annotation --- .../src/main/java/org/opengrok/indexer/history/Repository.java | 1 + 1 file changed, 1 insertion(+) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java index 6a1915e36e0..8802d1a4296 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java @@ -97,6 +97,7 @@ public abstract class Repository extends RepositoryInfo { */ abstract boolean hasHistoryForDirectories(); + @Override public String toString() { StringBuilder stringBuilder = new StringBuilder(); stringBuilder.append("{"); From 001ca5dc6385b7bf18f40d28fbcd6c5f71cbeff9 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Sat, 21 May 2022 10:25:11 +0200 Subject: [PATCH 71/88] limit the visibility --- .../src/main/java/org/opengrok/indexer/history/Repository.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java index 8802d1a4296..7bb3e3de2c5 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/history/Repository.java @@ -158,7 +158,7 @@ public HistoryEntry getLastHistoryEntry(File file, boolean ui) throws HistoryExc } } - Repository() { + protected Repository() { super(); ignoredFiles = new ArrayList<>(); ignoredDirs = new ArrayList<>(); From 555957940ad3ddb2e4340d6fd7088156d7f33c76 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Sat, 21 May 2022 10:26:46 +0200 Subject: [PATCH 72/88] remove unused imports --- .../java/org/opengrok/indexer/index/IndexDatabaseTest.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 2f3b94b3d1c..30d1924e711 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -64,20 +64,17 @@ import org.opengrok.indexer.history.History; import org.opengrok.indexer.history.HistoryEntry; import org.opengrok.indexer.history.HistoryGuru; -import org.opengrok.indexer.history.MercurialRepositoryTest; import org.opengrok.indexer.history.Repository; import org.opengrok.indexer.history.RepositoryFactory; import org.opengrok.indexer.history.RepositoryInfo; import org.opengrok.indexer.history.RepositoryWithHistoryTraversal; import org.opengrok.indexer.search.QueryBuilder; import org.opengrok.indexer.search.SearchEngine; -import org.opengrok.indexer.util.FileUtilities; import org.opengrok.indexer.util.ForbiddenSymlinkException; import org.opengrok.indexer.util.IOUtils; import org.opengrok.indexer.util.TandemPath; import org.opengrok.indexer.util.TestRepository; -import static java.nio.file.StandardCopyOption.REPLACE_EXISTING; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotEquals; From 2e99b7510f1e59d8435e3201c4eea786ce97e467 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Sat, 21 May 2022 10:27:37 +0200 Subject: [PATCH 73/88] fix style --- .../java/org/opengrok/indexer/index/IndexDatabaseTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 30d1924e711..c7f99bd2b41 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -517,8 +517,9 @@ private static void copyDirectory(Path src, Path dest) throws IOException { try { Path destPath = dest.resolve(src.relativize(srcPath)); if (Files.isDirectory(srcPath)) { - if(!Files.exists(destPath)) + if (!Files.exists(destPath)) { Files.createDirectory(destPath); + } return; } Files.copy(srcPath, destPath); From ea354d5891e77cae8032e2ff3ca38fac386046fa Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Sat, 21 May 2022 10:27:49 +0200 Subject: [PATCH 74/88] fix style --- .../test/java/org/opengrok/indexer/index/IndexDatabaseTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index c7f99bd2b41..c31d8738484 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -523,7 +523,7 @@ private static void copyDirectory(Path src, Path dest) throws IOException { return; } Files.copy(srcPath, destPath); - } catch(Exception e) { + } catch (Exception e) { e.printStackTrace(); } }); From 202e6da17c47d80154524e76f0789c8c75cb801f Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Mon, 23 May 2022 13:42:25 +0200 Subject: [PATCH 75/88] do not consider history vs. history based reindex as configuration problem --- .../org/opengrok/indexer/index/Indexer.java | 65 ++++++++++++------- 1 file changed, 40 insertions(+), 25 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java index 8e004fe27b2..41c0113eb11 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java @@ -190,7 +190,11 @@ public static void main(String[] argv) { exitWithHelp(); } - checkConfiguration(); + try { + checkConfiguration(); + } catch (ConfigurationException e) { + die(e.getMessage()); + } if (awaitProfiler) { pauseToAwaitProfiler(); @@ -573,7 +577,20 @@ public static String[] parseOptions(String[] argv) throws ParseException { "Assign commit tags to all entries in history for all repositories.").execute(v -> cfg.setTagsEnabled(true)); - parser.on("-H", "--history", "Enable history.").execute(v -> cfg.setHistoryEnabled(true)); + // for backward compatibility + parser.on("-H", "Enable history.").execute(v -> cfg.setHistoryEnabled(true)); + + parser.on("--historyBased", "=on|off", ON_OFF, Boolean.class, + "If history based reindex is in effect, the set of files ", + "changed/deleted since the last reindex is determined from history ", + "of the repositories. This needs history, history cache and ", + "projects to be enabled. This should be much faster than the ", + "classic way of traversing the directory structure. ", + "The default is on. If you need to e.g. index files untracked by ", + "SCM, set this to off. Currently works only for Git.", + "All repositories in a project need to support this in order ", + "to be indexed using history."). + execute(v -> cfg.setHistoryBasedReindex((Boolean) v)); parser.on("--historyThreads", "=number", Integer.class, "The number of threads to use for history cache generation on repository level. " + @@ -801,18 +818,6 @@ public static String[] parseOptions(String[] argv) throws ParseException { } }); - parser.on("--historyBased", "=on|off", ON_OFF, Boolean.class, - "If history based reindex is in effect, the set of files ", - "changed/deleted since the last reindex is determined from history ", - "of the repositories. This needs history, history cache and ", - "projects to be enabled. This should be much faster than the ", - "classic way of traversing the directory structure. ", - "The default is on. If you need to e.g. index files untracked by ", - "SCM, set this to off. Currently works only for Git.", - "All repositories in a project need to support this in order ", - "to be indexed using history."). - execute(v -> cfg.setHistoryBasedReindex((Boolean) v)); - parser.on("-U", "--uri", "=SCHEME://webappURI:port/contextPath", "Send the current configuration to the specified web application.").execute(webAddr -> { webappURI = (String) webAddr; @@ -866,8 +871,7 @@ public static String[] parseOptions(String[] argv) throws ParseException { execute(v -> cfg.setWebappCtags((Boolean) v)); }); - // Need to read the configuration file first - // so that options may be overwritten later. + // Need to read the configuration file first, so that options may be overwritten later. configure.parse(argv); LOGGER.log(Level.INFO, "Indexer options: {0}", Arrays.toString(argv)); @@ -883,38 +887,49 @@ public static String[] parseOptions(String[] argv) throws ParseException { return argv; } - private static void checkConfiguration() { + static class ConfigurationException extends Exception { + static final long serialVersionUID = -1; + + public ConfigurationException(String message) { + super(message); + } + } + + // TODO: move this Configuration + private static void checkConfiguration() throws ConfigurationException { env = RuntimeEnvironment.getInstance(); if (bareConfig && (env.getConfigURI() == null || env.getConfigURI().isEmpty())) { - die("Missing webappURI setting"); + throw new ConfigurationException("Missing webappURI setting"); } if (!repositories.isEmpty() && !cfg.isHistoryEnabled()) { - die("Repositories were specified; history is off however"); + throw new ConfigurationException("Repositories were specified; history is off however"); } if (cfg.getSourceRoot() == null) { - die("Please specify a SRC_ROOT with option -s !"); + throw new ConfigurationException("Please specify a SRC_ROOT with option -s !"); } if (cfg.getDataRoot() == null) { - die("Please specify a DATA ROOT path"); + throw new ConfigurationException("Please specify a DATA ROOT path"); } if (!new File(cfg.getSourceRoot()).canRead()) { - die("Source root '" + cfg.getSourceRoot() + "' must be readable"); + throw new ConfigurationException("Source root '" + cfg.getSourceRoot() + "' must be readable"); } if (!new File(cfg.getDataRoot()).canWrite()) { - die("Data root '" + cfg.getDataRoot() + "' must be writable"); + throw new ConfigurationException("Data root '" + cfg.getDataRoot() + "' must be writable"); } if (!cfg.isHistoryEnabled() && cfg.isHistoryBasedReindex()) { - die("History has to be enabled for history based reindex"); + LOGGER.log(Level.INFO, "History based reindex is on, however history is off. " + + "History has to be enabled for history based reindex."); } if (!cfg.isHistoryCache() && cfg.isHistoryBasedReindex()) { - die("History cache has to be enabled for history based reindex"); + LOGGER.log(Level.INFO, "History based reindex is on, however history cache is off. " + + "History cache has to be enabled for history based reindex."); } } From 61dce4cb338e604a2107e3407eacd2a8999c2eef Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Mon, 23 May 2022 14:01:54 +0200 Subject: [PATCH 76/88] move configuration check to Configuration class --- .../indexer/configuration/Configuration.java | 41 +++++++++++ .../org/opengrok/indexer/index/Indexer.java | 68 +++++-------------- 2 files changed, 58 insertions(+), 51 deletions(-) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Configuration.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Configuration.java index dd68b5e0c23..23b9df56109 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Configuration.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/configuration/Configuration.java @@ -1535,4 +1535,45 @@ private static Configuration decodeObject(InputStream in) throws IOException { return conf; } + + public static class ConfigurationException extends Exception { + static final long serialVersionUID = -1; + + public ConfigurationException(String message) { + super(message); + } + } + + /** + * Check if configuration is populated and self-consistent. + * @throws ConfigurationException on error + */ + public void checkConfiguration() throws ConfigurationException { + + if (getSourceRoot() == null) { + throw new ConfigurationException("Source root is not specified."); + } + + if (getDataRoot() == null) { + throw new ConfigurationException("Data root is not specified."); + } + + if (!new File(getSourceRoot()).canRead()) { + throw new ConfigurationException("Source root directory '" + getSourceRoot() + "' must be readable."); + } + + if (!new File(getDataRoot()).canWrite()) { + throw new ConfigurationException("Data root directory '" + getDataRoot() + "' must be writable."); + } + + if (!isHistoryEnabled() && isHistoryBasedReindex()) { + LOGGER.log(Level.INFO, "History based reindex is on, however history is off. " + + "History has to be enabled for history based reindex."); + } + + if (!isHistoryCache() && isHistoryBasedReindex()) { + LOGGER.log(Level.INFO, "History based reindex is on, however history cache is off. " + + "History cache has to be enabled for history based reindex."); + } + } } diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java index 41c0113eb11..b3a165ee2b0 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/Indexer.java @@ -190,11 +190,7 @@ public static void main(String[] argv) { exitWithHelp(); } - try { - checkConfiguration(); - } catch (ConfigurationException e) { - die(e.getMessage()); - } + checkConfiguration(); if (awaitProfiler) { pauseToAwaitProfiler(); @@ -419,6 +415,22 @@ public static void main(String[] argv) { } } + private static void checkConfiguration() { + if (bareConfig && (env.getConfigURI() == null || env.getConfigURI().isEmpty())) { + die("Missing webappURI setting"); + } + + if (!repositories.isEmpty() && !cfg.isHistoryEnabled()) { + die("Repositories were specified; history is off however"); + } + + try { + cfg.checkConfiguration(); + } catch (Configuration.ConfigurationException e) { + die(e.getMessage()); + } + } + /** * Parse OpenGrok Indexer options * This method was created so that it would be easier to write unit @@ -887,52 +899,6 @@ public static String[] parseOptions(String[] argv) throws ParseException { return argv; } - static class ConfigurationException extends Exception { - static final long serialVersionUID = -1; - - public ConfigurationException(String message) { - super(message); - } - } - - // TODO: move this Configuration - private static void checkConfiguration() throws ConfigurationException { - env = RuntimeEnvironment.getInstance(); - - if (bareConfig && (env.getConfigURI() == null || env.getConfigURI().isEmpty())) { - throw new ConfigurationException("Missing webappURI setting"); - } - - if (!repositories.isEmpty() && !cfg.isHistoryEnabled()) { - throw new ConfigurationException("Repositories were specified; history is off however"); - } - - if (cfg.getSourceRoot() == null) { - throw new ConfigurationException("Please specify a SRC_ROOT with option -s !"); - } - if (cfg.getDataRoot() == null) { - throw new ConfigurationException("Please specify a DATA ROOT path"); - } - - if (!new File(cfg.getSourceRoot()).canRead()) { - throw new ConfigurationException("Source root '" + cfg.getSourceRoot() + "' must be readable"); - } - - if (!new File(cfg.getDataRoot()).canWrite()) { - throw new ConfigurationException("Data root '" + cfg.getDataRoot() + "' must be writable"); - } - - if (!cfg.isHistoryEnabled() && cfg.isHistoryBasedReindex()) { - LOGGER.log(Level.INFO, "History based reindex is on, however history is off. " + - "History has to be enabled for history based reindex."); - } - - if (!cfg.isHistoryCache() && cfg.isHistoryBasedReindex()) { - LOGGER.log(Level.INFO, "History based reindex is on, however history cache is off. " + - "History cache has to be enabled for history based reindex."); - } - } - private static void die(String message) { System.err.println("ERROR: " + message); System.exit(1); From f542b6855eb1fe23e76403d64db6145128dbc807 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Mon, 23 May 2022 17:39:18 +0200 Subject: [PATCH 77/88] reuse already existing copyDirectory() --- .../indexer/index/IndexDatabaseTest.java | 19 +------------------ .../opengrok/indexer/util/TestRepository.java | 2 +- 2 files changed, 2 insertions(+), 19 deletions(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index c31d8738484..b77e36ad5cd 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -512,23 +512,6 @@ private void checkIndexDown(boolean historyBased, IndexDatabase idb) throws IOEx } } - private static void copyDirectory(Path src, Path dest) throws IOException { - Files.walk(src).forEach(srcPath -> { - try { - Path destPath = dest.resolve(src.relativize(srcPath)); - if (Files.isDirectory(srcPath)) { - if (!Files.exists(destPath)) { - Files.createDirectory(destPath); - } - return; - } - Files.copy(srcPath, destPath); - } catch (Exception e) { - e.printStackTrace(); - } - }); - } - /** * Make sure that history based reindex is not performed for projects * where some repositories are not instances of {@code RepositoryWithHistoryTraversal} @@ -556,7 +539,7 @@ void testHistoryBasedReindexVsProjectWithDiverseRepos(boolean useCvs) throws Exc Path destinationPath = Path.of(repository.getSourceRoot(), projectName, subrepoName); Path sourcePath = Path.of(repository.getSourceRoot(), "cvs_test", "cvsrepo"); assertTrue(sourcePath.toFile().exists()); - copyDirectory(sourcePath, destinationPath); + repository.copyDirectory(sourcePath, destinationPath); assertTrue(destinationPath.toFile().exists()); Repository subRepo = RepositoryFactory.getRepository(destinationPath.toFile()); diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java index 45c38602cbb..5a43954f09d 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java @@ -103,7 +103,7 @@ public void create(@NotNull final URL url) throws IOException, URISyntaxExceptio } } - private void copyDirectory(Path src, Path dest) throws IOException { + public void copyDirectory(Path src, Path dest) throws IOException { try (Stream stream = Files.walk(src)) { stream.forEach(sourceFile -> { if (sourceFile.equals(src)) { From f682e5bbfc75fd7c0a78c5dc5d31b8c13dddea67 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Mon, 23 May 2022 17:39:38 +0200 Subject: [PATCH 78/88] bump year --- .../src/test/java/org/opengrok/indexer/util/TestRepository.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java index 5a43954f09d..54b04702a66 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java @@ -18,7 +18,7 @@ */ /* - * Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved. * Portions Copyright (c) 2018, 2019, Chris Fraire . */ package org.opengrok.indexer.util; From 682482544bdd0f8d823d1b30ce86c39d9b5ee08a Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Mon, 23 May 2022 17:46:35 +0200 Subject: [PATCH 79/88] copy files preserving attributes on Windows this avoids Git detecting the files as modified --- .../test/java/org/opengrok/indexer/util/TestRepository.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java index 54b04702a66..2bf1c753995 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java @@ -39,6 +39,7 @@ import org.jetbrains.annotations.NotNull; import org.opengrok.indexer.configuration.RuntimeEnvironment; +import static java.nio.file.StandardCopyOption.COPY_ATTRIBUTES; import static java.nio.file.StandardCopyOption.REPLACE_EXISTING; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; @@ -111,7 +112,8 @@ public void copyDirectory(Path src, Path dest) throws IOException { } try { Path destRelativePath = getDestinationRelativePath(src, sourceFile); - Files.copy(sourceFile, dest.resolve(destRelativePath.toString()), REPLACE_EXISTING); + Files.copy(sourceFile, dest.resolve(destRelativePath.toString()), + REPLACE_EXISTING, COPY_ATTRIBUTES); } catch (Exception e) { throw new RuntimeException(e); } From 38dee2a66b25fd7035cb7c301706921b1073a819 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Mon, 23 May 2022 18:12:03 +0200 Subject: [PATCH 80/88] re-clone the Git repository in setup --- .../opengrok/indexer/index/IndexDatabaseTest.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index b77e36ad5cd..57bbdcd3ef8 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -111,6 +111,18 @@ public void setUpClass() throws Exception { repository = new TestRepository(); repository.create(HistoryGuru.class.getResource("/repositories")); + // After copying the files from the archive, Git will consider the files to be changed, + // at least on Windows. This causes some tests, particularly testGetIndexDownArgs() to fail. + // To avoid this, clone the Git repository. + Path gitRepositoryRootPath = Path.of(repository.getSourceRoot(), "git"); + Path gitCheckoutPath = Path.of(repository.getSourceRoot(), "gitcheckout"); + Git.cloneRepository() + .setURI(gitRepositoryRootPath.toFile().toURI().toString()) + .setDirectory(gitCheckoutPath.toFile()) + .call(); + IOUtils.removeRecursive(gitRepositoryRootPath); + Files.move(gitCheckoutPath, gitRepositoryRootPath); + env.setSourceRoot(repository.getSourceRoot()); env.setDataRoot(repository.getDataRoot()); env.setHistoryEnabled(true); From 9cfb33d820ad216254790967f5b90ef55458a098 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Tue, 24 May 2022 00:45:20 +0200 Subject: [PATCH 81/88] make sure the move does not fail on Windows --- .../java/org/opengrok/indexer/index/IndexDatabaseTest.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 57bbdcd3ef8..71f8ae77575 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -116,10 +116,13 @@ public void setUpClass() throws Exception { // To avoid this, clone the Git repository. Path gitRepositoryRootPath = Path.of(repository.getSourceRoot(), "git"); Path gitCheckoutPath = Path.of(repository.getSourceRoot(), "gitcheckout"); - Git.cloneRepository() + Git git = Git.cloneRepository() .setURI(gitRepositoryRootPath.toFile().toURI().toString()) .setDirectory(gitCheckoutPath.toFile()) .call(); + // The Git object has to be closed, otherwise the move below would fail on Windows with + // AccessDeniedException due to the file handle still being open. + git.close(); IOUtils.removeRecursive(gitRepositoryRootPath); Files.move(gitCheckoutPath, gitRepositoryRootPath); From a4a222ebde9da99b8d28d0681c52a684f3465cdf Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Tue, 24 May 2022 00:45:38 +0200 Subject: [PATCH 82/88] add asserts for Git operations --- .../opengrok/indexer/index/IndexDatabaseTest.java | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 71f8ae77575..859765bcfb1 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -358,15 +358,20 @@ private void changeGitRepository(File repositoryRoot) throws Exception { assertTrue(rmFile.exists()); git.rm().addFilepattern("main.o").call(); git.commit().setMessage("delete").setAuthor("foo", "foobar@example.com").setAll(true).call(); + assertFalse(rmFile.exists()); // Rename some file. - File fooFile = new File(repositoryRoot, "Makefile"); + final String fooFileName = "Makefile"; + final String barFileName = "Makefile.renamed"; + File fooFile = new File(repositoryRoot, fooFileName); assertTrue(fooFile.exists()); - File barFile = new File(repositoryRoot, "Makefile.renamed"); + File barFile = new File(repositoryRoot, barFileName); assertTrue(fooFile.renameTo(barFile)); - git.add().addFilepattern("Makefile.renamed").call(); - git.rm().addFilepattern("Makefile").call(); + git.add().addFilepattern(barFileName).call(); + git.rm().addFilepattern(fooFileName).call(); git.commit().setMessage("rename").setAuthor("foo", "foobar@example.com").setAll(true).call(); + assertTrue(barFile.exists()); + assertFalse(fooFile.exists()); addMergeCommit(git, repositoryRoot); } From 08db34cdc59b5059d0acd62a8aa3dc26fc3416b4 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Tue, 24 May 2022 10:03:19 +0200 Subject: [PATCH 83/88] close the Git object --- .../java/org/opengrok/indexer/index/IndexDatabaseTest.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 859765bcfb1..923672568c3 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -568,20 +568,22 @@ void testHistoryBasedReindexVsProjectWithDiverseRepos(boolean useCvs) throws Exc // Clone Git repository underneath the project. String cloneUrl = Path.of(repository.getSourceRoot(), "git").toFile().toURI().toString(); Path repositoryRootPath = Path.of(repository.getSourceRoot(), projectName, disabledGitRepoName); - Git.cloneRepository() + Git git = Git.cloneRepository() .setURI(cloneUrl) .setDirectory(repositoryRootPath.toFile()) .call(); + git.close(); assertTrue(repositoryRootPath.toFile().isDirectory()); } // Clone Git repository underneath the project and make a change there. String cloneUrl = Path.of(repository.getSourceRoot(), "git").toFile().toURI().toString(); Path repositoryRootPath = Path.of(repository.getSourceRoot(), projectName, "git"); - Git.cloneRepository() + Git git = Git.cloneRepository() .setURI(cloneUrl) .setDirectory(repositoryRootPath.toFile()) .call(); + git.close(); assertTrue(repositoryRootPath.toFile().isDirectory()); changeGitRepository(repositoryRootPath.toFile()); From 6dc8614bae871aeaabd6efc238132ffb6d8aa5a7 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Tue, 24 May 2022 10:30:22 +0200 Subject: [PATCH 84/88] fix the test copyDirectory needs to copy sub-directories as well --- .../indexer/index/IndexDatabaseTest.java | 1 + .../opengrok/indexer/util/TestRepository.java | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 923672568c3..0d86426b0d6 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -559,6 +559,7 @@ void testHistoryBasedReindexVsProjectWithDiverseRepos(boolean useCvs) throws Exc Path destinationPath = Path.of(repository.getSourceRoot(), projectName, subrepoName); Path sourcePath = Path.of(repository.getSourceRoot(), "cvs_test", "cvsrepo"); assertTrue(sourcePath.toFile().exists()); + assertTrue(destinationPath.toFile().mkdirs()); repository.copyDirectory(sourcePath, destinationPath); assertTrue(destinationPath.toFile().exists()); diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java index 2bf1c753995..167980d139f 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/util/TestRepository.java @@ -104,6 +104,12 @@ public void create(@NotNull final URL url) throws IOException, URISyntaxExceptio } } + /** + * Assumes the destination directory exists. + * @param src source directory + * @param dest destination directory + * @throws IOException on error + */ public void copyDirectory(Path src, Path dest) throws IOException { try (Stream stream = Files.walk(src)) { stream.forEach(sourceFile -> { @@ -112,8 +118,14 @@ public void copyDirectory(Path src, Path dest) throws IOException { } try { Path destRelativePath = getDestinationRelativePath(src, sourceFile); - Files.copy(sourceFile, dest.resolve(destRelativePath.toString()), - REPLACE_EXISTING, COPY_ATTRIBUTES); + Path destPath = dest.resolve(destRelativePath); + if (Files.isDirectory(sourceFile)) { + if (!Files.exists(destPath)) { + Files.createDirectory(destPath); + } + return; + } + Files.copy(sourceFile, destPath, REPLACE_EXISTING, COPY_ATTRIBUTES); } catch (Exception e) { throw new RuntimeException(e); } From 7eea590e3cde4dde5021cf7ab084d27b735a2c87 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Tue, 24 May 2022 13:42:37 +0200 Subject: [PATCH 85/88] remove obsolete comment the index is now created for each test separately --- .../java/org/opengrok/indexer/index/IndexDatabaseTest.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 0d86426b0d6..3d40aed6e48 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -222,10 +222,6 @@ void testCleanupAfterIndexRemoval(boolean historyBasedReindex) throws Exception IndexDatabase idb = new IndexDatabase(project); assertNotNull(idb); - // Note that the file to remove has to be different from the one used - // in {@code testGetDefinitions} because it shares the same index - // and this test is going to remove the file and therefore related - // definitions. String fileName = "header.h"; File gitRoot = new File(repository.getSourceRoot(), projectName); assertTrue(new File(gitRoot, fileName).exists()); From 6f227d570f4723dc17c02f68ea06b0601f4b8490 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Tue, 24 May 2022 13:48:10 +0200 Subject: [PATCH 86/88] do not use main.o for Git tests ELF analyzer uses RandomAccessFile which has troubles with closing the file on Windows. As a result main.o cannot be deleted which leads to failure of the testGetIndexDownArgs. --- .../org/opengrok/indexer/index/IndexDatabaseTest.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index 3d40aed6e48..ad6c6fb6d9f 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -350,9 +350,11 @@ private void changeGitRepository(File repositoryRoot) throws Exception { assertTrue(mainFile.exists()); changeFileAndCommit(git, mainFile, "new commit"); - File rmFile = new File(repositoryRoot, "main.o"); + // Delete a file. + final String deletedFileName = "header.h"; + File rmFile = new File(repositoryRoot, deletedFileName); assertTrue(rmFile.exists()); - git.rm().addFilepattern("main.o").call(); + git.rm().addFilepattern(deletedFileName).call(); git.commit().setMessage("delete").setAuthor("foo", "foobar@example.com").setAll(true).call(); assertFalse(rmFile.exists()); @@ -503,7 +505,7 @@ void testGetIndexDownArgs(boolean mergeCommits, boolean renamedFiles, boolean hi assertEquals(expectedFileSet, args.works.stream().map(v -> Path.of(v.path)).collect(Collectors.toSet())); assertEquals(Set.of( - Path.of("/git/main.o"), + Path.of("/git/header.h"), Path.of("/git/main.c"), Path.of("/git/Makefile") ), listener.getRemovedFiles().stream().map(Path::of).collect(Collectors.toSet())); From b0a8246bf4accfb3a57771e80030265cb0eb1310 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Tue, 24 May 2022 17:54:47 +0200 Subject: [PATCH 87/88] fix Windows path --- .../src/main/java/org/opengrok/indexer/index/IndexDatabase.java | 1 + 1 file changed, 1 insertion(+) diff --git a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java index b779e31d0ae..6f14d6feaa3 100644 --- a/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java +++ b/opengrok-indexer/src/main/java/org/opengrok/indexer/index/IndexDatabase.java @@ -1493,6 +1493,7 @@ void indexDown(File dir, String parent, IndexDownArgs args) throws IOException { */ private void processFileIncremental(IndexDownArgs args, File file, String path) throws IOException { if (uidIter != null) { + path = Util.fixPathIfWindows(path); // Traverse terms until reaching one that matches the path of given file. while (uidIter != null && uidIter.term() != null && uidIter.term().compareTo(emptyBR) != 0 From 855e7d602b9c6c61751617c0dec23c5a6efd14e9 Mon Sep 17 00:00:00 2001 From: Vladimir Kotal Date: Tue, 24 May 2022 18:03:43 +0200 Subject: [PATCH 88/88] use native path separator fixes the test on Windows --- .../java/org/opengrok/indexer/index/IndexDatabaseTest.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java index ad6c6fb6d9f..1cb247660c9 100644 --- a/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java +++ b/opengrok-indexer/src/test/java/org/opengrok/indexer/index/IndexDatabaseTest.java @@ -651,7 +651,9 @@ void testHistoryBasedReindexWithEligibleSubRepo() throws Exception { assertNotNull(fileCollector); assertTrue(fileCollector.getFiles().size() > 1); assertTrue(fileCollector.getFiles(). - contains("/" + gitProject.getName() + "/" + subRepoName + "/" + changedFileName)); + contains(File.separator + gitProject.getName() + + File.separator + subRepoName + + File.separator + changedFileName)); } /**