From 8a3d260e0f02fbecda8ddc637f537d393ff5b179 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Wed, 31 Aug 2022 11:58:19 +0900 Subject: [PATCH] Minor refactoring. --- src/core/index.rs | 30 +++++++++++++++++++++++++++++- src/indexer/index_writer.rs | 4 +--- src/indexer/segment_updater.rs | 31 +------------------------------ src/indexer/segment_writer.rs | 2 +- src/postings/mod.rs | 2 +- 5 files changed, 33 insertions(+), 36 deletions(-) diff --git a/src/core/index.rs b/src/core/index.rs index fbb08a1e03..f3af70ef5a 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -16,7 +16,7 @@ use crate::directory::MmapDirectory; use crate::directory::{Directory, ManagedDirectory, RamDirectory, INDEX_WRITER_LOCK}; use crate::error::{DataCorruption, TantivyError}; use crate::indexer::index_writer::{MAX_NUM_THREAD, MEMORY_ARENA_NUM_BYTES_MIN}; -use crate::indexer::segment_updater::save_new_metas; +use crate::indexer::segment_updater::save_metas; use crate::reader::{IndexReader, IndexReaderBuilder}; use crate::schema::{Field, FieldType, Schema}; use crate::tokenizer::{TextAnalyzer, TokenizerManager}; @@ -47,6 +47,34 @@ fn load_metas( .map_err(From::from) } +/// Save the index meta file. +/// This operation is atomic : +/// Either +/// - it fails, in which case an error is returned, +/// and the `meta.json` remains untouched, +/// - it succeeds, and `meta.json` is written +/// and flushed. +/// +/// This method is not part of tantivy's public API +fn save_new_metas( + schema: Schema, + index_settings: IndexSettings, + directory: &dyn Directory, +) -> crate::Result<()> { + save_metas( + &IndexMeta { + index_settings, + segments: Vec::new(), + schema, + opstamp: 0u64, + payload: None, + }, + directory, + )?; + directory.sync_directory()?; + Ok(()) +} + /// IndexBuilder can be used to create an index. /// /// Use in conjunction with `SchemaBuilder`. Global index settings diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index dcc9cba9ed..0bbf2f5b51 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -174,9 +174,7 @@ fn index_documents( segment_updater: &mut SegmentUpdater, mut delete_cursor: DeleteCursor, ) -> crate::Result<()> { - let schema = segment.schema(); - - let mut segment_writer = SegmentWriter::for_segment(memory_budget, segment.clone(), schema)?; + let mut segment_writer = SegmentWriter::for_segment(memory_budget, segment.clone())?; for document_group in grouped_document_iterator { for doc in document_group { segment_writer.add_document(doc)?; diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs index 32819b190a..66c2a3d63c 100644 --- a/src/indexer/segment_updater.rs +++ b/src/indexer/segment_updater.rs @@ -25,39 +25,10 @@ use crate::indexer::{ DefaultMergePolicy, MergeCandidate, MergeOperation, MergePolicy, SegmentEntry, SegmentSerializer, }; -use crate::schema::Schema; use crate::{FutureResult, Opstamp}; const NUM_MERGE_THREADS: usize = 4; -/// Save the index meta file. -/// This operation is atomic : -/// Either -/// - it fails, in which case an error is returned, -/// and the `meta.json` remains untouched, -/// - it succeeds, and `meta.json` is written -/// and flushed. -/// -/// This method is not part of tantivy's public API -pub fn save_new_metas( - schema: Schema, - index_settings: IndexSettings, - directory: &dyn Directory, -) -> crate::Result<()> { - save_metas( - &IndexMeta { - index_settings, - segments: Vec::new(), - schema, - opstamp: 0u64, - payload: None, - }, - directory, - )?; - directory.sync_directory()?; - Ok(()) -} - /// Save the index meta file. /// This operation is atomic: /// Either @@ -67,7 +38,7 @@ pub fn save_new_metas( /// and flushed. /// /// This method is not part of tantivy's public API -fn save_metas(metas: &IndexMeta, directory: &dyn Directory) -> crate::Result<()> { +pub(crate) fn save_metas(metas: &IndexMeta, directory: &dyn Directory) -> crate::Result<()> { info!("save metas"); let mut buffer = serde_json::to_vec_pretty(metas)?; // Just adding a new line at the end of the buffer. diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 554e04be0c..7381b03c99 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -80,8 +80,8 @@ impl SegmentWriter { pub fn for_segment( memory_budget_in_bytes: usize, segment: Segment, - schema: Schema, ) -> crate::Result { + let schema = segment.schema(); let tokenizer_manager = segment.index().tokenizers().clone(); let table_size = compute_initial_table_size(memory_budget_in_bytes)?; let segment_serializer = SegmentSerializer::for_segment(segment, false)?; diff --git a/src/postings/mod.rs b/src/postings/mod.rs index 7fa887bad3..d02a448bc1 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -227,7 +227,7 @@ pub mod tests { { let mut segment_writer = - SegmentWriter::for_segment(3_000_000, segment.clone(), schema).unwrap(); + SegmentWriter::for_segment(3_000_000, segment.clone()).unwrap(); { // checking that position works if the field has two values let op = AddOperation {