From 5f08b1a20313cc021430ac8da9456b2c67f0e197 Mon Sep 17 00:00:00 2001
From: phiresky <phireskyde+git@gmail.com>
Date: Tue, 18 May 2021 15:31:02 +0200
Subject: [PATCH 1/9] dynamic read implementation

---
 Cargo.toml                                   |  2 +-
 src/directory/file_slice.rs                  | 10 ++++--
 src/directory/mod.rs                         |  2 ++
 src/directory/on_demand_bytes.rs             | 34 ++++++++++++++++++++
 src/lib.rs                                   |  1 +
 src/termdict/fst_termdict/streamer.rs        |  4 +--
 src/termdict/fst_termdict/term_info_store.rs |  1 +
 src/termdict/fst_termdict/termdict.rs        | 23 +++++++++----
 8 files changed, 65 insertions(+), 12 deletions(-)
 create mode 100644 src/directory/on_demand_bytes.rs
diff --git a/Cargo.toml b/Cargo.toml
index e7130b47e8..0f1460a31f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,7 +18,7 @@ byteorder = "1"
 crc32fast = "1"
 once_cell = "1"
 regex ={version = "1", default-features = false, features = ["std"]}
-tantivy-fst = "0.3"
+tantivy-fst = {version="0.3", path="../tantivy-fst"}
 memmap = {version = "0.7", optional=true}
 lz4 = {version="1", optional=true}
 brotli = {version="3.3.0", optional=true}
diff --git a/src/directory/file_slice.rs b/src/directory/file_slice.rs
index cc2b97aa6a..285432f469 100644
--- a/src/directory/file_slice.rs
+++ b/src/directory/file_slice.rs
@@ -4,6 +4,8 @@ use crate::common::HasLen;
 use crate::directory::OwnedBytes;
 use std::sync::{Arc, Weak};
 use std::{io, ops::Deref};
+use std::fmt::Debug;
+use super::OnDemandBytes;
 
 pub type ArcBytes = Arc<dyn Deref<Target = [u8]> + Send + Sync + 'static>;
 pub type WeakArcBytes = Weak<dyn Deref<Target = [u8]> + Send + Sync + 'static>;
@@ -16,7 +18,7 @@ pub type WeakArcBytes = Weak<dyn Deref<Target = [u8]> + Send + Sync + 'static>;
 /// The underlying behavior is therefore specific to the `Directory` that created it.
 /// Despite its name, a `FileSlice` may or may not directly map to an actual file
 /// on the filesystem.
-pub trait FileHandle: 'static + Send + Sync + HasLen {
+pub trait FileHandle: 'static + Send + Sync + HasLen + Debug {
     /// Reads a slice of bytes.
     ///
     /// This method may panic if the range requested is invalid.
@@ -49,7 +51,7 @@ where
 //
 /// It can be cloned and sliced cheaply.
 ///
-#[derive(Clone)]
+#[derive(Clone, Debug)]
 pub struct FileSlice {
     data: Arc<dyn FileHandle>,
     start: usize,
@@ -104,6 +106,10 @@ impl FileSlice {
         self.data.read_bytes(self.start, self.stop)
     }
 
+    pub fn read_ondemand(&self) -> io::Result<OnDemandBytes> {
+        Ok(OnDemandBytes::new(self.data.clone()))
+    }
+
     /// Reads a specific slice of data.
     ///
     /// This is equivalent to running `file_slice.slice(from, to).read_bytes()`.
diff --git a/src/directory/mod.rs b/src/directory/mod.rs
index 8bd2c31852..f1dda7f786 100644
--- a/src/directory/mod.rs
+++ b/src/directory/mod.rs
@@ -14,6 +14,7 @@ mod file_watcher;
 mod footer;
 mod managed_directory;
 mod owned_bytes;
+mod on_demand_bytes;
 mod ram_directory;
 mod watch_event_router;
 
@@ -26,6 +27,7 @@ pub use self::directory_lock::{Lock, INDEX_WRITER_LOCK, META_LOCK};
 pub(crate) use self::file_slice::{ArcBytes, WeakArcBytes};
 pub use self::file_slice::{FileHandle, FileSlice};
 pub use self::owned_bytes::OwnedBytes;
+pub use self::on_demand_bytes::{OnDemandBytes, OnDemandBox};
 pub use self::ram_directory::RAMDirectory;
 pub use self::watch_event_router::{WatchCallback, WatchCallbackList, WatchHandle};
 use std::io::{self, BufWriter, Write};
diff --git a/src/directory/on_demand_bytes.rs b/src/directory/on_demand_bytes.rs
new file mode 100644
index 0000000000..389707c2f0
--- /dev/null
+++ b/src/directory/on_demand_bytes.rs
@@ -0,0 +1,34 @@
+use std::{ops::Deref, sync::Arc};
+
+use tantivy_fst::{FakeArr, FakeArrPart, ShRange};
+
+use super::FileHandle;
+
+pub type OnDemandBox = Box<dyn FakeArr + Sync>;
+#[derive(Debug)]
+pub struct OnDemandBytes {
+    file: Arc<dyn FileHandle>
+}
+
+impl OnDemandBytes {
+    pub fn new(fh: Arc<dyn FileHandle>) -> OnDemandBytes {
+        OnDemandBytes {
+            file: fh
+        }
+    }
+}
+impl FakeArr for OnDemandBytes {
+    fn len(&self) -> usize {
+        self.file.len()
+    }
+
+    fn read_into(&self, offset: usize, buf: &mut [u8]) -> std::io::Result<()> {
+        let bytes = self.file.read_bytes(offset, offset + buf.len())?;
+        buf.copy_from_slice(&bytes[..]);
+        Ok(())
+    }
+
+    fn as_dyn(&self) -> &dyn FakeArr {
+        self
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 39e21c27ff..57709ca08c 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -737,6 +737,7 @@ mod tests {
             let searcher = index_reader.searcher();
             let reader = searcher.segment_reader(0);
             let inverted_index = reader.inverted_index(text_field)?;
+            println!("terms: {:?}", inverted_index.terms());
             let term_abcd = Term::from_field_text(text_field, "abcd");
             assert!(inverted_index
                 .read_postings(&term_abcd, IndexRecordOption::WithFreqsAndPositions)?
diff --git a/src/termdict/fst_termdict/streamer.rs b/src/termdict/fst_termdict/streamer.rs
index 66ce02c2ab..f187f9c49b 100644
--- a/src/termdict/fst_termdict/streamer.rs
+++ b/src/termdict/fst_termdict/streamer.rs
@@ -3,7 +3,7 @@ use std::io;
 use super::TermDictionary;
 use crate::postings::TermInfo;
 use crate::termdict::TermOrdinal;
-use tantivy_fst::automaton::AlwaysMatch;
+use tantivy_fst::{FakeArr, automaton::AlwaysMatch};
 use tantivy_fst::map::{Stream, StreamBuilder};
 use tantivy_fst::Automaton;
 use tantivy_fst::{IntoStreamer, Streamer};
@@ -95,7 +95,7 @@ where
     pub fn advance(&mut self) -> bool {
         if let Some((term, term_ord)) = self.stream.next() {
             self.current_key.clear();
-            self.current_key.extend_from_slice(term);
+            self.current_key.extend_from_slice(&term.to_vec());
             self.term_ord = term_ord;
             self.current_value = self.fst_map.term_info_from_ord(term_ord);
             true
diff --git a/src/termdict/fst_termdict/term_info_store.rs b/src/termdict/fst_termdict/term_info_store.rs
index 20b709a2f9..fa33eef4f2 100644
--- a/src/termdict/fst_termdict/term_info_store.rs
+++ b/src/termdict/fst_termdict/term_info_store.rs
@@ -85,6 +85,7 @@ impl TermInfoBlockMeta {
     }
 }
 
+#[derive(Debug)]
 pub struct TermInfoStore {
     num_terms: usize,
     block_meta_bytes: OwnedBytes,
diff --git a/src/termdict/fst_termdict/termdict.rs b/src/termdict/fst_termdict/termdict.rs
index ff0d4ec5f5..98263cc5f5 100644
--- a/src/termdict/fst_termdict/termdict.rs
+++ b/src/termdict/fst_termdict/termdict.rs
@@ -1,13 +1,14 @@
 use super::term_info_store::{TermInfoStore, TermInfoStoreWriter};
 use super::{TermStreamer, TermStreamerBuilder};
-use crate::common::{BinarySerializable, CountingWriter};
+use crate::{common::{BinarySerializable, CountingWriter}, directory::OnDemandBox};
 use crate::directory::{FileSlice, OwnedBytes};
 use crate::error::DataCorruption;
 use crate::postings::TermInfo;
 use crate::termdict::TermOrdinal;
+use crate::directory::OnDemandBytes;
 use once_cell::sync::Lazy;
 use std::io::{self, Write};
-use tantivy_fst::raw::Fst;
+use tantivy_fst::{FakeArr, raw::Fst};
 use tantivy_fst::Automaton;
 
 fn convert_fst_error(e: tantivy_fst::Error) -> io::Error {
@@ -85,13 +86,17 @@ where
     }
 }
 
-fn open_fst_index(fst_file: FileSlice) -> crate::Result<tantivy_fst::Map<OwnedBytes>> {
-    let bytes = fst_file.read_bytes()?;
+fn open_fst_index(fst_file: FileSlice) -> crate::Result<tantivy_fst::Map<OnDemandBytes>> {
+    println!("open_fst_index()");
+    let bytes = fst_file.read_ondemand()?;
     let fst = Fst::new(bytes)
         .map_err(|err| DataCorruption::comment_only(format!("Fst data is corrupted: {:?}", err)))?;
-    Ok(tantivy_fst::Map::from(fst))
+    let ret = Ok(tantivy_fst::Map::from(fst));
+    println!("open_fst_index RET");
+    return ret;
 }
 
+
 static EMPTY_TERM_DICT_FILE: Lazy<FileSlice> = Lazy::new(|| {
     let term_dictionary_data: Vec<u8> = TermDictionaryBuilder::create(Vec::<u8>::new())
         .expect("Creating a TermDictionaryBuilder in a Vec<u8> should never fail")
@@ -106,8 +111,9 @@ static EMPTY_TERM_DICT_FILE: Lazy<FileSlice> = Lazy::new(|| {
 /// The `Fst` crate is used to associate terms to their
 /// respective `TermOrdinal`. The `TermInfoStore` then makes it
 /// possible to fetch the associated `TermInfo`.
+#[derive(Debug)]
 pub struct TermDictionary {
-    fst_index: tantivy_fst::Map<OwnedBytes>,
+    fst_index: tantivy_fst::Map<OnDemandBytes>,
     term_info_store: TermInfoStore,
 }
 
@@ -139,7 +145,10 @@ impl TermDictionary {
 
     /// Returns the ordinal associated to a given term.
     pub fn term_ord<K: AsRef<[u8]>>(&self, key: K) -> io::Result<Option<TermOrdinal>> {
-        Ok(self.fst_index.get(key))
+        println!("termdict.term_ord({:?})", String::from_utf8_lossy(key.as_ref()));
+        let ret = Ok(self.fst_index.get(key));
+        println!("termdict.term_ord RET");
+        return ret;
     }
 
     /// Returns the term associated to a given term ordinal.

From 866a11c3e19c4999c910cf334c00f52fb3b0ab9f Mon Sep 17 00:00:00 2001
From: phiresky <phireskyde+git@gmail.com>
Date: Tue, 18 May 2021 17:09:37 +0200
Subject: [PATCH 2/9] works!

---
 src/directory/file_slice.rs           | 26 +++++++++++++++++++++-----
 src/directory/on_demand_bytes.rs      |  2 ++
 src/directory/owned_bytes.rs          | 23 +++++++++++++++++++++--
 src/directory/ram_directory.rs        | 10 ++--------
 src/directory/watch_event_router.rs   |  9 +++++++--
 src/lib.rs                            |  6 ++++--
 src/termdict/fst_termdict/termdict.rs |  8 ++++----
 7 files changed, 61 insertions(+), 23 deletions(-)

diff --git a/src/directory/file_slice.rs b/src/directory/file_slice.rs
index 285432f469..313666f90a 100644
--- a/src/directory/file_slice.rs
+++ b/src/directory/file_slice.rs
@@ -1,11 +1,12 @@
 use stable_deref_trait::StableDeref;
+use tantivy_fst::FakeArr;
 
+use super::OnDemandBytes;
 use crate::common::HasLen;
 use crate::directory::OwnedBytes;
+use std::fmt::Debug;
 use std::sync::{Arc, Weak};
 use std::{io, ops::Deref};
-use std::fmt::Debug;
-use super::OnDemandBytes;
 
 pub type ArcBytes = Arc<dyn Deref<Target = [u8]> + Send + Sync + 'static>;
 pub type WeakArcBytes = Weak<dyn Deref<Target = [u8]> + Send + Sync + 'static>;
@@ -25,6 +26,21 @@ pub trait FileHandle: 'static + Send + Sync + HasLen + Debug {
     fn read_bytes(&self, from: usize, to: usize) -> io::Result<OwnedBytes>;
 }
 
+impl FakeArr for FileSlice {
+    fn len(&self) -> usize {
+        self.stop - self.start
+    }
+
+    fn read_into(&self, offset: usize, buf: &mut [u8]) -> io::Result<()> {
+        buf.copy_from_slice(&self.read_bytes_slice(offset, offset + buf.len())?);
+        Ok(())
+    }
+
+    fn as_dyn(&self) -> &dyn FakeArr {
+        self
+    }
+}
+
 impl FileHandle for &'static [u8] {
     fn read_bytes(&self, from: usize, to: usize) -> io::Result<OwnedBytes> {
         let bytes = &self[from..to];
@@ -81,7 +97,7 @@ impl FileSlice {
     ///
     /// Panics if `to < from` or if `to` exceeds the filesize.
     pub fn slice(&self, from: usize, to: usize) -> FileSlice {
-        assert!(to <= self.len());
+        assert!(to <= <FileSlice as HasLen>::len(&self));
         assert!(to >= from);
         FileSlice {
             data: self.data.clone(),
@@ -135,7 +151,7 @@ impl FileSlice {
     /// Splits the file slice at the given offset and return two file slices.
     /// `file_slice[..split_offset]` and `file_slice[split_offset..]`.
     pub fn split_from_end(self, right_len: usize) -> (FileSlice, FileSlice) {
-        let left_len = self.len() - right_len;
+        let left_len = HasLen::len(&self) - right_len;
         self.split(left_len)
     }
 
@@ -144,7 +160,7 @@ impl FileSlice {
     ///
     /// Equivalent to `.slice(from_offset, self.len())`
     pub fn slice_from(&self, from_offset: usize) -> FileSlice {
-        self.slice(from_offset, self.len())
+        self.slice(from_offset, <FileSlice as HasLen>::len(&self))
     }
 
     /// Like `.slice(...)` but enforcing only the `to`
diff --git a/src/directory/on_demand_bytes.rs b/src/directory/on_demand_bytes.rs
index 389707c2f0..42426ca71f 100644
--- a/src/directory/on_demand_bytes.rs
+++ b/src/directory/on_demand_bytes.rs
@@ -23,7 +23,9 @@ impl FakeArr for OnDemandBytes {
     }
 
     fn read_into(&self, offset: usize, buf: &mut [u8]) -> std::io::Result<()> {
+        assert!(offset + buf.len() <= self.len(), "{} <= {}", offset + buf.len(), self.len());
         let bytes = self.file.read_bytes(offset, offset + buf.len())?;
+        assert_eq!(buf.len(), bytes.len());
         buf.copy_from_slice(&bytes[..]);
         Ok(())
     }
diff --git a/src/directory/owned_bytes.rs b/src/directory/owned_bytes.rs
index 73303f50cc..93cefced10 100644
--- a/src/directory/owned_bytes.rs
+++ b/src/directory/owned_bytes.rs
@@ -1,5 +1,6 @@
 use crate::directory::FileHandle;
 use stable_deref_trait::StableDeref;
+//use tantivy_fst::FakeArr;
 use std::convert::TryInto;
 use std::mem;
 use std::ops::Deref;
@@ -22,6 +23,24 @@ impl FileHandle for OwnedBytes {
     }
 }
 
+
+/*impl FakeArr for OwnedBytes {
+    fn len(&self) -> usize {
+        self.data.len()
+    }
+
+    fn read_into(&self, offset: usize, buf: &mut [u8]) -> std::io::Result<()> {
+        let bytes = self.read_bytes(offset, offset + buf.len())?;
+        buf.copy_from_slice(&bytes[..]);
+        Ok(())
+    }
+
+    fn as_dyn(&self) -> &dyn FakeArr {
+        self
+    }
+}*/
+
+
 impl OwnedBytes {
     /// Creates an empty `OwnedBytes`.
     pub fn empty() -> OwnedBytes {
@@ -120,8 +139,8 @@ impl fmt::Debug for OwnedBytes {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         // We truncate the bytes in order to make sure the debug string
         // is not too long.
-        let bytes_truncated: &[u8] = if self.len() > 8 {
-            &self.as_slice()[..10]
+        let bytes_truncated: &[u8] = if self.len() > 1000 {
+            &self.as_slice()[..1000]
         } else {
             self.as_slice()
         };
diff --git a/src/directory/ram_directory.rs b/src/directory/ram_directory.rs
index f5ddcd955e..589247e451 100644
--- a/src/directory/ram_directory.rs
+++ b/src/directory/ram_directory.rs
@@ -80,7 +80,7 @@ impl TerminatingWrite for VecWriter {
     }
 }
 
-#[derive(Default)]
+#[derive(Default, Debug)]
 struct InnerDirectory {
     fs: HashMap<PathBuf, FileSlice>,
     watch_router: WatchCallbackList,
@@ -119,18 +119,12 @@ impl InnerDirectory {
     }
 }
 
-impl fmt::Debug for RAMDirectory {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "RAMDirectory")
-    }
-}
-
 /// A Directory storing everything in anonymous memory.
 ///
 /// It is mainly meant for unit testing.
 /// Writes are only made visible upon flushing.
 ///
-#[derive(Clone, Default)]
+#[derive(Clone, Default, Debug)]
 pub struct RAMDirectory {
     fs: Arc<RwLock<InnerDirectory>>,
 }
diff --git a/src/directory/watch_event_router.rs b/src/directory/watch_event_router.rs
index c42d03be30..72160dee95 100644
--- a/src/directory/watch_event_router.rs
+++ b/src/directory/watch_event_router.rs
@@ -3,11 +3,16 @@ use futures::{Future, TryFutureExt};
 use std::sync::Arc;
 use std::sync::RwLock;
 use std::sync::Weak;
+use std::fmt::Debug;
 
 /// Cloneable wrapper for callbacks registered when watching files of a `Directory`.
 #[derive(Clone)]
 pub struct WatchCallback(Arc<dyn Fn() + Sync + Send>);
-
+impl Debug for WatchCallback {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        Ok(())
+    }
+}
 impl WatchCallback {
     /// Wraps a `Fn()` to create a WatchCallback.
     pub fn new<F: Fn() + Sync + Send + 'static>(op: F) -> Self {
@@ -23,7 +28,7 @@ impl WatchCallback {
 ///
 /// It registers callbacks (See `.subscribe(...)`) and
 /// calls them upon calls to `.broadcast(...)`.
-#[derive(Default)]
+#[derive(Default, Debug)]
 pub struct WatchCallbackList {
     router: RwLock<Vec<Weak<WatchCallback>>>,
 }
diff --git a/src/lib.rs b/src/lib.rs
index 57709ca08c..4f985785fc 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -282,7 +282,7 @@ pub struct DocAddress(pub SegmentLocalId, pub DocId);
 
 #[cfg(test)]
 mod tests {
-    use crate::collector::tests::TEST_COLLECTOR_WITH_SCORE;
+    use crate::{Directory, collector::tests::TEST_COLLECTOR_WITH_SCORE};
     use crate::core::SegmentReader;
     use crate::docset::{DocSet, TERMINATED};
     use crate::query::BooleanQuery;
@@ -633,10 +633,12 @@ mod tests {
 
         let index = Index::create_in_ram(schema);
         let mut index_writer = index.writer_for_tests()?;
-        let negative_val = -1i64;
+        let negative_val = 242i64;
         index_writer.add_document(doc!(value_field => negative_val));
         index_writer.commit()?;
+        println!("dir: {:#?}", index.directory());
         let reader = index.reader()?;
+        
         let searcher = reader.searcher();
         let term = Term::from_field_i64(value_field, negative_val);
         let mut postings = searcher
diff --git a/src/termdict/fst_termdict/termdict.rs b/src/termdict/fst_termdict/termdict.rs
index 98263cc5f5..9be4543c16 100644
--- a/src/termdict/fst_termdict/termdict.rs
+++ b/src/termdict/fst_termdict/termdict.rs
@@ -86,10 +86,9 @@ where
     }
 }
 
-fn open_fst_index(fst_file: FileSlice) -> crate::Result<tantivy_fst::Map<OnDemandBytes>> {
+fn open_fst_index(fst_file: FileSlice) -> crate::Result<tantivy_fst::Map<FileSlice>> {
     println!("open_fst_index()");
-    let bytes = fst_file.read_ondemand()?;
-    let fst = Fst::new(bytes)
+    let fst = Fst::new(fst_file)
         .map_err(|err| DataCorruption::comment_only(format!("Fst data is corrupted: {:?}", err)))?;
     let ret = Ok(tantivy_fst::Map::from(fst));
     println!("open_fst_index RET");
@@ -97,6 +96,7 @@ fn open_fst_index(fst_file: FileSlice) -> crate::Result<tantivy_fst::Map<OnDeman
 }
 
 
+
 static EMPTY_TERM_DICT_FILE: Lazy<FileSlice> = Lazy::new(|| {
     let term_dictionary_data: Vec<u8> = TermDictionaryBuilder::create(Vec::<u8>::new())
         .expect("Creating a TermDictionaryBuilder in a Vec<u8> should never fail")
@@ -113,7 +113,7 @@ static EMPTY_TERM_DICT_FILE: Lazy<FileSlice> = Lazy::new(|| {
 /// possible to fetch the associated `TermInfo`.
 #[derive(Debug)]
 pub struct TermDictionary {
-    fst_index: tantivy_fst::Map<OnDemandBytes>,
+    fst_index: tantivy_fst::Map<FileSlice>,
     term_info_store: TermInfoStore,
 }
 

From 0671d2426fd9829f0c9e7636dff941f430fdfe75 Mon Sep 17 00:00:00 2001
From: phiresky <phireskyde+git@gmail.com>
Date: Wed, 19 May 2021 21:54:44 +0200
Subject: [PATCH 3/9] better

---
 Cargo.toml                                   |   1 +
 src/core/inverted_index_reader.rs            |   2 +-
 src/directory/file_slice.rs                  |   9 +-
 src/directory/fs_directory.rs                | 165 +++++++++++++++++++
 src/directory/mod.rs                         |   6 +-
 src/directory/on_demand_bytes.rs             |  36 ----
 src/fieldnorm/reader.rs                      |  17 +-
 src/postings/block_segment_postings.rs       |  59 ++++---
 src/postings/compression/mod.rs              |   9 +-
 src/postings/compression/vint.rs             |  10 +-
 src/postings/skip.rs                         |  14 +-
 src/termdict/fst_termdict/term_info_store.rs |  35 ++--
 src/termdict/fst_termdict/termdict.rs        |   5 +-
 13 files changed, 255 insertions(+), 113 deletions(-)
 create mode 100644 src/directory/fs_directory.rs
 delete mode 100644 src/directory/on_demand_bytes.rs

diff --git a/Cargo.toml b/Cargo.toml
index 0f1460a31f..3654021c3f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -48,6 +48,7 @@ chrono = "0.4"
 smallvec = "1"
 rayon = "1"
 lru = "0.6"
+backtrace = "0.3.59"
 
 [target.'cfg(windows)'.dependencies]
 winapi = "0.3"
diff --git a/src/core/inverted_index_reader.rs b/src/core/inverted_index_reader.rs
index 2f4edf76d4..9c4a1d049c 100644
--- a/src/core/inverted_index_reader.rs
+++ b/src/core/inverted_index_reader.rs
@@ -93,7 +93,7 @@ impl InvertedIndexReader {
         let start_offset = term_info.postings_start_offset as usize;
         let stop_offset = term_info.postings_stop_offset as usize;
         let postings_slice = self.postings_file_slice.slice(start_offset, stop_offset);
-        block_postings.reset(term_info.doc_freq, postings_slice.read_bytes()?);
+        block_postings.reset(term_info.doc_freq, postings_slice);
         Ok(())
     }
 
diff --git a/src/directory/file_slice.rs b/src/directory/file_slice.rs
index 313666f90a..b460cd0a04 100644
--- a/src/directory/file_slice.rs
+++ b/src/directory/file_slice.rs
@@ -1,7 +1,6 @@
 use stable_deref_trait::StableDeref;
-use tantivy_fst::FakeArr;
+pub use tantivy_fst::FakeArr;
 
-use super::OnDemandBytes;
 use crate::common::HasLen;
 use crate::directory::OwnedBytes;
 use std::fmt::Debug;
@@ -122,10 +121,6 @@ impl FileSlice {
         self.data.read_bytes(self.start, self.stop)
     }
 
-    pub fn read_ondemand(&self) -> io::Result<OnDemandBytes> {
-        Ok(OnDemandBytes::new(self.data.clone()))
-    }
-
     /// Reads a specific slice of data.
     ///
     /// This is equivalent to running `file_slice.slice(from, to).read_bytes()`.
@@ -133,7 +128,7 @@ impl FileSlice {
         assert!(from <= to);
         assert!(
             self.start + to <= self.stop,
-            "`to` exceeds the fileslice length"
+            "`to` exceeds the fileslice length, {}, {}, {}", self.start, to, self.stop
         );
         self.data.read_bytes(self.start + from, self.start + to)
     }
diff --git a/src/directory/fs_directory.rs b/src/directory/fs_directory.rs
new file mode 100644
index 0000000000..e409fdd5bb
--- /dev/null
+++ b/src/directory/fs_directory.rs
@@ -0,0 +1,165 @@
+use std::{
+    collections::BTreeMap,
+    fs::File,
+    io::{BufWriter, Read, Seek, SeekFrom, Write},
+    ops::DerefMut,
+    path::{Path, PathBuf},
+    sync::{Arc, RwLock},
+};
+
+use crate::{
+    directory::{error::OpenWriteError, FileHandle, OwnedBytes, TerminatingWrite, WatchHandle},
+    Directory, HasLen,
+};
+
+use super::{
+    error::{DeleteError, OpenReadError},
+    AntiCallToken, WatchCallback, WritePtr,
+};
+
+#[derive(Debug, Clone)]
+pub struct FsDirectory {
+    root: PathBuf,
+}
+
+impl FsDirectory {
+    pub fn new(path: &Path) -> FsDirectory {
+        FsDirectory {
+            root: path.to_path_buf(),
+        }
+    }
+}
+
+struct Noop {}
+impl Write for Noop {
+    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
+        Ok(buf.len())
+    }
+
+    fn flush(&mut self) -> std::io::Result<()> {
+        Ok(())
+    }
+}
+impl TerminatingWrite for Noop {
+    fn terminate_ref(&mut self, _: AntiCallToken) -> std::io::Result<()> {
+        Ok(())
+    }
+}
+impl Directory for FsDirectory {
+    fn get_file_handle(&self, path: &Path) -> Result<Box<dyn FileHandle>, OpenReadError> {
+        Ok(Box::new(FSFile::new(&self.root.join(path))))
+    }
+
+    fn delete(&self, path: &Path) -> Result<(), DeleteError> {
+        println!("delete {:?}", path);
+        Ok(())
+    }
+
+    fn exists(&self, path: &Path) -> Result<bool, OpenReadError> {
+        todo!()
+    }
+
+    fn open_write(&self, path: &Path) -> Result<WritePtr, OpenWriteError> {
+        Ok(BufWriter::new(Box::new(Noop {})))
+    }
+
+    fn atomic_read(&self, path: &Path) -> Result<Vec<u8>, OpenReadError> {
+        let path = self.root.join(path);
+        println!("atomic_read {:?}", path);
+        Ok(std::fs::read(path).unwrap())
+    }
+
+    fn atomic_write(&self, path: &Path, data: &[u8]) -> std::io::Result<()> {
+        todo!()
+    }
+
+    fn watch(&self, watch_callback: WatchCallback) -> crate::Result<WatchHandle> {
+        Ok(WatchHandle::empty())
+    }
+}
+
+#[derive(Debug)]
+struct FSFile {
+    path: PathBuf,
+    file: Arc<RwLock<File>>,
+    len: usize,
+    cache: RwLock<BTreeMap<usize, Vec<u8>>>,
+}
+const CS: usize = 4096;
+
+impl FSFile {
+    pub fn new(path: &Path) -> FSFile {
+        let mut f = File::open(path).unwrap();
+        let len = f.seek(SeekFrom::End(0)).unwrap();
+        FSFile {
+            path: path.to_path_buf(),
+            file: Arc::new(RwLock::new(f)),
+            len: len as usize,
+            cache: RwLock::new(BTreeMap::new()),
+        }
+    }
+    fn read_bytes_real(&self, from: usize, to: usize) -> Vec<u8> {
+        let len = to - from;
+
+        eprintln!(
+            "READ {} @ {}, len {}",
+            self.path.to_string_lossy(),
+            from,
+            len
+        );
+        if len == 51616 {
+            println!("{:?}", backtrace::Backtrace::new());
+        }
+        if len > 1_000_000 {
+            println!("{:?}", backtrace::Backtrace::new());
+        }
+        if len > 2_000_000 {
+            panic!("tried to read too much");
+        }
+        let mut f = self.file.write().unwrap();
+        f.seek(SeekFrom::Start(from as u64)).unwrap();
+        let mut buf = Vec::with_capacity(len);
+        let flonk = f.deref_mut();
+        (flonk).take(len as u64).read_to_end(&mut buf).unwrap();
+        return buf;
+    }
+}
+impl FileHandle for FSFile {
+    fn read_bytes(&self, from: usize, to: usize) -> std::io::Result<OwnedBytes> {
+        let len = to - from;
+        eprintln!(
+            "GET {} @ {}, len {}",
+            self.path.to_string_lossy(),
+            from,
+            len
+        );
+        let starti = from / CS;
+        let endi = to / CS;
+        let startofs = from % CS;
+        let endofs = to % CS;
+        let mut out_buf = vec![0u8; len];
+        //let toget = vec![];
+        let mut cache = self.cache.write().unwrap();
+        let mut written = 0;
+        for i in starti..=endi {
+            let startofs = if i == starti { startofs } else { 0 };
+            let endofs = if i == endi { endofs } else { CS };
+            let chunk = cache.entry(i).or_insert_with(|| {
+                self.read_bytes_real(i * CS, std::cmp::min((i + 1) * CS, self.len()))
+            });
+            let chunk = &chunk[startofs..endofs];
+            println!("{} {} {} {}", out_buf.len(), startofs, endofs, chunk.len());
+            let write_len = std::cmp::min(chunk.len(), len);
+            out_buf[written..written + write_len]
+                .copy_from_slice(&chunk);
+            written += write_len;
+        }
+
+        Ok(OwnedBytes::new(out_buf))
+    }
+}
+impl HasLen for FSFile {
+    fn len(&self) -> usize {
+        self.len
+    }
+}
diff --git a/src/directory/mod.rs b/src/directory/mod.rs
index f1dda7f786..7361900dfb 100644
--- a/src/directory/mod.rs
+++ b/src/directory/mod.rs
@@ -14,8 +14,8 @@ mod file_watcher;
 mod footer;
 mod managed_directory;
 mod owned_bytes;
-mod on_demand_bytes;
 mod ram_directory;
+mod fs_directory;
 mod watch_event_router;
 
 /// Errors specific to the directory module.
@@ -25,9 +25,8 @@ pub use self::directory::DirectoryLock;
 pub use self::directory::{Directory, DirectoryClone};
 pub use self::directory_lock::{Lock, INDEX_WRITER_LOCK, META_LOCK};
 pub(crate) use self::file_slice::{ArcBytes, WeakArcBytes};
-pub use self::file_slice::{FileHandle, FileSlice};
+pub use self::file_slice::{FileHandle, FileSlice, FakeArr};
 pub use self::owned_bytes::OwnedBytes;
-pub use self::on_demand_bytes::{OnDemandBytes, OnDemandBox};
 pub use self::ram_directory::RAMDirectory;
 pub use self::watch_event_router::{WatchCallback, WatchCallbackList, WatchHandle};
 use std::io::{self, BufWriter, Write};
@@ -49,6 +48,7 @@ pub struct GarbageCollectionResult {
 
 #[cfg(feature = "mmap")]
 pub use self::mmap_directory::MmapDirectory;
+pub use self::fs_directory::FsDirectory;
 
 pub use self::managed_directory::ManagedDirectory;
 
diff --git a/src/directory/on_demand_bytes.rs b/src/directory/on_demand_bytes.rs
deleted file mode 100644
index 42426ca71f..0000000000
--- a/src/directory/on_demand_bytes.rs
+++ /dev/null
@@ -1,36 +0,0 @@
-use std::{ops::Deref, sync::Arc};
-
-use tantivy_fst::{FakeArr, FakeArrPart, ShRange};
-
-use super::FileHandle;
-
-pub type OnDemandBox = Box<dyn FakeArr + Sync>;
-#[derive(Debug)]
-pub struct OnDemandBytes {
-    file: Arc<dyn FileHandle>
-}
-
-impl OnDemandBytes {
-    pub fn new(fh: Arc<dyn FileHandle>) -> OnDemandBytes {
-        OnDemandBytes {
-            file: fh
-        }
-    }
-}
-impl FakeArr for OnDemandBytes {
-    fn len(&self) -> usize {
-        self.file.len()
-    }
-
-    fn read_into(&self, offset: usize, buf: &mut [u8]) -> std::io::Result<()> {
-        assert!(offset + buf.len() <= self.len(), "{} <= {}", offset + buf.len(), self.len());
-        let bytes = self.file.read_bytes(offset, offset + buf.len())?;
-        assert_eq!(buf.len(), bytes.len());
-        buf.copy_from_slice(&bytes[..]);
-        Ok(())
-    }
-
-    fn as_dyn(&self) -> &dyn FakeArr {
-        self
-    }
-}
diff --git a/src/fieldnorm/reader.rs b/src/fieldnorm/reader.rs
index e1ce150076..f1f8ef805b 100644
--- a/src/fieldnorm/reader.rs
+++ b/src/fieldnorm/reader.rs
@@ -1,5 +1,7 @@
+use tantivy_fst::FakeArr;
+
 use super::{fieldnorm_to_id, id_to_fieldnorm};
-use crate::common::CompositeFile;
+use crate::{HasLen, common::CompositeFile};
 use crate::directory::FileSlice;
 use crate::directory::OwnedBytes;
 use crate::schema::Field;
@@ -71,7 +73,7 @@ impl From<ReaderImplEnum> for FieldNormReader {
 
 #[derive(Clone)]
 enum ReaderImplEnum {
-    FromData(OwnedBytes),
+    FromData(FileSlice),
     Const {
         num_docs: u32,
         fieldnorm_id: u8,
@@ -97,18 +99,17 @@ impl FieldNormReader {
 
     /// Opens a field norm reader given its file.
     pub fn open(fieldnorm_file: FileSlice) -> crate::Result<Self> {
-        let data = fieldnorm_file.read_bytes()?;
-        Ok(FieldNormReader::new(data))
+        Ok(FieldNormReader::new(fieldnorm_file))
     }
 
-    fn new(data: OwnedBytes) -> Self {
+    fn new(data: FileSlice) -> Self {
         ReaderImplEnum::FromData(data).into()
     }
 
     /// Returns the number of documents in this segment.
     pub fn num_docs(&self) -> u32 {
         match &self.0 {
-            ReaderImplEnum::FromData(data) => data.len() as u32,
+            ReaderImplEnum::FromData(data) => HasLen::len(data) as u32,
             ReaderImplEnum::Const { num_docs, .. } => *num_docs,
         }
     }
@@ -125,7 +126,7 @@ impl FieldNormReader {
     pub fn fieldnorm(&self, doc_id: DocId) -> u32 {
         match &self.0 {
             ReaderImplEnum::FromData(data) => {
-                let fieldnorm_id = data.as_slice()[doc_id as usize];
+                let fieldnorm_id = data.get_byte(doc_id as usize);
                 id_to_fieldnorm(fieldnorm_id)
             }
             ReaderImplEnum::Const { fieldnorm, .. } => *fieldnorm,
@@ -137,7 +138,7 @@ impl FieldNormReader {
     pub fn fieldnorm_id(&self, doc_id: DocId) -> u8 {
         match &self.0 {
             ReaderImplEnum::FromData(data) => {
-                let fieldnorm_id = data.as_slice()[doc_id as usize];
+                let fieldnorm_id = data.get_byte(doc_id as usize);
                 fieldnorm_id
             }
             ReaderImplEnum::Const { fieldnorm_id, .. } => *fieldnorm_id,
diff --git a/src/postings/block_segment_postings.rs b/src/postings/block_segment_postings.rs
index 9030d8a571..41b56b41b7 100644
--- a/src/postings/block_segment_postings.rs
+++ b/src/postings/block_segment_postings.rs
@@ -11,6 +11,7 @@ use crate::postings::{BlockInfo, FreqReadingOption, SkipReader};
 use crate::query::BM25Weight;
 use crate::schema::IndexRecordOption;
 use crate::{DocId, Score, TERMINATED};
+use tantivy_fst::FakeArr;
 
 fn max_score<I: Iterator<Item = Score>>(mut it: I) -> Option<Score> {
     if let Some(first) = it.next() {
@@ -37,19 +38,30 @@ pub struct BlockSegmentPostings {
 
     doc_freq: u32,
 
-    data: OwnedBytes,
+    data: FileSlice,
     pub(crate) skip_reader: SkipReader,
 }
 
 fn decode_bitpacked_block(
     doc_decoder: &mut BlockDecoder,
     freq_decoder_opt: Option<&mut BlockDecoder>,
-    data: &[u8],
+    data: &dyn FakeArr,
     doc_offset: DocId,
     doc_num_bits: u8,
     tf_num_bits: u8,
 ) {
-    let num_consumed_bytes = doc_decoder.uncompress_block_sorted(data, doc_offset, doc_num_bits);
+    let num_bytes_docs = 128 * (doc_num_bits as usize) / 8; // 128 integers per bitpacker4x block. should be same as num_consumed_bytes returned by uncompress block
+    let num_bytes_freqs = freq_decoder_opt.as_ref()
+        .map(|_| 128 * (tf_num_bits as usize) / 8)
+        .unwrap_or(0);
+    let num_toconsume_bytes = num_bytes_docs + num_bytes_freqs;
+    let data = data.slice((0..num_bytes_docs + num_bytes_freqs).into()).to_vec();
+    let num_consumed_bytes = doc_decoder.uncompress_block_sorted(&data, doc_offset, doc_num_bits);
+    assert_eq!(num_bytes_docs, num_consumed_bytes);
+    println!(
+        "ofs={}, bits={}, consumed={}",
+        doc_offset, doc_num_bits, num_consumed_bytes
+    );
     if let Some(freq_decoder) = freq_decoder_opt {
         freq_decoder.uncompress_block_unsorted(&data[num_consumed_bytes..], tf_num_bits);
     }
@@ -58,7 +70,7 @@ fn decode_bitpacked_block(
 fn decode_vint_block(
     doc_decoder: &mut BlockDecoder,
     freq_decoder_opt: Option<&mut BlockDecoder>,
-    data: &[u8],
+    data: &dyn FakeArr,
     doc_offset: DocId,
     num_vint_docs: usize,
 ) {
@@ -66,22 +78,24 @@ fn decode_vint_block(
         doc_decoder.uncompress_vint_sorted(data, doc_offset, num_vint_docs, TERMINATED);
     if let Some(freq_decoder) = freq_decoder_opt {
         freq_decoder.uncompress_vint_unsorted(
-            &data[num_consumed_bytes..],
+            &data.slice((num_consumed_bytes..).into()),
             num_vint_docs,
             TERMINATED,
         );
     }
 }
 
-fn split_into_skips_and_postings(
-    doc_freq: u32,
-    mut bytes: OwnedBytes,
-) -> (Option<OwnedBytes>, OwnedBytes) {
+fn split_into_skips_and_postings(doc_freq: u32, data: FileSlice) -> (Option<FileSlice>, FileSlice) {
     if doc_freq < COMPRESSION_BLOCK_SIZE as u32 {
-        return (None, bytes);
+        return (None, data);
     }
-    let skip_len = VInt::deserialize(&mut bytes).expect("Data corrupted").0 as usize;
-    let (skip_data, postings_data) = bytes.split(skip_len);
+    // hacky code
+    let slice = &mut data.full_slice();
+    let inx_before = slice.get_offset();
+    let skip_len = VInt::deserialize(slice).expect("Data corrupted").0 as usize;
+    let inx_after = slice.get_offset();
+    let data = data.slice_from(inx_after - inx_before);
+    let (skip_data, postings_data) = data.split(skip_len);
     (Some(skip_data), postings_data)
 }
 
@@ -98,11 +112,10 @@ impl BlockSegmentPostings {
             (_, _) => FreqReadingOption::ReadFreq,
         };
 
-        let (skip_data_opt, postings_data) =
-            split_into_skips_and_postings(doc_freq, data.read_bytes()?);
+        let (skip_data_opt, postings_data) = split_into_skips_and_postings(doc_freq, data);
         let skip_reader = match skip_data_opt {
             Some(skip_data) => SkipReader::new(skip_data, doc_freq, record_option),
-            None => SkipReader::new(OwnedBytes::empty(), doc_freq, record_option),
+            None => SkipReader::new(FileSlice::empty(), doc_freq, record_option),
         };
 
         let mut block_segment_postings = BlockSegmentPostings {
@@ -172,7 +185,7 @@ impl BlockSegmentPostings {
     // # Warning
     //
     // This does not reset the positions list.
-    pub(crate) fn reset(&mut self, doc_freq: u32, postings_data: OwnedBytes) {
+    pub(crate) fn reset(&mut self, doc_freq: u32, postings_data: FileSlice) {
         let (skip_data_opt, postings_data) = split_into_skips_and_postings(doc_freq, postings_data);
         self.data = postings_data;
         self.block_max_score_cache = None;
@@ -180,7 +193,7 @@ impl BlockSegmentPostings {
         if let Some(skip_data) = skip_data_opt {
             self.skip_reader.reset(skip_data, doc_freq);
         } else {
-            self.skip_reader.reset(OwnedBytes::empty(), doc_freq);
+            self.skip_reader.reset(FileSlice::empty(), doc_freq);
         }
         self.doc_freq = doc_freq;
         self.load_block();
@@ -297,7 +310,7 @@ impl BlockSegmentPostings {
                     } else {
                         None
                     },
-                    &self.data.as_slice()[offset..],
+                    &self.data.slice(offset, self.data.len()),
                     self.skip_reader.last_doc_in_previous_block,
                     doc_num_bits,
                     tf_num_bits,
@@ -306,9 +319,9 @@ impl BlockSegmentPostings {
             BlockInfo::VInt { num_docs } => {
                 let data = {
                     if num_docs == 0 {
-                        &[]
+                        FileSlice::empty()
                     } else {
-                        &self.data.as_slice()[offset..]
+                        self.data.slice(offset, self.data.len())
                     }
                 };
                 decode_vint_block(
@@ -318,7 +331,7 @@ impl BlockSegmentPostings {
                     } else {
                         None
                     },
-                    data,
+                    &data,
                     self.skip_reader.last_doc_in_previous_block,
                     num_docs as usize,
                 );
@@ -344,8 +357,8 @@ impl BlockSegmentPostings {
             freq_reading_option: FreqReadingOption::NoFreq,
             block_max_score_cache: None,
             doc_freq: 0,
-            data: OwnedBytes::empty(),
-            skip_reader: SkipReader::new(OwnedBytes::empty(), 0, IndexRecordOption::Basic),
+            data: FileSlice::empty(),
+            skip_reader: SkipReader::new(FileSlice::empty(), 0, IndexRecordOption::Basic),
         }
     }
 }
diff --git a/src/postings/compression/mod.rs b/src/postings/compression/mod.rs
index 5fd5d6a900..29da6de074 100644
--- a/src/postings/compression/mod.rs
+++ b/src/postings/compression/mod.rs
@@ -1,5 +1,6 @@
 use crate::common::FixedSize;
 use bitpacking::{BitPacker, BitPacker4x};
+use tantivy_fst::FakeArr;
 
 pub const COMPRESSION_BLOCK_SIZE: usize = BitPacker4x::BLOCK_LEN;
 const COMPRESSED_BLOCK_MAX_SIZE: usize = COMPRESSION_BLOCK_SIZE * u32::SIZE_IN_BYTES;
@@ -148,7 +149,7 @@ pub trait VIntDecoder {
     /// The value given in `padding` will be used to fill the remaining `128 - num_els` values.
     fn uncompress_vint_sorted(
         &mut self,
-        compressed_data: &[u8],
+        compressed_data: &dyn FakeArr,
         offset: u32,
         num_els: usize,
         padding: u32,
@@ -163,7 +164,7 @@ pub trait VIntDecoder {
     /// The value given in `padding` will be used to fill the remaining `128 - num_els` values.
     fn uncompress_vint_unsorted(
         &mut self,
-        compressed_data: &[u8],
+        compressed_data: &dyn FakeArr,
         num_els: usize,
         padding: u32,
     ) -> usize;
@@ -182,7 +183,7 @@ impl VIntEncoder for BlockEncoder {
 impl VIntDecoder for BlockDecoder {
     fn uncompress_vint_sorted(
         &mut self,
-        compressed_data: &[u8],
+        compressed_data: &dyn FakeArr,
         offset: u32,
         num_els: usize,
         padding: u32,
@@ -194,7 +195,7 @@ impl VIntDecoder for BlockDecoder {
 
     fn uncompress_vint_unsorted(
         &mut self,
-        compressed_data: &[u8],
+        compressed_data: &dyn FakeArr,
         num_els: usize,
         padding: u32,
     ) -> usize {
diff --git a/src/postings/compression/vint.rs b/src/postings/compression/vint.rs
index 3de43749f8..f8e7eb1533 100644
--- a/src/postings/compression/vint.rs
+++ b/src/postings/compression/vint.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::FakeArr;
+
 #[inline(always)]
 pub fn compress_sorted<'a>(input: &[u32], output: &'a mut [u8], mut offset: u32) -> &'a [u8] {
     let mut byte_written = 0;
@@ -42,13 +44,13 @@ pub(crate) fn compress_unsorted<'a>(input: &[u32], output: &'a mut [u8]) -> &'a
 }
 
 #[inline(always)]
-pub fn uncompress_sorted(compressed_data: &[u8], output: &mut [u32], offset: u32) -> usize {
+pub fn uncompress_sorted(compressed_data: &dyn FakeArr, output: &mut [u32], offset: u32) -> usize {
     let mut read_byte = 0;
     let mut result = offset;
     for output_mut in output.iter_mut() {
         let mut shift = 0u32;
         loop {
-            let cur_byte = compressed_data[read_byte];
+            let cur_byte = compressed_data.get_byte(read_byte);
             read_byte += 1;
             result += u32::from(cur_byte % 128u8) << shift;
             if cur_byte & 128u8 != 0u8 {
@@ -62,13 +64,13 @@ pub fn uncompress_sorted(compressed_data: &[u8], output: &mut [u32], offset: u32
 }
 
 #[inline(always)]
-pub(crate) fn uncompress_unsorted(compressed_data: &[u8], output_arr: &mut [u32]) -> usize {
+pub(crate) fn uncompress_unsorted(compressed_data: &dyn FakeArr, output_arr: &mut [u32]) -> usize {
     let mut read_byte = 0;
     for output_mut in output_arr.iter_mut() {
         let mut result = 0u32;
         let mut shift = 0u32;
         loop {
-            let cur_byte = compressed_data[read_byte];
+            let cur_byte = compressed_data.get_byte(read_byte);
             read_byte += 1;
             result += u32::from(cur_byte % 128u8) << shift;
             if cur_byte & 128u8 != 0u8 {
diff --git a/src/postings/skip.rs b/src/postings/skip.rs
index 8d4310eb23..ba4c7d2bf1 100644
--- a/src/postings/skip.rs
+++ b/src/postings/skip.rs
@@ -1,6 +1,8 @@
 use std::convert::TryInto;
 
-use crate::directory::OwnedBytes;
+use tantivy_fst::FakeArr;
+
+use crate::directory::{FileSlice, OwnedBytes};
 use crate::postings::compression::{compressed_block_size, COMPRESSION_BLOCK_SIZE};
 use crate::query::BM25Weight;
 use crate::schema::IndexRecordOption;
@@ -71,7 +73,7 @@ impl SkipSerializer {
 pub(crate) struct SkipReader {
     last_doc_in_block: DocId,
     pub(crate) last_doc_in_previous_block: DocId,
-    owned_read: OwnedBytes,
+    owned_read: FileSlice,
     skip_info: IndexRecordOption,
     byte_offset: usize,
     remaining_docs: u32, // number of docs remaining, including the
@@ -102,7 +104,7 @@ impl Default for BlockInfo {
 }
 
 impl SkipReader {
-    pub fn new(data: OwnedBytes, doc_freq: u32, skip_info: IndexRecordOption) -> SkipReader {
+    pub fn new(data: FileSlice, doc_freq: u32, skip_info: IndexRecordOption) -> SkipReader {
         let mut skip_reader = SkipReader {
             last_doc_in_block: if doc_freq >= COMPRESSION_BLOCK_SIZE as u32 {
                 0
@@ -123,7 +125,7 @@ impl SkipReader {
         skip_reader
     }
 
-    pub fn reset(&mut self, data: OwnedBytes, doc_freq: u32) {
+    pub fn reset(&mut self, data: FileSlice, doc_freq: u32) {
         self.last_doc_in_block = if doc_freq >= COMPRESSION_BLOCK_SIZE as u32 {
             0
         } else {
@@ -169,7 +171,7 @@ impl SkipReader {
     }
 
     fn read_block_info(&mut self) {
-        let bytes = self.owned_read.as_slice();
+        let bytes = &self.owned_read.slice_to(std::cmp::min(11, self.owned_read.len())).to_vec();
         let advance_len: usize;
         self.last_doc_in_block = read_u32(bytes);
         let doc_num_bits = bytes[4];
@@ -212,7 +214,7 @@ impl SkipReader {
                 };
             }
         }
-        self.owned_read.advance(advance_len);
+        self.owned_read = self.owned_read.slice_from(advance_len);
     }
 
     pub fn block_info(&self) -> BlockInfo {
diff --git a/src/termdict/fst_termdict/term_info_store.rs b/src/termdict/fst_termdict/term_info_store.rs
index fa33eef4f2..97f6303267 100644
--- a/src/termdict/fst_termdict/term_info_store.rs
+++ b/src/termdict/fst_termdict/term_info_store.rs
@@ -1,6 +1,6 @@
-use crate::common::compute_num_bits;
+use crate::{HasLen, common::compute_num_bits};
 use crate::common::{bitpacker::BitPacker, BinarySerializable, FixedSize};
-use crate::directory::{FileSlice, OwnedBytes};
+use crate::directory::{FileSlice, FakeArr};
 use crate::postings::TermInfo;
 use crate::termdict::TermOrdinal;
 use byteorder::{ByteOrder, LittleEndian};
@@ -58,7 +58,7 @@ impl TermInfoBlockMeta {
     // Here inner_offset is the offset within the block, WITHOUT the first term_info.
     // In other word, term_info #1,#2,#3 gets inner_offset 0,1,2... While term_info #0
     // is encoded without bitpacking.
-    fn deserialize_term_info(&self, data: &[u8], inner_offset: usize) -> TermInfo {
+    fn deserialize_term_info(&self, data: &dyn FakeArr, inner_offset: usize) -> TermInfo {
         assert!(inner_offset < BLOCK_LEN - 1);
         let num_bits = self.num_bits() as usize;
 
@@ -88,22 +88,22 @@ impl TermInfoBlockMeta {
 #[derive(Debug)]
 pub struct TermInfoStore {
     num_terms: usize,
-    block_meta_bytes: OwnedBytes,
-    term_info_bytes: OwnedBytes,
+    block_meta_bytes: FileSlice,
+    term_info_bytes: FileSlice,
 }
 
-fn extract_bits(data: &[u8], addr_bits: usize, num_bits: u8) -> u64 {
+fn extract_bits(data: &dyn FakeArr, addr_bits: usize, num_bits: u8) -> u64 {
     assert!(num_bits <= 56);
     let addr_byte = addr_bits / 8;
     let bit_shift = (addr_bits % 8) as u64;
     let val_unshifted_unmasked: u64 = if data.len() >= addr_byte + 8 {
-        LittleEndian::read_u64(&data[addr_byte..][..8])
+        LittleEndian::read_u64(&data.slice((addr_byte..addr_byte + 8).into()).to_vec())
     } else {
         // the buffer is not large enough.
         // Let's copy the few remaining bytes to a 8 byte buffer
         // padded with 0s.
         let mut buf = [0u8; 8];
-        let data_to_copy = &data[addr_byte..];
+        let data_to_copy = &data.slice((addr_byte..).into()).to_vec();
         let nbytes = data_to_copy.len();
         buf[..nbytes].copy_from_slice(data_to_copy);
         LittleEndian::read_u64(&buf)
@@ -120,27 +120,26 @@ impl TermInfoStore {
         let len = u64::deserialize(&mut bytes)? as usize;
         let num_terms = u64::deserialize(&mut bytes)? as usize;
         let (block_meta_file, term_info_file) = main_slice.split(len);
-        let term_info_bytes = term_info_file.read_bytes()?;
         Ok(TermInfoStore {
             num_terms,
-            block_meta_bytes: block_meta_file.read_bytes()?,
-            term_info_bytes,
+            block_meta_bytes: block_meta_file,
+            term_info_bytes: term_info_file,
         })
     }
 
     pub fn get(&self, term_ord: TermOrdinal) -> TermInfo {
         let block_id = (term_ord as usize) / BLOCK_LEN;
-        let buffer = self.block_meta_bytes.as_slice();
-        let mut block_data: &[u8] = &buffer[block_id * TermInfoBlockMeta::SIZE_IN_BYTES..];
+        let block_data = self.block_meta_bytes.slice(block_id * TermInfoBlockMeta::SIZE_IN_BYTES, HasLen::len(&self.block_meta_bytes));
+        let mut block_data = block_data.full_slice();
         let term_info_block_data = TermInfoBlockMeta::deserialize(&mut block_data)
             .expect("Failed to deserialize terminfoblockmeta");
         let inner_offset = (term_ord as usize) % BLOCK_LEN;
         if inner_offset == 0 {
             return term_info_block_data.ref_term_info;
         }
-        let term_info_data = self.term_info_bytes.as_slice();
+        let term_info_data = self.term_info_bytes.slice(term_info_block_data.offset as usize, HasLen::len(&self.term_info_bytes));
         term_info_block_data.deserialize_term_info(
-            &term_info_data[term_info_block_data.offset as usize..],
+            &term_info_data,
             inner_offset - 1,
         )
     }
@@ -304,9 +303,9 @@ mod tests {
         assert_eq!(compute_num_bits(51), 6);
         bitpack.close(&mut buffer).unwrap();
         assert_eq!(buffer.len(), 3 + 7);
-        assert_eq!(extract_bits(&buffer[..], 0, 9), 321u64);
-        assert_eq!(extract_bits(&buffer[..], 9, 2), 2u64);
-        assert_eq!(extract_bits(&buffer[..], 11, 6), 51u64);
+        assert_eq!(extract_bits(buffer, 0, 9), 321u64);
+        assert_eq!(extract_bits(buffer, 9, 2), 2u64);
+        assert_eq!(extract_bits(buffer, 11, 6), 51u64);
     }
 
     #[test]
diff --git a/src/termdict/fst_termdict/termdict.rs b/src/termdict/fst_termdict/termdict.rs
index 9be4543c16..50a4f2dd5c 100644
--- a/src/termdict/fst_termdict/termdict.rs
+++ b/src/termdict/fst_termdict/termdict.rs
@@ -1,14 +1,13 @@
 use super::term_info_store::{TermInfoStore, TermInfoStoreWriter};
 use super::{TermStreamer, TermStreamerBuilder};
-use crate::{common::{BinarySerializable, CountingWriter}, directory::OnDemandBox};
+use crate::{common::{BinarySerializable, CountingWriter}};
 use crate::directory::{FileSlice, OwnedBytes};
 use crate::error::DataCorruption;
 use crate::postings::TermInfo;
 use crate::termdict::TermOrdinal;
-use crate::directory::OnDemandBytes;
 use once_cell::sync::Lazy;
 use std::io::{self, Write};
-use tantivy_fst::{FakeArr, raw::Fst};
+use tantivy_fst::{raw::Fst};
 use tantivy_fst::Automaton;
 
 fn convert_fst_error(e: tantivy_fst::Error) -> io::Error {

From 397689a88e1e52a2f158f0dfe8e3169e9c6cceb0 Mon Sep 17 00:00:00 2001
From: phiresky <phireskyde+git@gmail.com>
Date: Thu, 20 May 2021 16:00:11 +0200
Subject: [PATCH 4/9] minor fix.

---
 src/directory/fs_directory.rs          | 10 ++++------
 src/postings/block_segment_postings.rs |  5 -----
 src/postings/skip.rs                   |  2 +-
 3 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/src/directory/fs_directory.rs b/src/directory/fs_directory.rs
index e409fdd5bb..943454f6e0 100644
--- a/src/directory/fs_directory.rs
+++ b/src/directory/fs_directory.rs
@@ -102,10 +102,9 @@ impl FSFile {
         let len = to - from;
 
         eprintln!(
-            "READ {} @ {}, len {}",
+            "READ {} chunk {}",
             self.path.to_string_lossy(),
-            from,
-            len
+            from / CS
         );
         if len == 51616 {
             println!("{:?}", backtrace::Backtrace::new());
@@ -127,12 +126,12 @@ impl FSFile {
 impl FileHandle for FSFile {
     fn read_bytes(&self, from: usize, to: usize) -> std::io::Result<OwnedBytes> {
         let len = to - from;
-        eprintln!(
+        /*eprintln!(
             "GET {} @ {}, len {}",
             self.path.to_string_lossy(),
             from,
             len
-        );
+        );*/
         let starti = from / CS;
         let endi = to / CS;
         let startofs = from % CS;
@@ -148,7 +147,6 @@ impl FileHandle for FSFile {
                 self.read_bytes_real(i * CS, std::cmp::min((i + 1) * CS, self.len()))
             });
             let chunk = &chunk[startofs..endofs];
-            println!("{} {} {} {}", out_buf.len(), startofs, endofs, chunk.len());
             let write_len = std::cmp::min(chunk.len(), len);
             out_buf[written..written + write_len]
                 .copy_from_slice(&chunk);
diff --git a/src/postings/block_segment_postings.rs b/src/postings/block_segment_postings.rs
index 41b56b41b7..6633d73611 100644
--- a/src/postings/block_segment_postings.rs
+++ b/src/postings/block_segment_postings.rs
@@ -54,14 +54,9 @@ fn decode_bitpacked_block(
     let num_bytes_freqs = freq_decoder_opt.as_ref()
         .map(|_| 128 * (tf_num_bits as usize) / 8)
         .unwrap_or(0);
-    let num_toconsume_bytes = num_bytes_docs + num_bytes_freqs;
     let data = data.slice((0..num_bytes_docs + num_bytes_freqs).into()).to_vec();
     let num_consumed_bytes = doc_decoder.uncompress_block_sorted(&data, doc_offset, doc_num_bits);
     assert_eq!(num_bytes_docs, num_consumed_bytes);
-    println!(
-        "ofs={}, bits={}, consumed={}",
-        doc_offset, doc_num_bits, num_consumed_bytes
-    );
     if let Some(freq_decoder) = freq_decoder_opt {
         freq_decoder.uncompress_block_unsorted(&data[num_consumed_bytes..], tf_num_bits);
     }
diff --git a/src/postings/skip.rs b/src/postings/skip.rs
index ba4c7d2bf1..f2e9c8bab9 100644
--- a/src/postings/skip.rs
+++ b/src/postings/skip.rs
@@ -171,7 +171,7 @@ impl SkipReader {
     }
 
     fn read_block_info(&mut self) {
-        let bytes = &self.owned_read.slice_to(std::cmp::min(11, self.owned_read.len())).to_vec();
+        let bytes = &self.owned_read.slice_to(std::cmp::min(12, self.owned_read.len())).to_vec();
         let advance_len: usize;
         self.last_doc_in_block = read_u32(bytes);
         let doc_num_bits = bytes[4];

From 76be25bee8ec1f74363845fd87fa4624bea5b221 Mon Sep 17 00:00:00 2001
From: phiresky <phireskyde+git@gmail.com>
Date: Thu, 20 May 2021 17:18:45 +0200
Subject: [PATCH 5/9] Ulen part 1

---
 src/common/mod.rs                            |  4 +-
 src/common/serialize.rs                      |  3 +-
 src/directory/file_slice.rs                  | 37 ++++++++--------
 src/directory/footer.rs                      |  5 ++-
 src/directory/fs_directory.rs                | 46 ++++++++------------
 src/directory/owned_bytes.rs                 |  5 ++-
 src/directory/ram_directory.rs               |  2 +-
 src/postings/compression/mod.rs              |  2 +-
 src/termdict/fst_termdict/term_info_store.rs |  3 +-
 9 files changed, 51 insertions(+), 56 deletions(-)

diff --git a/src/common/mod.rs b/src/common/mod.rs
index 45bee6a592..24224a742c 100644
--- a/src/common/mod.rs
+++ b/src/common/mod.rs
@@ -14,7 +14,7 @@ pub use self::vint::{
     read_u32_vint, read_u32_vint_no_advance, serialize_vint_u32, write_u32_vint, VInt,
 };
 pub use byteorder::LittleEndian as Endianness;
-
+use tantivy_fst::Ulen;
 /// Segment's max doc must be `< MAX_DOC_LIMIT`.
 ///
 /// We do not allow segments with more than
@@ -69,7 +69,7 @@ pub(crate) fn compute_num_bits(n: u64) -> u8 {
 /// Has length trait
 pub trait HasLen {
     /// Return length
-    fn len(&self) -> usize;
+    fn len(&self) -> Ulen;
 
     /// Returns true iff empty.
     fn is_empty(&self) -> bool {
diff --git a/src/common/serialize.rs b/src/common/serialize.rs
index 6b89bbe703..98f1896947 100644
--- a/src/common/serialize.rs
+++ b/src/common/serialize.rs
@@ -1,6 +1,7 @@
 use crate::common::Endianness;
 use crate::common::VInt;
 use byteorder::{ReadBytesExt, WriteBytesExt};
+use tantivy_fst::Ulen;
 use std::fmt;
 use std::io;
 use std::io::Read;
@@ -17,7 +18,7 @@ pub trait BinarySerializable: fmt::Debug + Sized {
 /// `FixedSize` marks a `BinarySerializable` as
 /// always serializing to the same size.
 pub trait FixedSize: BinarySerializable {
-    const SIZE_IN_BYTES: usize;
+    const SIZE_IN_BYTES: Ulen;
 }
 
 impl BinarySerializable for () {
diff --git a/src/directory/file_slice.rs b/src/directory/file_slice.rs
index b460cd0a04..3af5c2cea1 100644
--- a/src/directory/file_slice.rs
+++ b/src/directory/file_slice.rs
@@ -1,5 +1,6 @@
 use stable_deref_trait::StableDeref;
 pub use tantivy_fst::FakeArr;
+use tantivy_fst::Ulen;
 
 use crate::common::HasLen;
 use crate::directory::OwnedBytes;
@@ -22,15 +23,15 @@ pub trait FileHandle: 'static + Send + Sync + HasLen + Debug {
     /// Reads a slice of bytes.
     ///
     /// This method may panic if the range requested is invalid.
-    fn read_bytes(&self, from: usize, to: usize) -> io::Result<OwnedBytes>;
+    fn read_bytes(&self, from: Ulen, to: Ulen) -> io::Result<OwnedBytes>;
 }
 
 impl FakeArr for FileSlice {
-    fn len(&self) -> usize {
+    fn len(&self) -> Ulen {
         self.stop - self.start
     }
 
-    fn read_into(&self, offset: usize, buf: &mut [u8]) -> io::Result<()> {
+    fn read_into(&self, offset: Ulen, buf: &mut [u8]) -> io::Result<()> {
         buf.copy_from_slice(&self.read_bytes_slice(offset, offset + buf.len())?);
         Ok(())
     }
@@ -41,15 +42,15 @@ impl FakeArr for FileSlice {
 }
 
 impl FileHandle for &'static [u8] {
-    fn read_bytes(&self, from: usize, to: usize) -> io::Result<OwnedBytes> {
-        let bytes = &self[from..to];
+    fn read_bytes(&self, from: Ulen, to: Ulen) -> io::Result<OwnedBytes> {
+        let bytes = &self[from as usize..to as usize];
         Ok(OwnedBytes::new(bytes))
     }
 }
 
 impl<T: Deref<Target = [u8]>> HasLen for T {
-    fn len(&self) -> usize {
-        self.as_ref().len()
+    fn len(&self) -> Ulen {
+        self.as_ref().len() as Ulen
     }
 }
 
@@ -69,8 +70,8 @@ where
 #[derive(Clone, Debug)]
 pub struct FileSlice {
     data: Arc<dyn FileHandle>,
-    start: usize,
-    stop: usize,
+    start: Ulen,
+    stop: Ulen,
 }
 
 impl FileSlice {
@@ -82,7 +83,7 @@ impl FileSlice {
 
     /// Wraps a FileHandle.
     #[doc(hidden)]
-    pub fn new_with_num_bytes(file_handle: Box<dyn FileHandle>, num_bytes: usize) -> Self {
+    pub fn new_with_num_bytes(file_handle: Box<dyn FileHandle>, num_bytes: Ulen) -> Self {
         FileSlice {
             data: Arc::from(file_handle),
             start: 0,
@@ -95,7 +96,7 @@ impl FileSlice {
     /// # Panics
     ///
     /// Panics if `to < from` or if `to` exceeds the filesize.
-    pub fn slice(&self, from: usize, to: usize) -> FileSlice {
+    pub fn slice(&self, from: Ulen, to: Ulen) -> FileSlice {
         assert!(to <= <FileSlice as HasLen>::len(&self));
         assert!(to >= from);
         FileSlice {
@@ -124,7 +125,7 @@ impl FileSlice {
     /// Reads a specific slice of data.
     ///
     /// This is equivalent to running `file_slice.slice(from, to).read_bytes()`.
-    pub fn read_bytes_slice(&self, from: usize, to: usize) -> io::Result<OwnedBytes> {
+    pub fn read_bytes_slice(&self, from: Ulen, to: Ulen) -> io::Result<OwnedBytes> {
         assert!(from <= to);
         assert!(
             self.start + to <= self.stop,
@@ -137,7 +138,7 @@ impl FileSlice {
     /// `file_slice[..split_offset]` and `file_slice[split_offset..]`.
     ///
     /// This operation is cheap and must not copy any underlying data.
-    pub fn split(self, left_len: usize) -> (FileSlice, FileSlice) {
+    pub fn split(self, left_len: Ulen) -> (FileSlice, FileSlice) {
         let left = self.slice_to(left_len);
         let right = self.slice_from(left_len);
         (left, right)
@@ -145,7 +146,7 @@ impl FileSlice {
 
     /// Splits the file slice at the given offset and return two file slices.
     /// `file_slice[..split_offset]` and `file_slice[split_offset..]`.
-    pub fn split_from_end(self, right_len: usize) -> (FileSlice, FileSlice) {
+    pub fn split_from_end(self, right_len: Ulen) -> (FileSlice, FileSlice) {
         let left_len = HasLen::len(&self) - right_len;
         self.split(left_len)
     }
@@ -154,7 +155,7 @@ impl FileSlice {
     /// boundary.
     ///
     /// Equivalent to `.slice(from_offset, self.len())`
-    pub fn slice_from(&self, from_offset: usize) -> FileSlice {
+    pub fn slice_from(&self, from_offset: Ulen) -> FileSlice {
         self.slice(from_offset, <FileSlice as HasLen>::len(&self))
     }
 
@@ -162,19 +163,19 @@ impl FileSlice {
     /// boundary.
     ///
     /// Equivalent to `.slice(0, to_offset)`
-    pub fn slice_to(&self, to_offset: usize) -> FileSlice {
+    pub fn slice_to(&self, to_offset: Ulen) -> FileSlice {
         self.slice(0, to_offset)
     }
 }
 
 impl FileHandle for FileSlice {
-    fn read_bytes(&self, from: usize, to: usize) -> io::Result<OwnedBytes> {
+    fn read_bytes(&self, from: Ulen, to: Ulen) -> io::Result<OwnedBytes> {
         self.read_bytes_slice(from, to)
     }
 }
 
 impl HasLen for FileSlice {
-    fn len(&self) -> usize {
+    fn len(&self) -> Ulen {
         self.stop - self.start
     }
 }
diff --git a/src/directory/footer.rs b/src/directory/footer.rs
index b2f495f6cb..a0c0ed5b9d 100644
--- a/src/directory/footer.rs
+++ b/src/directory/footer.rs
@@ -4,6 +4,7 @@ use crate::directory::FileSlice;
 use crate::directory::{AntiCallToken, TerminatingWrite};
 use crate::Version;
 use crc32fast::Hasher;
+use tantivy_fst::Ulen;
 use std::io;
 use std::io::Write;
 
@@ -77,9 +78,9 @@ impl Footer {
                 ),
             ));
         }
-        let (body_footer, footer_len_file) = file.split_from_end(u32::SIZE_IN_BYTES);
+        let (body_footer, footer_len_file) = file.split_from_end(u32::SIZE_IN_BYTES as Ulen);
         let mut footer_len_bytes = footer_len_file.read_bytes()?;
-        let footer_len = u32::deserialize(&mut footer_len_bytes)? as usize;
+        let footer_len = u32::deserialize(&mut footer_len_bytes)? as Ulen;
         let (body, footer) = body_footer.split_from_end(footer_len);
         let mut footer_bytes = footer.read_bytes()?;
         let footer = Footer::deserialize(&mut footer_bytes)?;
diff --git a/src/directory/fs_directory.rs b/src/directory/fs_directory.rs
index 943454f6e0..5a29d194cd 100644
--- a/src/directory/fs_directory.rs
+++ b/src/directory/fs_directory.rs
@@ -1,11 +1,6 @@
-use std::{
-    collections::BTreeMap,
-    fs::File,
-    io::{BufWriter, Read, Seek, SeekFrom, Write},
-    ops::DerefMut,
-    path::{Path, PathBuf},
-    sync::{Arc, RwLock},
-};
+use std::{collections::BTreeMap, convert::TryInto, fs::File, io::{BufWriter, Read, Seek, SeekFrom, Write}, ops::DerefMut, path::{Path, PathBuf}, sync::{Arc, RwLock}};
+
+use tantivy_fst::Ulen;
 
 use crate::{
     directory::{error::OpenWriteError, FileHandle, OwnedBytes, TerminatingWrite, WatchHandle},
@@ -82,10 +77,10 @@ impl Directory for FsDirectory {
 struct FSFile {
     path: PathBuf,
     file: Arc<RwLock<File>>,
-    len: usize,
-    cache: RwLock<BTreeMap<usize, Vec<u8>>>,
+    len: Ulen,
+    cache: RwLock<BTreeMap<Ulen, Vec<u8>>>,
 }
-const CS: usize = 4096;
+const CS: Ulen = 4096;
 
 impl FSFile {
     pub fn new(path: &Path) -> FSFile {
@@ -94,18 +89,14 @@ impl FSFile {
         FSFile {
             path: path.to_path_buf(),
             file: Arc::new(RwLock::new(f)),
-            len: len as usize,
+            len,
             cache: RwLock::new(BTreeMap::new()),
         }
     }
-    fn read_bytes_real(&self, from: usize, to: usize) -> Vec<u8> {
+    fn read_bytes_real(&self, from: Ulen, to: Ulen) -> Vec<u8> {
         let len = to - from;
 
-        eprintln!(
-            "READ {} chunk {}",
-            self.path.to_string_lossy(),
-            from / CS
-        );
+        eprintln!("READ {} chunk {}", self.path.to_string_lossy(), from / CS);
         if len == 51616 {
             println!("{:?}", backtrace::Backtrace::new());
         }
@@ -117,15 +108,15 @@ impl FSFile {
         }
         let mut f = self.file.write().unwrap();
         f.seek(SeekFrom::Start(from as u64)).unwrap();
-        let mut buf = Vec::with_capacity(len);
+        let mut buf = Vec::with_capacity(len.try_into().unwrap());
         let flonk = f.deref_mut();
         (flonk).take(len as u64).read_to_end(&mut buf).unwrap();
         return buf;
     }
 }
 impl FileHandle for FSFile {
-    fn read_bytes(&self, from: usize, to: usize) -> std::io::Result<OwnedBytes> {
-        let len = to - from;
+    fn read_bytes(&self, from: Ulen, to: Ulen) -> std::io::Result<OwnedBytes> {
+        let len: usize = (to - from).try_into().unwrap();
         /*eprintln!(
             "GET {} @ {}, len {}",
             self.path.to_string_lossy(),
@@ -134,22 +125,21 @@ impl FileHandle for FSFile {
         );*/
         let starti = from / CS;
         let endi = to / CS;
-        let startofs = from % CS;
-        let endofs = to % CS;
+        let startofs = (from % CS) as usize;
+        let endofs = (to % CS) as usize;
         let mut out_buf = vec![0u8; len];
         //let toget = vec![];
         let mut cache = self.cache.write().unwrap();
         let mut written = 0;
         for i in starti..=endi {
             let startofs = if i == starti { startofs } else { 0 };
-            let endofs = if i == endi { endofs } else { CS };
+            let endofs = if i == endi { endofs } else { CS as usize };
             let chunk = cache.entry(i).or_insert_with(|| {
                 self.read_bytes_real(i * CS, std::cmp::min((i + 1) * CS, self.len()))
             });
             let chunk = &chunk[startofs..endofs];
-            let write_len = std::cmp::min(chunk.len(), len);
-            out_buf[written..written + write_len]
-                .copy_from_slice(&chunk);
+            let write_len = std::cmp::min(chunk.len(), len as usize);
+            out_buf[written..written + write_len].copy_from_slice(&chunk);
             written += write_len;
         }
 
@@ -157,7 +147,7 @@ impl FileHandle for FSFile {
     }
 }
 impl HasLen for FSFile {
-    fn len(&self) -> usize {
+    fn len(&self) -> Ulen {
         self.len
     }
 }
diff --git a/src/directory/owned_bytes.rs b/src/directory/owned_bytes.rs
index 93cefced10..6c2cce2267 100644
--- a/src/directory/owned_bytes.rs
+++ b/src/directory/owned_bytes.rs
@@ -1,5 +1,6 @@
 use crate::directory::FileHandle;
 use stable_deref_trait::StableDeref;
+use tantivy_fst::Ulen;
 //use tantivy_fst::FakeArr;
 use std::convert::TryInto;
 use std::mem;
@@ -18,8 +19,8 @@ pub struct OwnedBytes {
 }
 
 impl FileHandle for OwnedBytes {
-    fn read_bytes(&self, from: usize, to: usize) -> io::Result<OwnedBytes> {
-        Ok(self.slice(from, to))
+    fn read_bytes(&self, from: Ulen, to: Ulen) -> io::Result<OwnedBytes> {
+        Ok(self.slice(from as usize, to as usize))
     }
 }
 
diff --git a/src/directory/ram_directory.rs b/src/directory/ram_directory.rs
index 589247e451..21bd578256 100644
--- a/src/directory/ram_directory.rs
+++ b/src/directory/ram_directory.rs
@@ -115,7 +115,7 @@ impl InnerDirectory {
     }
 
     fn total_mem_usage(&self) -> usize {
-        self.fs.values().map(|f| f.len()).sum()
+        self.fs.values().map(|f| f.len() as usize).sum()
     }
 }
 
diff --git a/src/postings/compression/mod.rs b/src/postings/compression/mod.rs
index 29da6de074..671a260bb3 100644
--- a/src/postings/compression/mod.rs
+++ b/src/postings/compression/mod.rs
@@ -3,7 +3,7 @@ use bitpacking::{BitPacker, BitPacker4x};
 use tantivy_fst::FakeArr;
 
 pub const COMPRESSION_BLOCK_SIZE: usize = BitPacker4x::BLOCK_LEN;
-const COMPRESSED_BLOCK_MAX_SIZE: usize = COMPRESSION_BLOCK_SIZE * u32::SIZE_IN_BYTES;
+const COMPRESSED_BLOCK_MAX_SIZE: usize = COMPRESSION_BLOCK_SIZE * u32::SIZE_IN_BYTES as usize;
 
 mod vint;
 
diff --git a/src/termdict/fst_termdict/term_info_store.rs b/src/termdict/fst_termdict/term_info_store.rs
index 97f6303267..93a5e91086 100644
--- a/src/termdict/fst_termdict/term_info_store.rs
+++ b/src/termdict/fst_termdict/term_info_store.rs
@@ -4,6 +4,7 @@ use crate::directory::{FileSlice, FakeArr};
 use crate::postings::TermInfo;
 use crate::termdict::TermOrdinal;
 use byteorder::{ByteOrder, LittleEndian};
+use tantivy_fst::Ulen;
 use std::cmp;
 use std::io::{self, Read, Write};
 
@@ -92,7 +93,7 @@ pub struct TermInfoStore {
     term_info_bytes: FileSlice,
 }
 
-fn extract_bits(data: &dyn FakeArr, addr_bits: usize, num_bits: u8) -> u64 {
+fn extract_bits(data: &dyn FakeArr, addr_bits: Ulen, num_bits: u8) -> u64 {
     assert!(num_bits <= 56);
     let addr_byte = addr_bits / 8;
     let bit_shift = (addr_bits % 8) as u64;

From 6a0cf7eede4223dd444024b5e68ef9e3c8c1be16 Mon Sep 17 00:00:00 2001
From: phiresky <phireskyde+git@gmail.com>
Date: Sat, 22 May 2021 10:46:31 +0200
Subject: [PATCH 6/9] WORKS!

---
 src/collector/facet_collector.rs             |  2 +-
 src/common/bitpacker.rs                      |  3 ++
 src/common/bitset.rs                         |  2 +
 src/common/composite_file.rs                 | 20 ++++----
 src/common/serialize.rs                      | 16 +++---
 src/common/vint.rs                           |  1 +
 src/core/executor.rs                         |  7 ++-
 src/core/index.rs                            |  2 +
 src/core/inverted_index_reader.rs            | 10 ++--
 src/core/searcher.rs                         |  2 +
 src/directory/file_slice.rs                  |  7 ++-
 src/directory/mmap_directory.rs              |  5 +-
 src/directory/owned_bytes.rs                 | 18 ++++---
 src/directory/ram_directory.rs               |  7 +--
 src/directory/watch_event_router.rs          |  5 +-
 src/docset.rs                                |  6 ++-
 src/fastfield/bytes/reader.rs                | 12 +++--
 src/fastfield/delete.rs                      | 10 ++--
 src/fastfield/facet_reader.rs                |  4 +-
 src/fastfield/mod.rs                         | 15 +++---
 src/fastfield/multivalued/reader.rs          | 10 ++--
 src/fastfield/multivalued/writer.rs          |  5 +-
 src/fastfield/readers.rs                     |  4 +-
 src/fastfield/serializer.rs                  |  6 ++-
 src/fastfield/writer.rs                      |  3 +-
 src/fieldnorm/code.rs                        |  2 +
 src/fieldnorm/reader.rs                      |  8 +--
 src/fieldnorm/writer.rs                      |  2 +
 src/functional_test.rs                       |  3 +-
 src/indexer/delete_queue.rs                  |  6 ++-
 src/indexer/log_merge_policy.rs              |  2 +
 src/indexer/merger.rs                        |  5 +-
 src/indexer/segment_updater.rs               |  1 +
 src/lib.rs                                   |  1 +
 src/positions/mod.rs                         |  3 +-
 src/positions/reader.rs                      | 51 ++++++++++----------
 src/positions/serializer.rs                  |  1 +
 src/postings/block_search.rs                 | 12 +++--
 src/postings/block_segment_postings.rs       | 18 +++----
 src/postings/compression/vint.rs             | 10 ++--
 src/postings/segment_postings.rs             | 10 ++--
 src/postings/serializer.rs                   | 10 ++--
 src/postings/skip.rs                         | 20 ++++----
 src/postings/term_info.rs                    |  4 +-
 src/query/vec_docset.rs                      |  6 ++-
 src/reader/mod.rs                            |  2 +
 src/reader/pool.rs                           |  6 ++-
 src/schema/document.rs                       |  4 +-
 src/schema/schema.rs                         |  1 +
 src/schema/term.rs                           |  2 +
 src/space_usage/mod.rs                       |  3 +-
 src/store/index/block.rs                     |  4 +-
 src/store/index/mod.rs                       |  3 ++
 src/store/index/skip_index.rs                |  2 +
 src/store/index/skip_index_builder.rs        |  2 +
 src/store/mod.rs                             |  4 +-
 src/store/reader.rs                          | 19 ++++----
 src/store/writer.rs                          |  6 ++-
 src/termdict/fst_termdict/term_info_store.rs | 34 ++++++-------
 src/termdict/fst_termdict/termdict.rs        |  6 +--
 src/termdict/merger.rs                       |  2 +
 src/termdict/tests.rs                        |  2 +
 62 files changed, 283 insertions(+), 176 deletions(-)

diff --git a/src/collector/facet_collector.rs b/src/collector/facet_collector.rs
index cd10cbaf06..81b854d655 100644
--- a/src/collector/facet_collector.rs
+++ b/src/collector/facet_collector.rs
@@ -269,7 +269,7 @@ impl Collector for FacetCollector {
 
         let mut collapse_mapping = Vec::new();
         let mut counts = Vec::new();
-        let mut collapse_facet_ords = Vec::new();
+        let mut collapse_facet_ords: Vec<u64> = Vec::new();
 
         let mut collapse_facet_it = self.facets.iter().peekable();
         collapse_facet_ords.push(0);
diff --git a/src/common/bitpacker.rs b/src/common/bitpacker.rs
index 640d8adcff..881beb484a 100644
--- a/src/common/bitpacker.rs
+++ b/src/common/bitpacker.rs
@@ -1,4 +1,5 @@
 use byteorder::{ByteOrder, LittleEndian, WriteBytesExt};
+use tantivy_fst::Ulen;
 use std::io;
 
 use crate::directory::OwnedBytes;
@@ -103,6 +104,8 @@ impl BitUnpacker {
 
 #[cfg(test)]
 mod test {
+    use tantivy_fst::Ulen;
+
     use super::{BitPacker, BitUnpacker};
     use crate::directory::OwnedBytes;
 
diff --git a/src/common/bitset.rs b/src/common/bitset.rs
index 0a8d6f4de7..a09d8e2c90 100644
--- a/src/common/bitset.rs
+++ b/src/common/bitset.rs
@@ -1,6 +1,8 @@
 use std::fmt;
 use std::u64;
 
+use tantivy_fst::Ulen;
+
 #[derive(Clone, Copy, Eq, PartialEq)]
 pub(crate) struct TinySet(u64);
 
diff --git a/src/common/composite_file.rs b/src/common/composite_file.rs
index 5982743153..af184b3414 100644
--- a/src/common/composite_file.rs
+++ b/src/common/composite_file.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use crate::common::BinarySerializable;
 use crate::common::CountingWriter;
 use crate::common::VInt;
@@ -14,11 +16,11 @@ use super::HasLen;
 #[derive(Eq, PartialEq, Hash, Copy, Ord, PartialOrd, Clone, Debug)]
 pub struct FileAddr {
     field: Field,
-    idx: usize,
+    idx: Ulen,
 }
 
 impl FileAddr {
-    fn new(field: Field, idx: usize) -> FileAddr {
+    fn new(field: Field, idx: Ulen) -> FileAddr {
         FileAddr { field, idx }
     }
 }
@@ -32,7 +34,7 @@ impl BinarySerializable for FileAddr {
 
     fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
         let field = Field::deserialize(reader)?;
-        let idx = VInt::deserialize(reader)?.0 as usize;
+        let idx = VInt::deserialize(reader)?.0 as Ulen;
         Ok(FileAddr { field, idx })
     }
 }
@@ -59,7 +61,7 @@ impl<W: TerminatingWrite + Write> CompositeWrite<W> {
     }
 
     /// Start writing a new field.
-    pub fn for_field_with_idx(&mut self, field: Field, idx: usize) -> &mut CountingWriter<W> {
+    pub fn for_field_with_idx(&mut self, field: Field, idx: Ulen) -> &mut CountingWriter<W> {
         let offset = self.write.written_bytes();
         let file_addr = FileAddr::new(field, idx);
         assert!(!self.offsets.contains_key(&file_addr));
@@ -105,7 +107,7 @@ impl<W: TerminatingWrite + Write> CompositeWrite<W> {
 #[derive(Clone)]
 pub struct CompositeFile {
     data: FileSlice,
-    offsets_index: HashMap<FileAddr, (usize, usize)>,
+    offsets_index: HashMap<FileAddr, (Ulen, Ulen)>,
 }
 
 impl CompositeFile {
@@ -114,7 +116,7 @@ impl CompositeFile {
     pub fn open(data: &FileSlice) -> io::Result<CompositeFile> {
         let end = data.len();
         let footer_len_data = data.slice_from(end - 4).read_bytes()?;
-        let footer_len = u32::deserialize(&mut footer_len_data.as_slice())? as usize;
+        let footer_len = u32::deserialize(&mut footer_len_data.as_slice())? as Ulen;
         let footer_start = end - 4 - footer_len;
         let footer_data = data
             .slice(footer_start, footer_start + footer_len)
@@ -128,7 +130,7 @@ impl CompositeFile {
 
         let mut offset = 0;
         for _ in 0..num_fields {
-            offset += VInt::deserialize(&mut footer_buffer)?.0 as usize;
+            offset += VInt::deserialize(&mut footer_buffer)?.0 as Ulen;
             let file_addr = FileAddr::deserialize(&mut footer_buffer)?;
             offsets.push(offset);
             file_addrs.push(file_addr);
@@ -164,7 +166,7 @@ impl CompositeFile {
 
     /// Returns the `FileSlice` associated
     /// to a given `Field` and stored in a `CompositeFile`.
-    pub fn open_read_with_idx(&self, field: Field, idx: usize) -> Option<FileSlice> {
+    pub fn open_read_with_idx(&self, field: Field, idx: Ulen) -> Option<FileSlice> {
         self.offsets_index
             .get(&FileAddr { field, idx })
             .map(|&(from, to)| self.data.slice(from, to))
@@ -176,7 +178,7 @@ impl CompositeFile {
             fields
                 .entry(field_addr.field)
                 .or_insert_with(|| FieldUsage::empty(field_addr.field))
-                .add_field_idx(field_addr.idx, end - start);
+                .add_field_idx(field_addr.idx as usize, end - start);
         }
         PerFieldSpaceUsage::new(fields)
     }
diff --git a/src/common/serialize.rs b/src/common/serialize.rs
index 98f1896947..86168e5403 100644
--- a/src/common/serialize.rs
+++ b/src/common/serialize.rs
@@ -31,7 +31,7 @@ impl BinarySerializable for () {
 }
 
 impl FixedSize for () {
-    const SIZE_IN_BYTES: usize = 0;
+    const SIZE_IN_BYTES: Ulen = 0;
 }
 
 impl<T: BinarySerializable> BinarySerializable for Vec<T> {
@@ -74,7 +74,7 @@ impl BinarySerializable for u32 {
 }
 
 impl FixedSize for u32 {
-    const SIZE_IN_BYTES: usize = 4;
+    const SIZE_IN_BYTES: Ulen = 4;
 }
 
 impl BinarySerializable for u64 {
@@ -87,7 +87,7 @@ impl BinarySerializable for u64 {
 }
 
 impl FixedSize for u64 {
-    const SIZE_IN_BYTES: usize = 8;
+    const SIZE_IN_BYTES: Ulen = 8;
 }
 
 impl BinarySerializable for f32 {
@@ -100,7 +100,7 @@ impl BinarySerializable for f32 {
 }
 
 impl FixedSize for f32 {
-    const SIZE_IN_BYTES: usize = 4;
+    const SIZE_IN_BYTES: Ulen = 4;
 }
 
 impl BinarySerializable for i64 {
@@ -113,7 +113,7 @@ impl BinarySerializable for i64 {
 }
 
 impl FixedSize for i64 {
-    const SIZE_IN_BYTES: usize = 8;
+    const SIZE_IN_BYTES: Ulen = 8;
 }
 
 impl BinarySerializable for f64 {
@@ -126,7 +126,7 @@ impl BinarySerializable for f64 {
 }
 
 impl FixedSize for f64 {
-    const SIZE_IN_BYTES: usize = 8;
+    const SIZE_IN_BYTES: Ulen = 8;
 }
 
 impl BinarySerializable for u8 {
@@ -139,7 +139,7 @@ impl BinarySerializable for u8 {
 }
 
 impl FixedSize for u8 {
-    const SIZE_IN_BYTES: usize = 1;
+    const SIZE_IN_BYTES: Ulen = 1;
 }
 
 impl BinarySerializable for String {
@@ -168,7 +168,7 @@ pub mod test {
     pub fn fixed_size_test<O: BinarySerializable + FixedSize + Default>() {
         let mut buffer = Vec::new();
         O::default().serialize(&mut buffer).unwrap();
-        assert_eq!(buffer.len(), O::SIZE_IN_BYTES);
+        assert_eq!(buffer.len(), O::SIZE_IN_BYTES as usize);
     }
 
     fn serialize_test<T: BinarySerializable + Eq>(v: T) -> usize {
diff --git a/src/common/vint.rs b/src/common/vint.rs
index 6bbce4641d..f295b6276e 100644
--- a/src/common/vint.rs
+++ b/src/common/vint.rs
@@ -1,5 +1,6 @@
 use super::BinarySerializable;
 use byteorder::{ByteOrder, LittleEndian};
+use tantivy_fst::Ulen;
 use std::io;
 use std::io::Read;
 use std::io::Write;
diff --git a/src/core/executor.rs b/src/core/executor.rs
index 8ac39a7eb5..d0459026a1 100644
--- a/src/core/executor.rs
+++ b/src/core/executor.rs
@@ -1,5 +1,6 @@
 use crossbeam::channel;
 use rayon::{ThreadPool, ThreadPoolBuilder};
+use tantivy_fst::Ulen;
 
 /// Search executor whether search request are single thread or multithread.
 ///
@@ -87,12 +88,14 @@ impl Executor {
 #[cfg(test)]
 mod tests {
 
+    use tantivy_fst::Ulen;
+
     use super::Executor;
 
     #[test]
     #[should_panic(expected = "panic should propagate")]
     fn test_panic_propagates_single_thread() {
-        let _result: Vec<usize> = Executor::single_thread()
+        let _result: Vec<Ulen> = Executor::single_thread()
             .map(
                 |_| {
                     panic!("panic should propagate");
@@ -105,7 +108,7 @@ mod tests {
     #[test]
     #[should_panic] //< unfortunately the panic message is not propagated
     fn test_panic_propagates_multi_thread() {
-        let _result: Vec<usize> = Executor::multi_thread(1, "search-test")
+        let _result: Vec<Ulen> = Executor::multi_thread(1, "search-test")
             .unwrap()
             .map(
                 |_| {
diff --git a/src/core/index.rs b/src/core/index.rs
index 32f064ac1f..2ab65affc9 100644
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use super::segment::Segment;
 use crate::core::Executor;
 use crate::core::IndexMeta;
diff --git a/src/core/inverted_index_reader.rs b/src/core/inverted_index_reader.rs
index 9c4a1d049c..23349bec39 100644
--- a/src/core/inverted_index_reader.rs
+++ b/src/core/inverted_index_reader.rs
@@ -1,5 +1,7 @@
 use std::io;
 
+use tantivy_fst::Ulen;
+
 use crate::common::BinarySerializable;
 use crate::directory::FileSlice;
 use crate::positions::PositionReader;
@@ -90,8 +92,8 @@ impl InvertedIndexReader {
         term_info: &TermInfo,
         block_postings: &mut BlockSegmentPostings,
     ) -> io::Result<()> {
-        let start_offset = term_info.postings_start_offset as usize;
-        let stop_offset = term_info.postings_stop_offset as usize;
+        let start_offset = term_info.postings_start_offset as Ulen;
+        let stop_offset = term_info.postings_stop_offset as Ulen;
         let postings_slice = self.postings_file_slice.slice(start_offset, stop_offset);
         block_postings.reset(term_info.doc_freq, postings_slice);
         Ok(())
@@ -121,8 +123,8 @@ impl InvertedIndexReader {
         requested_option: IndexRecordOption,
     ) -> io::Result<BlockSegmentPostings> {
         let postings_data = self.postings_file_slice.slice(
-            term_info.postings_start_offset as usize,
-            term_info.postings_stop_offset as usize,
+            term_info.postings_start_offset as Ulen,
+            term_info.postings_stop_offset as Ulen,
         );
         BlockSegmentPostings::open(
             term_info.doc_freq,
diff --git a/src/core/searcher.rs b/src/core/searcher.rs
index 7123cfcf4a..b181762e61 100644
--- a/src/core/searcher.rs
+++ b/src/core/searcher.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use crate::collector::Collector;
 use crate::core::Executor;
 
diff --git a/src/directory/file_slice.rs b/src/directory/file_slice.rs
index 3af5c2cea1..a676818b52 100644
--- a/src/directory/file_slice.rs
+++ b/src/directory/file_slice.rs
@@ -50,7 +50,7 @@ impl FileHandle for &'static [u8] {
 
 impl<T: Deref<Target = [u8]>> HasLen for T {
     fn len(&self) -> Ulen {
-        self.as_ref().len() as Ulen
+        (self.as_ref() as &[u8]).len() as Ulen
     }
 }
 
@@ -159,6 +159,11 @@ impl FileSlice {
         self.slice(from_offset, <FileSlice as HasLen>::len(&self))
     }
 
+    /// like slice_from but inplace
+    pub fn advance(&mut self, from_offset: Ulen) {
+        self.start += from_offset;
+    }
+
     /// Like `.slice(...)` but enforcing only the `to`
     /// boundary.
     ///
diff --git a/src/directory/mmap_directory.rs b/src/directory/mmap_directory.rs
index 184795d9ea..3bdb371502 100644
--- a/src/directory/mmap_directory.rs
+++ b/src/directory/mmap_directory.rs
@@ -14,6 +14,7 @@ use fs2::FileExt;
 use memmap::Mmap;
 use serde::{Deserialize, Serialize};
 use stable_deref_trait::StableDeref;
+use tantivy_fst::Ulen;
 use std::convert::From;
 use std::fmt;
 use std::fs::OpenOptions;
@@ -62,10 +63,10 @@ fn open_mmap(full_path: &Path) -> result::Result<Option<Mmap>, OpenReadError> {
 #[derive(Default, Clone, Debug, Serialize, Deserialize)]
 pub struct CacheCounters {
     // Number of time the cache prevents to call `mmap`
-    pub hit: usize,
+    pub hit: Ulen,
     // Number of time tantivy had to call `mmap`
     // as no entry was in the cache.
-    pub miss: usize,
+    pub miss: Ulen,
 }
 
 #[derive(Clone, Debug, Serialize, Deserialize)]
diff --git a/src/directory/owned_bytes.rs b/src/directory/owned_bytes.rs
index 6c2cce2267..0f739d08eb 100644
--- a/src/directory/owned_bytes.rs
+++ b/src/directory/owned_bytes.rs
@@ -8,6 +8,8 @@ use std::ops::Deref;
 use std::sync::Arc;
 use std::{fmt, io};
 
+use super::FileSlice;
+
 /// An OwnedBytes simply wraps an object that owns a slice of data and exposes
 /// this data as a static slice.
 ///
@@ -26,11 +28,11 @@ impl FileHandle for OwnedBytes {
 
 
 /*impl FakeArr for OwnedBytes {
-    fn len(&self) -> usize {
+    fn len(&self) -> Ulen {
         self.data.len()
     }
 
-    fn read_into(&self, offset: usize, buf: &mut [u8]) -> std::io::Result<()> {
+    fn read_into(&self, offset: Ulen, buf: &mut [u8]) -> std::io::Result<()> {
         let bytes = self.read_bytes(offset, offset + buf.len())?;
         buf.copy_from_slice(&bytes[..]);
         Ok(())
@@ -76,10 +78,14 @@ impl OwnedBytes {
         self.data
     }
 
+    pub fn as_file_slice(self) -> FileSlice {
+        FileSlice::new(Box::new(self))
+    }
+
     /// Returns the len of the slice.
     #[inline(always)]
-    pub fn len(&self) -> usize {
-        self.data.len()
+    pub fn len(&self) -> Ulen {
+        self.data.len() as Ulen
     }
 
     /// Splits the OwnedBytes into two OwnedBytes `(left, right)`.
@@ -111,10 +117,10 @@ impl OwnedBytes {
 
     /// Drops the left most `advance_len` bytes.
     ///
-    /// See also [.clip(clip_len: usize))](#method.clip).
+    /// See also [.clip(clip_len: Ulen))](#method.clip).
     #[inline(always)]
     pub fn advance(&mut self, advance_len: usize) {
-        self.data = &self.data[advance_len..]
+        self.data = &self.data[advance_len as usize..]
     }
 
     /// Reads an `u8` from the `OwnedBytes` and advance by one byte.
diff --git a/src/directory/ram_directory.rs b/src/directory/ram_directory.rs
index 21bd578256..5514ac0539 100644
--- a/src/directory/ram_directory.rs
+++ b/src/directory/ram_directory.rs
@@ -5,6 +5,7 @@ use crate::directory::{Directory, FileSlice, WatchCallback, WatchHandle};
 use crate::directory::{TerminatingWrite, WritePtr};
 use crate::{common::HasLen, core::META_FILEPATH};
 use fail::fail_point;
+use tantivy_fst::Ulen;
 use std::collections::HashMap;
 use std::fmt;
 use std::io::{self, BufWriter, Cursor, Seek, SeekFrom, Write};
@@ -114,8 +115,8 @@ impl InnerDirectory {
         self.watch_router.subscribe(watch_handle)
     }
 
-    fn total_mem_usage(&self) -> usize {
-        self.fs.values().map(|f| f.len() as usize).sum()
+    fn total_mem_usage(&self) -> Ulen {
+        self.fs.values().map(|f| f.len() as Ulen).sum()
     }
 }
 
@@ -137,7 +138,7 @@ impl RAMDirectory {
 
     /// Returns the sum of the size of the different files
     /// in the RAMDirectory.
-    pub fn total_mem_usage(&self) -> usize {
+    pub fn total_mem_usage(&self) -> Ulen {
         self.fs.read().unwrap().total_mem_usage()
     }
 
diff --git a/src/directory/watch_event_router.rs b/src/directory/watch_event_router.rs
index 72160dee95..a54be9161e 100644
--- a/src/directory/watch_event_router.rs
+++ b/src/directory/watch_event_router.rs
@@ -110,8 +110,9 @@ impl WatchCallbackList {
 mod tests {
     use crate::directory::{WatchCallback, WatchCallbackList};
     use futures::executor::block_on;
-    use std::mem;
-    use std::sync::atomic::{AtomicUsize, Ordering};
+    use tantivy_fst::Ulen;
+    use std::{mem, sync::atomic::AtomicUsize};
+    use std::sync::atomic::{Ordering};
     use std::sync::Arc;
 
     #[test]
diff --git a/src/docset.rs b/src/docset.rs
index 3c5dfdd315..6c5ca1bcf7 100644
--- a/src/docset.rs
+++ b/src/docset.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use crate::fastfield::DeleteBitSet;
 use crate::DocId;
 use std::borrow::Borrow;
@@ -67,10 +69,10 @@ pub trait DocSet: Send {
         for (i, buffer_val) in buffer.iter_mut().enumerate() {
             *buffer_val = self.doc();
             if self.advance() == TERMINATED {
-                return i + 1;
+                return i + 1 as usize;
             }
         }
-        buffer.len()
+        buffer.len() as usize
     }
 
     /// Returns the current document
diff --git a/src/fastfield/bytes/reader.rs b/src/fastfield/bytes/reader.rs
index 123d6a89be..f192b16cab 100644
--- a/src/fastfield/bytes/reader.rs
+++ b/src/fastfield/bytes/reader.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use crate::directory::FileSlice;
 use crate::directory::OwnedBytes;
 use crate::fastfield::FastFieldReader;
@@ -28,20 +30,20 @@ impl BytesFastFieldReader {
         Ok(BytesFastFieldReader { idx_reader, values })
     }
 
-    fn range(&self, doc: DocId) -> (usize, usize) {
-        let start = self.idx_reader.get(doc) as usize;
-        let stop = self.idx_reader.get(doc + 1) as usize;
+    fn range(&self, doc: DocId) -> (Ulen, Ulen) {
+        let start = self.idx_reader.get(doc) as Ulen;
+        let stop = self.idx_reader.get(doc + 1) as Ulen;
         (start, stop)
     }
 
     /// Returns the bytes associated to the given `doc`
     pub fn get_bytes(&self, doc: DocId) -> &[u8] {
         let (start, stop) = self.range(doc);
-        &self.values.as_slice()[start..stop]
+        &self.values.as_slice()[start as usize..stop as usize]
     }
 
     /// Returns the overall number of bytes in this bytes fast field.
-    pub fn total_num_bytes(&self) -> usize {
+    pub fn total_num_bytes(&self) -> Ulen {
         self.values.len()
     }
 }
diff --git a/src/fastfield/delete.rs b/src/fastfield/delete.rs
index 58d9b77b34..80c87fe760 100644
--- a/src/fastfield/delete.rs
+++ b/src/fastfield/delete.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use crate::common::{BitSet, HasLen};
 use crate::directory::FileSlice;
 use crate::directory::OwnedBytes;
@@ -41,7 +43,7 @@ pub fn write_delete_bitset(
 #[derive(Clone)]
 pub struct DeleteBitSet {
     data: OwnedBytes,
-    len: usize,
+    len: Ulen,
 }
 
 impl DeleteBitSet {
@@ -66,10 +68,10 @@ impl DeleteBitSet {
     /// Opens a delete bitset given its file.
     pub fn open(file: FileSlice) -> crate::Result<DeleteBitSet> {
         let bytes = file.read_bytes()?;
-        let num_deleted: usize = bytes
+        let num_deleted: Ulen = bytes
             .as_slice()
             .iter()
-            .map(|b| b.count_ones() as usize)
+            .map(|b| b.count_ones() as Ulen)
             .sum();
         Ok(DeleteBitSet {
             data: bytes,
@@ -98,7 +100,7 @@ impl DeleteBitSet {
 }
 
 impl HasLen for DeleteBitSet {
-    fn len(&self) -> usize {
+    fn len(&self) -> Ulen {
         self.len
     }
 }
diff --git a/src/fastfield/facet_reader.rs b/src/fastfield/facet_reader.rs
index 6f802c153a..c8b95f7952 100644
--- a/src/fastfield/facet_reader.rs
+++ b/src/fastfield/facet_reader.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use super::MultiValuedFastFieldReader;
 use crate::error::DataCorruption;
 use crate::schema::Facet;
@@ -49,7 +51,7 @@ impl FacetReader {
     /// as deleted.
     ///
     /// `Facet` ordinals range from `0` to `num_facets() - 1`.
-    pub fn num_facets(&self) -> usize {
+    pub fn num_facets(&self) -> Ulen {
         self.term_dict.num_terms()
     }
 
diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs
index f7e9348d4b..1e12d0ee34 100644
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -214,6 +214,7 @@ mod tests {
     use rand::prelude::SliceRandom;
     use rand::rngs::StdRng;
     use rand::SeedableRng;
+    use tantivy_fst::Ulen;
     use std::collections::HashMap;
     use std::path::Path;
 
@@ -256,7 +257,7 @@ mod tests {
             serializer.close().unwrap();
         }
         let file = directory.open_read(&path).unwrap();
-        assert_eq!(file.len(), 36 as usize);
+        assert_eq!(file.len(), 36 as Ulen);
         let composite_file = CompositeFile::open(&file)?;
         let file = composite_file.open_read(*FIELD).unwrap();
         let fast_field_reader = FastFieldReader::<u64>::open(file)?;
@@ -287,7 +288,7 @@ mod tests {
             serializer.close()?;
         }
         let file = directory.open_read(&path)?;
-        assert_eq!(file.len(), 61 as usize);
+        assert_eq!(file.len(), 61 as Ulen);
         {
             let fast_fields_composite = CompositeFile::open(&file)?;
             let data = fast_fields_composite.open_read(*FIELD).unwrap();
@@ -323,7 +324,7 @@ mod tests {
             serializer.close().unwrap();
         }
         let file = directory.open_read(&path).unwrap();
-        assert_eq!(file.len(), 34 as usize);
+        assert_eq!(file.len(), 34 as Ulen);
         {
             let fast_fields_composite = CompositeFile::open(&file).unwrap();
             let data = fast_fields_composite.open_read(*FIELD).unwrap();
@@ -355,7 +356,7 @@ mod tests {
             serializer.close().unwrap();
         }
         let file = directory.open_read(&path).unwrap();
-        assert_eq!(file.len(), 80042 as usize);
+        assert_eq!(file.len(), 80042 as Ulen);
         {
             let fast_fields_composite = CompositeFile::open(&file)?;
             let data = fast_fields_composite.open_read(*FIELD).unwrap();
@@ -394,7 +395,7 @@ mod tests {
             serializer.close().unwrap();
         }
         let file = directory.open_read(&path).unwrap();
-        assert_eq!(file.len(), 17709 as usize);
+        assert_eq!(file.len(), 17709 as Ulen);
         {
             let fast_fields_composite = CompositeFile::open(&file)?;
             let data = fast_fields_composite.open_read(i64_field).unwrap();
@@ -589,7 +590,7 @@ mod bench {
             let n = test::black_box(7000u32);
             let mut a = 0u64;
             for i in (0u32..n / 7).map(|v| v * 7) {
-                a ^= permutation[i as usize];
+                a ^= permutation[i as Ulen];
             }
             a
         });
@@ -602,7 +603,7 @@ mod bench {
             let n = test::black_box(1000u32);
             let mut a = 0u64;
             for _ in 0u32..n {
-                a = permutation[a as usize];
+                a = permutation[a as Ulen];
             }
             a
         });
diff --git a/src/fastfield/multivalued/reader.rs b/src/fastfield/multivalued/reader.rs
index ac0d7775da..5b11309949 100644
--- a/src/fastfield/multivalued/reader.rs
+++ b/src/fastfield/multivalued/reader.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use crate::fastfield::{FastFieldReader, FastValue};
 use crate::DocId;
 
@@ -37,15 +39,15 @@ impl<Item: FastValue> MultiValuedFastFieldReader<Item> {
     /// Returns the array of values associated to the given `doc`.
     pub fn get_vals(&self, doc: DocId, vals: &mut Vec<Item>) {
         let (start, stop) = self.range(doc);
-        let len = (stop - start) as usize;
-        vals.resize(len, Item::make_zero());
+        let len = (stop - start) as Ulen;
+        vals.resize(len as usize, Item::make_zero());
         self.vals_reader.get_range_u64(start, &mut vals[..]);
     }
 
     /// Returns the number of values associated with the document `DocId`.
-    pub fn num_vals(&self, doc: DocId) -> usize {
+    pub fn num_vals(&self, doc: DocId) -> Ulen {
         let (start, stop) = self.range(doc);
-        (stop - start) as usize
+        (stop - start) as Ulen
     }
 
     /// Returns the overall number of values in this field  .
diff --git a/src/fastfield/multivalued/writer.rs b/src/fastfield/multivalued/writer.rs
index 9caf116ed9..b12d284fce 100644
--- a/src/fastfield/multivalued/writer.rs
+++ b/src/fastfield/multivalued/writer.rs
@@ -6,6 +6,7 @@ use crate::schema::{Document, Field};
 use crate::termdict::TermOrdinal;
 use crate::DocId;
 use fnv::FnvHashMap;
+use tantivy_fst::Ulen;
 use std::io;
 
 /// Writer for multi-valued (as in, more than one value per document)
@@ -136,10 +137,10 @@ impl MultiValuedFastFieldWriter {
                         .windows(2)
                         .map(|interval| (interval[0], interval[1]))
                         .chain(Some(last_interval).into_iter())
-                        .map(|(start, stop)| (start as usize, stop as usize))
+                        .map(|(start, stop)| (start as Ulen, stop as Ulen))
                     {
                         doc_vals.clear();
-                        let remapped_vals = self.vals[start..stop]
+                        let remapped_vals = self.vals[start as usize..stop as usize]
                             .iter()
                             .map(|val| *mapping.get(val).expect("Missing term ordinal"));
                         doc_vals.extend(remapped_vals);
diff --git a/src/fastfield/readers.rs b/src/fastfield/readers.rs
index 84d5a11fd2..2bec979383 100644
--- a/src/fastfield/readers.rs
+++ b/src/fastfield/readers.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use crate::common::CompositeFile;
 use crate::directory::FileSlice;
 use crate::fastfield::MultiValuedFastFieldReader;
@@ -58,7 +60,7 @@ impl FastFieldReaders {
         self.fast_fields_composite.space_usage()
     }
 
-    fn fast_field_data(&self, field: Field, idx: usize) -> crate::Result<FileSlice> {
+    fn fast_field_data(&self, field: Field, idx: Ulen) -> crate::Result<FileSlice> {
         self.fast_fields_composite
             .open_read_with_idx(field, idx)
             .ok_or_else(|| {
diff --git a/src/fastfield/serializer.rs b/src/fastfield/serializer.rs
index 60f3c1b975..f82b849581 100644
--- a/src/fastfield/serializer.rs
+++ b/src/fastfield/serializer.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use crate::common::bitpacker::BitPacker;
 use crate::common::compute_num_bits;
 use crate::common::BinarySerializable;
@@ -55,7 +57,7 @@ impl FastFieldSerializer {
         field: Field,
         min_value: u64,
         max_value: u64,
-        idx: usize,
+        idx: Ulen,
     ) -> io::Result<FastSingleFieldSerializer<'_, CountingWriter<WritePtr>>> {
         let field_write = self.composite_write.for_field_with_idx(field, idx);
         FastSingleFieldSerializer::open(field_write, min_value, max_value)
@@ -65,7 +67,7 @@ impl FastFieldSerializer {
     pub fn new_bytes_fast_field_with_idx(
         &mut self,
         field: Field,
-        idx: usize,
+        idx: Ulen,
     ) -> io::Result<FastBytesFieldSerializer<'_, CountingWriter<WritePtr>>> {
         let field_write = self.composite_write.for_field_with_idx(field, idx);
         FastBytesFieldSerializer::open(field_write)
diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs
index 79f9965ce2..3a46814dd3 100644
--- a/src/fastfield/writer.rs
+++ b/src/fastfield/writer.rs
@@ -7,6 +7,7 @@ use crate::postings::UnorderedTermId;
 use crate::schema::{Cardinality, Document, Field, FieldEntry, FieldType, Schema};
 use crate::termdict::TermOrdinal;
 use fnv::FnvHashMap;
+use tantivy_fst::Ulen;
 use std::collections::HashMap;
 use std::io;
 
@@ -158,7 +159,7 @@ impl FastFieldsWriter {
 pub struct IntFastFieldWriter {
     field: Field,
     vals: Vec<u8>,
-    val_count: usize,
+    val_count: Ulen,
     val_if_missing: u64,
     val_min: u64,
     val_max: u64,
diff --git a/src/fieldnorm/code.rs b/src/fieldnorm/code.rs
index 4c4e78e733..fb066fc69a 100644
--- a/src/fieldnorm/code.rs
+++ b/src/fieldnorm/code.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 #[inline(always)]
 pub fn id_to_fieldnorm(id: u8) -> u32 {
     FIELD_NORMS_TABLE[id as usize]
diff --git a/src/fieldnorm/reader.rs b/src/fieldnorm/reader.rs
index f1f8ef805b..ada4270b0e 100644
--- a/src/fieldnorm/reader.rs
+++ b/src/fieldnorm/reader.rs
@@ -1,4 +1,4 @@
-use tantivy_fst::FakeArr;
+use tantivy_fst::{FakeArr, Ulen};
 
 use super::{fieldnorm_to_id, id_to_fieldnorm};
 use crate::{HasLen, common::CompositeFile};
@@ -126,7 +126,7 @@ impl FieldNormReader {
     pub fn fieldnorm(&self, doc_id: DocId) -> u32 {
         match &self.0 {
             ReaderImplEnum::FromData(data) => {
-                let fieldnorm_id = data.get_byte(doc_id as usize);
+                let fieldnorm_id = data.get_byte(doc_id as Ulen);
                 id_to_fieldnorm(fieldnorm_id)
             }
             ReaderImplEnum::Const { fieldnorm, .. } => *fieldnorm,
@@ -138,7 +138,7 @@ impl FieldNormReader {
     pub fn fieldnorm_id(&self, doc_id: DocId) -> u8 {
         match &self.0 {
             ReaderImplEnum::FromData(data) => {
-                let fieldnorm_id = data.get_byte(doc_id as usize);
+                let fieldnorm_id = data.get_byte(doc_id as Ulen);
                 fieldnorm_id
             }
             ReaderImplEnum::Const { fieldnorm_id, .. } => *fieldnorm_id,
@@ -166,7 +166,7 @@ impl FieldNormReader {
             .map(FieldNormReader::fieldnorm_to_id)
             .collect::<Vec<u8>>();
         let field_norms_data = OwnedBytes::new(field_norms_id);
-        FieldNormReader::new(field_norms_data)
+        FieldNormReader::new(field_norms_data.as_file_slice())
     }
 }
 
diff --git a/src/fieldnorm/writer.rs b/src/fieldnorm/writer.rs
index 061522e5c8..a66f0c4b8b 100644
--- a/src/fieldnorm/writer.rs
+++ b/src/fieldnorm/writer.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use crate::DocId;
 
 use super::fieldnorm_to_id;
diff --git a/src/functional_test.rs b/src/functional_test.rs
index 478a996861..1fbca2426e 100644
--- a/src/functional_test.rs
+++ b/src/functional_test.rs
@@ -3,6 +3,7 @@ use crate::Searcher;
 use crate::{doc, schema::*};
 use rand::thread_rng;
 use rand::Rng;
+use tantivy_fst::Ulen;
 use std::collections::HashSet;
 
 fn check_index_content(searcher: &Searcher, vals: &[u64]) -> crate::Result<()> {
@@ -37,7 +38,7 @@ fn test_functional_store() -> crate::Result<()> {
     let mut doc_id = 0u64;
     for iteration in 0..500 {
         dbg!(iteration);
-        let num_docs: usize = rng.gen_range(0..4);
+        let num_docs: Ulen = rng.gen_range(0..4);
         if doc_set.len() >= 1 {
             let doc_to_remove_id = rng.gen_range(0..doc_set.len());
             let removed_doc_id = doc_set.swap_remove(doc_to_remove_id);
diff --git a/src/indexer/delete_queue.rs b/src/indexer/delete_queue.rs
index ba445dd3be..fe22745c78 100644
--- a/src/indexer/delete_queue.rs
+++ b/src/indexer/delete_queue.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use super::operation::DeleteOperation;
 use crate::Opstamp;
 use std::mem;
@@ -247,6 +249,8 @@ impl DeleteCursor {
 #[cfg(test)]
 mod tests {
 
+    use tantivy_fst::Ulen;
+
     use super::{DeleteOperation, DeleteQueue};
     use crate::schema::{Field, Term};
 
@@ -254,7 +258,7 @@ mod tests {
     fn test_deletequeue() {
         let delete_queue = DeleteQueue::new();
 
-        let make_op = |i: usize| {
+        let make_op = |i: Ulen| {
             let field = Field::from_field_id(1u32);
             DeleteOperation {
                 opstamp: i as u64,
diff --git a/src/indexer/log_merge_policy.rs b/src/indexer/log_merge_policy.rs
index 455c7d4559..7318dd444f 100644
--- a/src/indexer/log_merge_policy.rs
+++ b/src/indexer/log_merge_policy.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use super::merge_policy::{MergeCandidate, MergePolicy};
 use crate::core::SegmentMeta;
 use std::cmp;
diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs
index d91f2f8e3f..bee5df0685 100644
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use crate::common::MAX_DOC_LIMIT;
 use crate::core::Segment;
 use crate::core::SegmentReader;
@@ -27,7 +29,7 @@ use std::sync::Arc;
 
 fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> crate::Result<u64> {
     let mut total_tokens = 0u64;
-    let mut count: [usize; 256] = [0; 256];
+    let mut count: [Ulen; 256] = [0; 256];
     for reader in readers {
         if reader.has_deletes() {
             // if there are deletes, then we use an approximation
@@ -725,6 +727,7 @@ mod tests {
     use byteorder::{BigEndian, ReadBytesExt};
     use futures::executor::block_on;
     use schema::FAST;
+    use tantivy_fst::Ulen;
 
     #[test]
     fn test_index_merger_no_deletes() -> crate::Result<()> {
diff --git a/src/indexer/segment_updater.rs b/src/indexer/segment_updater.rs
index d0cb240bc3..f312f30d76 100644
--- a/src/indexer/segment_updater.rs
+++ b/src/indexer/segment_updater.rs
@@ -23,6 +23,7 @@ use futures::channel::oneshot;
 use futures::executor::{ThreadPool, ThreadPoolBuilder};
 use futures::future::Future;
 use futures::future::TryFutureExt;
+use tantivy_fst::Ulen;
 use std::borrow::BorrowMut;
 use std::collections::HashSet;
 use std::io::Write;
diff --git a/src/lib.rs b/src/lib.rs
index 4f985785fc..6fd6038a23 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -295,6 +295,7 @@ mod tests {
     use rand::distributions::Uniform;
     use rand::rngs::StdRng;
     use rand::{Rng, SeedableRng};
+    use tantivy_fst::Ulen;
 
     /// Checks if left and right are close one to each other.
     /// Panics if the two values are more than 0.5% apart.
diff --git a/src/positions/mod.rs b/src/positions/mod.rs
index d2bc278559..9052d5ef73 100644
--- a/src/positions/mod.rs
+++ b/src/positions/mod.rs
@@ -29,8 +29,9 @@ mod serializer;
 pub use self::reader::PositionReader;
 pub use self::serializer::PositionSerializer;
 use bitpacking::{BitPacker, BitPacker4x};
+use tantivy_fst::Ulen;
 
-const COMPRESSION_BLOCK_SIZE: usize = BitPacker4x::BLOCK_LEN;
+const COMPRESSION_BLOCK_SIZE: usize = BitPacker4x::BLOCK_LEN as usize;
 const LONG_SKIP_IN_BLOCKS: usize = 1_024;
 const LONG_SKIP_INTERVAL: u64 = (LONG_SKIP_IN_BLOCKS * COMPRESSION_BLOCK_SIZE) as u64;
 
diff --git a/src/positions/reader.rs b/src/positions/reader.rs
index eded0c613a..d58573c37d 100644
--- a/src/positions/reader.rs
+++ b/src/positions/reader.rs
@@ -7,6 +7,7 @@ use crate::positions::COMPRESSION_BLOCK_SIZE;
 use crate::positions::LONG_SKIP_INTERVAL;
 use crate::positions::LONG_SKIP_IN_BLOCKS;
 use bitpacking::{BitPacker, BitPacker4x};
+use tantivy_fst::{FakeArr, Ulen};
 
 /// Positions works as a long sequence of compressed block.
 /// All terms are chained one after the other.
@@ -33,7 +34,7 @@ struct Positions {
     bit_packer: BitPacker4x,
     skip_file: FileSlice,
     position_file: FileSlice,
-    long_skip_data: OwnedBytes,
+    long_skip_data: FileSlice,
 }
 
 impl Positions {
@@ -42,12 +43,11 @@ impl Positions {
         let footer_data = footer.read_bytes()?;
         let num_long_skips = u32::deserialize(&mut footer_data.as_slice())?;
         let (skip_file, long_skip_file) =
-            body.split_from_end(u64::SIZE_IN_BYTES * (num_long_skips as usize));
-        let long_skip_data = long_skip_file.read_bytes()?;
+            body.split_from_end(u64::SIZE_IN_BYTES * (num_long_skips as Ulen));
         Ok(Positions {
             bit_packer: BitPacker4x::new(),
             skip_file,
-            long_skip_data,
+            long_skip_data: long_skip_file,
             position_file,
         })
     }
@@ -55,26 +55,24 @@ impl Positions {
     /// Returns the offset of the block associated to the given `long_skip_id`.
     ///
     /// One `long_skip_id` means `LONG_SKIP_IN_BLOCKS` blocks.
-    fn long_skip(&self, long_skip_id: usize) -> u64 {
+    fn long_skip(&self, long_skip_id: Ulen) -> u64 {
         if long_skip_id == 0 {
             return 0;
         }
-        let long_skip_slice = self.long_skip_data.as_slice();
-        let mut long_skip_blocks: &[u8] = &long_skip_slice[(long_skip_id - 1) * 8..][..8];
+        let from = (long_skip_id - 1) * 8;
+        let mut long_skip_blocks: &[u8] = &self.long_skip_data.slice(from, from + 8).to_vec();
         u64::deserialize(&mut long_skip_blocks).expect("Index corrupted")
     }
 
     fn reader(&self, offset: u64) -> io::Result<PositionReader> {
-        let long_skip_id = (offset / LONG_SKIP_INTERVAL) as usize;
+        let long_skip_id = (offset / LONG_SKIP_INTERVAL) as Ulen;
         let offset_num_bytes: u64 = self.long_skip(long_skip_id);
         let position_read = self
             .position_file
-            .slice_from(offset_num_bytes as usize)
-            .read_bytes()?;
+            .slice_from(offset_num_bytes as Ulen);
         let skip_read = self
             .skip_file
-            .slice_from(long_skip_id * LONG_SKIP_IN_BLOCKS)
-            .read_bytes()?;
+            .slice_from(long_skip_id * LONG_SKIP_IN_BLOCKS as Ulen);
         Ok(PositionReader {
             bit_packer: self.bit_packer,
             skip_read,
@@ -89,10 +87,10 @@ impl Positions {
 
 #[derive(Clone)]
 pub struct PositionReader {
-    skip_read: OwnedBytes,
-    position_read: OwnedBytes,
+    skip_read: FileSlice,
+    position_read: FileSlice,
     bit_packer: BitPacker4x,
-    buffer: Box<[u32; COMPRESSION_BLOCK_SIZE]>,
+    buffer: Box<[u32; COMPRESSION_BLOCK_SIZE as usize]>,
 
     block_offset: u64,
     anchor_offset: u64,
@@ -110,15 +108,15 @@ impl PositionReader {
         positions.reader(offset)
     }
 
-    fn advance_num_blocks(&mut self, num_blocks: usize) {
-        let num_bits: usize = self.skip_read.as_ref()[..num_blocks]
+    fn advance_num_blocks(&mut self, num_blocks: Ulen) {
+        let num_bits: usize = self.skip_read.slice(0, num_blocks).to_vec()
             .iter()
             .cloned()
             .map(|num_bits| num_bits as usize)
             .sum();
         let num_bytes_to_skip = num_bits * COMPRESSION_BLOCK_SIZE / 8;
-        self.skip_read.advance(num_blocks as usize);
-        self.position_read.advance(num_bytes_to_skip);
+        self.skip_read.advance(num_blocks as Ulen);
+        self.position_read.advance(num_bytes_to_skip as Ulen);
     }
 
     /// Fills a buffer with the positions `[offset..offset+output.len())` integers.
@@ -137,22 +135,23 @@ impl PositionReader {
             // We need to decompress the first block.
             let delta_to_anchor_offset = offset - self.anchor_offset;
             let num_blocks_to_skip =
-                (delta_to_anchor_offset / (COMPRESSION_BLOCK_SIZE as u64)) as usize;
+                (delta_to_anchor_offset / (COMPRESSION_BLOCK_SIZE as u64)) as Ulen;
             self.advance_num_blocks(num_blocks_to_skip);
             self.anchor_offset = offset - (offset % COMPRESSION_BLOCK_SIZE as u64);
             self.block_offset = self.anchor_offset;
-            let num_bits = self.skip_read.as_slice()[0];
+            let num_bits = self.skip_read.get_byte(0);
             self.bit_packer
-                .decompress(self.position_read.as_ref(), self.buffer.as_mut(), num_bits);
+                .decompress(&self.position_read.to_vec(), self.buffer.as_mut(), num_bits);
         } else {
             let num_blocks_to_skip =
-                ((self.block_offset - self.anchor_offset) / COMPRESSION_BLOCK_SIZE as u64) as usize;
+                ((self.block_offset - self.anchor_offset) / COMPRESSION_BLOCK_SIZE as u64) as Ulen;
             self.advance_num_blocks(num_blocks_to_skip);
             self.anchor_offset = self.block_offset;
         }
 
-        let mut num_bits = self.skip_read.as_slice()[0];
-        let mut position_data = self.position_read.as_ref();
+        let mut num_bits = self.skip_read.get_byte(0);
+        let position_data = self.position_read.to_vec();
+        let mut position_data = position_data.as_slice();
 
         for i in 1.. {
             let offset_in_block = (offset as usize) % COMPRESSION_BLOCK_SIZE;
@@ -165,7 +164,7 @@ impl PositionReader {
             output = &mut output[remaining_in_block..];
             offset += remaining_in_block as u64;
             position_data = &position_data[(num_bits as usize * COMPRESSION_BLOCK_SIZE / 8)..];
-            num_bits = self.skip_read.as_slice()[i];
+            num_bits = self.skip_read.get_byte(i);
             self.bit_packer
                 .decompress(position_data, self.buffer.as_mut(), num_bits);
             self.block_offset += COMPRESSION_BLOCK_SIZE as u64;
diff --git a/src/positions/serializer.rs b/src/positions/serializer.rs
index 72eb652b41..b86d7be5bb 100644
--- a/src/positions/serializer.rs
+++ b/src/positions/serializer.rs
@@ -3,6 +3,7 @@ use crate::common::CountingWriter;
 use crate::positions::{COMPRESSION_BLOCK_SIZE, LONG_SKIP_INTERVAL};
 use bitpacking::BitPacker;
 use bitpacking::BitPacker4x;
+use tantivy_fst::Ulen;
 use std::io::{self, Write};
 
 pub struct PositionSerializer<W: io::Write> {
diff --git a/src/postings/block_search.rs b/src/postings/block_search.rs
index 08cd553796..1b9e16147f 100644
--- a/src/postings/block_search.rs
+++ b/src/postings/block_search.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use crate::postings::compression::AlignedBuffer;
 
 /// This modules define the logic used to search for a doc in a given
@@ -8,6 +10,8 @@ use crate::postings::compression::AlignedBuffer;
 
 #[cfg(target_arch = "x86_64")]
 mod sse2 {
+    use tantivy_fst::Ulen;
+
     use crate::postings::compression::{AlignedBuffer, COMPRESSION_BLOCK_SIZE};
     use std::arch::x86_64::__m128i as DataType;
     use std::arch::x86_64::_mm_add_epi32 as op_add;
@@ -53,7 +57,7 @@ mod sse2 {
 
         #[test]
         fn test_linear_search_sse2_128_u32() {
-            let mut block = [0u32; COMPRESSION_BLOCK_SIZE];
+            let mut block = [0u32; COMPRESSION_BLOCK_SIZE as usize];
             for el in 0u32..128u32 {
                 block[el as usize] = el * 2 + 1 << 18;
             }
@@ -72,7 +76,7 @@ fn linear_search(arr: &[u32], target: u32) -> usize {
     arr.iter().map(|&el| if el < target { 1 } else { 0 }).sum()
 }
 
-fn exponential_search(arr: &[u32], target: u32) -> (usize, usize) {
+fn exponential_search(arr: &[u32], target: u32) -> (usize,usize) {
     let end = arr.len();
     let mut begin = 0;
     for &pivot in &[1, 3, 7, 15, 31, 63] {
@@ -159,6 +163,8 @@ impl Default for BlockSearcher {
 
 #[cfg(test)]
 mod tests {
+    use tantivy_fst::Ulen;
+
     use super::exponential_search;
     use super::linear_search;
     use super::BlockSearcher;
@@ -193,7 +199,7 @@ mod tests {
     fn util_test_search_in_block(block_searcher: BlockSearcher, block: &[u32], target: u32) {
         let cursor = search_in_block_trivial_but_slow(block, target);
         assert!(block.len() < COMPRESSION_BLOCK_SIZE);
-        let mut output_buffer = [TERMINATED; COMPRESSION_BLOCK_SIZE];
+        let mut output_buffer = [TERMINATED; COMPRESSION_BLOCK_SIZE as usize];
         output_buffer[..block.len()].copy_from_slice(block);
         assert_eq!(
             block_searcher.search_in_block(&AlignedBuffer(output_buffer), target),
diff --git a/src/postings/block_segment_postings.rs b/src/postings/block_segment_postings.rs
index 6633d73611..7cbfe6edd4 100644
--- a/src/postings/block_segment_postings.rs
+++ b/src/postings/block_segment_postings.rs
@@ -11,7 +11,7 @@ use crate::postings::{BlockInfo, FreqReadingOption, SkipReader};
 use crate::query::BM25Weight;
 use crate::schema::IndexRecordOption;
 use crate::{DocId, Score, TERMINATED};
-use tantivy_fst::FakeArr;
+use tantivy_fst::{FakeArr, Ulen};
 
 fn max_score<I: Iterator<Item = Score>>(mut it: I) -> Option<Score> {
     if let Some(first) = it.next() {
@@ -31,7 +31,7 @@ fn max_score<I: Iterator<Item = Score>>(mut it: I) -> Option<Score> {
 #[derive(Clone)]
 pub struct BlockSegmentPostings {
     pub(crate) doc_decoder: BlockDecoder,
-    loaded_offset: usize,
+    loaded_offset: Ulen,
     freq_decoder: BlockDecoder,
     freq_reading_option: FreqReadingOption,
     block_max_score_cache: Option<Score>,
@@ -50,13 +50,13 @@ fn decode_bitpacked_block(
     doc_num_bits: u8,
     tf_num_bits: u8,
 ) {
-    let num_bytes_docs = 128 * (doc_num_bits as usize) / 8; // 128 integers per bitpacker4x block. should be same as num_consumed_bytes returned by uncompress block
+    let num_bytes_docs = 128 * (doc_num_bits as Ulen) / 8; // 128 integers per bitpacker4x block. should be same as num_consumed_bytes returned by uncompress block
     let num_bytes_freqs = freq_decoder_opt.as_ref()
-        .map(|_| 128 * (tf_num_bits as usize) / 8)
+        .map(|_| 128 * (tf_num_bits as Ulen) / 8)
         .unwrap_or(0);
     let data = data.slice((0..num_bytes_docs + num_bytes_freqs).into()).to_vec();
     let num_consumed_bytes = doc_decoder.uncompress_block_sorted(&data, doc_offset, doc_num_bits);
-    assert_eq!(num_bytes_docs, num_consumed_bytes);
+    assert_eq!(num_bytes_docs, num_consumed_bytes as Ulen);
     if let Some(freq_decoder) = freq_decoder_opt {
         freq_decoder.uncompress_block_unsorted(&data[num_consumed_bytes..], tf_num_bits);
     }
@@ -73,7 +73,7 @@ fn decode_vint_block(
         doc_decoder.uncompress_vint_sorted(data, doc_offset, num_vint_docs, TERMINATED);
     if let Some(freq_decoder) = freq_decoder_opt {
         freq_decoder.uncompress_vint_unsorted(
-            &data.slice((num_consumed_bytes..).into()),
+            &data.slice((num_consumed_bytes as Ulen..).into()),
             num_vint_docs,
             TERMINATED,
         );
@@ -87,7 +87,7 @@ fn split_into_skips_and_postings(doc_freq: u32, data: FileSlice) -> (Option<File
     // hacky code
     let slice = &mut data.full_slice();
     let inx_before = slice.get_offset();
-    let skip_len = VInt::deserialize(slice).expect("Data corrupted").0 as usize;
+    let skip_len = VInt::deserialize(slice).expect("Data corrupted").0 as Ulen;
     let inx_after = slice.get_offset();
     let data = data.slice_from(inx_after - inx_before);
     let (skip_data, postings_data) = data.split(skip_len);
@@ -115,7 +115,7 @@ impl BlockSegmentPostings {
 
         let mut block_segment_postings = BlockSegmentPostings {
             doc_decoder: BlockDecoder::with_val(TERMINATED),
-            loaded_offset: std::usize::MAX,
+            loaded_offset: Ulen::MAX,
             freq_decoder: BlockDecoder::with_val(1),
             freq_reading_option,
             block_max_score_cache: None,
@@ -184,7 +184,7 @@ impl BlockSegmentPostings {
         let (skip_data_opt, postings_data) = split_into_skips_and_postings(doc_freq, postings_data);
         self.data = postings_data;
         self.block_max_score_cache = None;
-        self.loaded_offset = std::usize::MAX;
+        self.loaded_offset = Ulen::MAX;
         if let Some(skip_data) = skip_data_opt {
             self.skip_reader.reset(skip_data, doc_freq);
         } else {
diff --git a/src/postings/compression/vint.rs b/src/postings/compression/vint.rs
index f8e7eb1533..5d3d4880cb 100644
--- a/src/postings/compression/vint.rs
+++ b/src/postings/compression/vint.rs
@@ -1,4 +1,4 @@
-use tantivy_fst::FakeArr;
+use tantivy_fst::{FakeArr, Ulen};
 
 #[inline(always)]
 pub fn compress_sorted<'a>(input: &[u32], output: &'a mut [u8], mut offset: u32) -> &'a [u8] {
@@ -45,12 +45,12 @@ pub(crate) fn compress_unsorted<'a>(input: &[u32], output: &'a mut [u8]) -> &'a
 
 #[inline(always)]
 pub fn uncompress_sorted(compressed_data: &dyn FakeArr, output: &mut [u32], offset: u32) -> usize {
-    let mut read_byte = 0;
+    let mut read_byte: usize = 0;
     let mut result = offset;
     for output_mut in output.iter_mut() {
         let mut shift = 0u32;
         loop {
-            let cur_byte = compressed_data.get_byte(read_byte);
+            let cur_byte = compressed_data.get_byte(read_byte as Ulen);
             read_byte += 1;
             result += u32::from(cur_byte % 128u8) << shift;
             if cur_byte & 128u8 != 0u8 {
@@ -65,12 +65,12 @@ pub fn uncompress_sorted(compressed_data: &dyn FakeArr, output: &mut [u32], offs
 
 #[inline(always)]
 pub(crate) fn uncompress_unsorted(compressed_data: &dyn FakeArr, output_arr: &mut [u32]) -> usize {
-    let mut read_byte = 0;
+    let mut read_byte: usize = 0;
     for output_mut in output_arr.iter_mut() {
         let mut result = 0u32;
         let mut shift = 0u32;
         loop {
-            let cur_byte = compressed_data.get_byte(read_byte);
+            let cur_byte = compressed_data.get_byte(read_byte as Ulen);
             read_byte += 1;
             result += u32::from(cur_byte % 128u8) << shift;
             if cur_byte & 128u8 != 0u8 {
diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs
index 1c79b8b1e2..000bc5dbca 100644
--- a/src/postings/segment_postings.rs
+++ b/src/postings/segment_postings.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use crate::common::HasLen;
 use crate::docset::DocSet;
 use crate::fastfield::DeleteBitSet;
@@ -99,9 +101,9 @@ impl SegmentPostings {
         fieldnorms: Option<&[u32]>,
     ) -> SegmentPostings {
         use crate::directory::FileSlice;
+        use crate::fieldnorm::FieldNormReader;
         use crate::postings::serializer::PostingsSerializer;
         use crate::schema::IndexRecordOption;
-        use crate::fieldnorm::FieldNormReader;
         use crate::Score;
         let mut buffer: Vec<u8> = Vec::new();
         let fieldnorm_reader = fieldnorms.map(FieldNormReader::for_test);
@@ -165,7 +167,7 @@ impl DocSet for SegmentPostings {
     #[inline]
     fn advance(&mut self) -> DocId {
         debug_assert!(self.block_cursor.block_is_loaded());
-        if self.cur == COMPRESSION_BLOCK_SIZE - 1 {
+        if self.cur == (COMPRESSION_BLOCK_SIZE - 1) {
             self.cur = 0;
             self.block_cursor.advance();
         } else {
@@ -215,8 +217,8 @@ impl DocSet for SegmentPostings {
 }
 
 impl HasLen for SegmentPostings {
-    fn len(&self) -> usize {
-        self.block_cursor.doc_freq() as usize
+    fn len(&self) -> Ulen {
+        self.block_cursor.doc_freq() as Ulen
     }
 }
 
diff --git a/src/postings/serializer.rs b/src/postings/serializer.rs
index 9e8d9c39a2..ecd9f5b318 100644
--- a/src/postings/serializer.rs
+++ b/src/postings/serializer.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use super::TermInfo;
 use crate::common::{BinarySerializable, VInt};
 use crate::common::{CompositeWrite, CountingWriter};
@@ -263,16 +265,16 @@ impl<'a> FieldSerializer<'a> {
 }
 
 struct Block {
-    doc_ids: [DocId; COMPRESSION_BLOCK_SIZE],
-    term_freqs: [u32; COMPRESSION_BLOCK_SIZE],
+    doc_ids: [DocId; COMPRESSION_BLOCK_SIZE as usize],
+    term_freqs: [u32; COMPRESSION_BLOCK_SIZE as usize],
     len: usize,
 }
 
 impl Block {
     fn new() -> Self {
         Block {
-            doc_ids: [0u32; COMPRESSION_BLOCK_SIZE],
-            term_freqs: [0u32; COMPRESSION_BLOCK_SIZE],
+            doc_ids: [0u32; COMPRESSION_BLOCK_SIZE as usize],
+            term_freqs: [0u32; COMPRESSION_BLOCK_SIZE as usize],
             len: 0,
         }
     }
diff --git a/src/postings/skip.rs b/src/postings/skip.rs
index f2e9c8bab9..81d7540c73 100644
--- a/src/postings/skip.rs
+++ b/src/postings/skip.rs
@@ -1,6 +1,6 @@
 use std::convert::TryInto;
 
-use tantivy_fst::FakeArr;
+use tantivy_fst::{FakeArr, Ulen};
 
 use crate::directory::{FileSlice, OwnedBytes};
 use crate::postings::compression::{compressed_block_size, COMPRESSION_BLOCK_SIZE};
@@ -75,7 +75,7 @@ pub(crate) struct SkipReader {
     pub(crate) last_doc_in_previous_block: DocId,
     owned_read: FileSlice,
     skip_info: IndexRecordOption,
-    byte_offset: usize,
+    byte_offset: Ulen,
     remaining_docs: u32, // number of docs remaining, including the
     // documents in the current block.
     block_info: BlockInfo,
@@ -166,13 +166,13 @@ impl SkipReader {
     }
 
     #[inline(always)]
-    pub fn byte_offset(&self) -> usize {
+    pub fn byte_offset(&self) -> Ulen {
         self.byte_offset
     }
 
     fn read_block_info(&mut self) {
         let bytes = &self.owned_read.slice_to(std::cmp::min(12, self.owned_read.len())).to_vec();
-        let advance_len: usize;
+        let advance_len: Ulen;
         self.last_doc_in_block = read_u32(bytes);
         let doc_num_bits = bytes[4];
         match self.skip_info {
@@ -246,13 +246,13 @@ impl SkipReader {
                 ..
             } => {
                 self.remaining_docs -= COMPRESSION_BLOCK_SIZE as u32;
-                self.byte_offset += compressed_block_size(doc_num_bits + tf_num_bits);
+                self.byte_offset += compressed_block_size(doc_num_bits + tf_num_bits) as Ulen;
                 self.position_offset += tf_sum as u64;
             }
             BlockInfo::VInt { num_docs } => {
                 debug_assert_eq!(num_docs, self.remaining_docs);
                 self.remaining_docs = 0;
-                self.byte_offset = std::usize::MAX;
+                self.byte_offset = Ulen::MAX;
             }
         }
         self.last_doc_in_previous_block = self.last_doc_in_block;
@@ -273,7 +273,7 @@ mod tests {
     use super::BlockInfo;
     use super::IndexRecordOption;
     use super::{SkipReader, SkipSerializer};
-    use crate::directory::OwnedBytes;
+    use crate::directory::{FileSlice, OwnedBytes};
     use crate::postings::compression::COMPRESSION_BLOCK_SIZE;
 
     #[test]
@@ -308,7 +308,7 @@ mod tests {
         };
         let doc_freq = 3u32 + (COMPRESSION_BLOCK_SIZE * 2) as u32;
         let mut skip_reader =
-            SkipReader::new(OwnedBytes::new(buf), doc_freq, IndexRecordOption::WithFreqs);
+            SkipReader::new(OwnedBytes::new(buf).as_file_slice(), doc_freq, IndexRecordOption::WithFreqs);
         assert_eq!(skip_reader.last_doc_in_block(), 1u32);
         assert_eq!(
             skip_reader.block_info,
@@ -350,7 +350,7 @@ mod tests {
         };
         let doc_freq = 3u32 + (COMPRESSION_BLOCK_SIZE * 2) as u32;
         let mut skip_reader =
-            SkipReader::new(OwnedBytes::new(buf), doc_freq, IndexRecordOption::Basic);
+            SkipReader::new(OwnedBytes::new(buf).as_file_slice(), doc_freq, IndexRecordOption::Basic);
         assert_eq!(skip_reader.last_doc_in_block(), 1u32);
         assert_eq!(
             skip_reader.block_info(),
@@ -391,7 +391,7 @@ mod tests {
         };
         let doc_freq = COMPRESSION_BLOCK_SIZE as u32;
         let mut skip_reader =
-            SkipReader::new(OwnedBytes::new(buf), doc_freq, IndexRecordOption::Basic);
+            SkipReader::new(OwnedBytes::new(buf).as_file_slice(), doc_freq, IndexRecordOption::Basic);
         assert_eq!(skip_reader.last_doc_in_block(), 1u32);
         assert_eq!(
             skip_reader.block_info(),
diff --git a/src/postings/term_info.rs b/src/postings/term_info.rs
index 4e08f2e9f7..f9fa270270 100644
--- a/src/postings/term_info.rs
+++ b/src/postings/term_info.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use crate::common::{BinarySerializable, FixedSize};
 use std::io;
 
@@ -29,7 +31,7 @@ impl FixedSize for TermInfo {
     /// This is large, but in practise, `TermInfo` are encoded in blocks and
     /// only the first `TermInfo` of a block is serialized uncompressed.
     /// The subsequent `TermInfo` are delta encoded and bitpacked.
-    const SIZE_IN_BYTES: usize = 2 * u32::SIZE_IN_BYTES + 2 * u64::SIZE_IN_BYTES;
+    const SIZE_IN_BYTES: Ulen = 2 * u32::SIZE_IN_BYTES + 2 * u64::SIZE_IN_BYTES;
 }
 
 impl BinarySerializable for TermInfo {
diff --git a/src/query/vec_docset.rs b/src/query/vec_docset.rs
index 89f32bd7f0..e766dddf9c 100644
--- a/src/query/vec_docset.rs
+++ b/src/query/vec_docset.rs
@@ -1,5 +1,7 @@
 #![allow(dead_code)]
 
+use tantivy_fst::Ulen;
+
 use crate::common::HasLen;
 use crate::docset::{DocSet, TERMINATED};
 use crate::DocId;
@@ -43,8 +45,8 @@ impl DocSet for VecDocSet {
 }
 
 impl HasLen for VecDocSet {
-    fn len(&self) -> usize {
-        self.doc_ids.len()
+    fn len(&self) -> Ulen {
+        self.doc_ids.len() as Ulen
     }
 }
 
diff --git a/src/reader/mod.rs b/src/reader/mod.rs
index 679abc7c0a..7f8749163a 100644
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -1,5 +1,7 @@
 mod pool;
 
+use tantivy_fst::Ulen;
+
 pub use self::pool::LeasedItem;
 use self::pool::Pool;
 use crate::core::Segment;
diff --git a/src/reader/pool.rs b/src/reader/pool.rs
index 7134e5adb1..d11a2b89ba 100644
--- a/src/reader/pool.rs
+++ b/src/reader/pool.rs
@@ -1,5 +1,6 @@
 use crossbeam::channel::unbounded;
 use crossbeam::channel::{Receiver, RecvError, Sender};
+use tantivy_fst::Ulen;
 use std::ops::{Deref, DerefMut};
 use std::sync::atomic::AtomicUsize;
 use std::sync::atomic::Ordering;
@@ -187,17 +188,18 @@ mod tests {
     use super::Pool;
     use super::Queue;
     use crossbeam::channel;
+    use tantivy_fst::Ulen;
     use std::{iter, mem};
 
     #[test]
     fn test_pool() {
-        let items10: Vec<usize> = iter::repeat(10).take(10).collect();
+        let items10: Vec<Ulen> = iter::repeat(10).take(10).collect();
         let pool = Pool::new();
         pool.publish_new_generation(items10);
         for _ in 0..20 {
             assert_eq!(*pool.acquire(), 10);
         }
-        let items11: Vec<usize> = iter::repeat(11).take(10).collect();
+        let items11: Vec<Ulen> = iter::repeat(11).take(10).collect();
         pool.publish_new_generation(items11);
         for _ in 0..20 {
             assert_eq!(*pool.acquire(), 11);
diff --git a/src/schema/document.rs b/src/schema/document.rs
index 1887821f2e..5c45d5c9d0 100644
--- a/src/schema/document.rs
+++ b/src/schema/document.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use super::*;
 use crate::common::BinarySerializable;
 use crate::common::VInt;
@@ -200,7 +202,7 @@ impl BinarySerializable for Document {
     }
 
     fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
-        let num_field_values = VInt::deserialize(reader)?.val() as usize;
+        let num_field_values = VInt::deserialize(reader)?.val() as Ulen;
         let field_values = (0..num_field_values)
             .map(|_| FieldValue::deserialize(reader))
             .collect::<io::Result<Vec<FieldValue>>>()?;
diff --git a/src/schema/schema.rs b/src/schema/schema.rs
index 8bdb7c1405..0204e998ef 100644
--- a/src/schema/schema.rs
+++ b/src/schema/schema.rs
@@ -9,6 +9,7 @@ use serde::de::{SeqAccess, Visitor};
 use serde::ser::SerializeSeq;
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
 use serde_json::{self, Map as JsonObject, Value as JsonValue};
+use tantivy_fst::Ulen;
 use std::fmt;
 
 /// Tantivy has a very strict schema.
diff --git a/src/schema/term.rs b/src/schema/term.rs
index 0662e5230b..772cb31cfc 100644
--- a/src/schema/term.rs
+++ b/src/schema/term.rs
@@ -1,5 +1,7 @@
 use std::fmt;
 
+use tantivy_fst::Ulen;
+
 use super::Field;
 use crate::common;
 use crate::schema::Facet;
diff --git a/src/space_usage/mod.rs b/src/space_usage/mod.rs
index 3bad8f8b05..72ba96fe43 100644
--- a/src/space_usage/mod.rs
+++ b/src/space_usage/mod.rs
@@ -12,10 +12,11 @@ under-count actual resultant space usage by up to 4095 bytes per file.
 use crate::schema::Field;
 use crate::SegmentComponent;
 use serde::{Deserialize, Serialize};
+use tantivy_fst::Ulen;
 use std::collections::HashMap;
 
 /// Indicates space usage in bytes
-pub type ByteCount = usize;
+pub type ByteCount = Ulen;
 
 /// Enum containing any of the possible space usage results for segment components.
 pub enum ComponentSpaceUsage {
diff --git a/src/store/index/block.rs b/src/store/index/block.rs
index 33785748c4..8c8d740a26 100644
--- a/src/store/index/block.rs
+++ b/src/store/index/block.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use crate::common::VInt;
 use crate::store::index::{Checkpoint, CHECKPOINT_PERIOD};
 use crate::DocId;
@@ -81,7 +83,7 @@ impl CheckpointBlock {
             return Err(io::Error::new(io::ErrorKind::UnexpectedEof, ""));
         }
         self.checkpoints.clear();
-        let len = VInt::deserialize_u64(data)? as usize;
+        let len = VInt::deserialize_u64(data)? as Ulen;
         if len == 0 {
             return Ok(());
         }
diff --git a/src/store/index/mod.rs b/src/store/index/mod.rs
index 4e93128763..eeab3da4f3 100644
--- a/src/store/index/mod.rs
+++ b/src/store/index/mod.rs
@@ -5,6 +5,8 @@ mod block;
 mod skip_index;
 mod skip_index_builder;
 
+use tantivy_fst::Ulen;
+
 use crate::DocId;
 
 pub use self::skip_index::SkipIndex;
@@ -49,6 +51,7 @@ mod tests {
 
     use futures::executor::block_on;
     use proptest::strategy::{BoxedStrategy, Strategy};
+    use tantivy_fst::Ulen;
 
     use crate::directory::OwnedBytes;
     use crate::indexer::NoMergePolicy;
diff --git a/src/store/index/skip_index.rs b/src/store/index/skip_index.rs
index f64dc5efd8..2eaf416edc 100644
--- a/src/store/index/skip_index.rs
+++ b/src/store/index/skip_index.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use crate::common::{BinarySerializable, VInt};
 use crate::directory::OwnedBytes;
 use crate::store::index::block::CheckpointBlock;
diff --git a/src/store/index/skip_index_builder.rs b/src/store/index/skip_index_builder.rs
index 6d46dabed8..ea9d8e630d 100644
--- a/src/store/index/skip_index_builder.rs
+++ b/src/store/index/skip_index_builder.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use crate::common::{BinarySerializable, VInt};
 use crate::store::index::block::CheckpointBlock;
 use crate::store::index::{Checkpoint, CHECKPOINT_PERIOD};
diff --git a/src/store/mod.rs b/src/store/mod.rs
index 6eff6ddd70..b1e2dd8e6d 100644
--- a/src/store/mod.rs
+++ b/src/store/mod.rs
@@ -66,6 +66,8 @@ use self::compression_snap::{compress, decompress};
 #[cfg(test)]
 pub mod tests {
 
+    use tantivy_fst::Ulen;
+
     use super::*;
     use crate::directory::{Directory, RAMDirectory, WritePtr};
     use crate::schema::Document;
@@ -74,7 +76,7 @@ pub mod tests {
     use crate::schema::TextOptions;
     use std::path::Path;
 
-    pub fn write_lorem_ipsum_store(writer: WritePtr, num_docs: usize) -> Schema {
+    pub fn write_lorem_ipsum_store(writer: WritePtr, num_docs: Ulen) -> Schema {
         let mut schema_builder = Schema::builder();
         let field_body = schema_builder.add_text_field("body", TextOptions::default().set_stored());
         let field_title =
diff --git a/src/store/reader.rs b/src/store/reader.rs
index adcf114301..2fff046813 100644
--- a/src/store/reader.rs
+++ b/src/store/reader.rs
@@ -8,12 +8,13 @@ use crate::space_usage::StoreSpaceUsage;
 use crate::store::index::Checkpoint;
 use crate::DocId;
 use lru::LruCache;
+use tantivy_fst::Ulen;
 use std::io;
 use std::mem::size_of;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::{Arc, Mutex};
 
-const LRU_CACHE_CAPACITY: usize = 100;
+const LRU_CACHE_CAPACITY: Ulen = 100;
 
 type Block = Arc<Vec<u8>>;
 
@@ -38,7 +39,7 @@ impl StoreReader {
         let skip_index = SkipIndex::open(index_data);
         Ok(StoreReader {
             data: data_file,
-            cache: Arc::new(Mutex::new(LruCache::new(LRU_CACHE_CAPACITY))),
+            cache: Arc::new(Mutex::new(LruCache::new(LRU_CACHE_CAPACITY as usize))),
             cache_hits: Default::default(),
             cache_misses: Default::default(),
             skip_index: Arc::new(skip_index),
@@ -61,8 +62,8 @@ impl StoreReader {
     fn compressed_block(&self, checkpoint: &Checkpoint) -> io::Result<OwnedBytes> {
         self.data
             .slice(
-                checkpoint.start_offset as usize,
-                checkpoint.end_offset as usize,
+                checkpoint.start_offset as Ulen,
+                checkpoint.end_offset as Ulen,
             )
             .read_bytes()
     }
@@ -117,10 +118,10 @@ impl StoreReader {
 }
 
 fn split_file(data: FileSlice) -> io::Result<(FileSlice, FileSlice)> {
-    let (data, footer_len_bytes) = data.split_from_end(size_of::<u64>());
+    let (data, footer_len_bytes) = data.split_from_end(size_of::<u64>() as Ulen);
     let serialized_offset: OwnedBytes = footer_len_bytes.read_bytes()?;
     let mut serialized_offset_buf = serialized_offset.as_slice();
-    let offset = u64::deserialize(&mut serialized_offset_buf)? as usize;
+    let offset = u64::deserialize(&mut serialized_offset_buf)? as Ulen;
     Ok(data.split(offset))
 }
 
@@ -162,7 +163,7 @@ mod tests {
                 .lock()
                 .unwrap()
                 .peek_lru()
-                .map(|(&k, _)| k as usize),
+                .map(|(&k, _)| k as Ulen),
             Some(0)
         );
 
@@ -179,7 +180,7 @@ mod tests {
                 .lock()
                 .unwrap()
                 .peek_lru()
-                .map(|(&k, _)| k as usize),
+                .map(|(&k, _)| k as Ulen),
             Some(0)
         );
 
@@ -195,7 +196,7 @@ mod tests {
                 .lock()
                 .unwrap()
                 .peek_lru()
-                .map(|(&k, _)| k as usize),
+                .map(|(&k, _)| k as Ulen),
             Some(18806)
         );
 
diff --git a/src/store/writer.rs b/src/store/writer.rs
index 3309f1a648..2f1f7ab6f9 100644
--- a/src/store/writer.rs
+++ b/src/store/writer.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use super::compress;
 use super::index::SkipIndexBuilder;
 use super::StoreReader;
@@ -10,7 +12,7 @@ use crate::store::index::Checkpoint;
 use crate::DocId;
 use std::io::{self, Write};
 
-const BLOCK_SIZE: usize = 16_384;
+const BLOCK_SIZE: Ulen = 16_384;
 
 /// Write tantivy's [`Store`](./index.html)
 ///
@@ -58,7 +60,7 @@ impl StoreWriter {
         self.current_block
             .write_all(&self.intermediary_buffer[..])?;
         self.doc += 1;
-        if self.current_block.len() > BLOCK_SIZE {
+        if self.current_block.len() > BLOCK_SIZE as usize {
             self.write_and_compress_block()?;
         }
         Ok(())
diff --git a/src/termdict/fst_termdict/term_info_store.rs b/src/termdict/fst_termdict/term_info_store.rs
index 93a5e91086..f7dea1ecf4 100644
--- a/src/termdict/fst_termdict/term_info_store.rs
+++ b/src/termdict/fst_termdict/term_info_store.rs
@@ -5,7 +5,7 @@ use crate::postings::TermInfo;
 use crate::termdict::TermOrdinal;
 use byteorder::{ByteOrder, LittleEndian};
 use tantivy_fst::Ulen;
-use std::cmp;
+use std::{cmp, convert::TryInto};
 use std::io::{self, Read, Write};
 
 const BLOCK_LEN: usize = 256;
@@ -47,7 +47,7 @@ impl BinarySerializable for TermInfoBlockMeta {
 }
 
 impl FixedSize for TermInfoBlockMeta {
-    const SIZE_IN_BYTES: usize =
+    const SIZE_IN_BYTES: Ulen =
         u64::SIZE_IN_BYTES + TermInfo::SIZE_IN_BYTES + 3 * u8::SIZE_IN_BYTES;
 }
 
@@ -61,13 +61,13 @@ impl TermInfoBlockMeta {
     // is encoded without bitpacking.
     fn deserialize_term_info(&self, data: &dyn FakeArr, inner_offset: usize) -> TermInfo {
         assert!(inner_offset < BLOCK_LEN - 1);
-        let num_bits = self.num_bits() as usize;
+        let num_bits = self.num_bits() as Ulen;
 
-        let posting_start_addr = num_bits * inner_offset;
+        let posting_start_addr = num_bits * inner_offset as Ulen;
         // the stop offset is the start offset of the next term info.
         let posting_stop_addr = posting_start_addr + num_bits;
-        let doc_freq_addr = posting_start_addr + self.postings_offset_nbits as usize;
-        let positions_idx_addr = doc_freq_addr + self.doc_freq_nbits as usize;
+        let doc_freq_addr = posting_start_addr + self.postings_offset_nbits as Ulen;
+        let positions_idx_addr = doc_freq_addr + self.doc_freq_nbits as Ulen;
 
         let postings_start_offset = self.ref_term_info.postings_start_offset
             + extract_bits(data, posting_start_addr, self.postings_offset_nbits);
@@ -88,7 +88,7 @@ impl TermInfoBlockMeta {
 
 #[derive(Debug)]
 pub struct TermInfoStore {
-    num_terms: usize,
+    num_terms: Ulen,
     block_meta_bytes: FileSlice,
     term_info_bytes: FileSlice,
 }
@@ -118,8 +118,8 @@ impl TermInfoStore {
     pub fn open(term_info_store_file: FileSlice) -> crate::Result<TermInfoStore> {
         let (len_slice, main_slice) = term_info_store_file.split(16);
         let mut bytes = len_slice.read_bytes()?;
-        let len = u64::deserialize(&mut bytes)? as usize;
-        let num_terms = u64::deserialize(&mut bytes)? as usize;
+        let len = u64::deserialize(&mut bytes)? as Ulen;
+        let num_terms = u64::deserialize(&mut bytes)? as Ulen;
         let (block_meta_file, term_info_file) = main_slice.split(len);
         Ok(TermInfoStore {
             num_terms,
@@ -129,23 +129,23 @@ impl TermInfoStore {
     }
 
     pub fn get(&self, term_ord: TermOrdinal) -> TermInfo {
-        let block_id = (term_ord as usize) / BLOCK_LEN;
+        let block_id = (term_ord) / (BLOCK_LEN as Ulen);
         let block_data = self.block_meta_bytes.slice(block_id * TermInfoBlockMeta::SIZE_IN_BYTES, HasLen::len(&self.block_meta_bytes));
         let mut block_data = block_data.full_slice();
         let term_info_block_data = TermInfoBlockMeta::deserialize(&mut block_data)
             .expect("Failed to deserialize terminfoblockmeta");
-        let inner_offset = (term_ord as usize) % BLOCK_LEN;
+        let inner_offset = (term_ord as Ulen) % (BLOCK_LEN as Ulen);
         if inner_offset == 0 {
             return term_info_block_data.ref_term_info;
         }
-        let term_info_data = self.term_info_bytes.slice(term_info_block_data.offset as usize, HasLen::len(&self.term_info_bytes));
+        let term_info_data = self.term_info_bytes.slice(term_info_block_data.offset, HasLen::len(&self.term_info_bytes));
         term_info_block_data.deserialize_term_info(
             &term_info_data,
-            inner_offset - 1,
+            (inner_offset - 1).try_into().unwrap(),
         )
     }
 
-    pub fn num_terms(&self) -> usize {
+    pub fn num_terms(&self) -> Ulen {
         self.num_terms
     }
 }
@@ -304,9 +304,9 @@ mod tests {
         assert_eq!(compute_num_bits(51), 6);
         bitpack.close(&mut buffer).unwrap();
         assert_eq!(buffer.len(), 3 + 7);
-        assert_eq!(extract_bits(buffer, 0, 9), 321u64);
-        assert_eq!(extract_bits(buffer, 9, 2), 2u64);
-        assert_eq!(extract_bits(buffer, 11, 6), 51u64);
+        assert_eq!(extract_bits(&buffer, 0, 9), 321u64);
+        assert_eq!(extract_bits(&buffer, 9, 2), 2u64);
+        assert_eq!(extract_bits(&buffer, 11, 6), 51u64);
     }
 
     #[test]
diff --git a/src/termdict/fst_termdict/termdict.rs b/src/termdict/fst_termdict/termdict.rs
index 50a4f2dd5c..fbca4b0e03 100644
--- a/src/termdict/fst_termdict/termdict.rs
+++ b/src/termdict/fst_termdict/termdict.rs
@@ -7,7 +7,7 @@ use crate::postings::TermInfo;
 use crate::termdict::TermOrdinal;
 use once_cell::sync::Lazy;
 use std::io::{self, Write};
-use tantivy_fst::{raw::Fst};
+use tantivy_fst::{Ulen, raw::Fst};
 use tantivy_fst::Automaton;
 
 fn convert_fst_error(e: tantivy_fst::Error) -> io::Error {
@@ -122,7 +122,7 @@ impl TermDictionary {
         let (main_slice, footer_len_slice) = file.split_from_end(8);
         let mut footer_len_bytes = footer_len_slice.read_bytes()?;
         let footer_size = u64::deserialize(&mut footer_len_bytes)?;
-        let (fst_file_slice, values_file_slice) = main_slice.split_from_end(footer_size as usize);
+        let (fst_file_slice, values_file_slice) = main_slice.split_from_end(footer_size as Ulen);
         let fst_index = open_fst_index(fst_file_slice)?;
         let term_info_store = TermInfoStore::open(values_file_slice)?;
         Ok(TermDictionary {
@@ -138,7 +138,7 @@ impl TermDictionary {
 
     /// Returns the number of terms in the dictionary.
     /// Term ordinals range from 0 to `num_terms() - 1`.
-    pub fn num_terms(&self) -> usize {
+    pub fn num_terms(&self) -> Ulen {
         self.term_info_store.num_terms()
     }
 
diff --git a/src/termdict/merger.rs b/src/termdict/merger.rs
index 924f31c8ba..81be689377 100644
--- a/src/termdict/merger.rs
+++ b/src/termdict/merger.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use crate::schema::Term;
 use crate::termdict::TermOrdinal;
 use crate::termdict::TermStreamer;
diff --git a/src/termdict/tests.rs b/src/termdict/tests.rs
index 9e0bde752a..b37cf34ae3 100644
--- a/src/termdict/tests.rs
+++ b/src/termdict/tests.rs
@@ -1,3 +1,5 @@
+use tantivy_fst::Ulen;
+
 use super::{TermDictionary, TermDictionaryBuilder, TermStreamer};
 
 use crate::directory::{Directory, FileSlice, RAMDirectory, TerminatingWrite};

From cecf63544e158fc723a9b009520e9d0165f805b1 Mon Sep 17 00:00:00 2001
From: phiresky <phireskyde+git@gmail.com>
Date: Mon, 24 May 2021 17:32:22 +0200
Subject: [PATCH 7/9] some info logs

---
 src/collector/top_score_collector.rs  |  1 +
 src/core/inverted_index_reader.rs     | 15 +++++++++++----
 src/core/searcher.rs                  |  4 +++-
 src/lib.rs                            | 17 ++++++++++++++---
 src/schema/term.rs                    | 16 ++++++++++------
 src/termdict/fst_termdict/termdict.rs |  6 ++----
 6 files changed, 41 insertions(+), 18 deletions(-)

diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs
index f58095b212..a0841da6dd 100644
--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -649,6 +649,7 @@ impl Collector for TopDocs {
                 threshold
             })?;
         } else {
+            crate::info_log(format!("Scoring results and collecting TOP {}", self.0.limit));
             weight.for_each_pruning(Score::MIN, reader, &mut |doc, score| {
                 let heap_item = ComparableDoc {
                     feature: score,
diff --git a/src/core/inverted_index_reader.rs b/src/core/inverted_index_reader.rs
index 23349bec39..0189f5f422 100644
--- a/src/core/inverted_index_reader.rs
+++ b/src/core/inverted_index_reader.rs
@@ -1,6 +1,6 @@
 use std::io;
 
-use tantivy_fst::Ulen;
+use tantivy_fst::{FakeArr, Ulen};
 
 use crate::common::BinarySerializable;
 use crate::directory::FileSlice;
@@ -108,8 +108,10 @@ impl InvertedIndexReader {
         term: &Term,
         option: IndexRecordOption,
     ) -> io::Result<Option<BlockSegmentPostings>> {
-        self.get_term_info(term)?
-            .map(move |term_info| self.read_block_postings_from_terminfo(&term_info, option))
+        crate::info_log(format!("reading term info for term {:?}", term));
+
+        let info = self.get_term_info(term)?;
+        info.map(move |term_info| self.read_block_postings_from_terminfo(&term_info, option))
             .transpose()
     }
 
@@ -126,6 +128,8 @@ impl InvertedIndexReader {
             term_info.postings_start_offset as Ulen,
             term_info.postings_stop_offset as Ulen,
         );
+       
+        postings_data.to_vec();  // better force load it all at once
         BlockSegmentPostings::open(
             term_info.doc_freq,
             postings_data,
@@ -183,7 +187,10 @@ impl InvertedIndexReader {
         option: IndexRecordOption,
     ) -> io::Result<Option<SegmentPostings>> {
         self.get_term_info(term)?
-            .map(move |term_info| self.read_postings_from_terminfo(&term_info, option))
+            .map(move |term_info| {
+                crate::info_log(format!("Fetching document ids and frequencies matching term {:?}", term));
+                self.read_postings_from_terminfo(&term_info, option)
+            })
             .transpose()
     }
 
diff --git a/src/core/searcher.rs b/src/core/searcher.rs
index b181762e61..9d5773943e 100644
--- a/src/core/searcher.rs
+++ b/src/core/searcher.rs
@@ -58,7 +58,9 @@ impl Searcher {
     pub fn doc(&self, doc_address: DocAddress) -> crate::Result<Document> {
         let DocAddress(segment_local_id, doc_id) = doc_address;
         let store_reader = &self.store_readers[segment_local_id as usize];
-        store_reader.get(doc_id)
+        let doc = store_reader.get(doc_id)?;
+        crate::info_log(format!("read content of doc {:?}", doc.field_values()));
+        Ok(doc)
     }
 
     /// Access the schema associated to the index of this searcher.
diff --git a/src/lib.rs b/src/lib.rs
index 6fd6038a23..0d255167aa 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -170,7 +170,7 @@ pub use crate::reader::LeasedItem;
 pub use crate::schema::{Document, Term};
 use std::fmt;
 
-use once_cell::sync::Lazy;
+use once_cell::sync::{Lazy, OnceCell};
 use serde::{Deserialize, Serialize};
 
 /// Index format version.
@@ -280,9 +280,19 @@ impl DocAddress {
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct DocAddress(pub SegmentLocalId, pub DocId);
 
+static INFO_LOG_HOOK: OnceCell<Box<Fn(&str) + Send + Sync + 'static>> = OnceCell::new();
+
+pub fn info_log(message: impl AsRef<str>) {
+    if let Some(log) = INFO_LOG_HOOK.get() {
+        log(message.as_ref());
+    }
+}
+pub fn set_info_log_hook(f: impl Fn(&str) + Send + Sync + 'static) {
+    INFO_LOG_HOOK.set(Box::new(f)).ok();
+}
+
 #[cfg(test)]
 mod tests {
-    use crate::{Directory, collector::tests::TEST_COLLECTOR_WITH_SCORE};
     use crate::core::SegmentReader;
     use crate::docset::{DocSet, TERMINATED};
     use crate::query::BooleanQuery;
@@ -291,6 +301,7 @@ mod tests {
     use crate::Index;
     use crate::Postings;
     use crate::ReloadPolicy;
+    use crate::{collector::tests::TEST_COLLECTOR_WITH_SCORE, Directory};
     use rand::distributions::Bernoulli;
     use rand::distributions::Uniform;
     use rand::rngs::StdRng;
@@ -639,7 +650,7 @@ mod tests {
         index_writer.commit()?;
         println!("dir: {:#?}", index.directory());
         let reader = index.reader()?;
-        
+
         let searcher = reader.searcher();
         let term = Term::from_field_i64(value_field, negative_val);
         let mut postings = searcher
diff --git a/src/schema/term.rs b/src/schema/term.rs
index 772cb31cfc..8c0c0432c7 100644
--- a/src/schema/term.rs
+++ b/src/schema/term.rs
@@ -233,12 +233,16 @@ where
 
 impl fmt::Debug for Term {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(
-            f,
-            "Term(field={},bytes={:?})",
-            self.field().field_id(),
-            self.value_bytes()
-        )
+        if let Ok(s) = std::str::from_utf8(&self.0) {
+            write!(f, "Term(field={},text={:?})", self.field().field_id(), s)
+        } else {
+            write!(
+                f,
+                "Term(field={},bytes={:?})",
+                self.field().field_id(),
+                self.value_bytes()
+            )
+        }
     }
 }
 
diff --git a/src/termdict/fst_termdict/termdict.rs b/src/termdict/fst_termdict/termdict.rs
index fbca4b0e03..409f4842ef 100644
--- a/src/termdict/fst_termdict/termdict.rs
+++ b/src/termdict/fst_termdict/termdict.rs
@@ -86,11 +86,9 @@ where
 }
 
 fn open_fst_index(fst_file: FileSlice) -> crate::Result<tantivy_fst::Map<FileSlice>> {
-    println!("open_fst_index()");
     let fst = Fst::new(fst_file)
         .map_err(|err| DataCorruption::comment_only(format!("Fst data is corrupted: {:?}", err)))?;
     let ret = Ok(tantivy_fst::Map::from(fst));
-    println!("open_fst_index RET");
     return ret;
 }
 
@@ -144,9 +142,9 @@ impl TermDictionary {
 
     /// Returns the ordinal associated to a given term.
     pub fn term_ord<K: AsRef<[u8]>>(&self, key: K) -> io::Result<Option<TermOrdinal>> {
-        println!("termdict.term_ord({:?})", String::from_utf8_lossy(key.as_ref()));
+        crate::info_log(format!("Getting info for term {:?}", String::from_utf8_lossy(key.as_ref())));
         let ret = Ok(self.fst_index.get(key));
-        println!("termdict.term_ord RET");
+        //crate::info_log(format!("termdict.term_ord RET {:?}", ret));
         return ret;
     }
 

From 57b9c59f4f33dcfe64de09cbd746c393aff89f54 Mon Sep 17 00:00:00 2001
From: phiresky <phireskyde+git@gmail.com>
Date: Sat, 29 May 2021 14:10:05 +0200
Subject: [PATCH 8/9] comment

---
 src/directory/fs_directory.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/directory/fs_directory.rs b/src/directory/fs_directory.rs
index 5a29d194cd..e55ca6e489 100644
--- a/src/directory/fs_directory.rs
+++ b/src/directory/fs_directory.rs
@@ -12,6 +12,9 @@ use super::{
     AntiCallToken, WatchCallback, WritePtr,
 };
 
+// for demonstration purposes only: a directory that dynamically reads from the filesystem without memory mapping with an integrated cache
+// this is *not used* in my wasm demo which uses different caching and hooks into the Web APIs.
+
 #[derive(Debug, Clone)]
 pub struct FsDirectory {
     root: PathBuf,

From 6bd8a8d9ef702bda9b76119c2732542a8aa3e04e Mon Sep 17 00:00:00 2001
From: phiresky <phireskyde+git@gmail.com>
Date: Sun, 30 May 2021 18:30:19 +0200
Subject: [PATCH 9/9] api to get multiple chunks at the same time

---
 src/core/searcher.rs        |  9 +++++++++
 src/directory/file_slice.rs | 12 ++++++++++++
 src/lib.rs                  |  6 +++---
 src/store/reader.rs         | 18 ++++++++++++++++++
 4 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/src/core/searcher.rs b/src/core/searcher.rs
index 9d5773943e..564944e32b 100644
--- a/src/core/searcher.rs
+++ b/src/core/searcher.rs
@@ -63,6 +63,15 @@ impl Searcher {
         Ok(doc)
     }
 
+    pub fn doc_multiple(&self, doc_addresses: Vec<DocAddress>) -> crate::Result<Vec<Document>> {
+        if doc_addresses.len() == 0 {
+            return Ok(vec![]);
+        }
+        assert!(doc_addresses.windows(2).all(|s| s[0].0 == s[1].0), "only supported on same segment for now");
+        let store_reader = &self.store_readers[doc_addresses[0].0 as usize];
+        store_reader.get_multiple(&doc_addresses.into_iter().map(|d| d.1).collect::<Vec<_>>())
+    }
+
     /// Access the schema associated to the index of this searcher.
     pub fn schema(&self) -> &Schema {
         &self.schema
diff --git a/src/directory/file_slice.rs b/src/directory/file_slice.rs
index a676818b52..2aac1addf2 100644
--- a/src/directory/file_slice.rs
+++ b/src/directory/file_slice.rs
@@ -5,6 +5,7 @@ use tantivy_fst::Ulen;
 use crate::common::HasLen;
 use crate::directory::OwnedBytes;
 use std::fmt::Debug;
+use std::ops::Range;
 use std::sync::{Arc, Weak};
 use std::{io, ops::Deref};
 
@@ -24,6 +25,12 @@ pub trait FileHandle: 'static + Send + Sync + HasLen + Debug {
     ///
     /// This method may panic if the range requested is invalid.
     fn read_bytes(&self, from: Ulen, to: Ulen) -> io::Result<OwnedBytes>;
+
+    /// Optimization: read multiple at the same time if you can
+    fn read_bytes_multiple(&self, ranges: &[Range<Ulen>]) -> io::Result<Vec<OwnedBytes>> {
+        crate::info_log("warn: unoptimized read of multiple ranges");
+        ranges.iter().map(|r| self.read_bytes(r.start, r.end)).collect()
+    }
 }
 
 impl FakeArr for FileSlice {
@@ -134,6 +141,11 @@ impl FileSlice {
         self.data.read_bytes(self.start + from, self.start + to)
     }
 
+    pub fn read_bytes_slice_multiple(&self, ranges: &[Range<Ulen>]) -> io::Result<Vec<OwnedBytes>> {
+        let real_ranges: Vec<Range<Ulen>> = ranges.into_iter().map(|r| (r.start + self.start)..(r.end + self.start)).collect();
+        self.data.read_bytes_multiple(&real_ranges)
+    }
+
     /// Splits the FileSlice at the given offset and return two file slices.
     /// `file_slice[..split_offset]` and `file_slice[split_offset..]`.
     ///
diff --git a/src/lib.rs b/src/lib.rs
index 0d255167aa..8f0e634e45 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -280,15 +280,15 @@ impl DocAddress {
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct DocAddress(pub SegmentLocalId, pub DocId);
 
-static INFO_LOG_HOOK: OnceCell<Box<Fn(&str) + Send + Sync + 'static>> = OnceCell::new();
+static INFO_LOG_HOOK: OnceCell<Box<dyn Fn(&str) + Send + Sync + 'static>> = OnceCell::new();
 
 pub fn info_log(message: impl AsRef<str>) {
     if let Some(log) = INFO_LOG_HOOK.get() {
         log(message.as_ref());
     }
 }
-pub fn set_info_log_hook(f: impl Fn(&str) + Send + Sync + 'static) {
-    INFO_LOG_HOOK.set(Box::new(f)).ok();
+pub fn set_info_log_hook(f: impl Fn(&str) + Send + Sync + 'static) -> std::result::Result<(), ()> {
+    INFO_LOG_HOOK.set(Box::new(f)).map_err(|_| ())
 }
 
 #[cfg(test)]
diff --git a/src/store/reader.rs b/src/store/reader.rs
index 2fff046813..52505a8629 100644
--- a/src/store/reader.rs
+++ b/src/store/reader.rs
@@ -89,6 +89,15 @@ impl StoreReader {
         Ok(block)
     }
 
+    fn cache_blocks_multiple(&self, checkpoints: &[Checkpoint]) -> io::Result<()> {
+        // just to cache them so the next read is instant, TODO: don't rely on caching  within FileSlice, use self.cache instead?
+        // crate::info_log("caching multiple");
+        let ranges = checkpoints.iter().map(|c| (c.start_offset as Ulen)..(c.end_offset as Ulen)).collect::<Vec<_>>();
+        self.data.read_bytes_slice_multiple(&ranges)?;
+        // crate::info_log("caching multiple done");
+        Ok(())
+    }
+
     /// Reads a given document.
     ///
     /// Calling `.get(doc)` is relatively costly as it requires
@@ -100,6 +109,7 @@ impl StoreReader {
         let checkpoint = self.block_checkpoint(doc_id).ok_or_else(|| {
             crate::TantivyError::InvalidArgument(format!("Failed to lookup Doc #{}.", doc_id))
         })?;
+        crate::info_log(format!("decompressing block for doc {}", doc_id));
         let mut cursor = &self.read_block(&checkpoint)?[..];
         for _ in checkpoint.start_doc..doc_id {
             let doc_length = VInt::deserialize(&mut cursor)?.val() as usize;
@@ -111,6 +121,14 @@ impl StoreReader {
         Ok(Document::deserialize(&mut cursor)?)
     }
 
+    /// Reads the given document ids.
+    /// May be faster than getting them separately if the storage backend supports it
+    pub fn get_multiple(&self, doc_ids: &[DocId]) -> crate::Result<Vec<Document>> {
+        let checkpoints: Vec<Checkpoint> = doc_ids.iter().flat_map(|doc_id| self.block_checkpoint(*doc_id)).collect();
+        self.cache_blocks_multiple(&checkpoints)?;
+        doc_ids.iter().map(|d| self.get(*d)).collect()
+    }
+
     /// Summarize total space usage of this store reader.
     pub fn space_usage(&self) -> StoreSpaceUsage {
         self.space_usage.clone()