Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add fulltext directory listing to archive metadata #932

Merged
merged 3 commits into from
Nov 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions www/js/lib/zimArchive.js
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ define(['zimfile', 'zimDirEntry', 'util', 'utf8'],
path: 'X/listing/titleOrdered/v1',
ptrName: 'articlePtrPos',
countName: 'articleCount'
},
{
// This tests for and specifies the existence of any Xapian Full Text Index
path: 'X/fulltext/xapian',
ptrName: 'fullTextIndex',
countName: 'fullTextIndexSize'
}
]);
// Set the archive file type ('open' or 'zimit')
Expand Down
17 changes: 13 additions & 4 deletions www/js/lib/zimfile.js
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ define(['xzdec_wrapper', 'zstddec_wrapper', 'util', 'utf8', 'zimDirEntry', 'file
* @property {Integer} urlPtrPos Position of the directory pointerlist ordered by URL
* @property {Integer} titlePtrPos Position of the legacy v0 pointerlist ordered by title
* @property {Integer} articlePtrPos Position of the v1 article-only pointerlist ordered by title (async calculated entry)
* @property {Integer|String} fullTextIndex Extended property: position of the full text index, or its path if it exists but has no metadata
* @property {Integer} fullTextIndexSize Extended property: the size of the full text index as indicated in the metadata, or null if not specified
* @property {Integer} clusterPtrPos Position of the cluster pointer list
* @property {Integer} mimeListPos Position of the MIME type list (also header size)
* @property {Integer} mainPage Main page or 0xffffffff if no main page
Expand Down Expand Up @@ -322,7 +324,7 @@ define(['xzdec_wrapper', 'zstddec_wrapper', 'util', 'utf8', 'zimDirEntry', 'file
// If we are in a legacy ZIM archive, we need to calculate the true article count (of entries in the A namespace)
// This effectively emulates the v1 article pointerlist
if (this.minorVersion === 0) {
console.debug('ZIM DirListing version: 0 (legacy)', this);
// console.debug('ZIM DirListing version: 0 (legacy)', this);
// Initiate a binary search for the first or last article
var getArticleIndexByOrdinal = function (ordinal) {
return util.binarySearch(0, that.entryCount, function(i) {
Expand All @@ -338,7 +340,7 @@ define(['xzdec_wrapper', 'zstddec_wrapper', 'util', 'utf8', 'zimDirEntry', 'file
return index;
});
};
return getArticleIndexByOrdinal('first').then(function(idxFirstArticle) {
getArticleIndexByOrdinal('first').then(function(idxFirstArticle) {
return getArticleIndexByOrdinal('last').then(function(idxLastArticle) {
// Technically idxLastArticle points to the entry after the last article in the 'A' namespace,
// We subtract the first from the last to get the number of entries in the 'A' namespace
Expand All @@ -352,8 +354,9 @@ define(['xzdec_wrapper', 'zstddec_wrapper', 'util', 'utf8', 'zimDirEntry', 'file
var listingAccessor = function (listing) {
if (!listing) {
// No more listings, so exit
console.debug('ZIM DirListing version: ' + highestListingVersion, that);
console.debug('ZIM DirListing version: ' + highestListingVersion + (highestListingVersion ? '' : ' (legacy)'), that);
console.debug('Article count is: ' + that.articleCount);
if (that.fullTextIndex) console.debug('ZIM has fullTextIndex with listed size: ' + that.fullTextIndexSize);
return null;
}
// Check if we already have this listing's values, so we don't do redundant binary searches
Expand All @@ -378,6 +381,10 @@ define(['xzdec_wrapper', 'zstddec_wrapper', 'util', 'utf8', 'zimDirEntry', 'file
return that.dirEntryByUrlIndex(index);
}).then(function(dirEntry) {
if (!dirEntry) return null;
// Detect a full text index
if (/fulltext\//.test(dirEntry.url)) {
that[listing.ptrName] = dirEntry.namespace + '/' + dirEntry.url
}
// Request the metadata for the blob represented by the dirEntry
return that.blob(dirEntry.cluster, dirEntry.blob, true);
}).then(function(metadata) {
Expand Down Expand Up @@ -474,7 +481,9 @@ define(['xzdec_wrapper', 'zstddec_wrapper', 'util', 'utf8', 'zimDirEntry', 'file
zf.clusterCount = readInt(header, 28, 4);
zf.urlPtrPos = urlPtrPos;
zf.titlePtrPos = readInt(header, 40, 8);
zf.articlePtrPos = null; // Calculated async by setListings()
zf.articlePtrPos = null; // Calculated async by setListings()
zf.fullTextIndex = null; // Calculated async by setListings()
zf.fullTextIndexSize = null; // Calbulated async by setListings()
zf.clusterPtrPos = readInt(header, 48, 8);
zf.mimeListPos = mimeListPos;
zf.mainPage = readInt(header, 64, 4);
Expand Down