Skip to content

Commit

Permalink
Reduce memory footprint during archive scan
Browse files Browse the repository at this point in the history
When scanning an archive for files a linked list it created with all
files and properties before being processed by file system functions
such as readdir. This cause some memory overhead since a lot of data
is required to be kept resident for a longer period of time. Since the
lifetime of the data collected is relatively short there is not need
to pre-fetch all information like this. Instead handle file by file
and use only a single temporary object to hold whatever meta data is
necessary. The performance is also expected to be improved by a change
like this since less dynamic heap allocations are required but it also
results in a loop unwind that will increase number of functions calls.
Measurements of some common use-cases indicated a performance increase
of approximately 15%-20% but there are also reports of no improvement
at all or even the opposite. The latter should however be considered a
rare and exceptional case.

This change was triggered by issue #122 for which a very huge archive
was mounted with more than 100k files.

Signed-off-by: Hans Beckerus <hans.beckerus at gmail.com>
  • Loading branch information
Hans Beckerus committed Nov 30, 2019
1 parent 2bc97d5 commit 4bc904f
Show file tree
Hide file tree
Showing 3 changed files with 174 additions and 197 deletions.
211 changes: 98 additions & 113 deletions src/dllext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,168 +65,153 @@ struct DataSet
#endif
};

int PASCAL RARListArchiveEx(HANDLE hArcData, RARArchiveListEx* N, int *ResultCode)
int PASCAL RARListArchiveEx(HANDLE hArcData, RARArchiveDataEx **NN)
{
DataSet *Data = (DataSet *)hArcData;
Archive& Arc = Data->Arc;
struct RARHeaderDataEx h;
RARArchiveListEx* N_ = N;
struct RARArchiveDataEx *N;

#if RARVER_MAJOR > 4 || ( RARVER_MAJOR == 4 && RARVER_MINOR >= 20 )
try
#endif
{
*ResultCode = 0;
uint FileCount = 0;
while (1)
int RHCode = 0;
int PFCode = 0;
memset(&h, 0, sizeof(h));
RHCode = RARReadHeaderEx(hArcData,&h);
if (RHCode)
{
int RHCode = 0;
int PFCode = 0;
memset(&h, 0, sizeof(h));
RHCode = RARReadHeaderEx(hArcData,&h);
if (RHCode)
{
*ResultCode = RHCode;
break;
}
return RHCode;
}

if (FileCount)
{
N->next = new RARArchiveListEx;
N = N->next;
}
FileCount++;
memcpy(&N->hdr, &h, sizeof(h));
N->HeadSize = Arc.FileHead.HeadSize;
N->Offset = Arc.CurBlockPos;
N->FileDataEnd = Arc.NextBlockPos;

/* For supporting high-precision timestamp.
* If not available, this value is set to 0 (1601/01/01 00:00:00.000000000).
* For reference, see http://support.microsoft.com/kb/167296/en
*/
memset(&N->RawTime, 0, sizeof(struct RARArchiveListEx::RawTime_));
if (!*NN)
{
*NN = new RARArchiveDataEx;
}
N = *NN;
memcpy(&N->hdr, &h, sizeof(h));
N->HeadSize = Arc.FileHead.HeadSize;
N->Offset = Arc.CurBlockPos;
N->FileDataEnd = Arc.NextBlockPos;

// For supporting high-precision timestamp.
// If not available, this value is set to 0 (1601/01/01 00:00:00.000000000).
// For reference, see http://support.microsoft.com/kb/167296/en
memset(&N->RawTime, 0, sizeof(struct RARArchiveDataEx::RawTime_));
#if RARVER_MAJOR > 4
#if RARVER_MAJOR > 5 || (RARVER_MAJOR == 5 && RARVER_MINOR >= 50)
/* High-precision(1 ns) UNIX timestamp from 1970-01-01 */
if (Arc.FileHead.mtime.IsSet())
N->RawTime.mtime = Arc.FileHead.mtime.GetUnixNS();
if (Arc.FileHead.ctime.IsSet())
N->RawTime.ctime = Arc.FileHead.ctime.GetUnixNS();
if (Arc.FileHead.atime.IsSet())
N->RawTime.atime = Arc.FileHead.atime.GetUnixNS();
// High-precision(1 ns) UNIX timestamp from 1970-01-01
if (Arc.FileHead.mtime.IsSet())
N->RawTime.mtime = Arc.FileHead.mtime.GetUnixNS();
if (Arc.FileHead.ctime.IsSet())
N->RawTime.ctime = Arc.FileHead.ctime.GetUnixNS();
if (Arc.FileHead.atime.IsSet())
N->RawTime.atime = Arc.FileHead.atime.GetUnixNS();
#else
/* High-precision(100 ns) Windows timestamp from 1601-01-01 */
if (Arc.FileHead.mtime.IsSet())
N->RawTime.mtime = Arc.FileHead.mtime.GetRaw() - 116444736000000000ULL;
if (Arc.FileHead.ctime.IsSet())
N->RawTime.ctime = Arc.FileHead.ctime.GetRaw() - 116444736000000000ULL;
if (Arc.FileHead.atime.IsSet())
N->RawTime.atime = Arc.FileHead.atime.GetRaw() - 116444736000000000ULL;
// High-precision(100 ns) Windows timestamp from 1601-01-01
if (Arc.FileHead.mtime.IsSet())
N->RawTime.mtime = Arc.FileHead.mtime.GetRaw() - 116444736000000000ULL;
if (Arc.FileHead.ctime.IsSet())
N->RawTime.ctime = Arc.FileHead.ctime.GetRaw() - 116444736000000000ULL;
if (Arc.FileHead.atime.IsSet())
N->RawTime.atime = Arc.FileHead.atime.GetRaw() - 116444736000000000ULL;
#endif
#endif

N->hdr.Flags = 0;
N->hdr.Flags = 0;
#if RARVER_MAJOR < 5
if ((Arc.FileHead.Flags & LHD_WINDOWMASK) == LHD_DIRECTORY)
N->hdr.Flags |= RHDF_DIRECTORY;
if (Arc.FileHead.Flags & LHD_SPLIT_BEFORE)
N->hdr.Flags |= RHDF_SPLITBEFORE;
if (Arc.FileHead.Flags & LHD_SPLIT_AFTER)
N->hdr.Flags |= RHDF_SPLITAFTER;
if (Arc.FileHead.Flags & LHD_PASSWORD)
N->hdr.Flags |= RHDF_ENCRYPTED;
if (Arc.FileHead.Flags & LHD_SOLID)
N->hdr.Flags |= RHDF_SOLID;
if ((Arc.FileHead.Flags & LHD_WINDOWMASK) == LHD_DIRECTORY)
N->hdr.Flags |= RHDF_DIRECTORY;
if (Arc.FileHead.Flags & LHD_SPLIT_BEFORE)
N->hdr.Flags |= RHDF_SPLITBEFORE;
if (Arc.FileHead.Flags & LHD_SPLIT_AFTER)
N->hdr.Flags |= RHDF_SPLITAFTER;
if (Arc.FileHead.Flags & LHD_PASSWORD)
N->hdr.Flags |= RHDF_ENCRYPTED;
if (Arc.FileHead.Flags & LHD_SOLID)
N->hdr.Flags |= RHDF_SOLID;
#else
if (Arc.FileHead.SplitBefore)
N->hdr.Flags |= RHDF_SPLITBEFORE;
if (Arc.FileHead.SplitAfter)
N->hdr.Flags |= RHDF_SPLITAFTER;
if (Arc.FileHead.Encrypted)
N->hdr.Flags |= RHDF_ENCRYPTED;
if (Arc.FileHead.Dir)
N->hdr.Flags |= RHDF_DIRECTORY;
if (Arc.FileHead.Solid)
N->hdr.Flags |= RHDF_SOLID;
if (Arc.FileHead.SplitBefore)
N->hdr.Flags |= RHDF_SPLITBEFORE;
if (Arc.FileHead.SplitAfter)
N->hdr.Flags |= RHDF_SPLITAFTER;
if (Arc.FileHead.Encrypted)
N->hdr.Flags |= RHDF_ENCRYPTED;
if (Arc.FileHead.Dir)
N->hdr.Flags |= RHDF_DIRECTORY;
if (Arc.FileHead.Solid)
N->hdr.Flags |= RHDF_SOLID;
#endif

N->LinkTargetFlags = 0;
N->LinkTargetFlags = 0;
#if RARVER_MAJOR < 5
if (N->hdr.HostOS==HOST_UNIX && (N->hdr.FileAttr & 0xF000)==0xA000)
if (N->hdr.HostOS==HOST_UNIX && (N->hdr.FileAttr & 0xF000)==0xA000)
{
if (N->hdr.UnpVer < 50)
{
int DataSize=Min(N->hdr.PackSize,sizeof(N->LinkTarget)-1);
Arc.Read(N->LinkTarget,DataSize);
N->LinkTarget[DataSize]=0;
}
}
#else
if (Arc.FileHead.RedirType != FSREDIR_NONE)
{
// Sanity check only that 'RedirType' match 'FileAttr'
if (Arc.FileHead.RedirType == FSREDIR_UNIXSYMLINK &&
(N->hdr.FileAttr & 0xF000)==0xA000)
{
if (N->hdr.UnpVer < 50)
{
int DataSize=Min(N->hdr.PackSize,sizeof(N->LinkTarget)-1);
Arc.Read(N->LinkTarget,DataSize);
N->LinkTarget[DataSize]=0;
}
}
#else
if (Arc.FileHead.RedirType != FSREDIR_NONE)
{
// Sanity check only that 'RedirType' match 'FileAttr'
if (Arc.FileHead.RedirType == FSREDIR_UNIXSYMLINK &&
(N->hdr.FileAttr & 0xF000)==0xA000)
else
{
if (N->hdr.UnpVer < 50)
{
int DataSize=Min(N->hdr.PackSize,sizeof(N->LinkTarget)-1);
Arc.Read(N->LinkTarget,DataSize);
N->LinkTarget[DataSize]=0;
}
else
{
wcscpy(N->LinkTargetW,Arc.FileHead.RedirName);
N->LinkTargetFlags |= LINK_T_UNICODE; // Make sure UNICODE is set
}
}
else if (Arc.FileHead.RedirType == FSREDIR_FILECOPY)
{
wcscpy(N->LinkTargetW,Arc.FileHead.RedirName);
N->LinkTargetFlags |= LINK_T_FILECOPY;
wcscpy(N->LinkTargetW,Arc.FileHead.RedirName);
N->LinkTargetFlags |= LINK_T_UNICODE; // Make sure UNICODE is set
}
}
#endif

// Skip to next header
PFCode = RARProcessFile(hArcData,RAR_SKIP,NULL,NULL);
if (PFCode)
else if (Arc.FileHead.RedirType == FSREDIR_FILECOPY)
{
*ResultCode = PFCode;
break;
wcscpy(N->LinkTargetW,Arc.FileHead.RedirName);
N->LinkTargetFlags |= LINK_T_FILECOPY;
}
}
#endif
// Skip to next header
PFCode = RARProcessFile(hArcData,RAR_SKIP,NULL,NULL);
if (PFCode)
{
return PFCode;
}

N->next = NULL;
return FileCount;
return 0;
}
#if RARVER_MAJOR > 4 || ( RARVER_MAJOR == 4 && RARVER_MINOR >= 20 )
catch (std::bad_alloc&) // Catch 'new' exception.
{
if (N->next != NULL)
delete N->next;
N->next = NULL;
if (*NN) {
delete *NN;
*NN = NULL;
}
cerr << "RARListArchiveEx() caught std:bac_alloc error" << endl;
}
#endif
RARFreeListEx(N_);
return 0;
}


void PASCAL RARFreeListEx(RARArchiveListEx* L)
void PASCAL RARFreeArchiveDataEx(RARArchiveDataEx **NN)
{
RARArchiveListEx* N = L?L->next:NULL;
while (N)
{
RARArchiveListEx* tmp = N;
N = N->next;
delete tmp;
if (*NN) {
delete *NN;
*NN = NULL;
}
}

void PASCAL RARNextVolumeName(char* arch, bool oldstylevolume)
void PASCAL RARNextVolumeName(char *arch, bool oldstylevolume)
{
#if RARVER_MAJOR < 5
NextVolumeName(arch, NULL, 0, oldstylevolume);
Expand All @@ -239,7 +224,7 @@ void PASCAL RARNextVolumeName(char* arch, bool oldstylevolume)
}


void PASCAL RARVolNameToFirstName(char* arch, bool oldstylevolume)
void PASCAL RARVolNameToFirstName(char *arch, bool oldstylevolume)
{
#if RARVER_MAJOR < 5
VolNameToFirstName(arch, arch, !oldstylevolume);
Expand Down
14 changes: 6 additions & 8 deletions src/dllext.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,13 @@ enum HOST_SYSTEM {
extern "C" {
#endif

typedef struct RARArchiveList RARArchiveList;
typedef struct RARArchiveListEx RARArchiveListEx;
typedef struct RARArchiveDataEx RARArchiveDataEx;

#ifdef __cplusplus
}
#endif

struct RARArchiveListEx
struct RARArchiveDataEx
{
struct RARHeaderDataEx hdr;
__extension__
Expand All @@ -120,7 +119,6 @@ struct RARArchiveListEx
unsigned int HeadSize;
off_t Offset;
off_t FileDataEnd;
RARArchiveListEx* next;
};

struct RARWcb
Expand All @@ -133,10 +131,10 @@ struct RARWcb
extern "C" {
#endif

int PASCAL RARListArchiveEx(HANDLE hArcData, RARArchiveListEx* fList, int *ResultCode);
void PASCAL RARFreeListEx(RARArchiveListEx* fList);
void PASCAL RARNextVolumeName(char*, bool);
void PASCAL RARVolNameToFirstName(char*, bool);
int PASCAL RARListArchiveEx(HANDLE hArcData, RARArchiveDataEx **);
void PASCAL RARFreeArchiveDataEx(RARArchiveDataEx **);
void PASCAL RARNextVolumeName(char *, bool);
void PASCAL RARVolNameToFirstName(char *, bool);
void PASCAL RARGetFileInfo(HANDLE hArcData, const char *FileName, struct RARWcb *wcb);

#ifdef __cplusplus
Expand Down
Loading

0 comments on commit 4bc904f

Please sign in to comment.