Skip to content

Commit

Permalink
- worked on jimmejardine#281 / jimmejardine#280: SORAX is again one o…
Browse files Browse the repository at this point in the history
…f the culprits it seems.

  Added memory status report code to dump into the logging as the GC output is not reliable when we want to know how much memory was actually used at the peak, when the OutOfMemory exception happened.
- lots of CultureInfo.CurrentCulture params are not necessary and only make the code harder to read. Removed where MSVC2019 doesn't yak about these any more.
- added *reason* message for logging lines which report when Qiqqa is being shut down. This should help diagnose jimmejardine#280 / jimmejardine#281 as well.
- tweaked the exit process to track and report the total number of threads running as well as only the SafeThreadPool
- document comments for several routines re LDA and AutoTag creation. Finally understanding how it's done. The LDA is *not* used for creating the AutoTags but only to find the *commonly shared* tags when running in Expedition.
  The BuzzwordGenerator is the AutoTag generator (in combination with some other code) and is using some plain basic heuristics. autoTags are extracted from the *document title* only, which explains the lousy AutoTag performance I got with the latest lib, as that one has very few PDFs which pass through OCR unscathed.
  • Loading branch information
GerHobbelt committed Jan 1, 2021
1 parent aa7d03a commit e3da81e
Show file tree
Hide file tree
Showing 47 changed files with 440 additions and 178 deletions.
2 changes: 1 addition & 1 deletion MuPDF
Submodule MuPDF updated 196 files
2 changes: 1 addition & 1 deletion Qiqqa/Brainstorm/Nodes/EllipseNodeContent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public EllipseNodeContent(string text)

public bool MatchesKeyword(string keyword)
{
return (null != text) && text.ToLower(CultureInfo.CurrentCulture).Contains(keyword);
return (null != text) && text.ToLower().Contains(keyword);
}
}
}
2 changes: 1 addition & 1 deletion Qiqqa/Brainstorm/Nodes/FileSystemNodeContent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public FileSystemNodeContent(string path)

public bool MatchesKeyword(string keyword)
{
return (null != path) && path.ToLower(CultureInfo.CurrentCulture).Contains(keyword);
return (null != path) && path.ToLower().Contains(keyword);
}
}
}
2 changes: 1 addition & 1 deletion Qiqqa/Brainstorm/Nodes/IconNodeContent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public BitmapImage BitmapImage

public bool MatchesKeyword(string keyword)
{
return (null != icon_path) && icon_path.ToLower(CultureInfo.CurrentCulture).Contains(keyword);
return (null != icon_path) && icon_path.ToLower().Contains(keyword);
}
}
}
2 changes: 1 addition & 1 deletion Qiqqa/Brainstorm/Nodes/ImageNodeContent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public bool MatchesKeyword(string keyword)

internal static bool IsSupportedImagePath(string filename)
{
string extension = Path.GetExtension(filename.ToLower(CultureInfo.CurrentCulture));
string extension = Path.GetExtension(filename.ToLower());

if (0 == extension.CompareTo(".jpg")) return true;
if (0 == extension.CompareTo(".png")) return true;
Expand Down
4 changes: 2 additions & 2 deletions Qiqqa/Brainstorm/Nodes/LinkedImageNodeContent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ public BitmapSource BitmapSource

public bool MatchesKeyword(string keyword)
{
return (null != image_path) && image_path.ToLower(CultureInfo.CurrentCulture).Contains(keyword);
return (null != image_path) && image_path.ToLower().Contains(keyword);
}


internal static bool IsSupportedImagePath(string filename)
{
string extension = Path.GetExtension(filename.ToLower(CultureInfo.CurrentCulture));
string extension = Path.GetExtension(filename.ToLower());

if (0 == extension.CompareTo(".jpg")) return true;
if (0 == extension.CompareTo(".png")) return true;
Expand Down
2 changes: 1 addition & 1 deletion Qiqqa/Brainstorm/Nodes/PDFAnnotationNodeContent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ public AugmentedBindable<PDFDocument> PDFDocument

public bool MatchesKeyword(string keyword)
{
return (null != PDFAnnotation.Underlying.Text) && PDFAnnotation.Underlying.Text.ToLower(CultureInfo.CurrentCulture).Contains(keyword);
return (null != PDFAnnotation.Underlying.Text) && PDFAnnotation.Underlying.Text.ToLower().Contains(keyword);
}
}
}
4 changes: 2 additions & 2 deletions Qiqqa/Brainstorm/Nodes/PDFAuthorNodeContent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ public PDFAuthorNodeContent(string library_id, string surname, string initial)

public bool MatchesKeyword(string keyword)
{
if ((null != surname) && surname.ToLower(CultureInfo.CurrentCulture).Contains(keyword)) return true;
if ((null != initial) && initial.ToLower(CultureInfo.CurrentCulture).Contains(keyword)) return true;
if ((null != surname) && surname.ToLower().Contains(keyword)) return true;
if ((null != initial) && initial.ToLower().Contains(keyword)) return true;
return false;
}

Expand Down
2 changes: 1 addition & 1 deletion Qiqqa/Brainstorm/Nodes/PDFAutoTagNodeContent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public PDFAutoTagNodeContent(string library_id, string tag)

public bool MatchesKeyword(string keyword)
{
if ((null != tag) && tag.ToLower(CultureInfo.CurrentCulture).Contains(keyword)) return true;
if ((null != tag) && tag.ToLower().Contains(keyword)) return true;
return false;
}

Expand Down
2 changes: 1 addition & 1 deletion Qiqqa/Brainstorm/Nodes/PDFTagNodeContent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public PDFTagNodeContent(string library_id, string tag)

public bool MatchesKeyword(string keyword)
{
if ((null != tag) && tag.ToLower(CultureInfo.CurrentCulture).Contains(keyword)) return true;
if ((null != tag) && tag.ToLower().Contains(keyword)) return true;
return false;
}

Expand Down
2 changes: 1 addition & 1 deletion Qiqqa/Brainstorm/Nodes/StringNodeContent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public StringNodeContent(string text)

public bool MatchesKeyword(string keyword)
{
return (null != text) && text.ToLower(CultureInfo.CurrentCulture).Contains(keyword);
return (null != text) && text.ToLower().Contains(keyword);
}

[NonSerialized]
Expand Down
4 changes: 2 additions & 2 deletions Qiqqa/Brainstorm/Nodes/WebsiteNodeContent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ public class WebsiteNodeContent : ISearchable
public bool MatchesKeyword(string keyword)
{
return
null != Url && Url.ToLower(CultureInfo.CurrentCulture).Contains(keyword) ||
null != Title && Title.ToLower(CultureInfo.CurrentCulture).Contains(keyword);
null != Url && Url.ToLower().Contains(keyword) ||
null != Title && Title.ToLower().Contains(keyword);
}
}
}
1 change: 1 addition & 0 deletions Qiqqa/Common/MainWindowServiceDispatcher.cs
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,7 @@ public void ShutdownQiqqa(bool suppress_exit_warning)
{
if (main_window != null)
{
Logging.Error("Forcibly shutting down Qiqqa");
main_window.suppress_exit_warning = suppress_exit_warning;
main_window.Close();
}
Expand Down
16 changes: 9 additions & 7 deletions Qiqqa/DocumentLibrary/AITagsStuff/AITagManager.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

namespace Qiqqa.DocumentLibrary.AITagsStuff
{
// Generates and manages (load/save) AutoTags for all documents in the library.
public class AITagManager
{
private TypedWeakReference<WebLibraryDetail> web_library_detail;
Expand Down Expand Up @@ -78,13 +79,13 @@ public void Regenerate(AsyncCallback callback = null)

try
{
Logging.Info("+AITagManager is starting regenerating");
Logging.Info("+AITagManager is starting regenerating AutoTags");

StatusManager.Instance.UpdateStatus("AITags", "Loading documents");
List<PDFDocument> pdf_documents = LibraryRef.Xlibrary.PDFDocuments;

int count_title_by_user = 0;
int could_title_by_suggest = 0;
int count_title_by_suggest = 0;
StatusManager.Instance.UpdateStatus("AITags", "Deciding whether to use suggested titles");
foreach (PDFDocument pdf_document in pdf_documents)
{
Expand All @@ -94,11 +95,12 @@ public void Regenerate(AsyncCallback callback = null)
}
else
{
++could_title_by_suggest;
++count_title_by_suggest;
}
}

bool use_suggested_titles = could_title_by_suggest > count_title_by_user;
bool use_suggested_titles = count_title_by_suggest > count_title_by_user;
Logging.Info($"+AITagManager: decision whether to use suggested titles: {use_suggested_titles}, based on auto-suggested title count {count_title_by_suggest} vs. user title count {count_title_by_user}");

StatusManager.Instance.UpdateStatus("AITags", "Scanning titles");
List<string> titles = new List<string>();
Expand Down Expand Up @@ -140,12 +142,12 @@ public void Regenerate(AsyncCallback callback = null)
}

// Generate them
CountingDictionary<NGram> ai_tags = BuzzwordGenerator.GenerateBuzzwords(titles, words_blacklist, words_whitelist, true);
CountingDictionary<NGram> ai_tags = BuzzwordGenerator.GenerateBuzzwords(titles, words_blacklist, words_whitelist, perform_scrabble_filtration: true);
Logging.Info("Generated {0} autotags", ai_tags.Count);
if (ai_tags.Count < 20)
{
Logging.Warn("There are too few autotags (only {0}), so not suppressing Scrabble words...", ai_tags.Count);
ai_tags = BuzzwordGenerator.GenerateBuzzwords(titles, words_blacklist, words_whitelist, false);
ai_tags = BuzzwordGenerator.GenerateBuzzwords(titles, words_blacklist, words_whitelist, perform_scrabble_filtration: false);
Logging.Info("Generated {0} autotags without Scrabble suppression", ai_tags.Count);
}

Expand Down Expand Up @@ -297,7 +299,7 @@ public void Regenerate(AsyncCallback callback = null)
}
Library.IsBusyRegeneratingTags = false;

Logging.Info("-AITagManager is finished regenerating (time spent: {0} ms)", clk.ElapsedMilliseconds);
Logging.Info("-AITagManager is finished regenerating AutoTags (time spent: {0} ms)", clk.ElapsedMilliseconds);
}

// Call any callback that might be interested
Expand Down
10 changes: 5 additions & 5 deletions Qiqqa/DocumentLibrary/DocumentLibraryIndex/LibraryIndex.cs
Original file line number Diff line number Diff line change
Expand Up @@ -188,11 +188,11 @@ protected virtual void Dispose(bool disposing)
}
});

WPFDoEvents.SafeExec(() =>
{
//this.word_index_manager = null;
//web_library_detail = null;
});
//WPFDoEvents.SafeExec(() =>
//{
// //this.word_index_manager = null;
// //web_library_detail = null;
//});

WPFDoEvents.SafeExec(() =>
{
Expand Down
2 changes: 1 addition & 1 deletion Qiqqa/DocumentLibrary/ImportingIntoLibrary.cs
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ private static void AddNewDocumentToLibraryFromInternet_SYNCHRONOUS(WebLibraryDe
if (!String.IsNullOrEmpty(content_type))
{
ContentType ct = new ContentType(content_type);
content_type = ct.MediaType.ToLower(CultureInfo.CurrentCulture);
content_type = ct.MediaType.ToLower();
}
else
{
Expand Down
10 changes: 10 additions & 0 deletions Qiqqa/Documents/Common/DocumentQueuedStorer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,16 @@ public void Queue(PDFDocument pdf_document)
lock (documents_to_store_lock)
{
// l1_clk.LockPerfTimerStop();
#if DEBUG
if (pdf_document.LibraryRef == null)
{
throw new Exception("boom");
}
if (ShutdownableManager.Instance.IsShuttingDown)
{
throw new Exception("boom");
}
#endif
documents_to_store[pdf_document.LibraryRef.Id + "." + pdf_document.Fingerprint] = pdf_document;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ private void AddItemChild_Largest(WordList[] word_lists, string section)
double largest_height = double.MinValue;
int largest_page = 0;

string section_lower = section.ToLower(CultureInfo.CurrentCulture);
string section_lower = section.ToLower();

for (int page = 1; page <= popup.pdf_renderer_control_stats.pdf_document.PDFRenderer.PageCount; ++page)
{
Expand All @@ -81,7 +81,7 @@ private void AddItemChild_Largest(WordList[] word_lists, string section)
for (int i = 0; i < word_list.Count; ++i)
{
Word word = word_list[i];
if (word.Text.ToLower(CultureInfo.CurrentCulture).Contains(section_lower))
if (word.Text.ToLower().Contains(section_lower))
{
if (largest_height < word.Height)
{
Expand Down Expand Up @@ -111,7 +111,7 @@ private void AddItemChild_Rear(WordList[] word_lists, string section)

private void AddItemChild_Front_OLD(WordList[] word_lists, string section)
{
string section_lower = section.ToLower(CultureInfo.CurrentCulture);
string section_lower = section.ToLower();

for (int page = 1; page <= popup.pdf_renderer_control_stats.pdf_document.PDFRenderer.PageCount; ++page)
{
Expand All @@ -121,7 +121,7 @@ private void AddItemChild_Front_OLD(WordList[] word_lists, string section)
for (int i = 0; i < word_list.Count; ++i)
{
Word word = word_list[i];
if (word.Text.ToLower(CultureInfo.CurrentCulture).Contains(section_lower))
if (word.Text.ToLower().Contains(section_lower))
{
popup.Children.Add(new JumpToSectionItem(popup, popup.pdf_reading_control, popup.pdf_render_control, popup.pdf_renderer_control_stats, section, page));
return;
Expand All @@ -133,7 +133,7 @@ private void AddItemChild_Front_OLD(WordList[] word_lists, string section)

private void AddItemChild_Rear_OLD(WordList[] word_lists, string section)
{
string section_lower = section.ToLower(CultureInfo.CurrentCulture);
string section_lower = section.ToLower();

for (int page = popup.pdf_renderer_control_stats.pdf_document.PDFRenderer.PageCount; page >= 1; --page)
{
Expand All @@ -143,7 +143,7 @@ private void AddItemChild_Rear_OLD(WordList[] word_lists, string section)
for (int i = word_list.Count - 1; i >= 0; --i)
{
Word word = word_list[i];
if (word.Text.ToLower(CultureInfo.CurrentCulture).Contains(section_lower))
if (word.Text.ToLower().Contains(section_lower))
{
popup.Children.Add(new JumpToSectionItem(popup, popup.pdf_reading_control, popup.pdf_render_control, popup.pdf_renderer_control_stats, section, page));
return;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ public DocumentMetadataControlsPanel()

InitializeComponent();

Unloaded += DocumentMetadataControlsPanel_Unloaded;

ObjTabs.Children.Clear();
ObjTabs.AddContent("Properties", "Properties", null, false, false, TabMetadata);
ObjTabs.AddContent("Annotations", "Annotations", null, false, false, TabUserData);
Expand All @@ -30,6 +32,19 @@ public DocumentMetadataControlsPanel()
ReevaluateDataContext();
}

// WARNING: https://docs.microsoft.com/en-us/dotnet/api/system.windows.frameworkelement.unloaded?view=net-5.0
// Which says:
//
// Note that the Unloaded event is not raised after an application begins shutting down.
// Application shutdown occurs when the condition defined by the ShutdownMode property occurs.
// If you place cleanup code within a handler for the Unloaded event, such as for a Window
// or a UserControl, it may not be called as expected.
private void DocumentMetadataControlsPanel_Unloaded(object sender, RoutedEventArgs e)
{
// TODO: ditch the pdf renders in the GridPreview children list...
//throw new System.NotImplementedException();
}

private void HyperlinkRestore_Click(object sender, RoutedEventArgs e)
{
AugmentedBindable<PDFDocument> pdf_document_bindable = DataContext as AugmentedBindable<PDFDocument>;
Expand Down
15 changes: 15 additions & 0 deletions Qiqqa/Documents/PDF/PDFControls/PDFReadingControl.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ public PDFReadingControl(PDFDocument pdf_document)
{
InitializeComponent();

Unloaded += PDFReadingControl_Unloaded;

GoogleScholarSideBar.Visibility = Qiqqa.Common.Configuration.ConfigurationManager.Instance.ConfigurationRecord.GoogleScholar_DoExtraBackgroundQueries ? Visibility.Visible : Visibility.Collapsed;

pdf_renderer_control = new PDFRendererControl(pdf_document, true);
Expand Down Expand Up @@ -268,6 +270,19 @@ public PDFReadingControl(PDFDocument pdf_document)
Loaded += PDFReadingControl_Loaded;
}

// WARNING: https://docs.microsoft.com/en-us/dotnet/api/system.windows.frameworkelement.unloaded?view=net-5.0
// Which says:
//
// Note that the Unloaded event is not raised after an application begins shutting down.
// Application shutdown occurs when the condition defined by the ShutdownMode property occurs.
// If you place cleanup code within a handler for the Unloaded event, such as for a Window
// or a UserControl, it may not be called as expected.
private void PDFReadingControl_Unloaded(object sender, RoutedEventArgs e)
{
// TODO: kill the pdf renderer
//throw new NotImplementedException();
}

private void PDFReadingControl_Loaded(object sender, RoutedEventArgs e)
{
if (ConfigurationManager.Instance.ConfigurationRecord.GUI_IsNovice)
Expand Down
Loading

0 comments on commit e3da81e

Please sign in to comment.