Skip to content

Commit

Permalink
Update ocrmypdf.bash completion
Browse files Browse the repository at this point in the history
Squashed commit of the following:

commit 974de2e8ccad7fd34694f2c3a7a17c64bb52cdab
Merge: a8d7f96 ee04aa7
Author: James R. Barlow <[email protected]>
Date:   Sat Dec 4 20:22:50 2021 -0800

    Merge branch 'update_bash-completion' of git://github.com/FPille/OCRmyPDF into FPille-update_bash-completion

commit ee04aa7
Author: FPille <[email protected]>
Date:   Thu Oct 14 11:09:23 2021 +0200

    update

commit 76f6453
Author: FPille <[email protected]>
Date:   Thu Oct 14 11:04:10 2021 +0200

    updated and descriptions for arguments and choices added
    deprecated arguments removed
    bug fix: typo "_init_completion" instead of "_init_completions"

commit de9b93e
Merge: c23374d 42713b7
Author: Frank <[email protected]>
Date:   Thu Oct 14 08:08:11 2021 +0200

    Merge branch 'jbarlow83:master' into master

commit c23374d
Merge: 40b2ebc c409fa5
Author: Frank <[email protected]>
Date:   Wed May 26 20:31:00 2021 +0200

    Merge branch 'jbarlow83:master' into master

commit 40b2ebc
Merge: 79c84ee 7e388f5
Author: Frank <[email protected]>
Date:   Sat Jun 1 11:09:07 2019 +0200

    Merge pull request #1 from jbarlow83/master

    update master
  • Loading branch information
FPille authored and jbarlow83 committed Dec 6, 2021
1 parent 4c1ff10 commit aae5591
Showing 1 changed file with 254 additions and 69 deletions.
323 changes: 254 additions & 69 deletions misc/completion/ocrmypdf.bash
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# ocrmypdf completion -*- shell-script -*-

# Copyright 2019 Frank Pille
# Copyright 2019, 2021 Frank Pille
# Copyright 2020 Alex Willner
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
Expand All @@ -23,100 +23,285 @@

set -o errexit

_ocrmypdf()
__ocrmypdf_arguments()
{
local cur prev cword words split
local arguments="--help (show help message)
--language (language(s) of the file to be OCRed)
--image-dpi (assume this DPI if input image DPI is unknown)
--output-type (select PDF output options)
--sidecar (write OCR to text file)
--version (print program version and exit)
--jobs (how many worker processes to use)
--quiet (suppress INFO messages)
--verbose (set verbosity level)
--title (set metadata)
--author (set metadata)
--subject (set metadata)
--keywords (set metadata)
--rotate-pages (rotate pages to correct orientation)
--remove-background (attempt to remove background from pages)
--deskew (fix small horizontal alignment skew)
--clean (clean document images before OCR)
--clean-final (clean document images and keep result)
--unpaper-args (a quoted string of arguments to pass to unpaper)
--oversample (oversample images to this DPI)
--remove-vectors (don\'t send vector objects to OCR)
--threshold (threshold images before OCR)
--force-ocr (OCR documents that already have printable text)
--skip-text (skip OCR on any pages that already contain text)
--redo-ocr (redo OCR on any pages that seem to have OCR already)
--skip-big (skip OCR on pages larger than this many MPixels)
--optimize (select optimization level)
--jpeg-quality (JPEG quality [0..100])
--png-quality (PNG quality [0..100])
--jbig2-lossy (enable lossy JBIG2 (see docs))
--pages (apply OCR to only the specified pages)
--max-image-mpixels (image decompression bomb threshold)
--pdf-renderer (select PDF renderer options)
--rotate-pages-threshold (page rotation confidence)
--pdfa-image-compression (set PDF/A image compression options)
--fast-web-view (if file size if above this amount in MB linearize PDF)
--plugin (name of plugin to import)
--keep-temporary-files (keep temporary files (debug)
--tesseract-config (set custom tesseract config file)
--tesseract-pagesegmode (set tesseract --psm)
--tesseract-oem (set tesseract --oem)
--tesseract-thresholding (set tesseract image thresholding)
--tesseract-timeout (maximum number of seconds to wait for OCR)
--user-words (specify location of user words file)
--user-patterns (specify location of user patterns file)
--no-progress-bar (disable the progress bar)
"

# Homebrew on Macs have version 1.3 of bash-completion which doesn't include - see #502
if declare -F _init_completions >/dev/null 2>&1; then
_init_completion -s || return
else
COMPREPLY=()
_get_comp_words_by_ref cur prev words cword
COMPREPLY=( $( compgen -W "$arguments" -- "$cur") )

# Remove description if only one completion exists
if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
COMPREPLY=( ${COMPREPLY[0]%% *} )
fi
}

if [[ $cur == -* ]]; then
COMPREPLY=( $( compgen -W '--language --image-dpi --output-type
--sidecar --version --jobs --quiet --verbose --title --author
--subject --keywords --rotate-pages --deskew
--clean --clean-final --unpaper-args --oversample --remove-vectors
--force-ocr --skip-text --redo-ocr
--skip-big --jpeg-quality --png-quality --jbig2-lossy
--max-image-mpixels --tesseract-config --tesseract-pagesegmode
--help --tesseract-oem --pdf-renderer --tesseract-timeout
--tesseract-thresholding
--rotate-pages-threshold --pdfa-image-compression --user-words
--user-patterns --keep-temporary-files --output-type
--no-progress-bar --pages --fast-web-view' \
-- "$cur" ) )
return
else
_filedir
return
__ocrmypdf_output-type()
{
local choices="pdfa (output a PDF/A (default))
pdf (output a standard PDF)
pdfa-1 (output a PDF/A-1b)
pdfa-2 (output a PDF/A-2b)
pdfa-3 (output a PDF/A-3b)
none (do not produce an output PDF (for example, if you only care about --sidecar))"

COMPREPLY=( $( compgen -W "$choices" -- "$cur") )

# Remove description if only one completion exists
if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
COMPREPLY=( ${COMPREPLY[0]%% *} )
fi
}

__ocrmypdf_verbose()
{
local choices="0 (standard output messages)
1 (troubleshooting output messages)
2 (debugging output messages)"

COMPREPLY=( $( compgen -W "$choices" -- "$cur") )

# Remove description if only one completion exists
if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
COMPREPLY=( ${COMPREPLY[0]%% *} )
fi
}

__ocrmypdf_optimize()
{
local choices="0 (do not optimize)
1 (do safe, lossless optimizations (default))
2 (do some lossy optimizations)
3 (do aggressive lossy optimizations (including lossy JBIG2))"

COMPREPLY=( $( compgen -W "$choices" -- "$cur") )

# Remove description if only one completion exists
if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
COMPREPLY=( ${COMPREPLY[0]%% *} )
fi
}

__ocrmypdf_pdf-renderer()
{
local choices="auto (auto select PDF renderer)
hocr (use hOCR renderer)
hocrdebug (uses hOCR renderer in debug mode, showing recognized text)
sandwich (use sandwich renderer)"

COMPREPLY=( $( compgen -W "$choices" -- "$cur") )

# Remove description if only one completion exists
if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
COMPREPLY=( ${COMPREPLY[0]%% *} )
fi
}

__ocrmypdf_pdfa-image-compression()
{
local choices="auto (let Ghostscript decide how to compress images)
jpeg (convert color and grayscale images to JPEG)
lossless (convert color and grayscale images to lossless (PNG))"

COMPREPLY=( $( compgen -W "$choices" -- "$cur") )

# Remove description if only one completion exists
if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
COMPREPLY=( ${COMPREPLY[0]%% *} )
fi
}

__ocrmypdf_tesseract-pagesegmode()
{
local choices="0 (orientation and script detection (OSD) only)
1 (automatic page segmentation with OSD)
2 (automatic page segmentation, but no OSD, or OCR)
3 (fully automatic page segmentation, but no OSD (default))
4 (assume a single column of text of variable sizes)
5 (assume a single uniform block of vertically aligned text)
6 (assume a single uniform block of text)
7 (treat the image as a single text line)
8 (treat the image as a single word)
9 (treat the image as a single word in a circle)
10 (treat the image as a single character)
11 (sparse text - find as much text as possible in no particular order)
12 (sparse text with OSD)
13 (raw line - treat the image as a single text line)"

COMPREPLY=( $( compgen -W "$choices" -- "$cur") )

# Remove description if only one completion exists
if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
COMPREPLY=( ${COMPREPLY[0]%% *} )
fi
}

__ocrmypdf_tesseract-oem()
{
local choices="0 (legacy engine only)
1 (neural nets LSTM engine only)
2 (legacy + LSTM engines)
3 (default, based on what is available)"

COMPREPLY=( $( compgen -W "$choices" -- "$cur") )

# Remove description if only one completion exists
if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
COMPREPLY=( ${COMPREPLY[0]%% *} )
fi
}

__ocrmypdf_tesseract-thresholding()
{
local choices="auto (let OCRmyPDF pick thresholding - current always uses otsu)
otsu (use hOCR renderer)
adaptive-otsu (use adaptive Otsu thresholding)
sauvola (use Sauvola thresholding)"

COMPREPLY=( $( compgen -W "$choices" -- "$cur") )
# Remove description if only one completion exists
if [[ ${#COMPREPLY[*]} -eq 1 ]]; then
COMPREPLY=( ${COMPREPLY[0]%% *} )
fi
}


__ocrmypdf_check_previous()
{
case $prev in
--version|-h|--help)
return
;;
--user-words|--user-patterns|--tesseract-config)
_filedir
return
;;
--output-type)
COMPREPLY=( $( compgen -W 'pdfa pdf pdfa-1 pdfa-2 pdfa-3' -- \
"$cur" ) )
return
;;
--pdf-renderer)
COMPREPLY=( $( compgen -W 'auto hocr sandwich' -- "$cur" ) )
return
;;
--pdfa-image-compression)
COMPREPLY=( $( compgen -W 'auto jpeg lossless' -- "$cur" ) )
return
;;
-O|--optimize|--tesseract-oem)
COMPREPLY=( $( compgen -W '{0..3}' -- "$cur" ) )
return
;;
--jpeg-quality|--png-quality)
COMPREPLY=( $( compgen -W '{0..100}' -- "$cur" ) )
return
-h|--help|--version)
return 0
;;
-l|--language)
COMPREPLY=$( command tesseract --list-langs 2>/dev/null )
COMPREPLY=( $( compgen -W '${COMPREPLY[@]##*:}' -- "$cur" ) )
return
return 0
;;
--image-dpi|--oversample|--skip-big|--max-image-mpixels|\
--tesseract-timeout|--rotate-pages-threshold)
COMPREPLY=( $( compgen -P "$cur" -W '{0..9}' ) )
return
--output-type)
__ocrmypdf_output-type
return 0
;;
-j|--jobs)
COMPREPLY=( $( compgen -W '{1..'$( _ncpus )'}' -- "$cur" ) )
return
return 0
;;
-v|--verbose)
COMPREPLY=( $( compgen -W '{0..2}' -- "$cur" ) ) # max level ?
return
__ocrmypdf_verbose
return 0
;;
-O|--optimize)
__ocrmypdf_optimize
return 0
;;
--pdf-renderer)
__ocrmypdf_pdf-renderer
return 0
;;
--pdfa-image-compression)
__ocrmypdf_pdfa-image-compression
return 0
;;
--tesseract-pagesegmode)
COMPREPLY=( $( compgen -W '{1..13}' -- "$cur" ) )
return
__ocrmypdf_tesseract-pagesegmode
return 0
;;
--tesseract-oem)
__ocrmypdf_tesseract-oem
return 0
;;
--tesseract-thresholding)
COMPREPLY=( $( compgen -W 'auto otsu adaptive-otsu sauvola' -- "$cur" ) )
return
__ocrmypdf_tesseract-thresholding
return 0
;;
--sidecar|--title|--author|--subject|--keywords|--unpaper-args|--pages|--fast-web-view)

--title|--author|--subject|--keywords|--unpaper-args|--pages|--plugin|\
--jpeg-quality|--png-quality|--image-dpi|--oversample|--skip-big|--max-image-mpixels|\
--tesseract-timeout|--rotate-pages-threshold|--fast-web-view)
# argument required but no completions available
return
return 0
;;
--tesseract-config|--user-words|--user-patterns|--sidecar)
_filedir
return 0
;;
esac

$split && return
return 1
}

_ocrmypdf()
{
local OLDIFS="$IFS"
local IFS=$'\n'

local cur prev

# Homebrew on Macs have version 1.3 of bash-completion which doesn't include - see #502
if declare -F _init_completion >/dev/null 2>&1; then
_init_completion || return
else
COMPREPLY=()
_get_comp_words_by_ref cur prev
fi

if __ocrmypdf_check_previous -ne 0; then
return
fi

if [[ "$cur" == -* ]]; then
__ocrmypdf_arguments
else
_filedir
fi

IFS="$OLDIFS"

return
} &&
complete -F _ocrmypdf ocrmypdf

Expand Down

0 comments on commit aae5591

Please sign in to comment.