forked from rocketraman/sane-scan-pdf
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscan_perpage
executable file
·125 lines (110 loc) · 3.41 KB
/
scan_perpage
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/bin/bash
# Usage: scan_perpage <imagefile>
# where imagefile is the data just scanned
# (specify this script to scanadf via -S)
usage()
{
echo "Usage: $0 <imagefile>"
echo "Set the following environment variables:"
echo " UNPAPER"
echo " SEARCHABLE"
echo " LANGUAGE"
echo " RESOLUTION"
echo " PGWIDTHIN"
echo " PGHEIGHTIN"
echo " SKIP_EMPTY_PAGES"
echo " PS2PDF_OPTS (optional)"
echo " VERBOSE (optional)"
echo " LOCKFILE (required if VERBOSE=1)"
}
log()
{
if [ $VERBOSE = 1 ]; then
echo "scan_perpage: $1"
fi
}
logstdout()
{
if [ $VERBOSE = 1 ]; then
cat
else
cat > /dev/null
fi
}
runconstrained()
{
if [ -x "$(command -v sem)" ]; then
# use up to 75% of the cores available
sem --jobs 75% --id scan_perpage --fg "$@"
else
"$@"
fi
}
if [ $# -lt 1 ]; then
usage
exit 1
fi
if [ "$UNPAPER" == "" -o "$SEARCHABLE" == "" -o "$RESOLUTION" == "" -o "$RESOLUTION" == "" -o "$SKIP_EMPTY_PAGES" == "" ]; then
usage
exit 1
fi
IMAGE_PATH=$1
IMAGE_DIR=`dirname $1`
IMAGE_FILE=`basename $1`
process_page() {
log ""
log "-------------------------------------------------------------------------------"
log "Post-processing scanned page ${IMAGE_FILE}, deskew=$UNPAPER, searchable=$SEARCHABLE..."
[[ $SKIP_EMPTY_PAGES -eq 1 ]] && PERCENTAGE_WHITE=`convert "$IMAGE_PATH" -fuzz 0% -negate -threshold 0 -negate -format "%[fx:100*mean]" info:` || PERCENTAGE_WHITE=0
log "$IMAGE_PATH has $PERCENTAGE_WHITE % white"
PP_PREFIX=
if (( $(echo "$PERCENTAGE_WHITE < 99.8" |bc -l) )); then
if [ $UNPAPER -eq 1 ]; then
log "Applying unpaper post-processing to image data..."
PP_PREFIX="unpaper-"
if [ $VERBOSE = 1 ]; then
UNPAPERVERBOSE="-v"
fi
#runconstrained unpaper $UNPAPERVERBOSE --no-mask-scan --overwrite --dpi $RESOLUTION --no-blackfilter $IMAGE_FILE $PP_PREFIX$IMAGE_FILE | logstdout
runconstrained unpaper $UNPAPERVERBOSE --overwrite --dpi $RESOLUTION $IMAGE_PATH $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE | logstdout
fi
log ""
log "-------------------------------------------------------------------------------"
if [ $SEARCHABLE -eq 1 ]; then
log "Converting image data to searchable pdf..."
runconstrained tesseract $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE $IMAGE_DIR/${IMAGE_FILE%.*} -l $LANGUAGE pdf | logstdout
else
log "Converting image data to pdf..."
if [ "$PGWIDTHIN" == "" -o "$PGHEIGHTIN" == "" ]; then
PAGEOPTS="-equalpixels -dpi=$RESOLUTION -noturn"
else
PAGEOPTS="-imagewidth $PGWIDTHIN -imageheight $PGHEIGHTIN"
fi
PNMVERBOSE=
# older versions of pnmtops (Ubuntu, Debian) don't have the -verbose option, test for it
if [[ $VERBOSE = 1 && ! "$(pnmtops -verbose 2>&1 < /dev/null)" =~ "unrecognized option" ]]; then
PNMVERBOSE="-verbose"
fi
log "Using page options: $PAGEOPTS"
runconstrained pnmtops $PNMVERBOSE $PAGEOPTS $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE | ps2pdf $PS2PDF_OPTS - > $IMAGE_DIR/${IMAGE_FILE%.*}.pdf | logstdout
fi
else
log "Skipping empty page $IMAGE_FILE with white percentage $PERCENTAGE_WHITE"
fi
status=$?
rm $IMAGE_PATH
if [ -f $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE ]; then
rm $IMAGE_DIR/$PP_PREFIX$IMAGE_FILE
fi
log ""
log "Scan processing done, status = $status"
}
if [ $VERBOSE = 1 ]; then
(
flock 200
process_page
) 200>$LOCKFILE
else
process_page
fi;
exit $status