-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmtypography.sh
executable file
·159 lines (154 loc) · 4.91 KB
/
mtypography.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#!/bin/bash
# Script introducing basic Polish typography into txt file
# Author: Mikolaj Machowski
# Date: 2017-11-12
# License: GPL v2
# Define NBSP sequence {{{
# WARNING: some combinations of Vim+terminal can handle zero-width chars strangely
if [ ${1+x} ]; then
case $1 in
html)
# HTML - u00a0 (NO-BREAK SPACE)
# word processor formats also accept this one
# may be preferable for DOCX export (MS-Word format)
NBSP=' '
;;
test)
# Test - u2218 (RING OPERATOR) or <>
# NBSP='<>'
NBSP='∘'
;;
docx)
# DOCX - u200d+u0020 (ZERO-WIDTH JOINER + SPACE)
# apparently it depends on font used (Calibri and Arial work, TNR and Cambria not)
# and/or if program is in Compatibility mode(?),
# overall solution no-reliable, stick with 00a0?
NBSP=' '
;;
odt)
# ODT - u2060+u0020 (WORD JOINER + SPACE)
# works very nice, note: line below contains zero-width char
NBSP=' '
;;
latex)
# LaTeX - ~ (TILDE)
NBSP='~'
;;
space)
# plain - u0020 (SPACE)
# just space for really weird cases
NBSP=' '
;;
esac
else
# HTML - u00a0 (NO-BREAK SPACE)
# word processor formats also accept this one
# may be preferable for DOCX export (MS-Word format)
NBSP=' '
fi
# }}}
# WARNINGS
# - depends on GNU sed 4.2.2 and higher (-z option)
# - some of those can wreak havoc when eg. in code blocks, doesn't apply to my
# cases but caveat for general usage; reson why filter operating on JSON
# structure would be beneficial
# Changes in lines: {{{
# 1. Typographic quotes
# 2. One letter words
# 3. separation of initial from name
# (WARNING: can create false positives and mess with lists formatting)
# 4. numbers befor one letter shortcuts w. r. c.
# 5. units (only popular)
# 6. scientific titles
# 7. military titles
# 8. church and aristocratic titles
# 9. itp., itd. (nbsp before)
# 10. tj., tzn. (nbsp after)
# 11. str., godz. (nbsp after)
# 12. Insert space before: ([{
# 13. Insert space after: )]}:;,?!
# 14. Remove space before: @%)]}:;,./?!
# 15. Remove space after: @([{/
# 16. Space after two letter words:
# bo # Bo # by # By # co # Co # do # Do # ja # Ja # ją # Ją # je # Je # ku # Ku # li # Li # ma # Ma
# mi # Mi # my # My # na # Na # od # Od # ów # Ów # po # Po # są # Są # ta # Ta # tą # Tą # tę # Tę
# tę # Tę # to # To # tu # Tu # ty # Ty # we # We # wy # Wy # za # Za # ze # Ze # że # Że
# 17. Space after two letter shortcuts: np. # Np. # ok. # Ok.
# }}}
sed -z -e 's/,,/„/g' -e "s/''/”/g" -e 's/>>/»/g' -e 's/<</«/g' \
-e "s/\<\([iaouwzIAOUWZ]\)\(\n\s*\| \)\([[:print:]]\)/\1$NBSP\3/g" \
-e "s/\<\([A-ZŁŻ].\)\(\n\s*\| \)\([A-ZŁŻĆŚŹ]\)/\1$NBSP\3/g" \
-e "s/\([0-9IVXLCDM]\)\(\n\s*\| \)\([wrcs]\.\)/\1$NBSP\3/g" \
-e "s/\([0-9]\)\(\n\s*\| \)\(zł\|gr\|km\|m\|dm\|cm\|mm\|ha\|ml\|l\|hl\|t\|kg\|g\|oz\|h\|min\|sek\|s\)\>/\1$NBSP\3/g" \
-e "s/\<\([Mm]gr\|[Dd]r\|[Pp]rof\.\|hab\.\|[Ii]nż\.\)\(\n\s*\| \)\([A-ZŁŻĆŚŹ]\)/\1$NBSP\3/g" \
-e "s/\<\([Mm]ar\.\|[Kk]mdr\|mł\.\|st\.\|[Ss]zer\.\|[Kk]pr\.\|[Pp]lut\.\|[Ss]ierż\.\|[Cc]hor\.\|[Pp]por\.\|[Pp]or\.\|[Kk]pt\.\|[Mm]jr\|[Pp]płk\|[Pp]łk\|[Gg]en\.\|[Aa]dm\.\)\(\n\s*\| \)\([A-ZŁŻĆŚŹ]\)/\1$NBSP\3/g" \
-e "s/\<\([Kk]s\.\|[Aa]bp\|[Bb]p\|[Kk]ard\.\|o\.\|[Hh]r\.\)\(\n\s*\| \)\([A-ZŁŻĆŚŹ]\)/\1$NBSP\3/g" \
-e "s/\([[:print:]]\)\(\n\s*\| \)\(itp\.\|itd\.\|się\|no\)\>/\1$NBSP\3/g" \
-e "s/\<\(tj\.\|tzn\.\)\(\n\s*\| \)\([[:print:]]\)/\1$NBSP\3/g" \
-e "s/\<\(s\.\|str\.\|godz\.\|rok\|roku\|rokiem\)\(\n\s*\| \)\([0-9]\)/\1$NBSP\3/g" \
-e "s/\([^ ]\)\([[({]\)/\1 \2/g" \
-e "s/\([])}:;,?!]\)\([^ ]\)/\1 \2/g" \
-e "s/ \([])}%@:;,?!/]\)/\1/g" \
-e "s/\([[@({/]\) /\1/g" \
-e "s/\<\(bo\|by\|co\|do\|ja\|ją\|je\|ku\|li\|ma\|mi\|my\|na\|od\|ów\|po\|są\|ta\|tą\|tę\|to\|tu\|ty\|we\|wy\|za\|ze\|że\)\>\(\n\s*\| \)\([[:print:]]\)/\1$NBSP\3/gi" \
-e "s/\<\(np\.\|ok\.\)\(\n\s*\| \)\([[:print:]]\)/\1$NBSP\3/gi" \
# TODO {{{
# Other, more aggressive with space after: {{{
# oraz
# albo
# bądź
# czy
# lub
# ani
# ale
# lecz
# zaś
# czyli
# przeto
# tedy
# więc
# zatem
#
# bez
# pod
# nad
# znad
# poprzez
# sprzed
# zza
#
# oni
# one
# mój
# twój
# nasz
# wasz
# ich
# jego
# jej
# ten
# tamten
# tam
# tędy
# taki
# tamci
# owi
# razy
# tylko
# nie
# niech
# niechaj
# tak
# bodaj
# oby
# }}}
# Fix common punctuation mistakes: {{{
# Replace - (HYPHEN) enclosed in spaces with -- (ndash) (risky)
# }}}
# }}}
# All data collected or influenced by discussions from: {{{
# http://typografia.info/dzielenie-i-pozostawianie (Robert Twardoch, accessed 2017-11-09)
# https://forum.openoffice.org/pl/forum/viewtopic.php?f=9&t=63 (various, accessed 2017-11-09)
# My own work on Vim LaTeX-Suite.
# }}}
# vim:fdm=marker nowrap