From 95d2c37527c348d4640cc8ebd9d04f416191ff18 Mon Sep 17 00:00:00 2001 From: Guillaume Ayoub Date: Tue, 30 Apr 2024 13:47:45 +0200 Subject: [PATCH 1/3] Try to subset fonts using Harfbuzz Related to #2120. --- weasyprint/pdf/fonts.py | 266 +++++++++++++++++++++++++++++++++++++-- weasyprint/pdf/stream.py | 196 +---------------------------- weasyprint/text/ffi.py | 63 +++++++++- 3 files changed, 320 insertions(+), 205 deletions(-) diff --git a/weasyprint/pdf/fonts.py b/weasyprint/pdf/fonts.py index b80dd3a23..85f5177f2 100644 --- a/weasyprint/pdf/fonts.py +++ b/weasyprint/pdf/fonts.py @@ -1,10 +1,258 @@ """Fonts integration in PDF.""" +import io +from hashlib import md5 from math import ceil import pydyf +from fontTools import subset +from fontTools.ttLib import TTFont, TTLibError, ttFont +from fontTools.varLib.mutator import instantiateVariableFont from ..logger import LOGGER +from ..text.constants import PANGO_STRETCH_PERCENT +from ..text.ffi import ffi, harfbuzz, harfbuzz_subset, pango, units_to_double +from ..text.fonts import get_hb_object_data, get_pango_font_hb_face + + +class Font: + def __init__(self, pango_font): + self.hb_font = pango.pango_font_get_hb_font(pango_font) + self.hb_face = get_pango_font_hb_face(pango_font) + self.file_content = get_hb_object_data(self.hb_face) + self.index = harfbuzz.hb_face_get_index(self.hb_face) + + pango_metrics = pango.pango_font_get_metrics(pango_font, ffi.NULL) + self.description = description = ffi.gc( + pango.pango_font_describe(pango_font), + pango.pango_font_description_free) + self.font_size = pango.pango_font_description_get_size(description) + self.style = pango.pango_font_description_get_style(description) + self.family = ffi.string( + pango.pango_font_description_get_family(description)) + + self.variations = {} + variations = pango.pango_font_description_get_variations(self.description) + if variations != ffi.NULL: + self.variations = { + part.split('=')[0]: float(part.split('=')[1]) + for part in ffi.string(variations).decode().split(',')} + if 'wght' in self.variations: + pango.pango_font_description_set_weight( + self.description, int(round(self.variations['wght']))) + if self.variations.get('ital'): + pango.pango_font_description_set_style( + self.description, pango.PANGO_STYLE_ITALIC) + elif self.variations.get('slnt'): + pango.pango_font_description_set_style( + self.description, pango.PANGO_STYLE_OBLIQUE) + if 'wdth' in self.variations: + stretch = min( + PANGO_STRETCH_PERCENT.items(), + key=lambda item: abs(item[0] - self.variations['wdth']))[1] + pango.pango_font_description_set_stretch(self.description, stretch) + description_string = ffi.string( + pango.pango_font_description_to_string(description)) + + # Never use the built-in hash function here: it’s not stable + self.hash = ''.join( + chr(65 + letter % 26) for letter + in md5(description_string, usedforsecurity=False).digest()[:6]) + + # Name + fields = description_string.split(b' ') + if fields and b'=' in fields[-1]: + fields.pop() # Remove variations + if fields: + fields.pop() # Remove font size + else: + fields = [b'Unknown'] + self.name = b'/' + self.hash.encode() + b'+' + b'-'.join(fields) + + # Ascent & descent + if self.font_size: + self.ascent = int( + pango.pango_font_metrics_get_ascent(pango_metrics) / + self.font_size * 1000) + self.descent = -int( + pango.pango_font_metrics_get_descent(pango_metrics) / + self.font_size * 1000) + else: + self.ascent = self.descent = 0 + + # Tables and metadata + table_count = ffi.new('unsigned int *', 100) + table_tags = ffi.new('hb_tag_t[100]') + table_name = ffi.new('char[4]') + harfbuzz.hb_face_get_table_tags(self.hb_face, 0, table_count, table_tags) + self.tables = [] + for i in range(table_count[0]): + harfbuzz.hb_tag_to_string(table_tags[i], table_name) + self.tables.append(ffi.string(table_name).decode()) + self.bitmap = 'EBDT' in self.tables and 'EBLC' in self.tables + self.italic_angle = 0 # TODO: this should be different + self.upem = harfbuzz.hb_face_get_upem(self.hb_face) + self.png = harfbuzz.hb_ot_color_has_png(self.hb_face) + self.svg = harfbuzz.hb_ot_color_has_svg(self.hb_face) + self.stemv = 80 + self.stemh = 80 + self.widths = {} + self.cmap = {} + self.used_in_forms = False + + # Font flags + self.flags = 2 ** (3 - 1) # Symbolic, custom character set + if self.style: + self.flags += 2 ** (7 - 1) # Italic + if b'Serif' in fields: + self.flags += 2 ** (2 - 1) # Serif + + def clean(self, cmap, hinting): + # Subset font. + self.subset(cmap, hinting) + + # Transform variable into static font + if 'fvar' in self.tables: + full_font = io.BytesIO(self.file_content) + ttfont = TTFont(full_font, fontNumber=self.index) + if 'wght' not in self.variations: + weight = pango.pango_font_description_get_weight( + self.description) + self.variations['wght'] = weight + if 'opsz' not in self.variations: + self.variations['opsz'] = units_to_double(self.font_size) + if 'slnt' not in self.variations: + slnt = 0 + if self.style == 1: + for axe in ttfont['fvar'].axes: + if axe.axisTag == 'slnt': + if axe.maxValue == 0: + slnt = axe.minValue + else: + slnt = axe.maxValue + break + self.variations['slnt'] = slnt + if 'ital' not in self.variations: + self.variations['ital'] = int(self.style == 2) + partial_font = io.BytesIO() + try: + ttfont = instantiateVariableFont(ttfont, self.variations) + for key, (advance, bearing) in ttfont['hmtx'].metrics.items(): + if advance < 0: + ttfont['hmtx'].metrics[key] = (0, bearing) + ttfont.save(partial_font) + except Exception: + LOGGER.warning('Unable to mutate variable font') + else: + self.file_content = partial_font.getvalue() + + if not (self.png or self.svg): + return + + full_font = io.BytesIO(self.file_content) + ttfont = TTFont(full_font, fontNumber=self.index) + try: + # Add empty glyphs instead of PNG or SVG emojis + if 'loca' not in self.tables or 'glyf' not in self.tables: + ttfont['loca'] = ttFont.getTableClass('loca')() + ttfont['glyf'] = ttFont.getTableClass('glyf')() + ttfont['glyf'].glyphOrder = ttfont.getGlyphOrder() + ttfont['glyf'].glyphs = { + name: ttFont.getTableModule('glyf').Glyph() + for name in ttfont['glyf'].glyphOrder} + else: + for glyph in ttfont['glyf'].glyphs: + ttfont['glyf'][glyph] = ( + ttFont.getTableModule('glyf').Glyph()) + for table_name in ('CBDT', 'CBLC', 'SVG '): + if table_name in ttfont: + del ttfont[table_name] + output_font = io.BytesIO() + ttfont.save(output_font) + self.file_content = output_font.getvalue() + except TTLibError: + LOGGER.warning('Unable to save emoji font') + + @property + def type(self): + return 'otf' if self.file_content[:4] == b'OTTO' else 'ttf' + + def subset(self, cmap, hinting): + if not cmap: + return + + if harfbuzz_subset: + hb_subset = harfbuzz_subset.hb_subset_input_create_or_fail() + + # Only keep used glyphs. + gid_set = harfbuzz_subset.hb_subset_input_glyph_set(hb_subset) + gid_array = ffi.new(f'hb_codepoint_t[{len(cmap)}]', sorted(cmap)) + harfbuzz.hb_set_add_sorted_array(gid_set, gid_array, len(cmap)) + + # Set flags. + flags = ( + harfbuzz_subset.HB_SUBSET_FLAGS_RETAIN_GIDS | + harfbuzz_subset.HB_SUBSET_FLAGS_PASSTHROUGH_UNRECOGNIZED | + harfbuzz_subset.HB_SUBSET_FLAGS_DESUBROUTINIZE) + if not hinting: + flags |= harfbuzz_subset.HB_SUBSET_FLAGS_NO_HINTING + harfbuzz_subset.hb_subset_input_set_flags(hb_subset, flags) + + # Drop useless tables. + drop_set = harfbuzz_subset.hb_subset_input_set( + hb_subset, harfbuzz_subset.HB_SUBSET_SETS_DROP_TABLE_TAG) + drop_tables = tuple(harfbuzz.hb_tag_from_string(name, -1) for name in ( + b'BASE', b'DSIG', b'EBDT', b'EBLC', b'EBSC', b'GPOS', b'GSUB', b'JSTF', + b'LTSH', b'PCLT', b'SVG ')) + drop_tables_array = ffi.new( + f'hb_codepoint_t[{len(drop_tables)}]', drop_tables) + harfbuzz.hb_set_add_sorted_array( + drop_set, drop_tables_array, len(drop_tables)) + + # Subset font. + hb_face = harfbuzz_subset.hb_subset_or_fail(self.hb_face, hb_subset) + + # Drop empty glyphs after last one used. + gid_set = harfbuzz_subset.hb_subset_input_glyph_set(hb_subset) + keep = tuple(range(max(cmap) + 1)) + gid_array = ffi.new(f'hb_codepoint_t[{len(keep)}]', keep) + harfbuzz.hb_set_add_sorted_array(gid_set, gid_array, len(keep)) + + # Set flags. + flags = ( + harfbuzz_subset.HB_SUBSET_FLAGS_PASSTHROUGH_UNRECOGNIZED | + harfbuzz_subset.HB_SUBSET_FLAGS_DESUBROUTINIZE) + if not hinting: + flags |= harfbuzz_subset.HB_SUBSET_FLAGS_NO_HINTING + harfbuzz_subset.hb_subset_input_set_flags(hb_subset, flags) + + # Subset font. + hb_face = harfbuzz_subset.hb_subset_or_fail(hb_face, hb_subset) + + # Store new font. + if hb_face: + file_content = get_hb_object_data(hb_face) + if file_content: + self.file_content = file_content + return + LOGGER.warning('Unable to subset font with Harfbuzz') + else: + full_font = io.BytesIO(self.file_content) + optimized_font = io.BytesIO() + options = subset.Options( + retain_gids=True, passthrough_tables=True, ignore_missing_glyphs=True, + hinting=hinting, desubroutinize=True) + options.drop_tables += ['GSUB', 'GPOS', 'SVG'] + subsetter = subset.Subsetter(options) + subsetter.populate(gids=cmap) + try: + ttfont = TTFont(full_font, fontNumber=self.index) + subsetter.subset(ttfont) + except TTLibError: + LOGGER.warning('Unable to subset font with fontTools') + else: + ttfont.save(optimized_font) + self.file_content = optimized_font.getvalue() def build_fonts_dictionary(pdf, fonts, compress_pdf, subset, options): @@ -37,18 +285,20 @@ def build_fonts_dictionary(pdf, fonts, compress_pdf, subset, options): font_references_by_file_hash[file_hash] = font_stream.reference for font in fonts.values(): - if not font.ttfont or (subset and not font.used_in_forms): + if subset and not font.used_in_forms: # Only store widths and map for used glyphs font_widths = font.widths cmap = font.cmap else: # Store width and Unicode map for all glyphs + full_font = io.BytesIO(font.file_content) + ttfont = TTFont(full_font, fontNumber=font.index) font_widths, cmap = {}, {} - for letter, key in font.ttfont.getBestCmap().items(): - glyph = font.ttfont.getGlyphID(key) + for letter, key in ttfont.getBestCmap().items(): + glyph = ttfont.getGlyphID(key) if glyph not in cmap: cmap[glyph] = chr(letter) - width = font.ttfont.getGlyphSet()[key].width + width = ttfont.getGlyphSet()[key].width font_widths[glyph] = width * 1000 / font.upem max_x = max(font_widths.values()) if font_widths else 0 @@ -178,17 +428,19 @@ def _build_bitmap_font_dictionary(font_dictionary, pdf, font, widths, 'Differences': pydyf.Array(differences), }) char_procs = pydyf.Dictionary({}) - font_glyphs = font.ttfont['EBDT'].strikeData[0] + full_font = io.BytesIO(font.file_content) + ttfont = TTFont(full_font, fontNumber=font.index) + font_glyphs = ttfont['EBDT'].strikeData[0] widths = [0] * (last - first + 1) glyphs_info = {} for key, glyph in font_glyphs.items(): glyph_format = glyph.getFormat() - glyph_id = font.ttfont.getGlyphID(key) + glyph_id = ttfont.getGlyphID(key) # Get and store glyph metrics if glyph_format == 5: data = glyph.data - subtables = font.ttfont['EBLC'].strikes[0].indexSubTables + subtables = ttfont['EBLC'].strikes[0].indexSubTables for subtable in subtables: first_index = subtable.firstGlyphIndex last_index = subtable.lastGlyphIndex diff --git a/weasyprint/pdf/stream.py b/weasyprint/pdf/stream.py index a8f47c710..a1d7abb06 100644 --- a/weasyprint/pdf/stream.py +++ b/weasyprint/pdf/stream.py @@ -1,201 +1,11 @@ """PDF stream.""" -import io -from hashlib import md5 - import pydyf -from fontTools import subset -from fontTools.ttLib import TTFont, TTLibError, ttFont -from fontTools.varLib.mutator import instantiateVariableFont -from ..logger import LOGGER from ..matrix import Matrix -from ..text.constants import PANGO_STRETCH_PERCENT -from ..text.ffi import ffi, harfbuzz, pango, units_to_double -from ..text.fonts import get_hb_object_data, get_pango_font_hb_face, get_pango_font_key - - -class Font: - def __init__(self, pango_font): - self.hb_font = pango.pango_font_get_hb_font(pango_font) - self.hb_face = get_pango_font_hb_face(pango_font) - self.file_content = get_hb_object_data(self.hb_face) - self.index = harfbuzz.hb_face_get_index(self.hb_face) - - pango_metrics = pango.pango_font_get_metrics(pango_font, ffi.NULL) - self.description = description = ffi.gc( - pango.pango_font_describe(pango_font), - pango.pango_font_description_free) - self.font_size = pango.pango_font_description_get_size(description) - self.style = pango.pango_font_description_get_style(description) - self.family = ffi.string( - pango.pango_font_description_get_family(description)) - - self.variations = {} - variations = pango.pango_font_description_get_variations( - self.description) - if variations != ffi.NULL: - self.variations = { - part.split('=')[0]: float(part.split('=')[1]) - for part in ffi.string(variations).decode().split(',')} - if 'wght' in self.variations: - pango.pango_font_description_set_weight( - self.description, int(round(self.variations['wght']))) - if self.variations.get('ital'): - pango.pango_font_description_set_style( - self.description, pango.PANGO_STYLE_ITALIC) - elif self.variations.get('slnt'): - pango.pango_font_description_set_style( - self.description, pango.PANGO_STYLE_OBLIQUE) - if 'wdth' in self.variations: - stretch = min( - PANGO_STRETCH_PERCENT.items(), - key=lambda item: abs(item[0] - self.variations['wdth']))[1] - pango.pango_font_description_set_stretch(self.description, stretch) - description_string = ffi.string( - pango.pango_font_description_to_string(description)) - - # Never use the built-in hash function here: it’s not stable - self.hash = ''.join( - chr(65 + letter % 26) for letter - in md5(description_string, usedforsecurity=False).digest()[:6]) - - # Name - fields = description_string.split(b' ') - if fields and b'=' in fields[-1]: - fields.pop() # Remove variations - if fields: - fields.pop() # Remove font size - else: - fields = [b'Unknown'] - self.name = b'/' + self.hash.encode() + b'+' + b'-'.join(fields) - - # Ascent & descent - if self.font_size: - self.ascent = int( - pango.pango_font_metrics_get_ascent(pango_metrics) / - self.font_size * 1000) - self.descent = -int( - pango.pango_font_metrics_get_descent(pango_metrics) / - self.font_size * 1000) - else: - self.ascent = self.descent = 0 - - # Fonttools - full_font = io.BytesIO(self.file_content) - try: - self.ttfont = TTFont(full_font, fontNumber=self.index) - except Exception: - LOGGER.warning('Unable to read font') - self.ttfont = None - self.bitmap = False - else: - self.bitmap = ( - 'EBDT' in self.ttfont and 'EBLC' in self.ttfont and ( - 'glyf' not in self.ttfont or not self.ttfont['glyf'].glyphs)) - - # Various properties - self.italic_angle = 0 # TODO: this should be different - self.upem = harfbuzz.hb_face_get_upem(self.hb_face) - self.png = harfbuzz.hb_ot_color_has_png(self.hb_face) - self.svg = harfbuzz.hb_ot_color_has_svg(self.hb_face) - self.stemv = 80 - self.stemh = 80 - self.widths = {} - self.cmap = {} - self.used_in_forms = False - - # Font flags - self.flags = 2 ** (3 - 1) # Symbolic, custom character set - if self.style: - self.flags += 2 ** (7 - 1) # Italic - if b'Serif' in fields: - self.flags += 2 ** (2 - 1) # Serif - - def clean(self, cmap, hinting): - if self.ttfont is None: - return - - # Subset font - if cmap: - optimized_font = io.BytesIO() - options = subset.Options( - retain_gids=True, passthrough_tables=True, - ignore_missing_glyphs=True, hinting=hinting, - desubroutinize=True) - options.drop_tables += ['GSUB', 'GPOS', 'SVG'] - subsetter = subset.Subsetter(options) - subsetter.populate(gids=cmap) - try: - subsetter.subset(self.ttfont) - except TTLibError: - LOGGER.warning('Unable to optimize font') - else: - self.ttfont.save(optimized_font) - self.file_content = optimized_font.getvalue() - - # Transform variable into static font - if 'fvar' in self.ttfont: - if 'wght' not in self.variations: - weight = pango.pango_font_description_get_weight( - self.description) - self.variations['wght'] = weight - if 'opsz' not in self.variations: - self.variations['opsz'] = units_to_double(self.font_size) - if 'slnt' not in self.variations: - slnt = 0 - if self.style == 1: - for axe in self.ttfont['fvar'].axes: - if axe.axisTag == 'slnt': - if axe.maxValue == 0: - slnt = axe.minValue - else: - slnt = axe.maxValue - break - self.variations['slnt'] = slnt - if 'ital' not in self.variations: - self.variations['ital'] = int(self.style == 2) - partial_font = io.BytesIO() - try: - ttfont = instantiateVariableFont(self.ttfont, self.variations) - for key, (advance, bearing) in ttfont['hmtx'].metrics.items(): - if advance < 0: - ttfont['hmtx'].metrics[key] = (0, bearing) - ttfont.save(partial_font) - except Exception: - LOGGER.warning('Unable to mutate variable font') - else: - self.ttfont = ttfont - self.file_content = partial_font.getvalue() - - if not (self.png or self.svg): - return - - try: - # Add empty glyphs instead of PNG or SVG emojis - if 'loca' not in self.ttfont or 'glyf' not in self.ttfont: - self.ttfont['loca'] = ttFont.getTableClass('loca')() - self.ttfont['glyf'] = ttFont.getTableClass('glyf')() - self.ttfont['glyf'].glyphOrder = self.ttfont.getGlyphOrder() - self.ttfont['glyf'].glyphs = { - name: ttFont.getTableModule('glyf').Glyph() - for name in self.ttfont['glyf'].glyphOrder} - else: - for glyph in self.ttfont['glyf'].glyphs: - self.ttfont['glyf'][glyph] = ( - ttFont.getTableModule('glyf').Glyph()) - for table_name in ('CBDT', 'CBLC', 'SVG '): - if table_name in self.ttfont: - del self.ttfont[table_name] - output_font = io.BytesIO() - self.ttfont.save(output_font) - self.file_content = output_font.getvalue() - except TTLibError: - LOGGER.warning('Unable to save emoji font') - - @property - def type(self): - return 'otf' if self.file_content[:4] == b'OTTO' else 'ttf' +from ..text.ffi import ffi +from ..text.fonts import get_pango_font_key +from .fonts import Font class Stream(pydyf.Stream): diff --git a/weasyprint/text/ffi.py b/weasyprint/text/ffi.py index 198bb8ad5..3542828c7 100644 --- a/weasyprint/text/ffi.py +++ b/weasyprint/text/ffi.py @@ -12,20 +12,67 @@ typedef ... hb_font_t; typedef ... hb_face_t; typedef ... hb_blob_t; + typedef uint32_t hb_tag_t; typedef uint32_t hb_codepoint_t; + hb_tag_t hb_tag_from_string (const char *str, int len); + void hb_tag_to_string (hb_tag_t tag, char *buf); hb_blob_t * hb_face_reference_blob (hb_face_t *face); unsigned int hb_face_get_index (const hb_face_t *face); unsigned int hb_face_get_upem (const hb_face_t *face); const char * hb_blob_get_data (hb_blob_t *blob, unsigned int *length); bool hb_ot_color_has_png (hb_face_t *face); - hb_blob_t * hb_ot_color_glyph_reference_png ( - hb_font_t *font, hb_codepoint_t glyph); + hb_blob_t * hb_ot_color_glyph_reference_png (hb_font_t *font, hb_codepoint_t glyph); bool hb_ot_color_has_svg (hb_face_t *face); - hb_blob_t * hb_ot_color_glyph_reference_svg ( - hb_face_t *face, hb_codepoint_t glyph); + hb_blob_t * hb_ot_color_glyph_reference_svg (hb_face_t *face, hb_codepoint_t glyph); void hb_blob_destroy (hb_blob_t *blob); + unsigned int hb_face_get_table_tags ( + const hb_face_t *face, unsigned int start_offset, unsigned int *table_count, + hb_tag_t *table_tags); + unsigned int hb_face_get_glyph_count (const hb_face_t *face); + hb_blob_t * hb_face_reference_table (const hb_face_t *face, hb_tag_t tag); + // HarfBuzz Subset + + typedef ... hb_subset_input_t; + typedef ... hb_set_t; + + typedef enum { + HB_SUBSET_FLAGS_DEFAULT = 0x00000000u, + HB_SUBSET_FLAGS_NO_HINTING = 0x00000001u, + HB_SUBSET_FLAGS_RETAIN_GIDS = 0x00000002u, + HB_SUBSET_FLAGS_DESUBROUTINIZE = 0x00000004u, + HB_SUBSET_FLAGS_NAME_LEGACY = 0x00000008u, + HB_SUBSET_FLAGS_SET_OVERLAPS_FLAG = 0x00000010u, + HB_SUBSET_FLAGS_PASSTHROUGH_UNRECOGNIZED = 0x00000020u, + HB_SUBSET_FLAGS_NOTDEF_OUTLINE = 0x00000040u, + HB_SUBSET_FLAGS_GLYPH_NAMES = 0x00000080u, + HB_SUBSET_FLAGS_NO_PRUNE_UNICODE_RANGES = 0x00000100u, + HB_SUBSET_FLAGS_NO_LAYOUT_CLOSURE = 0x00000200u, + } hb_subset_flags_t; + + typedef enum { + HB_SUBSET_SETS_GLYPH_INDEX = 0, + HB_SUBSET_SETS_UNICODE, + HB_SUBSET_SETS_NO_SUBSET_TABLE_TAG, + HB_SUBSET_SETS_DROP_TABLE_TAG, + HB_SUBSET_SETS_NAME_ID, + HB_SUBSET_SETS_NAME_LANG_ID, + HB_SUBSET_SETS_LAYOUT_FEATURE_TAG, + HB_SUBSET_SETS_LAYOUT_SCRIPT_TAG, + } hb_subset_sets_t; + + hb_subset_input_t * hb_subset_input_create_or_fail (void); + hb_set_t * hb_subset_input_glyph_set (hb_subset_input_t *input); + void hb_set_add (hb_set_t *set, hb_codepoint_t codepoint); + void hb_set_add_sorted_array ( + hb_set_t *set, const hb_codepoint_t *sorted_codepoints, + unsigned int num_codepoints); + hb_face_t * hb_subset_or_fail (hb_face_t *source, const hb_subset_input_t *input); + void hb_subset_input_set_flags (hb_subset_input_t *input, unsigned value); + hb_set_t * hb_subset_input_set ( + hb_subset_input_t *input, hb_subset_sets_t set_type); + // Pango typedef unsigned int guint; @@ -402,11 +449,13 @@ ''') -def _dlopen(ffi, *names): +def _dlopen(ffi, *names, allow_fail=False): """Try various names for the same library, for different platforms.""" for name in names: with suppress(OSError): return ffi.dlopen(name) + if allow_fail: + return # Re-raise the exception. print( '\n-----\n\n' @@ -439,6 +488,10 @@ def _dlopen(ffi, *names): ffi, 'harfbuzz', 'harfbuzz-0.0', 'libharfbuzz-0', 'libharfbuzz.so.0', 'libharfbuzz.so.0', 'libharfbuzz.0.dylib', 'libharfbuzz-0.dll') +harfbuzz_subset = _dlopen( + ffi, 'harfbuzz-subset', 'harfbuzz-subset-0.0', 'libharfbuzz-subset-0', + 'libharfbuzz-subset.so.0', 'libharfbuzz-subset.so.0', 'libharfbuzz-subset.0.dylib', + 'libharfbuzz-subset-0.dll', allow_fail=True) fontconfig = _dlopen( ffi, 'fontconfig-1', 'fontconfig', 'libfontconfig', 'libfontconfig.so.1', 'libfontconfig.1.dylib', 'libfontconfig-1.dll') From 95c47ae9ce8203abd547b74aaedd51ed2421aa47 Mon Sep 17 00:00:00 2001 From: Guillaume Ayoub Date: Sat, 8 Jun 2024 16:41:57 +0200 Subject: [PATCH 2/3] Clean PDF font management --- weasyprint/pdf/fonts.py | 408 +++++++++++++++++++++------------------- 1 file changed, 210 insertions(+), 198 deletions(-) diff --git a/weasyprint/pdf/fonts.py b/weasyprint/pdf/fonts.py index 85f5177f2..6271f8c0f 100644 --- a/weasyprint/pdf/fonts.py +++ b/weasyprint/pdf/fonts.py @@ -24,12 +24,10 @@ def __init__(self, pango_font): pango_metrics = pango.pango_font_get_metrics(pango_font, ffi.NULL) self.description = description = ffi.gc( - pango.pango_font_describe(pango_font), - pango.pango_font_description_free) + pango.pango_font_describe(pango_font), pango.pango_font_description_free) self.font_size = pango.pango_font_description_get_size(description) self.style = pango.pango_font_description_get_style(description) - self.family = ffi.string( - pango.pango_font_description_get_family(description)) + self.family = ffi.string(pango.pango_font_description_get_family(description)) self.variations = {} variations = pango.pango_font_description_get_variations(self.description) @@ -37,29 +35,29 @@ def __init__(self, pango_font): self.variations = { part.split('=')[0]: float(part.split('=')[1]) for part in ffi.string(variations).decode().split(',')} - if 'wght' in self.variations: + if weight := self.variations.get('weight'): pango.pango_font_description_set_weight( - self.description, int(round(self.variations['wght']))) + self.description, int(round(weight))) if self.variations.get('ital'): pango.pango_font_description_set_style( self.description, pango.PANGO_STYLE_ITALIC) elif self.variations.get('slnt'): pango.pango_font_description_set_style( self.description, pango.PANGO_STYLE_OBLIQUE) - if 'wdth' in self.variations: + if (width := self.variations.get('wdth')) is not None: stretch = min( PANGO_STRETCH_PERCENT.items(), - key=lambda item: abs(item[0] - self.variations['wdth']))[1] + key=lambda item: abs(item[0] - width))[1] pango.pango_font_description_set_stretch(self.description, stretch) description_string = ffi.string( pango.pango_font_description_to_string(description)) - # Never use the built-in hash function here: it’s not stable + # Never use the built-in hash function here: it’s not stable. self.hash = ''.join( chr(65 + letter % 26) for letter in md5(description_string, usedforsecurity=False).digest()[:6]) - # Name + # Set font name. fields = description_string.split(b' ') if fields and b'=' in fields[-1]: fields.pop() # Remove variations @@ -69,7 +67,7 @@ def __init__(self, pango_font): fields = [b'Unknown'] self.name = b'/' + self.hash.encode() + b'+' + b'-'.join(fields) - # Ascent & descent + # Set ascent and descent. if self.font_size: self.ascent = int( pango.pango_font_metrics_get_ascent(pango_metrics) / @@ -80,7 +78,7 @@ def __init__(self, pango_font): else: self.ascent = self.descent = 0 - # Tables and metadata + # Get font tables and set metadata. table_count = ffi.new('unsigned int *', 100) table_tags = ffi.new('hb_tag_t[100]') table_name = ffi.new('char[4]') @@ -100,7 +98,7 @@ def __init__(self, pango_font): self.cmap = {} self.used_in_forms = False - # Font flags + # Set font flags. self.flags = 2 ** (3 - 1) # Symbolic, custom character set if self.style: self.flags += 2 ** (7 - 1) # Italic @@ -108,10 +106,12 @@ def __init__(self, pango_font): self.flags += 2 ** (2 - 1) # Serif def clean(self, cmap, hinting): + """Remove useless data from font.""" + # Subset font. self.subset(cmap, hinting) - # Transform variable into static font + # Transform variable into static font. if 'fvar' in self.tables: full_font = io.BytesIO(self.file_content) ttfont = TTFont(full_font, fontNumber=self.index) @@ -146,141 +146,151 @@ def clean(self, cmap, hinting): else: self.file_content = partial_font.getvalue() - if not (self.png or self.svg): - return - - full_font = io.BytesIO(self.file_content) - ttfont = TTFont(full_font, fontNumber=self.index) - try: - # Add empty glyphs instead of PNG or SVG emojis - if 'loca' not in self.tables or 'glyf' not in self.tables: - ttfont['loca'] = ttFont.getTableClass('loca')() - ttfont['glyf'] = ttFont.getTableClass('glyf')() - ttfont['glyf'].glyphOrder = ttfont.getGlyphOrder() - ttfont['glyf'].glyphs = { - name: ttFont.getTableModule('glyf').Glyph() - for name in ttfont['glyf'].glyphOrder} - else: - for glyph in ttfont['glyf'].glyphs: - ttfont['glyf'][glyph] = ( - ttFont.getTableModule('glyf').Glyph()) - for table_name in ('CBDT', 'CBLC', 'SVG '): - if table_name in ttfont: - del ttfont[table_name] - output_font = io.BytesIO() - ttfont.save(output_font) - self.file_content = output_font.getvalue() - except TTLibError: - LOGGER.warning('Unable to save emoji font') + # Remove images. + if self.png or self.svg: + full_font = io.BytesIO(self.file_content) + ttfont = TTFont(full_font, fontNumber=self.index) + try: + # Add empty glyphs instead of PNG or SVG emojis. + if 'loca' not in self.tables or 'glyf' not in self.tables: + ttfont['loca'] = ttFont.getTableClass('loca')() + ttfont['glyf'] = ttFont.getTableClass('glyf')() + ttfont['glyf'].glyphOrder = ttfont.getGlyphOrder() + ttfont['glyf'].glyphs = { + name: ttFont.getTableModule('glyf').Glyph() + for name in ttfont['glyf'].glyphOrder} + else: + for glyph in ttfont['glyf'].glyphs: + ttfont['glyf'][glyph] = ttFont.getTableModule('glyf').Glyph() + for table_name in ('CBDT', 'CBLC', 'SVG '): + if table_name in ttfont: + del ttfont[table_name] + output_font = io.BytesIO() + ttfont.save(output_font) + self.file_content = output_font.getvalue() + except TTLibError: + LOGGER.warning('Unable to save emoji font') @property def type(self): return 'otf' if self.file_content[:4] == b'OTTO' else 'ttf' def subset(self, cmap, hinting): + """Remove unused glyphs and tables from font.""" if not cmap: return if harfbuzz_subset: - hb_subset = harfbuzz_subset.hb_subset_input_create_or_fail() - - # Only keep used glyphs. - gid_set = harfbuzz_subset.hb_subset_input_glyph_set(hb_subset) - gid_array = ffi.new(f'hb_codepoint_t[{len(cmap)}]', sorted(cmap)) - harfbuzz.hb_set_add_sorted_array(gid_set, gid_array, len(cmap)) - - # Set flags. - flags = ( - harfbuzz_subset.HB_SUBSET_FLAGS_RETAIN_GIDS | - harfbuzz_subset.HB_SUBSET_FLAGS_PASSTHROUGH_UNRECOGNIZED | - harfbuzz_subset.HB_SUBSET_FLAGS_DESUBROUTINIZE) - if not hinting: - flags |= harfbuzz_subset.HB_SUBSET_FLAGS_NO_HINTING - harfbuzz_subset.hb_subset_input_set_flags(hb_subset, flags) - - # Drop useless tables. - drop_set = harfbuzz_subset.hb_subset_input_set( - hb_subset, harfbuzz_subset.HB_SUBSET_SETS_DROP_TABLE_TAG) - drop_tables = tuple(harfbuzz.hb_tag_from_string(name, -1) for name in ( - b'BASE', b'DSIG', b'EBDT', b'EBLC', b'EBSC', b'GPOS', b'GSUB', b'JSTF', - b'LTSH', b'PCLT', b'SVG ')) - drop_tables_array = ffi.new( - f'hb_codepoint_t[{len(drop_tables)}]', drop_tables) - harfbuzz.hb_set_add_sorted_array( - drop_set, drop_tables_array, len(drop_tables)) - - # Subset font. - hb_face = harfbuzz_subset.hb_subset_or_fail(self.hb_face, hb_subset) - - # Drop empty glyphs after last one used. - gid_set = harfbuzz_subset.hb_subset_input_glyph_set(hb_subset) - keep = tuple(range(max(cmap) + 1)) - gid_array = ffi.new(f'hb_codepoint_t[{len(keep)}]', keep) - harfbuzz.hb_set_add_sorted_array(gid_set, gid_array, len(keep)) - - # Set flags. - flags = ( - harfbuzz_subset.HB_SUBSET_FLAGS_PASSTHROUGH_UNRECOGNIZED | - harfbuzz_subset.HB_SUBSET_FLAGS_DESUBROUTINIZE) - if not hinting: - flags |= harfbuzz_subset.HB_SUBSET_FLAGS_NO_HINTING - harfbuzz_subset.hb_subset_input_set_flags(hb_subset, flags) - - # Subset font. - hb_face = harfbuzz_subset.hb_subset_or_fail(hb_face, hb_subset) - - # Store new font. - if hb_face: - file_content = get_hb_object_data(hb_face) - if file_content: - self.file_content = file_content - return - LOGGER.warning('Unable to subset font with Harfbuzz') + self._harfbuzz_subset(cmap, hinting) else: - full_font = io.BytesIO(self.file_content) - optimized_font = io.BytesIO() - options = subset.Options( - retain_gids=True, passthrough_tables=True, ignore_missing_glyphs=True, - hinting=hinting, desubroutinize=True) - options.drop_tables += ['GSUB', 'GPOS', 'SVG'] - subsetter = subset.Subsetter(options) - subsetter.populate(gids=cmap) - try: - ttfont = TTFont(full_font, fontNumber=self.index) - subsetter.subset(ttfont) - except TTLibError: - LOGGER.warning('Unable to subset font with fontTools') - else: - ttfont.save(optimized_font) - self.file_content = optimized_font.getvalue() - - -def build_fonts_dictionary(pdf, fonts, compress_pdf, subset, options): + self._fonttools_subset(cmap, hinting) + + def _harfbuzz_subset(self, cmap, hinting): + """Subset font using Harfbuzz.""" + hb_subset = harfbuzz_subset.hb_subset_input_create_or_fail() + + # Only keep used glyphs. + gid_set = harfbuzz_subset.hb_subset_input_glyph_set(hb_subset) + gid_array = ffi.new(f'hb_codepoint_t[{len(cmap)}]', sorted(cmap)) + harfbuzz.hb_set_add_sorted_array(gid_set, gid_array, len(cmap)) + + # Set flags. + flags = ( + harfbuzz_subset.HB_SUBSET_FLAGS_RETAIN_GIDS | + harfbuzz_subset.HB_SUBSET_FLAGS_PASSTHROUGH_UNRECOGNIZED | + harfbuzz_subset.HB_SUBSET_FLAGS_DESUBROUTINIZE) + if not hinting: + flags |= harfbuzz_subset.HB_SUBSET_FLAGS_NO_HINTING + harfbuzz_subset.hb_subset_input_set_flags(hb_subset, flags) + + # Drop useless tables. + drop_set = harfbuzz_subset.hb_subset_input_set( + hb_subset, harfbuzz_subset.HB_SUBSET_SETS_DROP_TABLE_TAG) + drop_tables = tuple(harfbuzz.hb_tag_from_string(name, -1) for name in ( + b'BASE', b'DSIG', b'EBDT', b'EBLC', b'EBSC', b'GPOS', b'GSUB', b'JSTF', + b'LTSH', b'PCLT', b'SVG ')) + drop_tables_array = ffi.new(f'hb_codepoint_t[{len(drop_tables)}]', drop_tables) + harfbuzz.hb_set_add_sorted_array(drop_set, drop_tables_array, len(drop_tables)) + + # Subset font. + hb_face = harfbuzz_subset.hb_subset_or_fail(self.hb_face, hb_subset) + + # Drop empty glyphs after last one used. + gid_set = harfbuzz_subset.hb_subset_input_glyph_set(hb_subset) + keep = tuple(range(max(cmap) + 1)) + gid_array = ffi.new(f'hb_codepoint_t[{len(keep)}]', keep) + harfbuzz.hb_set_add_sorted_array(gid_set, gid_array, len(keep)) + + # Set flags. + flags = ( + harfbuzz_subset.HB_SUBSET_FLAGS_PASSTHROUGH_UNRECOGNIZED | + harfbuzz_subset.HB_SUBSET_FLAGS_DESUBROUTINIZE) + if not hinting: + flags |= harfbuzz_subset.HB_SUBSET_FLAGS_NO_HINTING + harfbuzz_subset.hb_subset_input_set_flags(hb_subset, flags) + + # Subset font. + hb_face = harfbuzz_subset.hb_subset_or_fail(hb_face, hb_subset) + + # Store new font. + if hb_face: + file_content = get_hb_object_data(hb_face) + if file_content: + self.file_content = file_content + return + + LOGGER.warning('Unable to subset font with Harfbuzz') + + def _fonttools_subset(self, cmap, hinting): + """Subset font using Fonttools.""" + full_font = io.BytesIO(self.file_content) + + # Set subset options. + options = subset.Options( + retain_gids=True, passthrough_tables=True, ignore_missing_glyphs=True, + hinting=hinting, desubroutinize=True) + options.drop_tables += ['GSUB', 'GPOS', 'SVG'] + subsetter = subset.Subsetter(options) + subsetter.populate(gids=cmap) + + # Subset font. + try: + ttfont = TTFont(full_font, fontNumber=self.index) + subsetter.subset(ttfont) + except TTLibError: + LOGGER.warning('Unable to subset font with fontTools') + else: + optimized_font = io.BytesIO() + ttfont.save(optimized_font) + self.file_content = optimized_font.getvalue() + + +def build_fonts_dictionary(pdf, fonts, compress, subset, options): + """Build PDF dictionary for fonts.""" pdf_fonts = pydyf.Dictionary() fonts_by_file_hash = {} for font in fonts.values(): fonts_by_file_hash.setdefault(font.hash, []).append(font) font_references_by_file_hash = {} for file_hash, file_fonts in fonts_by_file_hash.items(): - # TODO: find why we can have multiple fonts for one font file + # TODO: Find why we can have multiple fonts for one font file. font = file_fonts[0] if font.bitmap: continue - # Clean font, optimize and handle emojis + # Clean font, optimize and handle emojis. cmap = {} if subset and not font.used_in_forms: for file_font in file_fonts: cmap = {**cmap, **file_font.cmap} font.clean(cmap, options['hinting']) - # Include font + # Include font. if font.type == 'otf': font_extra = pydyf.Dictionary({'Subtype': '/OpenType'}) else: font_extra = pydyf.Dictionary({'Length1': len(font.file_content)}) - font_stream = pydyf.Stream( - [font.file_content], font_extra, compress=compress_pdf) + font_stream = pydyf.Stream([font.file_content], font_extra, compress=compress) pdf.add_object(font_stream) font_references_by_file_hash[file_hash] = font_stream.reference @@ -301,18 +311,6 @@ def build_fonts_dictionary(pdf, fonts, compress_pdf, subset, options): width = ttfont.getGlyphSet()[key].width font_widths[glyph] = width * 1000 / font.upem - max_x = max(font_widths.values()) if font_widths else 0 - bbox = (0, font.descent, max_x, font.ascent) - - widths = pydyf.Array() - for i in sorted(font_widths): - if i - 1 not in font_widths: - widths.append(i) - current_widths = pydyf.Array() - widths.append(current_widths) - current_widths.append(font_widths[i]) - - font_file = f'FontFile{3 if font.type == "otf" else 2}' to_unicode = pydyf.Stream([ b'/CIDInit /ProcSet findresource begin', b'12 dict begin', @@ -327,7 +325,7 @@ def build_fonts_dictionary(pdf, fonts, compress_pdf, subset, options): b'1 begincodespacerange', b'<0000> ', b'endcodespacerange', - f'{len(cmap)} beginbfchar'.encode()], compress=compress_pdf) + f'{len(cmap)} beginbfchar'.encode()], compress=compress) for glyph, text in cmap.items(): unicode_codepoints = ''.join( f'{letter.encode("utf-16-be").hex()}' for letter in text) @@ -349,64 +347,18 @@ def build_fonts_dictionary(pdf, fonts, compress_pdf, subset, options): if font.bitmap: _build_bitmap_font_dictionary( - font_dictionary, pdf, font, widths, compress_pdf, subset) + font_dictionary, pdf, font, font_widths, compress, subset) else: - flags = font.flags - if len(widths) > 1 and len(set(font.widths.values())) == 1: - flags += 2 ** (1 - 1) # FixedPitch - font_descriptor = pydyf.Dictionary({ - 'Type': '/FontDescriptor', - 'FontName': font.name, - 'FontFamily': pydyf.String(font.family), - 'Flags': flags, - 'FontBBox': pydyf.Array(bbox), - 'ItalicAngle': font.italic_angle, - 'Ascent': font.ascent, - 'Descent': font.descent, - 'CapHeight': bbox[3], - 'StemV': font.stemv, - 'StemH': font.stemh, - font_file: font_references_by_file_hash[font.hash], - }) - if str(options['pdf_version']) <= '1.4': # Cast for bytes and None - cids = sorted(font.widths) - padded_width = int(ceil((cids[-1] + 1) / 8)) - bits = ['0'] * padded_width * 8 - for cid in cids: - bits[cid] = '1' - stream = pydyf.Stream( - (int(''.join(bits), 2).to_bytes(padded_width, 'big'),), - compress=compress_pdf) - pdf.add_object(stream) - font_descriptor['CIDSet'] = stream.reference - if font.type == 'otf': - font_descriptor['Subtype'] = '/OpenType' - pdf.add_object(font_descriptor) - subfont_dictionary = pydyf.Dictionary({ - 'Type': '/Font', - 'Subtype': f'/CIDFontType{0 if font.type == "otf" else 2}', - 'BaseFont': font.name, - 'CIDSystemInfo': pydyf.Dictionary({ - 'Registry': pydyf.String('Adobe'), - 'Ordering': pydyf.String('Identity'), - 'Supplement': 0, - }), - 'CIDToGIDMap': '/Identity', - 'W': widths, - 'FontDescriptor': font_descriptor.reference, - }) - pdf.add_object(subfont_dictionary) - font_dictionary['Encoding'] = '/Identity-H' - font_dictionary['DescendantFonts'] = pydyf.Array( - [subfont_dictionary.reference]) + _build_vector_font_dictionary( + font_dictionary, pdf, font, font_widths, compress, + font_references_by_file_hash[font.hash], options['pdf_version']) pdf.add_object(font_dictionary) pdf_fonts[font.hash] = font_dictionary.reference return pdf_fonts -def _build_bitmap_font_dictionary(font_dictionary, pdf, font, widths, - compress_pdf, subset): +def _build_bitmap_font_dictionary(font_dictionary, pdf, font, widths, compress, subset): # https://docs.microsoft.com/typography/opentype/spec/ebdt font_dictionary['FontBBox'] = pydyf.Array([0, 0, 1, 1]) font_dictionary['FontMatrix'] = pydyf.Array([1, 0, 0, 1, 0, 0]) @@ -415,14 +367,13 @@ def _build_bitmap_font_dictionary(font_dictionary, pdf, font, widths, else: chars = tuple(range(256)) first, last = chars[0], chars[-1] + differences = [] + for glyph in sorted(widths): + if glyph - 1 not in widths: + differences.append(glyph) + differences.append(f'/{glyph}') font_dictionary['FirstChar'] = first font_dictionary['LastChar'] = last - differences = [] - for index, index_widths in zip(widths[::2], widths[1::2]): - differences.append(index) - for i in range(len(index_widths)): - if i + index in chars: - differences.append(f'/{i + index}') font_dictionary['Encoding'] = pydyf.Dictionary({ 'Type': '/Encoding', 'Differences': pydyf.Array(differences), @@ -437,7 +388,7 @@ def _build_bitmap_font_dictionary(font_dictionary, pdf, font, widths, glyph_format = glyph.getFormat() glyph_id = ttfont.getGlyphID(key) - # Get and store glyph metrics + # Get and store glyph metrics. if glyph_format == 5: data = glyph.data subtables = ttfont['EBLC'].strikes[0].indexSubTables @@ -474,7 +425,7 @@ def _build_bitmap_font_dictionary(font_dictionary, pdf, font, widths, 'subglyphs': None, } - # Decode bitmaps + # Decode bitmaps. if 0 in (width, height) or not data: glyph_info['bitmap'] = b'' elif glyph_format in (1, 6): @@ -486,8 +437,7 @@ def _build_bitmap_font_dictionary(font_dictionary, pdf, font, widths, bitmap_bits = ''.join( bits[i * width:(i + 1) * width] + padding * '0' for i in range(height)) - glyph_info['bitmap'] = int(bitmap_bits, 2).to_bytes( - height * stride, 'big') + glyph_info['bitmap'] = int(bitmap_bits, 2).to_bytes(height * stride, 'big') elif glyph_format in (8, 9): subglyphs = glyph_info['subglyphs'] = [] i = 0 if glyph_format == 9 else 1 @@ -503,11 +453,11 @@ def _build_bitmap_font_dictionary(font_dictionary, pdf, font, widths, glyph_info['bitmap'] = bytes(height * stride) for glyph_id, glyph_info in glyphs_info.items(): - # Don’t store glyph not in cmap + # Don’t store glyph not in cmap. if glyph_id not in chars: continue - # Draw glyph + # Draw glyph. stride = glyph_info['stride'] width = glyph_info['width'] height = glyph_info['height'] @@ -525,9 +475,8 @@ def _build_bitmap_font_dictionary(font_dictionary, pdf, font, widths, continue subglyph = glyphs_info[sub_id] if subglyph['bitmap'] is None: - # TODO: support subglyph in subglyph - LOGGER.warning( - f'Unsupported subglyph in subglyph: {sub_id}') + # TODO: Support subglyph in subglyph. + LOGGER.warning(f'Unsupported subglyph in subglyph: {sub_id}') continue for row_y in range(subglyph['height']): row_slice = slice( @@ -561,10 +510,73 @@ def _build_bitmap_font_dictionary(font_dictionary, pdf, font, widths, b'/BPC 1', b'/D [1 0]', b'ID', bitmap, b'EI' - ], compress=compress_pdf) + ], compress=compress) pdf.add_object(bitmap_stream) char_procs[glyph_id] = bitmap_stream.reference pdf.add_object(char_procs) font_dictionary['Widths'] = pydyf.Array(widths) font_dictionary['CharProcs'] = char_procs.reference + + +def _build_vector_font_dictionary(font_dictionary, pdf, font, widths, compress, + reference, pdf_version): + font_file = f'FontFile{3 if font.type == "otf" else 2}' + max_x = max(widths.values()) if widths else 0 + bbox = (0, font.descent, max_x, font.ascent) + flags = font.flags + if len(widths) > 1 and len(set(font.widths.values())) == 1: + flags += 2 ** (1 - 1) # FixedPitch + font_descriptor = pydyf.Dictionary({ + 'Type': '/FontDescriptor', + 'FontName': font.name, + 'FontFamily': pydyf.String(font.family), + 'Flags': flags, + 'FontBBox': pydyf.Array(bbox), + 'ItalicAngle': font.italic_angle, + 'Ascent': font.ascent, + 'Descent': font.descent, + 'CapHeight': bbox[3], + 'StemV': font.stemv, + 'StemH': font.stemh, + font_file: reference, + }) + if str(pdf_version) <= '1.4': # Cast for bytes and None + cids = sorted(font.widths) + padded_width = int(ceil((cids[-1] + 1) / 8)) + bits = ['0'] * padded_width * 8 + for cid in cids: + bits[cid] = '1' + stream = pydyf.Stream( + (int(''.join(bits), 2).to_bytes(padded_width, 'big'),), + compress=compress) + pdf.add_object(stream) + font_descriptor['CIDSet'] = stream.reference + if font.type == 'otf': + font_descriptor['Subtype'] = '/OpenType' + pdf.add_object(font_descriptor) + + pdf_widths = pydyf.Array() + for i in sorted(widths): + if i - 1 not in widths: + pdf_widths.append(i) + current_widths = pydyf.Array() + pdf_widths.append(current_widths) + current_widths.append(widths[i]) + + subfont_dictionary = pydyf.Dictionary({ + 'Type': '/Font', + 'Subtype': f'/CIDFontType{0 if font.type == "otf" else 2}', + 'BaseFont': font.name, + 'CIDSystemInfo': pydyf.Dictionary({ + 'Registry': pydyf.String('Adobe'), + 'Ordering': pydyf.String('Identity'), + 'Supplement': 0, + }), + 'CIDToGIDMap': '/Identity', + 'W': pdf_widths, + 'FontDescriptor': font_descriptor.reference, + }) + pdf.add_object(subfont_dictionary) + font_dictionary['Encoding'] = '/Identity-H' + font_dictionary['DescendantFonts'] = pydyf.Array([subfont_dictionary.reference]) From 5485d2edf67f391227a780a7523476aee0a36482 Mon Sep 17 00:00:00 2001 From: Guillaume Ayoub Date: Sat, 8 Jun 2024 16:58:58 +0200 Subject: [PATCH 3/3] Update documentation to advertise about harfbuzz-subset --- docs/first_steps.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/first_steps.rst b/docs/first_steps.rst index 3abee5ee8..ce5daf547 100644 --- a/docs/first_steps.rst +++ b/docs/first_steps.rst @@ -70,7 +70,7 @@ in a `virtual environment`_ using `pip`_:: .. _pip: https://pip.pypa.io/ -Alpine ≥ 3.14 +Alpine ≥ 3.17 +++++++++++++ To install WeasyPrint using your distribution’s package:: @@ -80,12 +80,12 @@ To install WeasyPrint using your distribution’s package:: To install WeasyPrint inside a virtualenv using wheels (if possible), you need the following packages:: - apk add py3-pip gcc musl-dev python3-dev pango zlib-dev jpeg-dev openjpeg-dev g++ libffi-dev + apk add py3-pip gcc musl-dev python3-dev pango zlib-dev jpeg-dev openjpeg-dev g++ libffi-dev harfbuzz-subset To install WeasyPrint inside a virtualenv without using wheels, you need the following packages:: - apk add py3-pip gcc musl-dev python3-dev pango zlib-dev jpeg-dev openjpeg-dev g++ libffi-dev + apk add py3-pip gcc musl-dev python3-dev pango zlib-dev jpeg-dev openjpeg-dev g++ libffi-dev harfbuzz-subset Archlinux @@ -116,15 +116,15 @@ To install WeasyPrint using your distribution’s package:: To install WeasyPrint inside a virtualenv using wheels (if possible), you need the following packages:: - apt install python3-pip libpango-1.0-0 libpangoft2-1.0-0 + apt install python3-pip libpango-1.0-0 libpangoft2-1.0-0 libharfbuzz-subset0 To install WeasyPrint inside a virtualenv without using wheels, you need the following packages:: - apt install python3-pip libpango-1.0-0 libpangoft2-1.0-0 libjpeg-dev libopenjp2-7-dev libffi-dev + apt install python3-pip libpango-1.0-0 libpangoft2-1.0-0 libharfbuzz-subset0 libjpeg-dev libopenjp2-7-dev libffi-dev -Fedora ≥ 34 +Fedora ≥ 39 +++++++++++ To install WeasyPrint using your distribution’s package:: @@ -152,12 +152,12 @@ To install WeasyPrint using your distribution’s package:: To install WeasyPrint inside a virtualenv using wheels (if possible), you need the following packages:: - apt install python3-pip libpango-1.0-0 libharfbuzz0b libpangoft2-1.0-0 + apt install python3-pip libpango-1.0-0 libharfbuzz0b libpangoft2-1.0-0 libharfbuzz-subset0 To install WeasyPrint inside a virtualenv without using wheels, you need the following packages:: - apt install python3-pip libpango-1.0-0 libharfbuzz0b libpangoft2-1.0-0 libffi-dev libjpeg-dev libopenjp2-7-dev + apt install python3-pip libpango-1.0-0 libharfbuzz0b libpangoft2-1.0-0 libharfbuzz-subset0 libffi-dev libjpeg-dev libopenjp2-7-dev macOS