From 3a708482bfaed25e21656829eb6529824f8aea8f Mon Sep 17 00:00:00 2001 From: Krzysztof Kowalczyk Date: Sat, 21 Sep 2024 14:42:54 +0200 Subject: [PATCH] update mupdf --- mupdf/include/mupdf/fitz/font.h | 1 + mupdf/include/mupdf/fitz/text.h | 3 ++- mupdf/include/mupdf/pdf/interpret.h | 2 +- mupdf/source/fitz/font.c | 40 +++++++++++++++++++++-------- mupdf/source/fitz/ocr-device.c | 2 +- mupdf/source/fitz/output-docx.c | 2 +- mupdf/source/fitz/stext-device.c | 4 +-- mupdf/source/fitz/svg-device.c | 2 +- mupdf/source/fitz/text.c | 7 ++--- mupdf/source/fitz/trace-device.c | 6 +---- mupdf/source/fitz/xmltext-device.c | 8 ++---- mupdf/source/pdf/pdf-interpret.c | 6 ++--- mupdf/source/pdf/pdf-op-filter.c | 3 ++- mupdf/source/pdf/pdf-op-run.c | 7 ++--- mupdf/source/pdf/pdf-page.c | 13 +++++++--- mupdf/source/pdf/pdf-type3.c | 1 + 16 files changed, 66 insertions(+), 41 deletions(-) diff --git a/mupdf/include/mupdf/fitz/font.h b/mupdf/include/mupdf/fitz/font.h index 4b153ec7ed6f..a6a824b7d53d 100644 --- a/mupdf/include/mupdf/fitz/font.h +++ b/mupdf/include/mupdf/fitz/font.h @@ -748,6 +748,7 @@ struct fz_font void *t3resources; fz_buffer **t3procs; /* has 256 entries if used */ struct fz_display_list **t3lists; /* has 256 entries if used */ + float *t3widths; /* has 256 entries if used */ unsigned short *t3flags; /* has 256 entries if used */ void *t3doc; /* a pdf_document for the callback */ void (*t3run)(fz_context *ctx, void *doc, void *resources, fz_buffer *contents, struct fz_device *dev, fz_matrix ctm, void *gstate, fz_default_colorspaces *default_cs); diff --git a/mupdf/include/mupdf/fitz/text.h b/mupdf/include/mupdf/fitz/text.h index 01efbfd160e0..a1341f6f9942 100644 --- a/mupdf/include/mupdf/fitz/text.h +++ b/mupdf/include/mupdf/fitz/text.h @@ -44,6 +44,7 @@ typedef struct { float x, y; + float adv; /* advance width given by input format */ int gid; /* -1 for one gid to many ucs mappings */ int ucs; /* -1 for one ucs to many gid mappings */ int cid; /* CID for CJK fonts, raw character code for other fonts; or unicode for non-PDF formats. */ @@ -134,7 +135,7 @@ void fz_drop_text(fz_context *ctx, const fz_text *text); Throws exception on failure to allocate. */ void fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, int glyph, int unicode, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language); -void fz_show_glyph_aux(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, int glyph, int unicode, int cid, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang); +void fz_show_glyph_aux(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, float adv, int glyph, int unicode, int cid, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang); /** Add a UTF8 string to a text object. diff --git a/mupdf/include/mupdf/pdf/interpret.h b/mupdf/include/mupdf/pdf/interpret.h index edfbee9bb9b8..e3bae9db0b0b 100644 --- a/mupdf/include/mupdf/pdf/interpret.h +++ b/mupdf/include/mupdf/pdf/interpret.h @@ -483,7 +483,7 @@ void pdf_tos_save(fz_context *ctx, pdf_text_object_state *tos, fz_matrix save[2] void pdf_tos_restore(fz_context *ctx, pdf_text_object_state *tos, fz_matrix save[2]); fz_text *pdf_tos_get_text(fz_context *ctx, pdf_text_object_state *tos); void pdf_tos_reset(fz_context *ctx, pdf_text_object_state *tos, int render); -int pdf_tos_make_trm(fz_context *ctx, pdf_text_object_state *tos, pdf_text_state *text, pdf_font_desc *fontdesc, int cid, fz_matrix *trm); +int pdf_tos_make_trm(fz_context *ctx, pdf_text_object_state *tos, pdf_text_state *text, pdf_font_desc *fontdesc, int cid, fz_matrix *trm, float *adv); void pdf_tos_move_after_char(fz_context *ctx, pdf_text_object_state *tos); void pdf_tos_translate(pdf_text_object_state *tos, float tx, float ty); void pdf_tos_set_matrix(pdf_text_object_state *tos, float a, float b, float c, float d, float e, float f); diff --git a/mupdf/source/fitz/font.c b/mupdf/source/fitz/font.c index f929338ed0f1..1e1fa2df3a0a 100755 --- a/mupdf/source/fitz/font.c +++ b/mupdf/source/fitz/font.c @@ -114,6 +114,7 @@ fz_new_font(fz_context *ctx, const char *name, int use_glyph_bbox, int glyph_cou font->t3resources = NULL; font->t3procs = NULL; font->t3lists = NULL; + font->t3widths = NULL; font->t3flags = NULL; font->t3doc = NULL; font->t3run = NULL; @@ -205,6 +206,7 @@ fz_drop_font(fz_context *ctx, fz_font *font) fz_drop_display_list(ctx, font->t3lists[i]); fz_free(ctx, font->t3procs); fz_free(ctx, font->t3lists); + fz_free(ctx, font->t3widths); fz_free(ctx, font->t3flags); if (font->ft_face) @@ -1530,6 +1532,7 @@ fz_new_type3_font(fz_context *ctx, const char *name, fz_matrix matrix) { font->t3procs = fz_calloc(ctx, 256, sizeof(fz_buffer*)); font->t3lists = fz_calloc(ctx, 256, sizeof(fz_display_list*)); + font->t3widths = fz_calloc(ctx, 256, sizeof(float)); font->t3flags = fz_calloc(ctx, 256, sizeof(unsigned short)); } fz_catch(ctx) @@ -1826,6 +1829,17 @@ fz_advance_ft_glyph_aux(fz_context *ctx, fz_font *font, int gid, int wmode, int FT_Fixed adv = 0; int mask; + /* PDF and substitute font widths. */ + if (font->flags.ft_stretch) + { + if (font->width_table) + { + if (gid < font->width_count) + return font->width_table[gid] / 1000.0f; + return font->width_default / 1000.0f; + } + } + mask = FT_LOAD_NO_SCALE | FT_LOAD_NO_HINTING | FT_LOAD_IGNORE_TRANSFORM; if (wmode) mask |= FT_LOAD_VERTICAL_LAYOUT; @@ -1837,7 +1851,12 @@ fz_advance_ft_glyph_aux(fz_context *ctx, fz_font *font, int gid, int wmode, int if (fterr && fterr != FT_Err_Invalid_Argument) { fz_warn(ctx, "FT_Get_Advance(%s,%d): %s", font->name, gid, ft_error_string(fterr)); - return 0; + if (font->width_table) + { + if (gid < font->width_count) + return font->width_table[gid] / 1000.0f; + return font->width_default / 1000.0f; + } } return (float) adv / ((FT_Face)font->ft_face)->units_per_EM; } @@ -1848,6 +1867,14 @@ fz_advance_ft_glyph(fz_context *ctx, fz_font *font, int gid, int wmode) return fz_advance_ft_glyph_aux(ctx, font, gid, wmode, 0); } +static float +fz_advance_t3_glyph(fz_context *ctx, fz_font *font, int gid) +{ + if (gid < 0 || gid > 255) + return 0; + return font->t3widths[gid]; +} + void fz_get_glyph_name(fz_context *ctx, fz_font *font, int glyph, char *buf, int size) { @@ -1875,14 +1902,6 @@ fz_get_glyph_name(fz_context *ctx, fz_font *font, int glyph, char *buf, int size float fz_advance_glyph(fz_context *ctx, fz_font *font, int gid, int wmode) { - /* Use PDF font widths table if available */ - if (font->width_table) - { - if (gid < font->width_count) - return font->width_table[gid] / 1000.0f; - return font->width_default / 1000.0f; - } - if (font->ft_face) { if (wmode) @@ -1928,7 +1947,8 @@ fz_advance_glyph(fz_context *ctx, fz_font *font, int gid, int wmode) return fz_advance_ft_glyph(ctx, font, gid, 0); } - + if (font->t3procs) + return fz_advance_t3_glyph(ctx, font, gid); return 0; } diff --git a/mupdf/source/fitz/ocr-device.c b/mupdf/source/fitz/ocr-device.c index 7bd8a0531ceb..7e9a041d68fb 100644 --- a/mupdf/source/fitz/ocr-device.c +++ b/mupdf/source/fitz/ocr-device.c @@ -669,7 +669,7 @@ rewrite_span(fz_context *ctx, fz_rewrite_device *dev, fz_matrix ctm, const fz_te /* And do the actual rewriting */ for (i = 0; i < rspan->len; i++) { - float advance = fz_advance_glyph(ctx, span->font, rspan->items[i].gid, wmode); + float advance = rspan->items[i].adv; fz_point vadv = { dir.x * advance, dir.y * advance }; rewrite_char(ctx, dev, ctm, &rspan->items[i], vadv); } diff --git a/mupdf/source/fitz/output-docx.c b/mupdf/source/fitz/output-docx.c index 581a034744a7..573fe07548be 100644 --- a/mupdf/source/fitz/output-docx.c +++ b/mupdf/source/fitz/output-docx.c @@ -116,7 +116,7 @@ static void dev_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_m continue; if (span->items[i].gid >= 0) - adv = fz_advance_glyph(ctx, span->font, span->items[i].gid, span->wmode); + adv = span->items[i].adv; bounds = fz_bound_glyph(ctx, span->font, span->items[i].gid, combined); if (extract_add_char(dev->writer->extract, combined.e, combined.f, item->ucs, adv, diff --git a/mupdf/source/fitz/stext-device.c b/mupdf/source/fitz/stext-device.c index c66902960bb7..561f97be19f8 100644 --- a/mupdf/source/fitz/stext-device.c +++ b/mupdf/source/fitz/stext-device.c @@ -833,7 +833,7 @@ do_extract(fz_context *ctx, fz_stext_device *dev, fz_text_span *span, fz_matrix /* Calculate bounding box and new pen position based on font metrics */ if (span->items[i].gid >= 0) - adv = fz_advance_glyph(ctx, font, span->items[i].gid, span->wmode); + adv = span->items[i].adv; else adv = 0; @@ -993,7 +993,7 @@ do_extract_within_actualtext(fz_context *ctx, fz_stext_device *dev, fz_text_span /* Calculate bounding box and new pen position based on font metrics */ if (item->gid >= 0) - adv = fz_advance_glyph(ctx, font, item->gid, span->wmode); + adv = item->adv; else adv = 0; diff --git a/mupdf/source/fitz/svg-device.c b/mupdf/source/fitz/svg-device.c index 2571d3d1f46e..03533ec8d4d0 100644 --- a/mupdf/source/fitz/svg-device.c +++ b/mupdf/source/fitz/svg-device.c @@ -368,7 +368,7 @@ svg_cluster_advance(fz_context *ctx, const fz_text_span *span, int i, int end) while (i + n < end && span->items[i + n].gid == -1) ++n; if (n > 1) - return fz_advance_glyph(ctx, span->font, span->items[i].gid, span->wmode) / n; + return span->items[i].adv / n; return 0; /* this value is never used (since n==1) */ } diff --git a/mupdf/source/fitz/text.c b/mupdf/source/fitz/text.c index c0d9ca525430..64706f9663cb 100644 --- a/mupdf/source/fitz/text.c +++ b/mupdf/source/fitz/text.c @@ -110,7 +110,7 @@ fz_grow_text_span(fz_context *ctx, fz_text_span *span, int n) } void -fz_show_glyph_aux(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, int gid, int ucs, int cid, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang) +fz_show_glyph_aux(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, float adv, int gid, int ucs, int cid, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang) { fz_text_span *span; @@ -126,13 +126,14 @@ fz_show_glyph_aux(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, span->items[span->len].cid = cid; span->items[span->len].x = trm.e; span->items[span->len].y = trm.f; + span->items[span->len].adv = adv; span->len++; } void fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, int gid, int ucs, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang) { - fz_show_glyph_aux(ctx, text, font, trm, gid, ucs, ucs, wmode, bidi_level, markup_dir, lang); + fz_show_glyph_aux(ctx, text, font, trm, fz_advance_glyph(ctx, font, gid, wmode), gid, ucs, ucs, wmode, bidi_level, markup_dir, lang); } fz_matrix @@ -147,8 +148,8 @@ fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix trm { s += fz_chartorune(&ucs, s); gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, language, &font); - fz_show_glyph(ctx, text, font, trm, gid, ucs, wmode, bidi_level, markup_dir, language); adv = fz_advance_glyph(ctx, font, gid, wmode); + fz_show_glyph_aux(ctx, text, font, trm, adv, gid, ucs, ucs, wmode, bidi_level, markup_dir, language); if (wmode == 0) trm = fz_pre_translate(trm, adv, 0); else diff --git a/mupdf/source/fitz/trace-device.c b/mupdf/source/fitz/trace-device.c index 365d853cdeae..b75a645f6a8f 100644 --- a/mupdf/source/fitz/trace-device.c +++ b/mupdf/source/fitz/trace-device.c @@ -80,10 +80,6 @@ fz_trace_text_span(fz_context *ctx, fz_output *out, fz_text_span *span, int dept for (i = 0; i < span->len; i++) { int ucs = span->items[i].ucs; - float adv = 0; - if (span->items[i].gid >= 0) { - adv = fz_advance_glyph(ctx, span->font, span->items[i].gid, span->wmode); - } fz_trace_indent(ctx, out, depth+1); fz_write_string(ctx, out, "\n", span->items[i].x, span->items[i].y, adv); + fz_write_printf(ctx, out, " x=\"%g\" y=\"%g\" adv=\"%g\"/>\n", span->items[i].x, span->items[i].y, span->items[i].adv); } fz_trace_indent(ctx, out, depth); fz_write_string(ctx, out, "\n"); diff --git a/mupdf/source/fitz/xmltext-device.c b/mupdf/source/fitz/xmltext-device.c index e93c7af687b5..b437d735eb4c 100644 --- a/mupdf/source/fitz/xmltext-device.c +++ b/mupdf/source/fitz/xmltext-device.c @@ -137,11 +137,7 @@ fz_xmltext_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix for (i=0; ilen; ++i) { fz_text_item *item = &span->items[i]; - float adv = 0; - if (span->items[i].gid >= 0) - { - adv = fz_advance_glyph(ctx, span->font, span->items[i].gid, span->wmode); - } + s_xml_starttag_begin(ctx, dev->out, "char"); s_write_attribute_float(ctx, dev->out, "x", item->x); s_write_attribute_float(ctx, dev->out, "y", item->y); @@ -156,7 +152,7 @@ fz_xmltext_text(fz_context *ctx, fz_device *dev_, const fz_text *text, fz_matrix (item->ucs >= 32 && item->ucs < 128 && item->ucs != '"') ? item->ucs : ' ' ); - s_write_attribute_float(ctx, dev->out, "adv", adv); + s_write_attribute_float(ctx, dev->out, "adv", span->items[i].adv); s_xml_starttag_empty_end(ctx, dev->out); } diff --git a/mupdf/source/pdf/pdf-interpret.c b/mupdf/source/pdf/pdf-interpret.c index ab16ab991a39..2d29383da310 100644 --- a/mupdf/source/pdf/pdf-interpret.c +++ b/mupdf/source/pdf/pdf-interpret.c @@ -1305,7 +1305,7 @@ pdf_tos_reset(fz_context *ctx, pdf_text_object_state *tos, int render) } int -pdf_tos_make_trm(fz_context *ctx, pdf_text_object_state *tos, pdf_text_state *text, pdf_font_desc *fontdesc, int cid, fz_matrix *trm) +pdf_tos_make_trm(fz_context *ctx, pdf_text_object_state *tos, pdf_text_state *text, pdf_font_desc *fontdesc, int cid, fz_matrix *trm, float *adv) { fz_matrix tsm; @@ -1319,7 +1319,7 @@ pdf_tos_make_trm(fz_context *ctx, pdf_text_object_state *tos, pdf_text_state *te if (fontdesc->wmode == 0) { pdf_hmtx h = pdf_lookup_hmtx(ctx, fontdesc, cid); - float w0 = h.w * 0.001f; + float w0 = *adv = h.w * 0.001f; tos->char_tx = (w0 * text->size + text->char_space) * text->scale; tos->char_ty = 0; } @@ -1327,7 +1327,7 @@ pdf_tos_make_trm(fz_context *ctx, pdf_text_object_state *tos, pdf_text_state *te if (fontdesc->wmode == 1) { pdf_vmtx v = pdf_lookup_vmtx(ctx, fontdesc, cid); - float w1 = v.w * 0.001f; + float w1 = *adv = v.w * 0.001f; tsm.e -= v.x * fabsf(text->size) * 0.001f; tsm.f -= v.y * text->size * 0.001f; tos->char_tx = 0; diff --git a/mupdf/source/pdf/pdf-op-filter.c b/mupdf/source/pdf/pdf-op-filter.c index cef9bf7da8ef..46736c825574 100644 --- a/mupdf/source/pdf/pdf-op-filter.c +++ b/mupdf/source/pdf/pdf-op-filter.c @@ -570,8 +570,9 @@ filter_show_char(fz_context *ctx, pdf_sanitize_processor *p, int cid, int *unico int ucsbuf[PDF_MRANGE_CAP]; int ucslen; int remove = 0; + float adv; - (void)pdf_tos_make_trm(ctx, &p->tos, &gstate->pending.text, fontdesc, cid, &trm); + (void)pdf_tos_make_trm(ctx, &p->tos, &gstate->pending.text, fontdesc, cid, &trm, &adv); ucslen = 0; if (fontdesc->to_unicode) diff --git a/mupdf/source/pdf/pdf-op-run.c b/mupdf/source/pdf/pdf-op-run.c index 772981334782..23a0fa52f183 100644 --- a/mupdf/source/pdf/pdf-op-run.c +++ b/mupdf/source/pdf/pdf-op-run.c @@ -1142,13 +1142,14 @@ pdf_show_char(fz_context *ctx, pdf_run_processor *pr, int cid, fz_text_language pdf_gstate *gstate = pr->gstate + pr->gtop; pdf_font_desc *fontdesc = gstate->text.font; fz_matrix trm; + float adv; int gid; int ucsbuf[PDF_MRANGE_CAP]; int ucslen; int i; int render_direct; - gid = pdf_tos_make_trm(ctx, &pr->tos, &gstate->text, fontdesc, cid, &trm); + gid = pdf_tos_make_trm(ctx, &pr->tos, &gstate->text, fontdesc, cid, &trm, &adv); /* If we are uncachable, then render direct. */ render_direct = !fz_glyph_cacheable(ctx, fontdesc->font, gid); @@ -1203,11 +1204,11 @@ pdf_show_char(fz_context *ctx, pdf_run_processor *pr, int cid, fz_text_language pr->bidi = guess_bidi_level(ucdn_get_bidi_class(ucsbuf[0]), pr->bidi); /* add glyph to textobject */ - fz_show_glyph_aux(ctx, pr->tos.text, fontdesc->font, trm, gid, ucsbuf[0], cid, fontdesc->wmode, pr->bidi, FZ_BIDI_NEUTRAL, lang); + fz_show_glyph_aux(ctx, pr->tos.text, fontdesc->font, trm, adv, gid, ucsbuf[0], cid, fontdesc->wmode, pr->bidi, FZ_BIDI_NEUTRAL, lang); /* add filler glyphs for one-to-many unicode mapping */ for (i = 1; i < ucslen; i++) - fz_show_glyph_aux(ctx, pr->tos.text, fontdesc->font, trm, -1, ucsbuf[i], -1, fontdesc->wmode, pr->bidi, FZ_BIDI_NEUTRAL, lang); + fz_show_glyph_aux(ctx, pr->tos.text, fontdesc->font, trm, 0, -1, ucsbuf[i], -1, fontdesc->wmode, pr->bidi, FZ_BIDI_NEUTRAL, lang); pdf_tos_move_after_char(ctx, &pr->tos); } diff --git a/mupdf/source/pdf/pdf-page.c b/mupdf/source/pdf/pdf-page.c index e36de276c991..98bd6d2b6479 100755 --- a/mupdf/source/pdf/pdf-page.c +++ b/mupdf/source/pdf/pdf-page.c @@ -700,7 +700,7 @@ void pdf_page_obj_transform_box(fz_context *ctx, pdf_obj *pageobj, fz_rect *outbox, fz_matrix *page_ctm, fz_box_type box) { pdf_obj *obj; - fz_rect usedbox, tempbox, cropbox; + fz_rect usedbox, tempbox, cropbox, mediabox; float userunit = 1; int rotate; @@ -709,6 +709,9 @@ pdf_page_obj_transform_box(fz_context *ctx, pdf_obj *pageobj, fz_rect *outbox, f userunit = pdf_dict_get_real_default(ctx, pageobj, PDF_NAME(UserUnit), 1); + obj = pdf_dict_get_inheritable(ctx, pageobj, PDF_NAME(MediaBox)); + mediabox = pdf_to_rect(ctx, obj); + obj = NULL; if (box == FZ_ART_BOX) obj = pdf_dict_get_inheritable(ctx, pageobj, PDF_NAME(ArtBox)); @@ -719,8 +722,12 @@ pdf_page_obj_transform_box(fz_context *ctx, pdf_obj *pageobj, fz_rect *outbox, f if (box == FZ_CROP_BOX || !obj) obj = pdf_dict_get_inheritable(ctx, pageobj, PDF_NAME(CropBox)); if (box == FZ_MEDIA_BOX || !obj) - obj = pdf_dict_get_inheritable(ctx, pageobj, PDF_NAME(MediaBox)); - usedbox = pdf_to_rect(ctx, obj); + usedbox = mediabox; + else + { + // never use a box larger than fits the paper (mediabox) + usedbox = fz_intersect_rect(mediabox, pdf_to_rect(ctx, obj)); + } if (fz_is_empty_rect(usedbox)) usedbox = fz_make_rect(0, 0, 612, 792); diff --git a/mupdf/source/pdf/pdf-type3.c b/mupdf/source/pdf/pdf-type3.c index d992f882eb6a..cf7af5d108c0 100644 --- a/mupdf/source/pdf/pdf-type3.c +++ b/mupdf/source/pdf/pdf-type3.c @@ -159,6 +159,7 @@ pdf_load_type3_font(fz_context *ctx, pdf_document *doc, pdf_obj *rdb, pdf_obj *d { float w = pdf_array_get_real(ctx, widths, i - first); w = font->t3matrix.a * w * 1000; + font->t3widths[i] = w * 0.001f; pdf_add_hmtx(ctx, fontdesc, i, i, w); }