diff --git a/crengine/include/lvrend.h b/crengine/include/lvrend.h index 5926dc9db..2b00cb615 100644 --- a/crengine/include/lvrend.h +++ b/crengine/include/lvrend.h @@ -37,6 +37,7 @@ #define RENDER_RECT_FLAG_FINAL_FOOTPRINT_AS_SAVED_FLOAT_IDS 0x0040 #define RENDER_RECT_FLAG_FLOATBOX_IS_RIGHT 0x0080 #define RENDER_RECT_FLAG_NO_INTERLINE_SCALE_UP 0x0100 // for ruby elements to not scale up +#define RENDER_RECT_FLAG_IS_IN_MAIN_FLOW 0x0200 // for hanging punctuation #define RENDER_RECT_FLAG_TEMP_USED_AS_CSS_CHECK_CACHE 0x8000 // has been cleared and is used as a CSS checks cache #define RENDER_RECT_SET_FLAG(r, f) ( r.setFlags( r.getFlags() | RENDER_RECT_FLAG_##f ) ) @@ -173,6 +174,8 @@ extern int gRenderDPI; extern bool gRenderScaleFontWithDPI; extern int gRootFontSize; +extern bool gHangingPunctuationEnabled; + #define INTERLINE_SCALE_FACTOR_NO_SCALE 1024 #define INTERLINE_SCALE_FACTOR_SHIFT 10 extern int gInterlineScaleFactor; diff --git a/crengine/include/lvtextfm.h b/crengine/include/lvtextfm.h index 5a3845462..39faa993c 100644 --- a/crengine/include/lvtextfm.h +++ b/crengine/include/lvtextfm.h @@ -19,9 +19,6 @@ #include "lvbmpbuf.h" #include "textlang.h" -// comment out following line to use old formatter -#define USE_NEW_FORMATTER 1 - #ifdef __cplusplus extern "C" { #endif @@ -432,6 +429,7 @@ class LFormattedText lUInt32 Format(lUInt16 width, lUInt16 page_height, int para_direction=0, // = REND_DIRECTION_UNSET in lvrend.h + bool hanging_punctuation=false, BlockFloatFootprint * float_footprint = NULL ); int GetSrcCount() @@ -481,6 +479,4 @@ class LFormattedText #endif -extern bool gFlgFloatingPunctuationEnabled; - #endif diff --git a/crengine/include/textlang.h b/crengine/include/textlang.h index d22f69af9..69686ed00 100644 --- a/crengine/include/textlang.h +++ b/crengine/include/textlang.h @@ -142,6 +142,9 @@ class TextLangCfg lString16 & getOpeningQuote( bool update_level=true ); lString16 & getClosingQuote( bool update_level=true ); + int getHyphenHangingPercent(); + int getHangingPercent( bool right_hanging, bool & check_font, const lChar16 * text, int pos, int next_usable ); + #if USE_HARFBUZZ==1 hb_language_t getHBLanguage() const { return _hb_language; } #endif diff --git a/crengine/src/lvdocview.cpp b/crengine/src/lvdocview.cpp index 3f3701bf0..4d9030eaf 100644 --- a/crengine/src/lvdocview.cpp +++ b/crengine/src/lvdocview.cpp @@ -6463,8 +6463,9 @@ CRPropRef LVDocView::propsApply(CRPropRef props) { REQUEST_RENDER("propsApply footnotes") } else if (name == PROP_FLOATING_PUNCTUATION) { bool value = props->getBoolDef(PROP_FLOATING_PUNCTUATION, true); - if ( gFlgFloatingPunctuationEnabled != value ) { - gFlgFloatingPunctuationEnabled = value; + if ( gHangingPunctuationEnabled != value ) { + gHangingPunctuationEnabled = value; + requestRender(); // does m_doc->clearRendBlockCache(), which is needed on hanging punctuation change REQUEST_RENDER("propsApply floating punct") } } else if (name == PROP_RENDER_BLOCK_RENDERING_FLAGS) { diff --git a/crengine/src/lvrend.cpp b/crengine/src/lvrend.cpp index 026ae59bf..34d91bdda 100644 --- a/crengine/src/lvrend.cpp +++ b/crengine/src/lvrend.cpp @@ -50,6 +50,8 @@ // crengine default used to be "width: 100%", but now that we // can shrink to fit, it is "width: auto". +bool gHangingPunctuationEnabled = false; + int gInterlineScaleFactor = INTERLINE_SCALE_FACTOR_NO_SCALE; int gRenderDPI = DEF_RENDER_DPI; // if 0: old crengine behaviour: 1px/pt=1px, 1in/cm/pc...=0px @@ -4183,6 +4185,8 @@ int renderBlockElementLegacy( LVRendPageContext & context, ldomNode * enode, int fmt.setX( fmt.getX() ); fmt.setY( fmt.getY() ); fmt.setLangNodeIndex( 0 ); // No support for lang in legacy rendering + // No support for floating punctuation in legacy mode (we don't know + // if this final node is in the main flow or not) fmt.push(); //if ( CRLog::isTraceEnabled() ) // CRLog::trace("rendering final node: %s %d %s", LCSTR(enode->getNodeName()), enode->getDataIndex(), LCSTR(ldomXPointer(enode,0).toString()) ); @@ -7157,6 +7161,17 @@ void renderBlockElementEnhanced( FlowState * flow, ldomNode * enode, int x, int fmt.setInnerY( padding_top ); fmt.setInnerWidth( inner_width ); RENDER_RECT_SET_FLAG(fmt, INNER_FIELDS_SET); + if ( flow->isMainFlow() ) { + RENDER_RECT_SET_FLAG(fmt, IS_IN_MAIN_FLOW); + // Hanging punctuation will only be ensured on final blocks + // in the main flow where we assume we can render glyphs + // in the margins - but not in floats, inline-boxes and + // table cells, which might have borders and surrounding + // content. + // (We could check for the available padding/margin until + // some border is met (but we would have to store the + // available width on each side in new fields...) + } fmt.push(); // (These setInner* needs to be set before creating float_footprint if // we want to debug/valide floatIds coordinates) @@ -9267,6 +9282,8 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct // do, but only with widths and horizontal margin/border/padding and indent // (with no width constraint, so no line splitting and hyphenation - and we // don't care about vertical spacing and alignment). + // Note that, as this is called only on blocks that are not in the main flow, + // hanging punctuation, even if enabled, won't be ensured on them. // Limitations: no support for css_d_run_in (hardly ever used, we don't care) // todo : probably more tweaking to do when direction=RTL, and we should // also handle direction change when walking inner elements... (For now, @@ -9887,8 +9904,7 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct static lUInt8 flags[MAX_TEXT_CHUNK_SIZE+1]; // We adjust below each word width with calls to getLeftSideBearing() // and getRightSideBearing(). These should be called with the exact same - // parameters as used in lvtextfm.cpp getAdditionalCharWidth() and - // getAdditionalCharWidthOnLeft(). + // parameters as used in lvtextfm.cpp addLine(). // todo: use fribidi and split measurement at fribidi level change, // and beware left/right side bearing adjustments... #if (USE_LIBUNIBREAK==1) diff --git a/crengine/src/lvtextfm.cpp b/crengine/src/lvtextfm.cpp index ef3a0ad22..ce0c5b3d8 100644 --- a/crengine/src/lvtextfm.cpp +++ b/crengine/src/lvtextfm.cpp @@ -308,8 +308,6 @@ void lvtextAddSourceObject( #define DUMMY_IMAGE_SIZE 16 -bool gFlgFloatingPunctuationEnabled = true; - void LFormattedText::AddSourceObject( lUInt32 flags, /* flags */ lInt16 interval, /* line height in screen pixels */ @@ -417,6 +415,7 @@ class LVFormatter { bool m_no_clear_own_floats; bool m_allow_strut_confinning; bool m_has_multiple_scripts; + bool m_hanging_punctuation; bool m_indent_first_line_done; int m_indent_after_first_line; int m_indent_current; @@ -465,6 +464,7 @@ class LVFormatter { m_has_ongoing_float = false; m_no_clear_own_floats = false; m_has_multiple_scripts = false; + m_hanging_punctuation = false; m_specified_para_dir = REND_DIRECTION_UNSET; #if (USE_FRIBIDI==1) m_bidi_ctypes = NULL; @@ -1789,7 +1789,7 @@ class LVFormatter { if ( chars_measured w=%d\n", m_widths[start + k]); } + // XXX dubious + // This checks whether we're the last char of a text node, and if + // this node is italic, it adds the glyph italic overflow to the + // last char width. + // This might not be needed if the next text node is also italic, + // or if there is a space at start of next text node, and it might + // be needed at start of node too as the italic can overflow there too. + // It might also confuse our adjustment at start or end of line. int dw = getAdditionalCharWidth(i-1, m_length); if ( lastDirection < 0 ) // ignore it for RTL (as right side bearing is measured) dw = 0; @@ -1892,10 +1900,6 @@ class LVFormatter { } lastWidth += widths[len-1]; //lenlight_formatting && !hasInlineBoxes; + + // todo: we can avoid some more work below when light_formatting (and + // possibly the BiDi re-ordering we need for ordering footnotes, as + // if we don't re-order, we'll always have them in the logical order, + // and we can just append them in lvrend.cpp instead of checking + // where to insert them if RTL.) + int maxWidth = getCurrentLineWidth(); - // provided x is left indent + // Provided x is the line indent: as we're making words in the visual + // order here, it will be line start x for LTR paragraphs; but for RTL + // ones, where we'll handle it as some reserved space on the right. int rightIndent = 0; if ( m_para_dir_is_rtl ) { rightIndent = x; @@ -2294,6 +2315,10 @@ class LVFormatter { x += getCurrentLineX(); // add shift induced by left floats } + // XXX should we prevent hanging punctuation when there are some + // floats on the left or right, and hanging might be drawn other + // them ? + // Find out text alignment to ensure for this line int align = para->flags & LTEXT_FLAG_NEWLINE; @@ -2332,10 +2357,6 @@ class LVFormatter { TR("addLine(%d, %d) y=%d align=%d", start, end, m_y, align); // printf("addLine(%d, %d) y=%d align=%d maxWidth=%d\n", start, end, m_y, align, maxWidth); - // Note: in the code and comments, all these mean the same thing: - // visual alignment enabled, floating punctuation, hanging punctuation - bool visualAlignmentEnabled = (gFlgFloatingPunctuationEnabled != 0) && (align != LTEXT_ALIGN_CENTER); - // Note: parameter needReduceSpace and variable splitBySpaces (which // was always true) have been removed, as we always split by space: // even if we end up not changing spaces' widths, we need to make @@ -2595,13 +2616,12 @@ class LVFormatter { // Some words vertical-align positionning might need to be fixed // only once the whole line has been laid out bool delayed_valign_computation = false; - // alignLine() will have more work to do if we have inlineBox elements - bool has_inline_boxes = false; // Make out words, making a new one when some properties change int wstart = start; - bool lastIsSpace = false; + bool firstWord = true; bool lastWord = false; + bool lastIsSpace = false; bool isSpace = false; bool space = false; // Bidi @@ -2690,7 +2710,7 @@ class LVFormatter { // Note: a "word" in our current context is just a unit of text that // should be rendered together, and can be moved on the x-axis for // alignment purpose (the 2 french words "qu'autrefois" make a - // single "word" here, the single word "quelconque", if hyphentaded + // single "word" here, the single word "quelconque", if hyphenated // as "quel-conque" will make one "word" on this line and another // "word" on the next line. // @@ -2760,7 +2780,7 @@ class LVFormatter { // block cut by
): most browsers don't display the line break // implied by the BR when we have: "
some text
more text" // or "
some text
more text". - if (lastWord && frmline->word_count == 0) { + if (lastWord && firstWord) { if (!isLastPara) { wstart--; // make a single word with a single collapsed space if (m_flags[wstart] & LCHAR_IS_TO_IGNORE) { @@ -2817,7 +2837,6 @@ class LVFormatter { word->min_width = word->width; word->o.height = srcline->o.height; if ( srcline->flags & LTEXT_SRC_IS_INLINE_BOX ) { // inline-block - has_inline_boxes = true; word->flags = LTEXT_WORD_IS_INLINE_BOX; // For inline-block boxes, the baseline may not be the bottom; it has // been computed in measureText(). @@ -2971,14 +2990,6 @@ class LVFormatter { // printf("baseline_to_bottom=%d top_to_baseline=%d word->y=%d txt=|%s|\n", baseline_to_bottom, // top_to_baseline, word->y, UnicodeToLocal(lString16(srcline->t.text, srcline->t.len)).c_str()); - // For Harfbuzz, which may shape differently words at start or end of paragraph - if (first && frmline->word_count == 1) // first line of paragraph + first word of line - word->flags |= LTEXT_WORD_BEGINS_PARAGRAPH; - if (last && lastWord) // last line of paragraph + last word of line - word->flags |= LTEXT_WORD_ENDS_PARAGRAPH; - if ( trustDirection) - word->flags |= LTEXT_WORD_DIRECTION_KNOWN; - // Set word start and end (start+len-1) indices in the source text node if ( !m_has_bidi ) { // No bidi, everything is linear @@ -3016,6 +3027,29 @@ class LVFormatter { word->t.len = m_charindex[i-1] + 1 - m_charindex[wstart]; } + // Flag word that are the start of a link (for in-page footnotes) + if ( word->t.start==0 && srcline->flags & LTEXT_IS_LINK ) { + word->flags |= LTEXT_WORD_IS_LINK_START; + // todo: we might miss some links if the source text starts with a space + } + + // Below this are stuff that could be skipped if light_formatting + // (We need bidi and the above adjustment only to get correctly ordered + // in-page footnotes links.) + + // For Harfbuzz, which may shape differently words at start or end of paragraph. + // todo: this is probably wrong if some multi bidi levels re-ordering has been done + if ( first ) { // first line of paragraph + if ( m_para_dir_is_rtl ? lastWord : firstWord ) + word->flags |= LTEXT_WORD_BEGINS_PARAGRAPH; + } + if ( last ) { // last line of paragraph + if ( m_para_dir_is_rtl ? firstWord : lastWord ) + word->flags |= LTEXT_WORD_ENDS_PARAGRAPH; + } + if ( trustDirection) + word->flags |= LTEXT_WORD_DIRECTION_KNOWN; + // We need to compute how many glyphs can have letter_spacing added, that // might be done in alignLine() (or not). We have to do it now even if // not used, as we won't have that information anymore in alignLine(). @@ -3047,6 +3081,62 @@ class LVFormatter { word->distinct_glyphs += tailing_spaces; } + if ( firstWord && (align == LTEXT_ALIGN_LEFT || align == LTEXT_ALIGN_WIDTH) ) { + // Adjust line start x if needed + // No need to do it when line is centered or right aligned (doing so + // might increase the line width and change space widths for no reason). + // We currently have no chance to get an added hyphen for hyphenation + // at start of line, as we handle only hyphenation with LTR text. + // It feels we have to do it even for the first line with text-indent, + // a some page might have multiple consecutive single lines that can + // benefit from hanging so the margin look clean too. + if ( m_hanging_punctuation && !(m_flags[wstart] & LCHAR_LOCKED_SPACING) ) { + // (We prevent if on the common opening quotation marks or dashes + // we flagged with LCHAR_LOCKED_SPACING, as they may be candidate + // for hanging.) + bool check_font; + int percent = srcline->lang_cfg->getHangingPercent(false, check_font, m_text, wstart, end-wstart-1); + if ( percent ) { + bool use_percent = true; + if ( check_font ) { + // Some fonts might already have enough negative + // left side bearing for some chars, that would + // make them naturally hang on the left. + if ( font->getLeftSideBearing(m_text[wstart]) < 0 ) + use_percent = false; + } + if ( use_percent ) { + int first_char_width = m_widths[wstart] - (wstart>0 ? m_widths[wstart-1] : 0); + int shift_x = first_char_width * percent / 100; + if ( shift_x == 0 ) // Force at least 1px if division rounded it to 0 + shift_x = 1; + frmline->x -= shift_x; + } + } + else { // If not some usual hanging char, remove any positive LSB + int lsb = font->getLeftSideBearing(m_text[wstart]); + if ( lsb > 0 ) + frmline->x -= lsb; + // Assume that when we want hanging punctuation, we can let italic + // overflowing on the left (like glyphs "J" or "f"). + } + } + else { + // When we don't want anything hanging, correct for negative overflow. + // We used to correct it only for italic fonts, where "J" or "f" + // can have have huge negative overflow for their part below baseline + // and so leak on the left. + // But some regular font can have large one too. + // So, we do it in any case, whether negative or positive. + // Correcting that makes for nicer multilines higlighting boxes. + frmline->x -= font->getLeftSideBearing(m_text[wstart]); + // Note: be sure lvrend.cpp getRenderedWidths() and lvtinydom.cpp + // ldomXPointer::getRect() use the same checks we do here. + } + // Should we account for any positive added shift, that we could remove + // if it makes us decrease the width of spaces too much ? + } + // Word x position on line: for now, we just stack words after each other. // They will be adjusted if needed in alignLine() word->x = frmline->width; @@ -3107,7 +3197,7 @@ class LVFormatter { word->min_width = word->width; } } - else if ( frmline->word_count>1 && m_flags[wstart] & LCHAR_IS_SPACE ) { + else if ( !firstWord && m_flags[wstart] & LCHAR_IS_SPACE ) { // Current word starts with a space (looks like this should not happen): // we can increase the space between previous word and this one if needed //if ( word->t.len<2 || m_text[i-1]!=UNICODE_NO_BREAK_SPACE || m_text[i-2]!=UNICODE_NO_BREAK_SPACE) @@ -3116,17 +3206,89 @@ class LVFormatter { //else frmline->words[frmline->word_count-2].flags |= LTEXT_WORD_CAN_ADD_SPACE_AFTER; } - else if (frmline->word_count>1 && isCJKIdeograph(m_text[i])) { + else if ( !firstWord && isCJKIdeograph(m_text[i]) ) { // Current word is a CJK char: we can increase the space // between previous word and this one if needed frmline->words[frmline->word_count-2].flags |= LTEXT_WORD_CAN_ADD_SPACE_AFTER; } - // if ( m_flags[i-1] & LCHAR_ALLOW_WRAP_AFTER ) // word->flags |= LTEXT_WORD_CAN_BREAK_LINE_AFTER; // not used anywhere - if ( word->t.start==0 && srcline->flags & LTEXT_IS_LINK ) - word->flags |= LTEXT_WORD_IS_LINK_START; // for in-page footnotes + if ( lastWord && (align == LTEXT_ALIGN_RIGHT || align == LTEXT_ALIGN_WIDTH) ) { + // Adjust line end if needed. + // If we need to adjust last word last char correction, + // we need to put the delta in this word->width, which + // will make it into frmline->width. + + // We need to possibly extend the last char width to account for italic + // right side bearing overflow (but not if we ended the line with some + // hyphenation, as the last glyph will then be the hyphen). + + // Note: XXX it feels we should do that for the char before ANY image on the line (so the italic + // glyph does not overlap with the image). It's unclear whether the former code did that + // (or not) for the char before an image at end of line only... + + // Find the real last drawn glyph + int lastnonspace = i-1; + for ( int k=i-1; k>=wstart; k-- ) { + if ( !(m_flags[k] & LCHAR_IS_SPACE) ) { + lastnonspace = k; + break; + } + } + if ( m_flags[lastnonspace] & LCHAR_ALLOW_HYPH_WRAP_AFTER ) { + if ( m_hanging_punctuation ) { + // If the line ends with a hyphen, just hang the hyphen. + int percent = srcline->lang_cfg->getHyphenHangingPercent(); + if ( percent ) { + int shift_w = font->getHyphenWidth() * percent / 100; + if ( shift_w == 0 ) // Force at least 1px if division rounded it to 0 + shift_w = 1; + word->width -= shift_w; + } + // XXX Shouldn't we always take the hyphen width for the main + // paragraph font ? So that hyphens for parts of text in a + // bold font don't stand out more than others? + } + } + else if ( m_hanging_punctuation ) { + bool check_font; + int percent = srcline->lang_cfg->getHangingPercent(true, check_font, m_text, lastnonspace, end-lastnonspace-1); + if ( percent ) { + bool use_percent = true; + if ( check_font ) { + // Some fonts might already have enough negative + // right side bearing for some chars, that would + // make them naturally hang on the right. + if ( font->getRightSideBearing(m_text[lastnonspace]) < 0 ) + use_percent = false; + } + if ( use_percent ) { + int last_char_width = m_widths[lastnonspace] - (lastnonspace>0 ? m_widths[lastnonspace-1] : 0); + int shift_w = last_char_width * percent / 100; + if ( shift_w == 0 ) // Force at least 1px if division rounded it to 0 + shift_w = 1; + word->width -= shift_w; + } + } + } + else { + // When we don't want anything hanging, correct for negative overflow. + // We used to correct it only for italic fonts, where "J" or "f" + // can have have huge negative overflow for their part above baseline + // and so leak on the right. + // But some regular font can have large one too. + // So, we do it in any case, whether negative or positive. + // Correcting that makes for nicer multilines higlighting boxes. + word->width -= font->getRightSideBearing(m_text[lastnonspace]); + // Note: be sure lvrend.cpp getRenderedWidths() and lvtinydom.cpp + // ldomXPointer::getRect() use the same checks we do here. + } + } + + /* Hanging punctuation (with CJK specifics) old code: + * + bool visualAlignmentEnabled = m_hanging_punctuation && (align != LTEXT_ALIGN_CENTER); if ( visualAlignmentEnabled && lastWord ) { // if floating punctuation enabled int endp = i-1; int lastc = m_text[endp]; @@ -3221,6 +3383,8 @@ class LVFormatter { } word->min_width = word->width; } // done if floating punctuation enabled + * End of old code for handling hanging punctuation + */ // printf("addLine - word(%d, %d) x=%d (%d..%d)[%d>%d %x] |%s|\n", wstart, i, // frmline->width, wstart>0 ? m_widths[wstart-1] : 0, m_widths[i-1], word->width, @@ -3281,6 +3445,7 @@ class LVFormatter { } frmline->width += word->width; + firstWord = false; lastSrc = newSrc; wstart = i; @@ -3343,16 +3508,9 @@ class LVFormatter { } } - // Fix up words position and width to ensure requested alignment and indent - // No need to do that if light formatting, as this won't affect the - // block height and floats positionning - is_reusable will be unset, - // and any attempt at reusing this formatting for drawing will cause - // a non-light re-formatting. Except when there are inlineBoxes in the - // text: we need to correctly position them to have their x/y saved - // in their RenderRectAccessor (so getRect() can work accurately before - // the page is drawn). - if ( !m_pbuffer->light_formatting || has_inline_boxes ) { - alignLine( frmline, align, rightIndent, has_inline_boxes ); + if ( !light_formatting ) { + // Fix up words position and width to ensure requested alignment and indent + alignLine( frmline, align, rightIndent, hasInlineBoxes ); } // Get ready for next line @@ -3393,6 +3551,7 @@ class LVFormatter { c >= UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_END ); } + #if (USE_LIBUNIBREAK!=1) bool isCJKPunctuation(lChar16 c) { return ( c >= UNICODE_CJK_PUNCTUATION_BEGIN && c <= UNICODE_CJK_PUNCTUATION_END ) || ( c >= UNICODE_GENERAL_PUNCTUATION_BEGIN && c <= UNICODE_GENERAL_PUNCTUATION_END && @@ -3410,6 +3569,7 @@ class LVFormatter { c==0x3008 || c==0x300a || c==0x300c || c==0x300e || c==0x3010 || // 〈 《 「 『 【 CJK left brackets c==0xff08; // ( fullwidth left parenthesis } + #endif bool isLeftPunctuation(lChar16 c) { // Opening quotation marks and dashes that we don't want a followup space to @@ -3457,28 +3617,6 @@ class LVFormatter { preFormattedOnly = preFormattedOnly && lfFound; } - bool visualAlignmentEnabled = gFlgFloatingPunctuationEnabled!=0; - int visualAlignmentWidth = 0; - if ( visualAlignmentEnabled ) { - // We remove from the available width the max of the max width - // of -/./,/!/? (and other CJK ones) in all fonts used in that - // paragraph, to reserve room for it in case we get one hanging. - // (This will lead to messy variable paragraph widths if some - // paragraph use some bigger font for some inline parts, and - // others don't.) - LVFont * font = NULL; - for ( int i=start; isrctext[i].flags & LTEXT_SRC_IS_OBJECT) ) { - font = (LVFont*)m_pbuffer->srctext[i].t.font; - if (font) { - int dx = font->getVisualAligmentWidth(); - if ( dx>visualAlignmentWidth ) - visualAlignmentWidth = dx; - } - } - } - } - // Not per-specs, but when floats reduce the available width, skip y until // we have the width to draw at least a few chars on a line. // We use N x strut_height because it's one easily acccessible font metric here. @@ -3490,6 +3628,14 @@ class LVFormatter { int upSkipPos = -1; #endif + // Note: we no longer adjust here x and width to account for first or + // last italic glyphs side bearings or hanging punctuation, as here, + // we're still just walking the text in logical order, which might + // be re-ordered when BiDi. + // We'll handle that in AddLine() where we'll make words in visual + // order; the small shifts we might have on the final width vs the + // width measured here will hopefully be compensated on the space chars. + while ( posflags & LTEXT_SRC_IS_FLOAT_DONE) ) { - int currentWidth = x + firstCharMargin + m_widths[i]-w0 - spaceReduceWidth; - addFloat( src, currentWidth ); - src->flags |= LTEXT_SRC_IS_FLOAT_DONE; - maxWidth = getCurrentLineWidth(); + if ( flags & LCHAR_IS_OBJECT ) { + if ( m_charindex[i] == FLOAT_CHAR_INDEX ) { // float + src_text_fragment_t * src = m_srcs[i]; + // Not sure if we can be called again on the same LVFormatter + // object, but the whole code allows for re-formatting and + // they should give the same result. + // So, use a flag to not re-add already processed floats. + if ( !(src->flags & LTEXT_SRC_IS_FLOAT_DONE) ) { + int currentWidth = x + m_widths[i]-w0 - spaceReduceWidth; + addFloat( src, currentWidth ); + src->flags |= LTEXT_SRC_IS_FLOAT_DONE; + maxWidth = getCurrentLineWidth(); + } + // We don't set lastNormalWrap when collapsed spaces, + // so let's not for floats either. + // But we need to when the float is the last source (as + // done below, otherwise we would not update wrapPos and + // we'd get another ghost line, and this real last line + // might be wrongly justified). + if ( i==m_length-1 ) { + lastNormalWrap = i; + } + continue; } - // We don't set lastNormalWrap when collapsed spaces, - // so let's not for floats either. - // But we need to when the float is the last source (as - // done below, otherwise we would not update wrapPos and - // we'd get another ghost line, and this real last line - // might be wrongly justified). - if ( i==m_length-1 ) { - lastNormalWrap = i; + if ( m_charindex[i] == INLINEBOX_CHAR_INDEX && firstInlineBoxPos < 0 ) { + firstInlineBoxPos = i; } - continue; } // We would not need to bother with LCHAR_IS_COLLAPSED_SPACE, as they have zero // width and so can be grabbed here. They carry LCHAR_ALLOW_WRAP_AFTER just like @@ -3584,39 +3733,10 @@ class LVFormatter { fillAndMoveToY( new_y ); maxWidth = getCurrentLineWidth(); } - // Shift first italic glyph whose part below baseline might leak on the left. - // Note: we might not need to bother with negative left side bearing, as we - // now can have them in the margin as we don't clip anymore. So, we could - // just have italic "J" or "f" drawn a bit in the margin. - // But shifting them that way makes for nicer multilines higlighting boxes. - firstCharMargin = getAdditionalCharWidthOnLeft(pos); - if ( visualAlignmentEnabled ) { // Floating punctuation - maxWidth -= visualAlignmentWidth; - spaceReduceWidth -= visualAlignmentWidth/2; - firstCharMargin += visualAlignmentWidth/2; - if (isCJKLeftPunctuation(m_text[pos])) { - // Make that left punctuation left-hanging by reducing firstCharMargin - LVFont * fnt = (LVFont *)m_srcs[pos]->t.font; - if (fnt) - firstCharMargin -= fnt->getCharWidth(m_text[pos]); - firstCharMargin = (x + firstCharMargin) > 0 ? firstCharMargin : 0; - } - } - if ( m_has_bidi ) { - // If bidi, our first char may be no more the first char - // inside AddLine, so reset firtCharMargin to 0. - // (a bit sad to do that if there's a single RTL word - // in the middle of line...) - firstCharMargin = 0; - // todo: probably some other things to avoid if bidi or - // if m_para_dir_is_rtl, like hyphenation. - // Also possible: scan chars as they fit on this line for - // bidi level > 1: if none, this line is pure LTR - } } bool grabbedExceedingSpace = false; - if ( x + firstCharMargin + m_widths[i]-w0 > maxWidth + spaceReduceWidth ) { + if ( x + m_widths[i]-w0 > maxWidth + spaceReduceWidth ) { // It's possible the char at i is a space whose width exceeds maxWidth, // but it should be a candidate for lastNormalWrap (otherwise, the // previous word will be hyphenated and we will get spaces widen for @@ -3700,11 +3820,12 @@ class LVFormatter { m_pbuffer->min_space_condensing_percent != 100 && i < m_length-1 && ( m_flags[i] & LCHAR_IS_SPACE ) && - ( i==m_length-1 || !(m_flags[i + 1] & LCHAR_IS_SPACE) ) ) { + !(m_flags[i+1] & LCHAR_IS_SPACE) ) { // Each space not followed by a space is candidate for space condensing int dw = getMaxCondensedSpaceTruncation(i); if ( dw>0 ) spaceReduceWidth += dw; + // TODO do that too for CJK punctuation whose glyph might be half blank } if (grabbedExceedingSpace) break; // delayed break @@ -3732,9 +3853,6 @@ class LVFormatter { #endif int normalWrapWidth = lastNormalWrap > 0 ? x + m_widths[lastNormalWrap]-w0 : 0; int unusedSpace = maxWidth - normalWrapWidth; - if ( visualAlignmentEnabled ) { - unusedSpace -= 2*visualAlignmentWidth; - } int unusedPercent = maxWidth > 0 ? unusedSpace * 100 / maxWidth : 0; #if (USE_LIBUNIBREAK!=1) // (Different usage of deprecatedWrap than above) @@ -3822,7 +3940,7 @@ class LVFormatter { for ( int i=0; i 0 && !(m_flags[endp-1] & LCHAR_ALLOW_HYPH_WRAP_AFTER) ) { - // Find the real last displayed glyph, skipping spaces and floats - int lastnonspace = endp-1; - for ( int k=endp-1; k>=start; k-- ) { - if ( !(m_flags[k] & LCHAR_IS_SPACE) && - !( (m_flags[k] & LCHAR_IS_OBJECT) && (m_charindex[k] == FLOAT_CHAR_INDEX) ) ) { - lastnonspace = k; - break; - } - } - // If the last non-space/non-float is an image or an inline-block box, we don't do it. - // Note: it feels we should do that for the char before ANY image on the line (so the italic - // glyph does not overlap with the image). It's unclear whether the former code did that - // (or not) for the char before an image at end of line only... - if ( !(m_flags[lastnonspace] & LCHAR_IS_OBJECT) ) { - // todo: probably need be avoided if bidi/rtl: - int dw = lastnonspace>=start ? getAdditionalCharWidth(lastnonspace, lastnonspace+1) : 0; - if (dw) { - TR("additional width = %d, after char %s", dw, LCSTR(lString16(m_text + lastnonspace, 1))); - m_widths[lastnonspace] += dw; - } - } - } - - addLine(pos, endp, x + firstCharMargin, para, pos==0, wrapPos>=m_length-1, preFormattedOnly, isLastPara); + bool hasInlineBoxes = firstInlineBoxPos >= 0 && firstInlineBoxPos < endp; + addLine(pos, endp, x, para, pos==0, wrapPos>=m_length-1, preFormattedOnly, isLastPara, hasInlineBoxes); pos = wrapPos + 1; // start of next line #if (USE_LIBUNIBREAK==1) @@ -4220,7 +4315,7 @@ static void freeFrmLines( formatted_text_fragment_t * m_pbuffer ) } // experimental formatter -lUInt32 LFormattedText::Format(lUInt16 width, lUInt16 page_height, int para_direction, BlockFloatFootprint * float_footprint) +lUInt32 LFormattedText::Format(lUInt16 width, lUInt16 page_height, int para_direction, bool hanging_punctuation, BlockFloatFootprint * float_footprint) { // clear existing formatted data, if any freeFrmLines( m_pbuffer ); @@ -4251,6 +4346,7 @@ lUInt32 LFormattedText::Format(lUInt16 width, lUInt16 page_height, int para_dire // Set specified para direction (can be REND_DIRECTION_UNSET, in which case // it will be detected by fribidi) formatter.m_specified_para_dir = para_direction; + formatter.m_hanging_punctuation = hanging_punctuation; if (float_footprint) { formatter.m_no_clear_own_floats = float_footprint->no_clear_own_floats; diff --git a/crengine/src/lvtinydom.cpp b/crengine/src/lvtinydom.cpp index bce575064..fa4524a61 100644 --- a/crengine/src/lvtinydom.cpp +++ b/crengine/src/lvtinydom.cpp @@ -390,8 +390,11 @@ lUInt32 calcGlobalSettingsHash(int documentId, bool already_rendered) // hash = hash * 31 + (int)fontMan->GetHintingMode(); if ( LVRendGetFontEmbolden() ) hash = hash * 75 + 2384761; - if ( gFlgFloatingPunctuationEnabled ) - hash = hash * 75 + 1761; + // Hanging punctioatn does not need to trigger a re-render, as + // it's now ensure by alignLine() and won't change paragraphs height. + // We just need to _renderedBlockCache.clear() when it changes. + // if ( gHangingPunctuationEnabled ) + // hash = hash * 75 + 1761; hash = hash * 31 + gRenderDPI; hash = hash * 31 + gRenderBlockRenderingFlags; hash = hash * 31 + gRootFontSize; @@ -8764,13 +8767,9 @@ bool ldomXPointer::getRect(lvRect & rect, bool extended, bool adjusted) const int chw = w[ offset - word->t.start ] - chx; bool hyphen_added = false; if ( offset == word->t.start + word->t.len - 1 - && (word->flags & LTEXT_WORD_CAN_HYPH_BREAK_LINE_AFTER) - && !gFlgFloatingPunctuationEnabled ) { + && (word->flags & LTEXT_WORD_CAN_HYPH_BREAK_LINE_AFTER) ) { // if offset is the end of word, and this word has // been hyphenated, includes the hyphen width - // (but not when floating punctuation is enabled, - // to keep nice looking rectangles on multi lines - // text selection) chw += font->getHyphenWidth(); // We then should not account for the right side // bearing below @@ -8935,13 +8934,9 @@ bool ldomXPointer::getRect(lvRect & rect, bool extended, bool adjusted) const int chw = w[ offset - word->t.start ] - chx; bool hyphen_added = false; if ( offset == word->t.start + word->t.len - 1 - && (word->flags & LTEXT_WORD_CAN_HYPH_BREAK_LINE_AFTER) - && !gFlgFloatingPunctuationEnabled ) { + && (word->flags & LTEXT_WORD_CAN_HYPH_BREAK_LINE_AFTER) ) { // if offset is the end of word, and this word has // been hyphenated, includes the hyphen width - // (but not when floating punctuation is enabled, - // to keep nice looking rectangles on multi lines - // text selection) chw += font->getHyphenWidth(); // We then should not account for the right side // bearing below @@ -17125,6 +17120,7 @@ int ldomNode::renderFinalBlock( LFormattedTextRef & frmtext, RenderRectAccessor //RenderRectAccessor fmt( this ); /// render whole node content as single formatted object int direction = RENDER_RECT_PTR_GET_DIRECTION(fmt); + int hanging_punctuation = gHangingPunctuationEnabled && RENDER_RECT_PTR_HAS_FLAG(fmt, IS_IN_MAIN_FLOW); lUInt32 flags = styleToTextFmtFlags( true, getStyle(), 0, direction ); int lang_node_idx = fmt->getLangNodeIndex(); TextLangCfg * lang_cfg = TextLangMan::getTextLangCfg(lang_node_idx>0 ? getDocument()->getTinyNode(lang_node_idx) : NULL); @@ -17132,12 +17128,6 @@ int ldomNode::renderFinalBlock( LFormattedTextRef & frmtext, RenderRectAccessor // We need to store this LFormattedTextRef in the cache for it to // survive when leaving this function (some callers do use it). cache.set( this, f ); - bool flg=gFlgFloatingPunctuationEnabled; - if (this->getNodeName()=="th"||this->getNodeName()=="td"|| - (!this->getParentNode()->isNull()&&this->getParentNode()->getNodeName()=="td")|| - (!this->getParentNode()->isNull()&&this->getParentNode()->getNodeName()=="th")) { - gFlgFloatingPunctuationEnabled=false; - } // This page_h we provide to f->Format() is only used to enforce a max height to images int page_h = getDocument()->getPageHeight(); // Save or restore outer floats footprint (it is only provided @@ -17159,8 +17149,7 @@ int ldomNode::renderFinalBlock( LFormattedTextRef & frmtext, RenderRectAccessor // one that is on a page to be drawn will be reformatted . f->requestLightFormatting(); } - int h = f->Format((lUInt16)width, (lUInt16)page_h, direction, float_footprint); - gFlgFloatingPunctuationEnabled=flg; + int h = f->Format((lUInt16)width, (lUInt16)page_h, direction, hanging_punctuation, float_footprint); frmtext = f; //CRLog::trace("Created new formatted object for node #%08X", (lUInt32)this); return h; diff --git a/crengine/src/textlang.cpp b/crengine/src/textlang.cpp index c5bc9edc6..190178741 100644 --- a/crengine/src/textlang.cpp +++ b/crengine/src/textlang.cpp @@ -705,3 +705,140 @@ lString16 & TextLangCfg::getClosingQuote( bool update_level ) { _quote_nesting_level--; return ((_quote_nesting_level+1) % 2) ? _close_quote1 : _close_quote2; } + +int TextLangCfg::getHyphenHangingPercent() { + return 70; // 70% +} + +int TextLangCfg::getHangingPercent( bool right_hanging, bool & check_font, const lChar16 * text, int pos, int next_usable ) { + // We get provided with the BiDi re-ordered m_text (so, visually + // ordered) and the index of char: if needed, we can look at + // previous or next chars for context to decide how much to hang. + // For now, we don't - but in some cases or with some languages, + // we might want to check the context (i.e. consecutive punctuations, + // opening quotes followed by a space in French...) + lChar16 ch = text[pos]; + int ratio = 0; + + // For the common punctuations, parens and quotes, we check and + // return the same value whether asked for left or right hanging. + // Normally, libunibreak has prevented them from happening on + // one of the sides - but with RTL text, they may happen on + // the other side. Also, some BiDi mirrorable chars "([])" might + // be mirrored in the provided *text when not-using HarfBuzz, but + // won't be mirrored when using HarfBuzz - so let's handle + // all of them no matter the hanging side asked for. + // Also, because in some languages, quotation marks and guillemets + // are used reverted, we include left and right ones in both sets. + + // Most values taken from the "protusion" section in: + // https://source.contextgarden.net/tex/context/base/mkiv/font-imp-quality.lua + // https://www.w3.org/Mail/flatten/index?subject=Amending+hanging-punctuation+for+Western+typography&list=www-style + // and the microtypography thesis: http://www.pragma-ade.nl/pdftex/thesis.pdf + // (screenshot at https://github.com/koreader/koreader/issues/6235#issuecomment-639307634) + + switch (ch) { + case 0x0027: // ' single quote + case 0x002C: // , comma + case 0x002D: // - minus + case 0x002E: // . period + case 0x0060: // ` back quote + // case 0x00AD: // soft hyphen (we don't draw them, so don't handle them) + case 0x060C: // ، arabic comma + case 0x06D4: // ۔ arabic full stop + case 0x2010: // ‐ hyphen + case 0x2018: // ‘ left single quotation mark + case 0x2019: // ’ right single quotation mark + case 0x201A: // ‚ single low-9 quotation mark + case 0x201B: // ‛ single high-reversed-9 quotation mark + case 0x2039: // ‹ left single guillemet + case 0x203A: // › right single guillemet + ratio = 70; + break; + case 0x0022: // " double quote + case 0x003A: // : colon + case 0x003B: // ; semicolon + case 0x00AB: // « left guillemet + case 0x00BB: // » right guillemet + case 0x061B: // ګ arabic semicolon + case 0x201C: // € left double quotation mark + case 0x201D: // ” right double quotation mark + case 0x201E: // „ double low-9 quotation mark + case 0x201F: // ₯ double high-reversed-9 quotation mark + ratio = 50; + break; + case 0x2013: // – endash + ratio = 30; + break; + case 0x0021: // ! + case 0x003F: // ? + case 0x00A1: // ¡ + case 0x00BF: // ¿ + case 0x061F: // ؟ + case 0x2014: // — emdash + ratio = 20; + break; + case 0x0028: // ( + case 0x0029: // ) + case 0x005B: // [ + case 0x005D: // ] + case 0x007B: // { + case 0x007D: // } + ratio = 5; + break; + default: + break; + } + if ( ratio ) { + check_font = false; + return ratio; + } + // Other are non punctuation but slight adjustment for some letters, + // that might be ignored if the font already include some negative + // left side bearing. + check_font = true; + if ( right_hanging ) { + switch (ch) { + case 'A': + case 'F': + case 'K': + case 'L': + case 'T': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'k': + case 'r': + case 't': + case 'v': + case 'w': + case 'x': + case 'y': + ratio = 5; + break; + default: + break; + } + } + else { // left hanging + switch (ch) { + case 'A': + case 'J': + case 'T': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'v': + case 'w': + case 'x': + case 'y': + ratio = 5; + break; + default: + break; + } + } + return ratio; +}