Skip to content

Commit

Permalink
Adds TextLangMan for text typography by language
Browse files Browse the repository at this point in the history
Parse and store values from lang= attributes, so we can
propagate a TextlangCfg object to all calls dealing with
text, which will allow to:
- Use specific libunibreak rules for line breaking per lang
  (i.e. reverted quotation marks in German vs French).
- Use the right hyphenation dictionary for each language
- Add more specific line breaking tweaks for some languages
  (some single letter prepositions should not be at end of
  line in Polish and Czech, real hyphens should be duplicated
  at start of next line in Portuguese and Polish...)
- Give the language tag to Harfbuzz so it can pick the
  right glyphs for the language (e.g. different glyphs
  for the same codepoint in zh-CN, zh-TW and ja, and for
  Bulgarian Cyrillic with some fonts).

Update existing global HyphMan to use services from
TextLangMan to ensure legacy single global hyphenation.
TextLangMan still uses the hyphenation methods defined
in hyphman.cpp.
  • Loading branch information
poire-z committed Apr 18, 2020
1 parent c72bf1e commit d89ae37
Show file tree
Hide file tree
Showing 15 changed files with 1,193 additions and 171 deletions.
67 changes: 47 additions & 20 deletions crengine/include/hyphman.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,27 +17,45 @@

#include "lvtypes.h"
#include "lvstream.h"

class HyphMethod
{
public:
virtual bool hyphenate( const lChar16 * str, int len, lUInt16 * widths, lUInt8 * flags, lUInt16 hyphCharWidth, lUInt16 maxWidth, size_t flagSize=1 ) = 0;
virtual ~HyphMethod() { }
};

#include "lvhashtable.h"

#define WORD_LENGTH 64
#define MAX_REAL_WORD 24

// min value supported by algorithms is 1 (max is arbitrary 10)
// value enforced by algorithm previously was 2, so it's the default
#define HYPH_DEFAULT_HYPHEN_MIN 2
#define HYPH_MIN_HYPHEN_MIN 1
// 0 means to use the defaults per HyphMethod
// if set to >= 1, the values apply to all HyphMethods
#define HYPH_MIN_HYPHEN_MIN 0
#define HYPH_MAX_HYPHEN_MIN 10
// Default for global HyphMan values is 0: use per-HyphMethod defaults
#define HYPH_DEFAULT_HYPHEN_MIN 0
// Default for per-HyphMethod values (value enforced by algorithms
// previously was 2, so let's keep that as the default)
#define HYPHMETHOD_DEFAULT_HYPHEN_MIN 2

// Don't trust soft-hyphens when using dict or algo methods
#define HYPH_DEFAULT_TRUST_SOFT_HYPHENS 0

class HyphMethod
{
protected:
lString16 _id;
int _left_hyphen_min;
int _right_hyphen_min;
public:
HyphMethod(lString16 id, int leftHyphenMin=HYPHMETHOD_DEFAULT_HYPHEN_MIN, int rightHyphenMin=HYPHMETHOD_DEFAULT_HYPHEN_MIN)
: _id(id)
, _left_hyphen_min(leftHyphenMin)
, _right_hyphen_min(rightHyphenMin)
{ }
lString16 getId() { return _id; }
virtual bool hyphenate( const lChar16 * str, int len, lUInt16 * widths, lUInt8 * flags, lUInt16 hyphCharWidth, lUInt16 maxWidth, size_t flagSize=1 ) = 0;
virtual ~HyphMethod() { }
virtual lUInt32 getCount() { return 0; }
virtual lUInt32 getSize() { return 0; }
};


enum HyphDictType
{
HDT_NONE, // disable hyphenation
Expand Down Expand Up @@ -70,7 +88,6 @@ class HyphDictionary
#define HYPH_DICT_ID_SOFTHYPHENS L"@softhyphens"
#define HYPH_DICT_ID_DICTIONARY L"@dictionary"


class HyphDictionaryList
{
LVPtrVector<HyphDictionary> _list;
Expand All @@ -86,6 +103,11 @@ class HyphDictionaryList
};

#define DEF_HYPHENATION_DICT "English_US.pattern"
// We'll be loading English_US.pattern even if non-english users
// may never use it, but it's a bit tedious not going with it.
// It might use around 1M of memory, but it will avoid re-rendering
// the document if the book does not contain any language tag, and
// we end up going with it anyway.

class HyphDictionary;
class HyphDictionaryList;
Expand All @@ -100,36 +122,41 @@ class HyphMan
friend class TexHyph;
friend class AlgoHyph;
friend class SoftHyphensHyph;
static HyphMethod * _method;
static HyphDictionary * _selectedDictionary;
static HyphDictionaryList * _dictList;
// Obsolete: now fetched from TextLangMan main lang TextLangCfg
// static HyphMethod * _method;
// static HyphDictionary * _selectedDictionary;
static HyphDictionaryList * _dictList; // available hyph dict files (+ none/algo/softhyphens)
static LVHashTable<lString16, HyphMethod*> _loaded_hyph_methods; // methods with loaded dictionaries
static int _LeftHyphenMin;
static int _RightHyphenMin;
static int _TrustSoftHyphens;
public:
static void uninit();
static bool activateDictionaryFromStream( LVStreamRef stream );
static bool initDictionaries(lString16 dir, bool clear = true);
static HyphDictionaryList * getDictList() { return _dictList; }
static bool activateDictionary( lString16 id ) { return _dictList->activate(id); }
static bool initDictionaries(lString16 dir, bool clear = true);
static HyphDictionary * getSelectedDictionary() { return _selectedDictionary; }
static bool activateDictionaryFromStream( LVStreamRef stream ); // used by CoolReader on Android
static HyphDictionary * getSelectedDictionary(); // was: { return _selectedDictionary; }
static int getLeftHyphenMin() { return _LeftHyphenMin; }
static int getRightHyphenMin() { return _RightHyphenMin; }
static bool setLeftHyphenMin( int left_hyphen_min );
static bool setRightHyphenMin( int right_hyphen_min );
static int getTrustSoftHyphens() { return _TrustSoftHyphens; }
static bool setTrustSoftHyphens( int trust_soft_hyphen );
static bool isEnabled();
static HyphMethod * getHyphMethodForDictionary( lString16 id, int leftHyphenMin=HYPHMETHOD_DEFAULT_HYPHEN_MIN,
int rightHyphenMin=HYPHMETHOD_DEFAULT_HYPHEN_MIN );

HyphMan();
~HyphMan();

static bool hyphenate( const lChar16 * str, int len, lUInt16 * widths, lUInt8 * flags, lUInt16 hyphCharWidth, lUInt16 maxWidth, size_t flagSize=1 );
/* Obsolete:
inline static bool hyphenate( const lChar16 * str, int len, lUInt16 * widths, lUInt8 * flags, lUInt16 hyphCharWidth, lUInt16 maxWidth, size_t flagSize=1 )
{
return _method->hyphenate( str, len, widths, flags, hyphCharWidth, maxWidth, flagSize );
}
*/
};



#endif
17 changes: 13 additions & 4 deletions crengine/include/lvdocviewprops.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,22 @@
#define PROP_SHOW_BATTERY_PERCENT "window.status.battery.percent"
//#define PROP_FONT_KERNING_ENABLED "font.kerning.enabled"
#define PROP_LANDSCAPE_PAGES "window.landscape.pages"
#define PROP_HYPHENATION_LEFT_HYPHEN_MIN "crengine.hyphenation.left.hyphen.min"
#define PROP_HYPHENATION_RIGHT_HYPHEN_MIN "crengine.hyphenation.right.hyphen.min"
#define PROP_HYPHENATION_TRUST_SOFT_HYPHENS "crengine.hyphenation.trust.soft.hyphens"
#define PROP_AUTOSAVE_BOOKMARKS "crengine.autosave.bookmarks"

// Obsolete hyph settings:
#define PROP_HYPHENATION_DICT "crengine.hyphenation.directory"
#define PROP_HYPHENATION_DICT_VALUE_NONE "@none"
#define PROP_HYPHENATION_DICT_VALUE_ALGORITHM "@algorithm"
#define PROP_AUTOSAVE_BOOKMARKS "crengine.autosave.bookmarks"
// Still used hyph settings:
#define PROP_HYPHENATION_LEFT_HYPHEN_MIN "crengine.hyphenation.left.hyphen.min"
#define PROP_HYPHENATION_RIGHT_HYPHEN_MIN "crengine.hyphenation.right.hyphen.min"
#define PROP_HYPHENATION_TRUST_SOFT_HYPHENS "crengine.hyphenation.trust.soft.hyphens"
// New textlang typography settings:
#define PROP_TEXTLANG_MAIN_LANG "crengine.textlang.main.lang"
#define PROP_TEXTLANG_EMBEDDED_LANGS_ENABLED "crengine.textlang.embedded.langs.enabled"
#define PROP_TEXTLANG_HYPHENATION_ENABLED "crengine.textlang.hyphenation.enabled"
#define PROP_TEXTLANG_HYPH_SOFT_HYPHENS_ONLY "crengine.textlang.hyphenation.soft.hyphens.only"
#define PROP_TEXTLANG_HYPH_FORCE_ALGORITHMIC "crengine.textlang.hyphenation.force.algorithmic"

#define PROP_FLOATING_PUNCTUATION "crengine.style.floating.punctuation.enabled"

Expand Down
16 changes: 12 additions & 4 deletions crengine/include/lvfntman.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "lvptrvec.h"
#include "hyphman.h"
#include "lvdrawbuf.h"
#include "textlang.h"

#if !defined(__SYMBIAN32__) && defined(_WIN32)
extern "C" {
Expand Down Expand Up @@ -340,6 +341,7 @@ class LVFont : public LVRefCounter
lUInt8 * flags,
int max_width,
lChar16 def_char,
TextLangCfg * lang_cfg=NULL,
int letter_spacing=0,
bool allow_hyphenation=true,
lUInt32 hints=0
Expand All @@ -350,7 +352,7 @@ class LVFont : public LVRefCounter
\param len is number of characters to measure
\return width of specified string
*/
virtual lUInt32 getTextWidth( const lChar16 * text, int len ) = 0;
virtual lUInt32 getTextWidth( const lChar16 * text, int len, TextLangCfg * lang_cfg=NULL ) = 0;

// /** \brief get glyph image in 1 byte per pixel format
// \param code is unicode character
Expand Down Expand Up @@ -391,6 +393,7 @@ class LVFont : public LVRefCounter
virtual int DrawTextString( LVDrawBuf * buf, int x, int y,
const lChar16 * text, int len,
lChar16 def_char, lUInt32 * palette = NULL, bool addHyphen = false,
TextLangCfg * lang_cfg=NULL,
lUInt32 flags=0, int letter_spacing=0, int width=-1,
int text_decoration_back_gap=0 ) = 0;
/// constructor
Expand Down Expand Up @@ -576,6 +579,7 @@ class LVBaseFont : public LVFont
virtual int DrawTextString( LVDrawBuf * buf, int x, int y,
const lChar16 * text, int len,
lChar16 def_char, lUInt32 * palette, bool addHyphen,
TextLangCfg * lang_cfg=NULL,
lUInt32 flags=0, int letter_spacing=0, int width=-1,
int text_decoration_back_gap=0 );
};
Expand All @@ -595,6 +599,7 @@ class LBitmapFont : public LVBaseFont
lUInt8 * flags,
int max_width,
lChar16 def_char,
TextLangCfg * lang_cfg=NULL,
int letter_spacing=0,
bool allow_hyphenation=true,
lUInt32 hints=0
Expand All @@ -605,7 +610,7 @@ class LBitmapFont : public LVBaseFont
\return width of specified string
*/
virtual lUInt32 getTextWidth(
const lChar16 * text, int len
const lChar16 * text, int len, TextLangCfg * lang_cfg=NULL
);
virtual LVFontGlyphCacheItem * getGlyph(lUInt32 ch, lChar16 def_char=0);
/// returns font baseline offset
Expand Down Expand Up @@ -757,6 +762,7 @@ class LVWin32DrawFont : public LVBaseWin32Font
lUInt8 * flags,
int max_width,
lChar16 def_char,
TextLangCfg * lang_cfg=NULL,
int letter_spacing=0,
bool allow_hyphenation=true,
lUInt32 hints=0
Expand All @@ -767,7 +773,7 @@ class LVWin32DrawFont : public LVBaseWin32Font
\return width of specified string
*/
virtual lUInt32 getTextWidth(
const lChar16 * text, int len
const lChar16 * text, int len, TextLangCfg * lang_cfg=NULL
);

/// returns char width
Expand All @@ -777,6 +783,7 @@ class LVWin32DrawFont : public LVBaseWin32Font
virtual int DrawTextString( LVDrawBuf * buf, int x, int y,
const lChar16 * text, int len,
lChar16 def_char, lUInt32 * palette, bool addHyphen,
TextLangCfg * lang_cfg=NULL,
lUInt32 flags=0, int letter_spacing=0, int width=-1,
int text_decoration_back_gap=0 );

Expand Down Expand Up @@ -935,6 +942,7 @@ class LVWin32Font : public LVBaseWin32Font
lUInt8 * flags,
int max_width,
lChar16 def_char,
TextLangCfg * lang_cfg=NULL,
int letter_spacing=0,
bool allow_hyphenation=true,
lUInt32 hints=0
Expand All @@ -945,7 +953,7 @@ class LVWin32Font : public LVBaseWin32Font
\return width of specified string
*/
virtual lUInt32 getTextWidth(
const lChar16 * text, int len
const lChar16 * text, int len, TextLangCfg * lang_cfg=NULL
);

/** \brief get glyph image in 1 byte per pixel format
Expand Down
6 changes: 4 additions & 2 deletions crengine/include/lvrend.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#define __LV_REND_H_INCLUDED__

#include "lvtinydom.h"
#include "textlang.h"

// Current direction, from dir="ltr" or dir="rtl" element attribute
// Should map directly to the RENDER_RECT_FLAG_DIRECTION_* below
Expand Down Expand Up @@ -120,7 +121,7 @@ int initRendMethod( ldomNode * node, bool recurseChildren, bool allowAutoboxing
int styleToTextFmtFlags( const css_style_ref_t & style, int oldflags, int direction=REND_DIRECTION_UNSET );
/// renders block as single text formatter object
void renderFinalBlock( ldomNode * node, LFormattedText * txform, RenderRectAccessor * fmt, int & flags,
int indent, int line_h, int valign_dy=0, bool * is_link_start=NULL );
int indent, int line_h, TextLangCfg * lang_cfg=NULL, int valign_dy=0, bool * is_link_start=NULL );
/// renders block which contains subblocks (with gRenderBlockRenderingFlags as flags)
int renderBlockElement( LVRendPageContext & context, ldomNode * enode, int x, int y, int width, int direction=REND_DIRECTION_UNSET, int * baseline=NULL );
/// renders block which contains subblocks
Expand All @@ -144,7 +145,8 @@ void DrawDocument( LVDrawBuf & drawbuf, ldomNode * node, int x0, int y0, int dx,
// minWidth: width with a wrap on all spaces (no hyphenation), so width taken by the longest word
// full function for recursive use:
void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direction, bool ignorePadding, int rendFlags,
int &curMaxWidth, int &curWordWidth, bool &collapseNextSpace, int &lastSpaceWidth, int indent, bool isStartNode=false);
int &curMaxWidth, int &curWordWidth, bool &collapseNextSpace, int &lastSpaceWidth,
int indent, TextLangCfg * lang_cfg, bool isStartNode=false);
// simpler function for first call:
void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direction=REND_DIRECTION_UNSET, bool ignorePadding=false, int rendFlags=0);

Expand Down
27 changes: 15 additions & 12 deletions crengine/include/lvstyles.h
Original file line number Diff line number Diff line change
Expand Up @@ -315,8 +315,8 @@ class lvdomElementFormatRec {
int _top_overflow; // Overflow (positive value) below _y
int _bottom_overflow; // Overflow (positive value) after _y+_height

int _listprop_node_idx; // dataIndex of the UL/OL node this erm_final block
// should get its marker from
int _lang_node_idx; // dataIndex of the upper node this erm_final block
// should get its lang= langage from

// Flags & extras, to have additional info related to this rect cached.
// - For erm_final nodes, these contain the footprint of outer floats
Expand All @@ -332,18 +332,21 @@ class lvdomElementFormatRec {
int _extra4;
int _extra5;

// Added for padding from 14 to 16 32-bits ints
int _listprop_node_idx; // dataIndex of the UL/OL node this erm_final block
// should get its marker from

// Added for padding from 15 to 16 32-bits ints
int _available1;
int _available2;

public:
lvdomElementFormatRec()
: _x(0), _width(0), _y(0), _height(0)
, _inner_width(0), _inner_x(0), _inner_y(0), _baseline(0)
, _top_overflow(0), _bottom_overflow(0), _listprop_node_idx(0)
, _top_overflow(0), _bottom_overflow(0)
, _lang_node_idx(0) , _listprop_node_idx(0)
, _flags(0), _extra0(0)
, _extra1(0), _extra2(0), _extra3(0), _extra4(0), _extra5(0)
, _available1(0), _available2(0)
, _available1(0)
{
}
~lvdomElementFormatRec()
Expand All @@ -354,22 +357,22 @@ class lvdomElementFormatRec {
_x = _width = _y = _height = 0;
_inner_width = _inner_x = _inner_y = _baseline = 0;
_top_overflow = _bottom_overflow = 0;
_listprop_node_idx = 0;
_lang_node_idx = _listprop_node_idx = 0;
_flags = _extra0 = 0;
_extra1 = _extra2 = _extra3 = _extra4 = _extra5 = 0;
_available1 = 0; _available2 = 0;
_available1 = 0;
}
bool operator == ( lvdomElementFormatRec & v )
{
return (_height==v._height && _y==v._y && _width==v._width && _x==v._x &&
_inner_width==v._inner_width && _inner_x==v._inner_x &&
_inner_y==v._inner_y && _baseline==v._baseline &&
_top_overflow==v._top_overflow && _bottom_overflow==v._bottom_overflow &&
_listprop_node_idx==v._listprop_node_idx &&
_lang_node_idx==v._lang_node_idx && _listprop_node_idx==v._listprop_node_idx &&
_flags==v._flags && _extra0==v._extra0 &&
_extra1==v._extra1 && _extra2==v._extra2 && _extra3==v._extra3 &&
_extra4==v._extra4 && _extra5==v._extra5 &&
_available1==v._available1 && _available2==v._available2
_available1==v._available1
);
}
bool operator != ( lvdomElementFormatRec & v )
Expand All @@ -378,11 +381,11 @@ class lvdomElementFormatRec {
_inner_width!=v._inner_width || _inner_x!=v._inner_x ||
_inner_y!=v._inner_y || _baseline!=v._baseline ||
_top_overflow!=v._top_overflow || _bottom_overflow!=v._bottom_overflow ||
_listprop_node_idx!=v._listprop_node_idx ||
_lang_node_idx!=v._lang_node_idx || _listprop_node_idx!=v._listprop_node_idx ||
_flags!=v._flags || _extra0!=v._extra0 ||
_extra1!=v._extra1 || _extra2!=v._extra2 || _extra3!=v._extra3 ||
_extra4!=v._extra4 || _extra5!=v._extra5 ||
_available1!=v._available1 || _available2!=v._available2
_available1!=v._available1
);
}
// Get/Set
Expand Down
Loading

0 comments on commit d89ae37

Please sign in to comment.