shaper.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616
  1. // SPDX-License-Identifier: Unlicense OR MIT
  2. package text
  3. import (
  4. "bufio"
  5. "io"
  6. "strings"
  7. "unicode/utf8"
  8. giofont "gioui.org/font"
  9. "gioui.org/io/system"
  10. "gioui.org/op"
  11. "gioui.org/op/clip"
  12. "github.com/go-text/typesetting/font"
  13. "golang.org/x/image/math/fixed"
  14. )
  15. // WrapPolicy configures strategies for choosing where to break lines of text for line
  16. // wrapping.
  17. type WrapPolicy uint8
  18. const (
  19. // WrapHeuristically tries to minimize breaking within words (UAX#14 text segments)
  20. // while also ensuring that text fits within the given MaxWidth. It will only break
  21. // a line within a word (on a UAX#29 grapheme cluster boundary) when that word cannot
  22. // fit on a line by itself. Additionally, when the final word of a line is being
  23. // truncated, this policy will preserve as many symbols of that word as
  24. // possible before the truncator.
  25. WrapHeuristically WrapPolicy = iota
  26. // WrapWords does not permit words (UAX#14 text segments) to be broken across lines.
  27. // This means that sometimes long words will exceed the MaxWidth they are wrapped with.
  28. WrapWords
  29. // WrapGraphemes will maximize the amount of text on each line at the expense of readability,
  30. // breaking any word across lines on UAX#29 grapheme cluster boundaries to maximize the number of
  31. // grapheme clusters on each line.
  32. WrapGraphemes
  33. )
  34. // Parameters are static text shaping attributes applied to the entire shaped text.
  35. type Parameters struct {
  36. // Font describes the preferred typeface.
  37. Font giofont.Font
  38. // Alignment characterizes the positioning of text within the line. It does not directly
  39. // impact shaping, but is provided in order to allow efficient offset computation.
  40. Alignment Alignment
  41. // PxPerEm is the pixels-per-em to shape the text with.
  42. PxPerEm fixed.Int26_6
  43. // MaxLines limits the quantity of shaped lines. Zero means no limit.
  44. MaxLines int
  45. // Truncator is a string of text to insert where the shaped text was truncated, which
  46. // can currently ohly happen if MaxLines is nonzero and the text on the final line is
  47. // truncated.
  48. Truncator string
  49. // WrapPolicy configures how line breaks will be chosen when wrapping text across lines.
  50. WrapPolicy WrapPolicy
  51. // MinWidth and MaxWidth provide the minimum and maximum horizontal space constraints
  52. // for the shaped text.
  53. MinWidth, MaxWidth int
  54. // Locale provides primary direction and language information for the shaped text.
  55. Locale system.Locale
  56. // LineHeightScale is a scaling factor applied to the LineHeight of a paragraph. If zero, a default
  57. // value of 1.2 will be used.
  58. LineHeightScale float32
  59. // LineHeight is the distance between the baselines of two lines of text. If zero, the PxPerEm
  60. // of the any given paragraph will set the LineHeight of that paragraph. This value will be
  61. // scaled by LineHeightScale, so applications desiring a specific fixed value
  62. // should set LineHeightScale to 1.
  63. LineHeight fixed.Int26_6
  64. // forceTruncate controls whether the truncator string is inserted on the final line of
  65. // text with a MaxLines. It is unexported because this behavior only makes sense for the
  66. // shaper to control when it iterates paragraphs of text.
  67. forceTruncate bool
  68. // DisableSpaceTrim prevents the width of the final whitespace glyph on a line from being zeroed.
  69. // This is desirable for text editors (so that the whitespace can be selected), but is undesirable
  70. // for ordinary display text.
  71. DisableSpaceTrim bool
  72. }
  73. type FontFace = giofont.FontFace
  74. // Glyph describes a shaped font glyph. Many fields are distances relative
  75. // to the "dot", which is a point on the baseline (the line upon which glyphs
  76. // visually rest) for the line of text containing the glyph.
  77. //
  78. // Glyphs are organized into "glyph clusters," which are sequences that
  79. // may represent an arbitrary number of runes.
  80. //
  81. // Sequences of glyph clusters that share style parameters are grouped into "runs."
  82. //
  83. // "Document coordinates" are pixel values relative to the text's origin at (0,0)
  84. // in the upper-left corner" Displaying each shaped glyph at the document
  85. // coordinates of its dot will correctly visualize the text.
  86. type Glyph struct {
  87. // ID is a unique, per-shaper identifier for the shape of the glyph.
  88. // Glyphs from the same shaper will share an ID when they are from
  89. // the same face and represent the same glyph at the same size.
  90. ID GlyphID
  91. // X is the x coordinate of the dot for this glyph in document coordinates.
  92. X fixed.Int26_6
  93. // Y is the y coordinate of the dot for this glyph in document coordinates.
  94. Y int32
  95. // Advance is the logical width of the glyph. The glyph may be visually
  96. // wider than this.
  97. Advance fixed.Int26_6
  98. // Ascent is the distance from the dot to the logical top of glyphs in
  99. // this glyph's face. The specific glyph may be shorter than this.
  100. Ascent fixed.Int26_6
  101. // Descent is the distance from the dot to the logical bottom of glyphs
  102. // in this glyph's face. The specific glyph may descend less than this.
  103. Descent fixed.Int26_6
  104. // Offset encodes the origin of the drawing coordinate space for this glyph
  105. // relative to the dot. This value is used when converting glyphs to paths.
  106. Offset fixed.Point26_6
  107. // Bounds encodes the visual dimensions of the glyph relative to the dot.
  108. Bounds fixed.Rectangle26_6
  109. // Runes is the number of runes represented by the glyph cluster this glyph
  110. // belongs to. If Flags does not contain FlagClusterBreak, this value will
  111. // always be zero. The final glyph in the cluster contains the runes count
  112. // for the entire cluster.
  113. Runes uint16
  114. // Flags encode special properties of this glyph.
  115. Flags Flags
  116. }
  117. type Flags uint16
  118. const (
  119. // FlagTowardOrigin is set for glyphs in runs that flow
  120. // towards the origin (RTL).
  121. FlagTowardOrigin Flags = 1 << iota
  122. // FlagLineBreak is set for the last glyph in a line.
  123. FlagLineBreak
  124. // FlagRunBreak is set for the last glyph in a run. A run is a sequence of
  125. // glyphs sharing constant style properties (same size, same face, same
  126. // direction, etc...).
  127. FlagRunBreak
  128. // FlagClusterBreak is set for the last glyph in a glyph cluster. A glyph cluster is a
  129. // sequence of glyphs which are logically a single unit, but require multiple
  130. // symbols from a font to display.
  131. FlagClusterBreak
  132. // FlagParagraphBreak indicates that the glyph cluster does not represent actual
  133. // font glyphs, but was inserted by the shaper to represent line-breaking
  134. // whitespace characters. After a glyph with FlagParagraphBreak set, the shaper
  135. // will always return a glyph with FlagParagraphStart providing the X and Y
  136. // coordinates of the start of the next line, even if that line has no contents.
  137. FlagParagraphBreak
  138. // FlagParagraphStart indicates that the glyph starts a new paragraph.
  139. FlagParagraphStart
  140. // FlagTruncator indicates that the glyph is part of a special truncator run that
  141. // represents the portion of text removed due to truncation. A glyph with both
  142. // FlagTruncator and FlagClusterBreak will have a Runes field accounting for all
  143. // runes truncated.
  144. FlagTruncator
  145. )
  146. func (f Flags) String() string {
  147. var b strings.Builder
  148. if f&FlagParagraphStart != 0 {
  149. b.WriteString("S")
  150. } else {
  151. b.WriteString("_")
  152. }
  153. if f&FlagParagraphBreak != 0 {
  154. b.WriteString("P")
  155. } else {
  156. b.WriteString("_")
  157. }
  158. if f&FlagTowardOrigin != 0 {
  159. b.WriteString("T")
  160. } else {
  161. b.WriteString("_")
  162. }
  163. if f&FlagLineBreak != 0 {
  164. b.WriteString("L")
  165. } else {
  166. b.WriteString("_")
  167. }
  168. if f&FlagRunBreak != 0 {
  169. b.WriteString("R")
  170. } else {
  171. b.WriteString("_")
  172. }
  173. if f&FlagClusterBreak != 0 {
  174. b.WriteString("C")
  175. } else {
  176. b.WriteString("_")
  177. }
  178. if f&FlagTruncator != 0 {
  179. b.WriteString("…")
  180. } else {
  181. b.WriteString("_")
  182. }
  183. return b.String()
  184. }
  185. type GlyphID uint64
  186. // Shaper converts strings of text into glyphs that can be displayed. The same
  187. // Shaper should not be used in different goroutines.
  188. //
  189. // The Shaper controls text layout and has a cache, implemented as a map, and
  190. // so laying out text in two different goroutines can easily result in
  191. // concurrent access to said map, resulting in a panic.
  192. //
  193. // Practically speaking, this means you should use different Shapers for
  194. // different top-level windows.
  195. type Shaper struct {
  196. config struct {
  197. disableSystemFonts bool
  198. collection []FontFace
  199. }
  200. initialized bool
  201. shaper shaperImpl
  202. pathCache pathCache
  203. bitmapShapeCache bitmapShapeCache
  204. layoutCache layoutCache
  205. reader *bufio.Reader
  206. paragraph []byte
  207. // Iterator state.
  208. brokeParagraph bool
  209. pararagraphStart Glyph
  210. txt document
  211. line int
  212. run int
  213. glyph int
  214. // advance is the width of glyphs from the current run that have already been displayed.
  215. advance fixed.Int26_6
  216. // done tracks whether iteration is over.
  217. done bool
  218. err error
  219. }
  220. // ShaperOptions configure text shapers.
  221. type ShaperOption func(*Shaper)
  222. // NoSystemFonts can be used to disable system font loading.
  223. func NoSystemFonts() ShaperOption {
  224. return func(s *Shaper) {
  225. s.config.disableSystemFonts = true
  226. }
  227. }
  228. // WithCollection can be used to provide a collection of pre-loaded fonts to the shaper.
  229. func WithCollection(collection []FontFace) ShaperOption {
  230. return func(s *Shaper) {
  231. s.config.collection = collection
  232. }
  233. }
  234. // NewShaper constructs a shaper with the provided options.
  235. //
  236. // NewShaper must be called after [app.NewWindow], unless the [NoSystemFonts]
  237. // option is specified. This is an unfortunate restriction caused by some platforms
  238. // such as Android.
  239. func NewShaper(options ...ShaperOption) *Shaper {
  240. l := &Shaper{}
  241. for _, opt := range options {
  242. opt(l)
  243. }
  244. l.init()
  245. return l
  246. }
  247. func (l *Shaper) init() {
  248. if l.initialized {
  249. return
  250. }
  251. l.initialized = true
  252. l.reader = bufio.NewReader(nil)
  253. l.shaper = *newShaperImpl(!l.config.disableSystemFonts, l.config.collection)
  254. }
  255. // Layout text from an io.Reader according to a set of options. Results can be retrieved by
  256. // iteratively calling NextGlyph.
  257. func (l *Shaper) Layout(params Parameters, txt io.Reader) {
  258. l.init()
  259. l.layoutText(params, txt, "")
  260. }
  261. // LayoutString is Layout for strings.
  262. func (l *Shaper) LayoutString(params Parameters, str string) {
  263. l.init()
  264. l.layoutText(params, nil, str)
  265. }
  266. func (l *Shaper) reset(align Alignment) {
  267. l.line, l.run, l.glyph, l.advance = 0, 0, 0, 0
  268. l.done = false
  269. l.txt.reset()
  270. l.txt.alignment = align
  271. }
  272. // layoutText lays out a large text document by breaking it into paragraphs and laying
  273. // out each of them separately. This allows the shaping results to be cached independently
  274. // by paragraph. Only one of txt and str should be provided.
  275. func (l *Shaper) layoutText(params Parameters, txt io.Reader, str string) {
  276. l.reset(params.Alignment)
  277. if txt == nil && len(str) == 0 {
  278. l.txt.append(l.layoutParagraph(params, "", nil))
  279. return
  280. }
  281. l.reader.Reset(txt)
  282. truncating := params.MaxLines > 0
  283. var done bool
  284. var endByte int
  285. for !done {
  286. l.paragraph = l.paragraph[:0]
  287. if txt != nil {
  288. for {
  289. b, err := l.reader.ReadByte()
  290. if err != nil {
  291. // EOF or any other error ends processing here.
  292. done = true
  293. break
  294. }
  295. l.paragraph = append(l.paragraph, b)
  296. if b == '\n' {
  297. break
  298. }
  299. }
  300. if !done {
  301. _, re := l.reader.ReadByte()
  302. done = re != nil
  303. if !done {
  304. _ = l.reader.UnreadByte()
  305. }
  306. }
  307. } else {
  308. idx := strings.IndexByte(str, '\n')
  309. if idx == -1 {
  310. done = true
  311. endByte = len(str)
  312. } else {
  313. endByte = idx + 1
  314. done = endByte == len(str)
  315. }
  316. }
  317. if len(str[:endByte]) > 0 || (len(l.paragraph) > 0 || len(l.txt.lines) == 0) {
  318. params.forceTruncate = truncating && !done
  319. lines := l.layoutParagraph(params, str[:endByte], l.paragraph)
  320. if truncating {
  321. params.MaxLines -= len(lines.lines)
  322. if params.MaxLines == 0 {
  323. done = true
  324. // We've truncated the text, but we need to account for all of the runes we never
  325. // decoded in the truncator.
  326. var unreadRunes int
  327. if txt == nil {
  328. unreadRunes = utf8.RuneCountInString(str[endByte:])
  329. } else {
  330. for {
  331. _, _, e := l.reader.ReadRune()
  332. if e != nil {
  333. break
  334. }
  335. unreadRunes++
  336. }
  337. }
  338. l.txt.unreadRuneCount = unreadRunes
  339. }
  340. }
  341. l.txt.append(lines)
  342. }
  343. if done {
  344. return
  345. }
  346. str = str[endByte:]
  347. }
  348. }
  349. // layoutParagraph shapes and wraps a paragraph using the provided parameters.
  350. // It accepts the paragraph data in either string or rune format, preferring the
  351. // string in order to hit the shaper cache more quickly.
  352. func (l *Shaper) layoutParagraph(params Parameters, asStr string, asBytes []byte) document {
  353. if l == nil {
  354. return document{}
  355. }
  356. if len(asStr) == 0 && len(asBytes) > 0 {
  357. asStr = string(asBytes)
  358. }
  359. // Alignment is not part of the cache key because changing it does not impact shaping.
  360. lk := layoutKey{
  361. ppem: params.PxPerEm,
  362. maxWidth: params.MaxWidth,
  363. minWidth: params.MinWidth,
  364. maxLines: params.MaxLines,
  365. truncator: params.Truncator,
  366. locale: params.Locale,
  367. font: params.Font,
  368. forceTruncate: params.forceTruncate,
  369. wrapPolicy: params.WrapPolicy,
  370. str: asStr,
  371. lineHeight: params.LineHeight,
  372. lineHeightScale: params.LineHeightScale,
  373. }
  374. if l, ok := l.layoutCache.Get(lk); ok {
  375. return l
  376. }
  377. lines := l.shaper.LayoutRunes(params, []rune(asStr))
  378. l.layoutCache.Put(lk, lines)
  379. return lines
  380. }
  381. // NextGlyph returns the next glyph from the most recent shaping operation, if
  382. // any. If there are no more glyphs, ok will be false.
  383. func (l *Shaper) NextGlyph() (_ Glyph, ok bool) {
  384. l.init()
  385. if l.done {
  386. return Glyph{}, false
  387. }
  388. for {
  389. if l.line == len(l.txt.lines) {
  390. if l.brokeParagraph {
  391. l.brokeParagraph = false
  392. return l.pararagraphStart, true
  393. }
  394. if l.err == nil {
  395. l.err = io.EOF
  396. }
  397. return Glyph{}, false
  398. }
  399. line := l.txt.lines[l.line]
  400. if l.run == len(line.runs) {
  401. l.line++
  402. l.run = 0
  403. continue
  404. }
  405. run := line.runs[l.run]
  406. align := l.txt.alignment.Align(line.direction, line.width, l.txt.alignWidth)
  407. if l.line == 0 && l.run == 0 && len(run.Glyphs) == 0 {
  408. // The very first run is empty, which will only happen when the
  409. // entire text is a shaped empty string. Return a single synthetic
  410. // glyph to provide ascent/descent information to the caller.
  411. l.done = true
  412. return Glyph{
  413. X: align,
  414. Y: int32(line.yOffset),
  415. Runes: 0,
  416. Flags: FlagLineBreak | FlagClusterBreak | FlagRunBreak,
  417. Ascent: line.ascent,
  418. Descent: line.descent,
  419. }, true
  420. }
  421. if l.glyph == len(run.Glyphs) {
  422. l.run++
  423. l.glyph = 0
  424. l.advance = 0
  425. continue
  426. }
  427. glyphIdx := l.glyph
  428. rtl := run.Direction.Progression() == system.TowardOrigin
  429. if rtl {
  430. // If RTL, traverse glyphs backwards to ensure rune order.
  431. glyphIdx = len(run.Glyphs) - 1 - glyphIdx
  432. }
  433. g := run.Glyphs[glyphIdx]
  434. if rtl {
  435. // Modify the advance prior to computing runOffset to ensure that the
  436. // current glyph's width is subtracted in RTL.
  437. l.advance += g.xAdvance
  438. }
  439. // runOffset computes how far into the run the dot should be positioned.
  440. runOffset := l.advance
  441. if rtl {
  442. runOffset = run.Advance - l.advance
  443. }
  444. glyph := Glyph{
  445. ID: g.id,
  446. X: align + run.X + runOffset,
  447. Y: int32(line.yOffset),
  448. Ascent: line.ascent,
  449. Descent: line.descent,
  450. Advance: g.xAdvance,
  451. Runes: uint16(g.runeCount),
  452. Offset: fixed.Point26_6{
  453. X: g.xOffset,
  454. Y: g.yOffset,
  455. },
  456. Bounds: g.bounds,
  457. }
  458. if run.truncator {
  459. glyph.Flags |= FlagTruncator
  460. }
  461. l.glyph++
  462. if !rtl {
  463. l.advance += g.xAdvance
  464. }
  465. endOfRun := l.glyph == len(run.Glyphs)
  466. if endOfRun {
  467. glyph.Flags |= FlagRunBreak
  468. }
  469. endOfLine := endOfRun && l.run == len(line.runs)-1
  470. if endOfLine {
  471. glyph.Flags |= FlagLineBreak
  472. }
  473. endOfText := endOfLine && l.line == len(l.txt.lines)-1
  474. nextGlyph := l.glyph
  475. if rtl {
  476. nextGlyph = len(run.Glyphs) - 1 - nextGlyph
  477. }
  478. endOfCluster := endOfRun || run.Glyphs[nextGlyph].clusterIndex != g.clusterIndex
  479. if run.truncator {
  480. // Only emit a single cluster for the entire truncator sequence.
  481. endOfCluster = endOfRun
  482. }
  483. if endOfCluster {
  484. glyph.Flags |= FlagClusterBreak
  485. if run.truncator {
  486. glyph.Runes += uint16(l.txt.unreadRuneCount)
  487. }
  488. } else {
  489. glyph.Runes = 0
  490. }
  491. if run.Direction.Progression() == system.TowardOrigin {
  492. glyph.Flags |= FlagTowardOrigin
  493. }
  494. if l.brokeParagraph {
  495. glyph.Flags |= FlagParagraphStart
  496. l.brokeParagraph = false
  497. }
  498. if g.glyphCount == 0 {
  499. glyph.Flags |= FlagParagraphBreak
  500. l.brokeParagraph = true
  501. if endOfText {
  502. l.pararagraphStart = Glyph{
  503. Ascent: glyph.Ascent,
  504. Descent: glyph.Descent,
  505. Flags: FlagParagraphStart | FlagLineBreak | FlagRunBreak | FlagClusterBreak,
  506. }
  507. // If a glyph is both a paragraph break and the final glyph, it's a newline
  508. // at the end of the text. We must inform widgets like the text editor
  509. // of a valid cursor position they can use for "after" such a newline,
  510. // taking text alignment into account.
  511. l.pararagraphStart.X = l.txt.alignment.Align(line.direction, 0, l.txt.alignWidth)
  512. l.pararagraphStart.Y = glyph.Y + int32((glyph.Ascent + glyph.Descent).Ceil())
  513. }
  514. }
  515. return glyph, true
  516. }
  517. }
  518. const (
  519. facebits = 16
  520. sizebits = 16
  521. gidbits = 64 - facebits - sizebits
  522. )
  523. // newGlyphID encodes a face and a glyph id into a GlyphID.
  524. func newGlyphID(ppem fixed.Int26_6, faceIdx int, gid font.GID) GlyphID {
  525. if gid&^((1<<gidbits)-1) != 0 {
  526. panic("glyph id out of bounds")
  527. }
  528. if faceIdx&^((1<<facebits)-1) != 0 {
  529. panic("face index out of bounds")
  530. }
  531. if ppem&^((1<<sizebits)-1) != 0 {
  532. panic("ppem out of bounds")
  533. }
  534. // Mask off the upper 16 bits of ppem. This still allows values up to
  535. // 1023.
  536. ppem &= ((1 << sizebits) - 1)
  537. return GlyphID(faceIdx)<<(gidbits+sizebits) | GlyphID(ppem)<<(gidbits) | GlyphID(gid)
  538. }
  539. // splitGlyphID is the opposite of newGlyphID.
  540. func splitGlyphID(g GlyphID) (fixed.Int26_6, int, font.GID) {
  541. faceIdx := int(uint64(g) >> (gidbits + sizebits))
  542. ppem := fixed.Int26_6((g & ((1<<sizebits - 1) << gidbits)) >> gidbits)
  543. gid := font.GID(g) & (1<<gidbits - 1)
  544. return ppem, faceIdx, gid
  545. }
  546. // Shape converts the provided glyphs into a path. The path will enclose the forms
  547. // of all vector glyphs.
  548. // All glyphs are expected to be from a single line of text (their Y offsets are ignored).
  549. func (l *Shaper) Shape(gs []Glyph) clip.PathSpec {
  550. l.init()
  551. key := l.pathCache.hashGlyphs(gs)
  552. shape, ok := l.pathCache.Get(key, gs)
  553. if ok {
  554. return shape
  555. }
  556. pathOps := new(op.Ops)
  557. shape = l.shaper.Shape(pathOps, gs)
  558. l.pathCache.Put(key, gs, shape)
  559. return shape
  560. }
  561. // Bitmaps extracts bitmap glyphs from the provided slice and creates an op.CallOp to present
  562. // them. The returned op.CallOp will align correctly with the return value of Shape() for the
  563. // same gs slice.
  564. // All glyphs are expected to be from a single line of text (their Y offsets are ignored).
  565. func (l *Shaper) Bitmaps(gs []Glyph) op.CallOp {
  566. l.init()
  567. key := l.bitmapShapeCache.hashGlyphs(gs)
  568. call, ok := l.bitmapShapeCache.Get(key, gs)
  569. if ok {
  570. return call
  571. }
  572. callOps := new(op.Ops)
  573. call = l.shaper.Bitmaps(callOps, gs)
  574. l.bitmapShapeCache.Put(key, gs, call)
  575. return call
  576. }