family_parser.go 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. package text
  2. import (
  3. "fmt"
  4. "strings"
  5. "unicode"
  6. "unicode/utf8"
  7. )
  8. type tokenKind uint8
  9. const (
  10. tokenStr tokenKind = iota
  11. tokenComma
  12. tokenEOF
  13. )
  14. type token struct {
  15. kind tokenKind
  16. value string
  17. }
  18. func (t token) String() string {
  19. switch t.kind {
  20. case tokenStr:
  21. return t.value
  22. case tokenComma:
  23. return ","
  24. case tokenEOF:
  25. return "EOF"
  26. default:
  27. return "unknown"
  28. }
  29. }
  30. type lexState func(*lexer) lexState
  31. func lexText(l *lexer) lexState {
  32. for {
  33. switch r := l.next(); {
  34. case r == -1:
  35. l.ignore()
  36. l.emit(tokenEOF)
  37. return nil
  38. case unicode.IsSpace(r):
  39. continue
  40. case r == ',':
  41. l.ignore()
  42. l.emit(tokenComma)
  43. case r == '"':
  44. l.ignore()
  45. return lexDquote
  46. case r == '\'':
  47. l.ignore()
  48. return lexSquote
  49. default:
  50. return lexBareStr
  51. }
  52. }
  53. }
  54. func lexBareStr(l *lexer) lexState {
  55. defer l.emitProcessed(tokenStr, func(s string) (string, error) {
  56. return strings.TrimSpace(s), nil
  57. })
  58. for {
  59. if strings.HasPrefix(l.input[l.pos:], `,`) {
  60. return lexText
  61. }
  62. switch r := l.next(); {
  63. case r == -1:
  64. return lexText
  65. }
  66. }
  67. }
  68. func lexDquote(l *lexer) lexState {
  69. return lexQuote(l, `"`)
  70. }
  71. func lexSquote(l *lexer) lexState {
  72. return lexQuote(l, `'`)
  73. }
  74. func unescape(s string, quote rune) (string, error) {
  75. var b strings.Builder
  76. hitNonSpace := false
  77. var wb strings.Builder
  78. for i := 0; i < len(s); {
  79. r, sz := utf8.DecodeRuneInString(s[i:])
  80. i += sz
  81. if unicode.IsSpace(r) {
  82. if !hitNonSpace {
  83. continue
  84. }
  85. wb.WriteRune(r)
  86. continue
  87. }
  88. hitNonSpace = true
  89. // If we get here, we're not looking at whitespace.
  90. // Insert any buffered up whitespace characters from
  91. // the gap between words.
  92. b.WriteString(wb.String())
  93. wb.Reset()
  94. if r == '\\' {
  95. r, sz := utf8.DecodeRuneInString(s[i:])
  96. i += sz
  97. switch r {
  98. case '\\', quote:
  99. b.WriteRune(r)
  100. default:
  101. return "", fmt.Errorf("illegal escape sequence \\%c", r)
  102. }
  103. } else {
  104. b.WriteRune(r)
  105. }
  106. }
  107. return b.String(), nil
  108. }
  109. func lexQuote(l *lexer, mark string) lexState {
  110. escaping := false
  111. for {
  112. if isQuote := strings.HasPrefix(l.input[l.pos:], mark); isQuote && !escaping {
  113. err := l.emitProcessed(tokenStr, func(s string) (string, error) {
  114. return unescape(s, []rune(mark)[0])
  115. })
  116. if err != nil {
  117. l.err = err
  118. return nil
  119. }
  120. l.next()
  121. l.ignore()
  122. return lexText
  123. }
  124. escaped := escaping
  125. switch r := l.next(); {
  126. case r == -1:
  127. l.err = fmt.Errorf("unexpected EOF while parsing %s-quoted family", mark)
  128. return lexText
  129. case r == '\\':
  130. if !escaped {
  131. escaping = true
  132. }
  133. }
  134. if escaped {
  135. escaping = false
  136. }
  137. }
  138. }
  139. type lexer struct {
  140. input string
  141. pos int
  142. tokens []token
  143. err error
  144. }
  145. func (l *lexer) ignore() {
  146. l.input = l.input[l.pos:]
  147. l.pos = 0
  148. }
  149. // next decodes the next rune in the input and returns it.
  150. func (l *lexer) next() int32 {
  151. if l.pos >= len(l.input) {
  152. return -1
  153. }
  154. r, w := utf8.DecodeRuneInString(l.input[l.pos:])
  155. l.pos += w
  156. return r
  157. }
  158. // emit adds a token of the given kind.
  159. func (l *lexer) emit(t tokenKind) {
  160. l.emitProcessed(t, func(s string) (string, error) { return s, nil })
  161. }
  162. // emitProcessed adds a token of the given kind, but transforms its value
  163. // with the provided closure first.
  164. func (l *lexer) emitProcessed(t tokenKind, f func(string) (string, error)) error {
  165. val, err := f(l.input[:l.pos])
  166. l.tokens = append(l.tokens, token{
  167. kind: t,
  168. value: val,
  169. })
  170. l.ignore()
  171. return err
  172. }
  173. // run executes the lexer on the given input.
  174. func (l *lexer) run(input string) ([]token, error) {
  175. l.input = input
  176. l.tokens = l.tokens[:0]
  177. l.pos = 0
  178. for state := lexText; state != nil; {
  179. state = state(l)
  180. }
  181. return l.tokens, l.err
  182. }
  183. // parser implements a simple recursive descent parser for font family fallback
  184. // expressions.
  185. type parser struct {
  186. faces []string
  187. lexer lexer
  188. tokens []token
  189. }
  190. // parse the provided rule and return the extracted font families. The returned families
  191. // are valid only until the next call to parse. If parsing fails, an error describing the
  192. // failure is returned instead.
  193. func (p *parser) parse(rule string) ([]string, error) {
  194. var err error
  195. p.tokens, err = p.lexer.run(rule)
  196. if err != nil {
  197. return nil, err
  198. }
  199. p.faces = p.faces[:0]
  200. return p.faces, p.parseList()
  201. }
  202. // parse implements the production:
  203. //
  204. // LIST ::= <FACE> <COMMA> <LIST> | <FACE>
  205. func (p *parser) parseList() error {
  206. if len(p.tokens) < 0 {
  207. return fmt.Errorf("expected family name, got EOF")
  208. }
  209. if head := p.tokens[0]; head.kind != tokenStr {
  210. return fmt.Errorf("expected family name, got %s", head)
  211. } else {
  212. p.faces = append(p.faces, head.value)
  213. p.tokens = p.tokens[1:]
  214. }
  215. switch head := p.tokens[0]; head.kind {
  216. case tokenEOF:
  217. return nil
  218. case tokenComma:
  219. p.tokens = p.tokens[1:]
  220. return p.parseList()
  221. default:
  222. return fmt.Errorf("unexpected token %s", head)
  223. }
  224. }