raster_fixed.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. // Copyright 2016 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package vector
  5. // This file contains a fixed point math implementation of the vector
  6. // graphics rasterizer.
  7. const (
  8. // ϕ is the number of binary digits after the fixed point.
  9. //
  10. // For example, if ϕ == 10 (and int1ϕ is based on the int32 type) then we
  11. // are using 22.10 fixed point math.
  12. //
  13. // When changing this number, also change the assembly code (search for ϕ
  14. // in the .s files).
  15. ϕ = 9
  16. fxOne int1ϕ = 1 << ϕ
  17. fxOneAndAHalf int1ϕ = 1<<ϕ + 1<<(ϕ-1)
  18. fxOneMinusIota int1ϕ = 1<<ϕ - 1 // Used for rounding up.
  19. )
  20. // int1ϕ is a signed fixed-point number with 1*ϕ binary digits after the fixed
  21. // point.
  22. type int1ϕ int32
  23. // int2ϕ is a signed fixed-point number with 2*ϕ binary digits after the fixed
  24. // point.
  25. //
  26. // The Rasterizer's bufU32 field, nominally of type []uint32 (since that slice
  27. // is also used by other code), can be thought of as a []int2ϕ during the
  28. // fixedLineTo method. Lines of code that are actually like:
  29. //
  30. // buf[i] += uint32(etc) // buf has type []uint32.
  31. //
  32. // can be thought of as
  33. //
  34. // buf[i] += int2ϕ(etc) // buf has type []int2ϕ.
  35. type int2ϕ int32
  36. func fixedMax(x, y int1ϕ) int1ϕ {
  37. if x > y {
  38. return x
  39. }
  40. return y
  41. }
  42. func fixedMin(x, y int1ϕ) int1ϕ {
  43. if x < y {
  44. return x
  45. }
  46. return y
  47. }
  48. func fixedFloor(x int1ϕ) int32 { return int32(x >> ϕ) }
  49. func fixedCeil(x int1ϕ) int32 { return int32((x + fxOneMinusIota) >> ϕ) }
  50. func (z *Rasterizer) fixedLineTo(bx, by float32) {
  51. ax, ay := z.penX, z.penY
  52. z.penX, z.penY = bx, by
  53. dir := int1ϕ(1)
  54. if ay > by {
  55. dir, ax, ay, bx, by = -1, bx, by, ax, ay
  56. }
  57. // Horizontal line segments yield no change in coverage. Almost horizontal
  58. // segments would yield some change, in ideal math, but the computation
  59. // further below, involving 1 / (by - ay), is unstable in fixed point math,
  60. // so we treat the segment as if it was perfectly horizontal.
  61. if by-ay <= 0.000001 {
  62. return
  63. }
  64. dxdy := (bx - ax) / (by - ay)
  65. ayϕ := int1ϕ(ay * float32(fxOne))
  66. byϕ := int1ϕ(by * float32(fxOne))
  67. x := int1ϕ(ax * float32(fxOne))
  68. y := fixedFloor(ayϕ)
  69. yMax := fixedCeil(byϕ)
  70. if yMax > int32(z.size.Y) {
  71. yMax = int32(z.size.Y)
  72. }
  73. width := int32(z.size.X)
  74. for ; y < yMax; y++ {
  75. dy := fixedMin(int1ϕ(y+1)<<ϕ, byϕ) - fixedMax(int1ϕ(y)<<ϕ, ayϕ)
  76. xNext := x + int1ϕ(float32(dy)*dxdy)
  77. if y < 0 {
  78. x = xNext
  79. continue
  80. }
  81. buf := z.bufU32[y*width:]
  82. d := dy * dir // d ranges up to ±1<<(1*ϕ).
  83. x0, x1 := x, xNext
  84. if x > xNext {
  85. x0, x1 = x1, x0
  86. }
  87. x0i := fixedFloor(x0)
  88. x0Floor := int1ϕ(x0i) << ϕ
  89. x1i := fixedCeil(x1)
  90. x1Ceil := int1ϕ(x1i) << ϕ
  91. if x1i <= x0i+1 {
  92. xmf := (x+xNext)>>1 - x0Floor
  93. if i := clamp(x0i+0, width); i < uint(len(buf)) {
  94. buf[i] += uint32(d * (fxOne - xmf))
  95. }
  96. if i := clamp(x0i+1, width); i < uint(len(buf)) {
  97. buf[i] += uint32(d * xmf)
  98. }
  99. } else {
  100. oneOverS := x1 - x0
  101. twoOverS := 2 * oneOverS
  102. x0f := x0 - x0Floor
  103. oneMinusX0f := fxOne - x0f
  104. oneMinusX0fSquared := oneMinusX0f * oneMinusX0f
  105. x1f := x1 - x1Ceil + fxOne
  106. x1fSquared := x1f * x1f
  107. // These next two variables are unused, as rounding errors are
  108. // minimized when we delay the division by oneOverS for as long as
  109. // possible. These lines of code (and the "In ideal math" comments
  110. // below) are commented out instead of deleted in order to aid the
  111. // comparison with the floating point version of the rasterizer.
  112. //
  113. // a0 := ((oneMinusX0f * oneMinusX0f) >> 1) / oneOverS
  114. // am := ((x1f * x1f) >> 1) / oneOverS
  115. if i := clamp(x0i, width); i < uint(len(buf)) {
  116. // In ideal math: buf[i] += uint32(d * a0)
  117. D := oneMinusX0fSquared // D ranges up to ±1<<(2*ϕ).
  118. D *= d // D ranges up to ±1<<(3*ϕ).
  119. D /= twoOverS
  120. buf[i] += uint32(D)
  121. }
  122. if x1i == x0i+2 {
  123. if i := clamp(x0i+1, width); i < uint(len(buf)) {
  124. // In ideal math: buf[i] += uint32(d * (fxOne - a0 - am))
  125. //
  126. // (x1i == x0i+2) and (twoOverS == 2 * (x1 - x0)) implies
  127. // that twoOverS ranges up to +1<<(1*ϕ+2).
  128. D := twoOverS<<ϕ - oneMinusX0fSquared - x1fSquared // D ranges up to ±1<<(2*ϕ+2).
  129. D *= d // D ranges up to ±1<<(3*ϕ+2).
  130. D /= twoOverS
  131. buf[i] += uint32(D)
  132. }
  133. } else {
  134. // This is commented out for the same reason as a0 and am.
  135. //
  136. // a1 := ((fxOneAndAHalf - x0f) << ϕ) / oneOverS
  137. if i := clamp(x0i+1, width); i < uint(len(buf)) {
  138. // In ideal math:
  139. // buf[i] += uint32(d * (a1 - a0))
  140. // or equivalently (but better in non-ideal, integer math,
  141. // with respect to rounding errors),
  142. // buf[i] += uint32(A * d / twoOverS)
  143. // where
  144. // A = (a1 - a0) * twoOverS
  145. // = a1*twoOverS - a0*twoOverS
  146. // Noting that twoOverS/oneOverS equals 2, substituting for
  147. // a0 and then a1, given above, yields:
  148. // A = a1*twoOverS - oneMinusX0fSquared
  149. // = (fxOneAndAHalf-x0f)<<(ϕ+1) - oneMinusX0fSquared
  150. // = fxOneAndAHalf<<(ϕ+1) - x0f<<(ϕ+1) - oneMinusX0fSquared
  151. //
  152. // This is a positive number minus two non-negative
  153. // numbers. For an upper bound on A, the positive number is
  154. // P = fxOneAndAHalf<<(ϕ+1)
  155. // < (2*fxOne)<<(ϕ+1)
  156. // = fxOne<<(ϕ+2)
  157. // = 1<<(2*ϕ+2)
  158. //
  159. // For a lower bound on A, the two non-negative numbers are
  160. // N = x0f<<(ϕ+1) + oneMinusX0fSquared
  161. // ≤ x0f<<(ϕ+1) + fxOne*fxOne
  162. // = x0f<<(ϕ+1) + 1<<(2*ϕ)
  163. // < x0f<<(ϕ+1) + 1<<(2*ϕ+1)
  164. // ≤ fxOne<<(ϕ+1) + 1<<(2*ϕ+1)
  165. // = 1<<(2*ϕ+1) + 1<<(2*ϕ+1)
  166. // = 1<<(2*ϕ+2)
  167. //
  168. // Thus, A ranges up to ±1<<(2*ϕ+2). It is possible to
  169. // derive a tighter bound, but this bound is sufficient to
  170. // reason about overflow.
  171. D := (fxOneAndAHalf-x0f)<<(ϕ+1) - oneMinusX0fSquared // D ranges up to ±1<<(2*ϕ+2).
  172. D *= d // D ranges up to ±1<<(3*ϕ+2).
  173. D /= twoOverS
  174. buf[i] += uint32(D)
  175. }
  176. dTimesS := uint32((d << (2 * ϕ)) / oneOverS)
  177. for xi := x0i + 2; xi < x1i-1; xi++ {
  178. if i := clamp(xi, width); i < uint(len(buf)) {
  179. buf[i] += dTimesS
  180. }
  181. }
  182. // This is commented out for the same reason as a0 and am.
  183. //
  184. // a2 := a1 + (int1ϕ(x1i-x0i-3)<<(2*ϕ))/oneOverS
  185. if i := clamp(x1i-1, width); i < uint(len(buf)) {
  186. // In ideal math:
  187. // buf[i] += uint32(d * (fxOne - a2 - am))
  188. // or equivalently (but better in non-ideal, integer math,
  189. // with respect to rounding errors),
  190. // buf[i] += uint32(A * d / twoOverS)
  191. // where
  192. // A = (fxOne - a2 - am) * twoOverS
  193. // = twoOverS<<ϕ - a2*twoOverS - am*twoOverS
  194. // Noting that twoOverS/oneOverS equals 2, substituting for
  195. // am and then a2, given above, yields:
  196. // A = twoOverS<<ϕ - a2*twoOverS - x1f*x1f
  197. // = twoOverS<<ϕ - a1*twoOverS - (int1ϕ(x1i-x0i-3)<<(2*ϕ))*2 - x1f*x1f
  198. // = twoOverS<<ϕ - a1*twoOverS - int1ϕ(x1i-x0i-3)<<(2*ϕ+1) - x1f*x1f
  199. // Substituting for a1, given above, yields:
  200. // A = twoOverS<<ϕ - ((fxOneAndAHalf-x0f)<<ϕ)*2 - int1ϕ(x1i-x0i-3)<<(2*ϕ+1) - x1f*x1f
  201. // = twoOverS<<ϕ - (fxOneAndAHalf-x0f)<<(ϕ+1) - int1ϕ(x1i-x0i-3)<<(2*ϕ+1) - x1f*x1f
  202. // = B<<ϕ - x1f*x1f
  203. // where
  204. // B = twoOverS - (fxOneAndAHalf-x0f)<<1 - int1ϕ(x1i-x0i-3)<<(ϕ+1)
  205. // = (x1-x0)<<1 - (fxOneAndAHalf-x0f)<<1 - int1ϕ(x1i-x0i-3)<<(ϕ+1)
  206. //
  207. // Re-arranging the defintions given above:
  208. // x0Floor := int1ϕ(x0i) << ϕ
  209. // x0f := x0 - x0Floor
  210. // x1Ceil := int1ϕ(x1i) << ϕ
  211. // x1f := x1 - x1Ceil + fxOne
  212. // combined with fxOne = 1<<ϕ yields:
  213. // x0 = x0f + int1ϕ(x0i)<<ϕ
  214. // x1 = x1f + int1ϕ(x1i-1)<<ϕ
  215. // so that expanding (x1-x0) yields:
  216. // B = (x1f-x0f + int1ϕ(x1i-x0i-1)<<ϕ)<<1 - (fxOneAndAHalf-x0f)<<1 - int1ϕ(x1i-x0i-3)<<(ϕ+1)
  217. // = (x1f-x0f)<<1 + int1ϕ(x1i-x0i-1)<<(ϕ+1) - (fxOneAndAHalf-x0f)<<1 - int1ϕ(x1i-x0i-3)<<(ϕ+1)
  218. // A large part of the second and fourth terms cancel:
  219. // B = (x1f-x0f)<<1 - (fxOneAndAHalf-x0f)<<1 - int1ϕ(-2)<<(ϕ+1)
  220. // = (x1f-x0f)<<1 - (fxOneAndAHalf-x0f)<<1 + 1<<(ϕ+2)
  221. // = (x1f - fxOneAndAHalf)<<1 + 1<<(ϕ+2)
  222. // The first term, (x1f - fxOneAndAHalf)<<1, is a negative
  223. // number, bounded below by -fxOneAndAHalf<<1, which is
  224. // greater than -fxOne<<2, or -1<<(ϕ+2). Thus, B ranges up
  225. // to ±1<<(ϕ+2). One final simplification:
  226. // B = x1f<<1 + (1<<(ϕ+2) - fxOneAndAHalf<<1)
  227. const C = 1<<(ϕ+2) - fxOneAndAHalf<<1
  228. D := x1f<<1 + C // D ranges up to ±1<<(1*ϕ+2).
  229. D <<= ϕ // D ranges up to ±1<<(2*ϕ+2).
  230. D -= x1fSquared // D ranges up to ±1<<(2*ϕ+3).
  231. D *= d // D ranges up to ±1<<(3*ϕ+3).
  232. D /= twoOverS
  233. buf[i] += uint32(D)
  234. }
  235. }
  236. if i := clamp(x1i, width); i < uint(len(buf)) {
  237. // In ideal math: buf[i] += uint32(d * am)
  238. D := x1fSquared // D ranges up to ±1<<(2*ϕ).
  239. D *= d // D ranges up to ±1<<(3*ϕ).
  240. D /= twoOverS
  241. buf[i] += uint32(D)
  242. }
  243. }
  244. x = xNext
  245. }
  246. }
  247. func fixedAccumulateOpOver(dst []uint8, src []uint32) {
  248. // Sanity check that len(dst) >= len(src).
  249. if len(dst) < len(src) {
  250. return
  251. }
  252. acc := int2ϕ(0)
  253. for i, v := range src {
  254. acc += int2ϕ(v)
  255. a := acc
  256. if a < 0 {
  257. a = -a
  258. }
  259. a >>= 2*ϕ - 16
  260. if a > 0xffff {
  261. a = 0xffff
  262. }
  263. // This algorithm comes from the standard library's image/draw package.
  264. dstA := uint32(dst[i]) * 0x101
  265. maskA := uint32(a)
  266. outA := dstA*(0xffff-maskA)/0xffff + maskA
  267. dst[i] = uint8(outA >> 8)
  268. }
  269. }
  270. func fixedAccumulateOpSrc(dst []uint8, src []uint32) {
  271. // Sanity check that len(dst) >= len(src).
  272. if len(dst) < len(src) {
  273. return
  274. }
  275. acc := int2ϕ(0)
  276. for i, v := range src {
  277. acc += int2ϕ(v)
  278. a := acc
  279. if a < 0 {
  280. a = -a
  281. }
  282. a >>= 2*ϕ - 8
  283. if a > 0xff {
  284. a = 0xff
  285. }
  286. dst[i] = uint8(a)
  287. }
  288. }
  289. func fixedAccumulateMask(buf []uint32) {
  290. acc := int2ϕ(0)
  291. for i, v := range buf {
  292. acc += int2ϕ(v)
  293. a := acc
  294. if a < 0 {
  295. a = -a
  296. }
  297. a >>= 2*ϕ - 16
  298. if a > 0xffff {
  299. a = 0xffff
  300. }
  301. buf[i] = uint32(a)
  302. }
  303. }