tcvn3.go 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. package mahonia
  2. // Converters for TCVN3 encoding.
  3. import (
  4. "sync"
  5. )
  6. var (
  7. onceTCVN3 sync.Once
  8. dataTCVN3 = struct {
  9. UnicodeToWord map[rune][2]byte
  10. WordToUnicode [256]struct {
  11. r rune
  12. m *[256]rune
  13. }
  14. }{}
  15. )
  16. func init() {
  17. p := new(Charset)
  18. p.Name = "TCVN3"
  19. p.NewDecoder = func() Decoder {
  20. onceTCVN3.Do(buildTCVN3Tables)
  21. return decodeTCVN3
  22. }
  23. p.NewEncoder = func() Encoder {
  24. onceTCVN3.Do(buildTCVN3Tables)
  25. return encodeTCVN3
  26. }
  27. RegisterCharset(p)
  28. }
  29. func decodeTCVN3(p []byte) (rune, int, Status) {
  30. if len(p) == 0 {
  31. return 0, 0, NO_ROOM
  32. }
  33. item := &dataTCVN3.WordToUnicode[p[0]]
  34. if item.m != nil && len(p) > 1 {
  35. if r := item.m[p[1]]; r != 0 {
  36. return r, 2, SUCCESS
  37. }
  38. }
  39. if item.r != 0 {
  40. return item.r, 1, SUCCESS
  41. }
  42. if p[0] < 0x80 {
  43. return rune(p[0]), 1, SUCCESS
  44. }
  45. return '?', 1, INVALID_CHAR
  46. }
  47. func encodeTCVN3(p []byte, c rune) (int, Status) {
  48. if len(p) == 0 {
  49. return 0, NO_ROOM
  50. }
  51. if c < rune(0x80) {
  52. p[0] = byte(c)
  53. return 1, SUCCESS
  54. }
  55. if v, ok := dataTCVN3.UnicodeToWord[c]; ok {
  56. if v[1] != 0 {
  57. if len(p) < 2 {
  58. return 0, NO_ROOM
  59. }
  60. p[0] = v[0]
  61. p[1] = v[1]
  62. return 2, SUCCESS
  63. } else {
  64. p[0] = v[0]
  65. return 1, SUCCESS
  66. }
  67. }
  68. p[0] = '?'
  69. return 1, INVALID_CHAR
  70. }
  71. func buildTCVN3Tables() {
  72. dataTCVN3.UnicodeToWord = map[rune][2]byte{
  73. // one byte
  74. 0x00C2: {0xA2, 0x00},
  75. 0x00CA: {0xA3, 0x00},
  76. 0x00D4: {0xA4, 0x00},
  77. 0x00E0: {0xB5, 0x00},
  78. 0x00E1: {0xB8, 0x00},
  79. 0x00E2: {0xA9, 0x00},
  80. 0x00E3: {0xB7, 0x00},
  81. 0x00E8: {0xCC, 0x00},
  82. 0x00E9: {0xD0, 0x00},
  83. 0x00EA: {0xAA, 0x00},
  84. 0x00EC: {0xD7, 0x00},
  85. 0x00ED: {0xDD, 0x00},
  86. 0x00F2: {0xDF, 0x00},
  87. 0x00F3: {0xE3, 0x00},
  88. 0x00F4: {0xAB, 0x00},
  89. 0x00F5: {0xE2, 0x00},
  90. 0x00F9: {0xEF, 0x00},
  91. 0x00FA: {0xF3, 0x00},
  92. 0x00FD: {0xFD, 0x00},
  93. 0x0102: {0xA1, 0x00},
  94. 0x0103: {0xA8, 0x00},
  95. 0x0110: {0xA7, 0x00},
  96. 0x0111: {0xAE, 0x00},
  97. 0x0129: {0xDC, 0x00},
  98. 0x0169: {0xF2, 0x00},
  99. 0x01A0: {0xA5, 0x00},
  100. 0x01A1: {0xAC, 0x00},
  101. 0x01AF: {0xA6, 0x00},
  102. 0x01B0: {0xAD, 0x00},
  103. 0x1EA1: {0xB9, 0x00},
  104. 0x1EA3: {0xB6, 0x00},
  105. 0x1EA5: {0xCA, 0x00},
  106. 0x1EA7: {0xC7, 0x00},
  107. 0x1EA9: {0xC8, 0x00},
  108. 0x1EAB: {0xC9, 0x00},
  109. 0x1EAD: {0xCB, 0x00},
  110. 0x1EAF: {0xBE, 0x00},
  111. 0x1EB1: {0xBB, 0x00},
  112. 0x1EB3: {0xBC, 0x00},
  113. 0x1EB5: {0xBD, 0x00},
  114. 0x1EB7: {0xC6, 0x00},
  115. 0x1EB9: {0xD1, 0x00},
  116. 0x1EBB: {0xCE, 0x00},
  117. 0x1EBD: {0xCF, 0x00},
  118. 0x1EBF: {0xD5, 0x00},
  119. 0x1EC1: {0xD2, 0x00},
  120. 0x1EC3: {0xD3, 0x00},
  121. 0x1EC5: {0xD4, 0x00},
  122. 0x1EC7: {0xD6, 0x00},
  123. 0x1EC9: {0xD8, 0x00},
  124. 0x1ECB: {0xDE, 0x00},
  125. 0x1ECD: {0xE4, 0x00},
  126. 0x1ECF: {0xE1, 0x00},
  127. 0x1ED1: {0xE8, 0x00},
  128. 0x1ED3: {0xE5, 0x00},
  129. 0x1ED5: {0xE6, 0x00},
  130. 0x1ED7: {0xE7, 0x00},
  131. 0x1ED9: {0xE9, 0x00},
  132. 0x1EDB: {0xED, 0x00},
  133. 0x1EDD: {0xEA, 0x00},
  134. 0x1EDF: {0xEB, 0x00},
  135. 0x1EE1: {0xEC, 0x00},
  136. 0x1EE3: {0xEE, 0x00},
  137. 0x1EE5: {0xF4, 0x00},
  138. 0x1EE7: {0xF1, 0x00},
  139. 0x1EE9: {0xF8, 0x00},
  140. 0x1EEB: {0xF5, 0x00},
  141. 0x1EED: {0xF6, 0x00},
  142. 0x1EEF: {0xF7, 0x00},
  143. 0x1EF1: {0xF9, 0x00},
  144. 0x1EF3: {0xFA, 0x00},
  145. 0x1EF5: {0xFE, 0x00},
  146. 0x1EF7: {0xFB, 0x00},
  147. 0x1EF9: {0xFC, 0x00},
  148. // two bytes
  149. 0x00C0: {0x41, 0xB5},
  150. 0x00C1: {0x41, 0xB8},
  151. 0x00C3: {0x41, 0xB7},
  152. 0x00C8: {0x45, 0xCC},
  153. 0x00C9: {0x45, 0xD0},
  154. 0x00CC: {0x49, 0xD7},
  155. 0x00CD: {0x49, 0xDD},
  156. 0x00D2: {0x4F, 0xDF},
  157. 0x00D3: {0x4F, 0xE3},
  158. 0x00D5: {0x4F, 0xE2},
  159. 0x00D9: {0x55, 0xEF},
  160. 0x00DA: {0x55, 0xF3},
  161. 0x00DD: {0x59, 0xFD},
  162. 0x0128: {0x49, 0xDC},
  163. 0x0168: {0x55, 0xF2},
  164. 0x1EA0: {0x41, 0xB9},
  165. 0x1EA2: {0x41, 0xB6},
  166. 0x1EA4: {0xA2, 0xCA},
  167. 0x1EA6: {0xA2, 0xC7},
  168. 0x1EA8: {0xA2, 0xC8},
  169. 0x1EAA: {0xA2, 0xC9},
  170. 0x1EAC: {0xA2, 0xCB},
  171. 0x1EAE: {0xA1, 0xBE},
  172. 0x1EB0: {0xA1, 0xBB},
  173. 0x1EB2: {0xA1, 0xBC},
  174. 0x1EB4: {0xA1, 0xBD},
  175. 0x1EB6: {0xA1, 0xC6},
  176. 0x1EB8: {0x45, 0xD1},
  177. 0x1EBA: {0x45, 0xCE},
  178. 0x1EBC: {0x45, 0xCF},
  179. 0x1EBE: {0xA3, 0xD5},
  180. 0x1EC0: {0xA3, 0xD2},
  181. 0x1EC2: {0xA3, 0xD3},
  182. 0x1EC4: {0xA3, 0xD4},
  183. 0x1EC6: {0xA3, 0xD6},
  184. 0x1EC8: {0x49, 0xD8},
  185. 0x1ECA: {0x49, 0xDE},
  186. 0x1ECC: {0x4F, 0xE4},
  187. 0x1ECE: {0x4F, 0xE1},
  188. 0x1ED0: {0xA4, 0xE8},
  189. 0x1ED2: {0xA4, 0xE5},
  190. 0x1ED4: {0xA4, 0xE6},
  191. 0x1ED6: {0xA4, 0xE7},
  192. 0x1ED8: {0xA4, 0xE9},
  193. 0x1EDA: {0xA5, 0xED},
  194. 0x1EDC: {0xA5, 0xEA},
  195. 0x1EDE: {0xA5, 0xEB},
  196. 0x1EE0: {0xA5, 0xEC},
  197. 0x1EE2: {0xA5, 0xEE},
  198. 0x1EE4: {0x55, 0xF4},
  199. 0x1EE6: {0x55, 0xF1},
  200. 0x1EE8: {0xA6, 0xF8},
  201. 0x1EEA: {0xA6, 0xF5},
  202. 0x1EEC: {0xA6, 0xF6},
  203. 0x1EEE: {0xA6, 0xF7},
  204. 0x1EF0: {0xA6, 0xF9},
  205. 0x1EF2: {0x59, 0xFA},
  206. 0x1EF4: {0x59, 0xFE},
  207. 0x1EF6: {0x59, 0xFB},
  208. 0x1EF8: {0x59, 0xFC},
  209. }
  210. for r, b := range dataTCVN3.UnicodeToWord {
  211. item := &dataTCVN3.WordToUnicode[b[0]]
  212. if b[1] == 0 {
  213. item.r = r
  214. } else {
  215. if item.m == nil {
  216. item.m = new([256]rune)
  217. }
  218. item.m[b[1]] = r
  219. }
  220. }
  221. }