PseudoLocalizationTranslator.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\Translation;
  11. use Symfony\Contracts\Translation\TranslatorInterface;
  12. /**
  13. * This translator should only be used in a development environment.
  14. */
  15. final class PseudoLocalizationTranslator implements TranslatorInterface
  16. {
  17. private const EXPANSION_CHARACTER = '~';
  18. private $translator;
  19. private $accents;
  20. private $expansionFactor;
  21. private $brackets;
  22. private $parseHTML;
  23. /**
  24. * @var string[]
  25. */
  26. private $localizableHTMLAttributes;
  27. /**
  28. * Available options:
  29. * * accents:
  30. * type: boolean
  31. * default: true
  32. * description: replace ASCII characters of the translated string with accented versions or similar characters
  33. * example: if true, "foo" => "ƒöö".
  34. *
  35. * * expansion_factor:
  36. * type: float
  37. * default: 1
  38. * validation: it must be greater than or equal to 1
  39. * description: expand the translated string by the given factor with spaces and tildes
  40. * example: if 2, "foo" => "~foo ~"
  41. *
  42. * * brackets:
  43. * type: boolean
  44. * default: true
  45. * description: wrap the translated string with brackets
  46. * example: if true, "foo" => "[foo]"
  47. *
  48. * * parse_html:
  49. * type: boolean
  50. * default: false
  51. * description: parse the translated string as HTML - looking for HTML tags has a performance impact but allows to preserve them from alterations - it also allows to compute the visible translated string length which is useful to correctly expand ot when it contains HTML
  52. * warning: unclosed tags are unsupported, they will be fixed (closed) by the parser - eg, "foo <div>bar" => "foo <div>bar</div>"
  53. *
  54. * * localizable_html_attributes:
  55. * type: string[]
  56. * default: []
  57. * description: the list of HTML attributes whose values can be altered - it is only useful when the "parse_html" option is set to true
  58. * example: if ["title"], and with the "accents" option set to true, "<a href="#" title="Go to your profile">Profile</a>" => "<a href="#" title="Ĝö ţö ýöûŕ þŕöƒîļé">Þŕöƒîļé</a>" - if "title" was not in the "localizable_html_attributes" list, the title attribute data would be left unchanged.
  59. */
  60. public function __construct(TranslatorInterface $translator, array $options = [])
  61. {
  62. $this->translator = $translator;
  63. $this->accents = $options['accents'] ?? true;
  64. if (1.0 > ($this->expansionFactor = $options['expansion_factor'] ?? 1.0)) {
  65. throw new \InvalidArgumentException('The expansion factor must be greater than or equal to 1.');
  66. }
  67. $this->brackets = $options['brackets'] ?? true;
  68. $this->parseHTML = $options['parse_html'] ?? false;
  69. if ($this->parseHTML && !$this->accents && 1.0 === $this->expansionFactor) {
  70. $this->parseHTML = false;
  71. }
  72. $this->localizableHTMLAttributes = $options['localizable_html_attributes'] ?? [];
  73. }
  74. /**
  75. * {@inheritdoc}
  76. */
  77. public function trans(string $id, array $parameters = [], string $domain = null, string $locale = null): string
  78. {
  79. $trans = '';
  80. $visibleText = '';
  81. foreach ($this->getParts($this->translator->trans($id, $parameters, $domain, $locale)) as [$visible, $localizable, $text]) {
  82. if ($visible) {
  83. $visibleText .= $text;
  84. }
  85. if (!$localizable) {
  86. $trans .= $text;
  87. continue;
  88. }
  89. $this->addAccents($trans, $text);
  90. }
  91. $this->expand($trans, $visibleText);
  92. $this->addBrackets($trans);
  93. return $trans;
  94. }
  95. public function getLocale(): string
  96. {
  97. return $this->translator->getLocale();
  98. }
  99. private function getParts(string $originalTrans): array
  100. {
  101. if (!$this->parseHTML) {
  102. return [[true, true, $originalTrans]];
  103. }
  104. $html = mb_encode_numericentity($originalTrans, [0x80, 0xFFFF, 0, 0xFFFF], mb_detect_encoding($originalTrans, null, true) ?: 'UTF-8');
  105. $useInternalErrors = libxml_use_internal_errors(true);
  106. $dom = new \DOMDocument();
  107. $dom->loadHTML('<trans>'.$html.'</trans>');
  108. libxml_clear_errors();
  109. libxml_use_internal_errors($useInternalErrors);
  110. return $this->parseNode($dom->childNodes->item(1)->childNodes->item(0)->childNodes->item(0));
  111. }
  112. private function parseNode(\DOMNode $node): array
  113. {
  114. $parts = [];
  115. foreach ($node->childNodes as $childNode) {
  116. if (!$childNode instanceof \DOMElement) {
  117. $parts[] = [true, true, $childNode->nodeValue];
  118. continue;
  119. }
  120. $parts[] = [false, false, '<'.$childNode->tagName];
  121. /** @var \DOMAttr $attribute */
  122. foreach ($childNode->attributes as $attribute) {
  123. $parts[] = [false, false, ' '.$attribute->nodeName.'="'];
  124. $localizableAttribute = \in_array($attribute->nodeName, $this->localizableHTMLAttributes, true);
  125. foreach (preg_split('/(&(?:amp|quot|#039|lt|gt);+)/', htmlspecialchars($attribute->nodeValue, \ENT_QUOTES, 'UTF-8'), -1, \PREG_SPLIT_DELIM_CAPTURE) as $i => $match) {
  126. if ('' === $match) {
  127. continue;
  128. }
  129. $parts[] = [false, $localizableAttribute && 0 === $i % 2, $match];
  130. }
  131. $parts[] = [false, false, '"'];
  132. }
  133. $parts[] = [false, false, '>'];
  134. $parts = array_merge($parts, $this->parseNode($childNode, $parts));
  135. $parts[] = [false, false, '</'.$childNode->tagName.'>'];
  136. }
  137. return $parts;
  138. }
  139. private function addAccents(string &$trans, string $text): void
  140. {
  141. $trans .= $this->accents ? strtr($text, [
  142. ' ' => ' ',
  143. '!' => '¡',
  144. '"' => '″',
  145. '#' => '♯',
  146. '$' => '€',
  147. '%' => '‰',
  148. '&' => '⅋',
  149. '\'' => '´',
  150. '(' => '{',
  151. ')' => '}',
  152. '*' => '⁎',
  153. '+' => '⁺',
  154. ',' => '،',
  155. '-' => '‐',
  156. '.' => '·',
  157. '/' => '⁄',
  158. '0' => '⓪',
  159. '1' => '①',
  160. '2' => '②',
  161. '3' => '③',
  162. '4' => '④',
  163. '5' => '⑤',
  164. '6' => '⑥',
  165. '7' => '⑦',
  166. '8' => '⑧',
  167. '9' => '⑨',
  168. ':' => '∶',
  169. ';' => '⁏',
  170. '<' => '≤',
  171. '=' => '≂',
  172. '>' => '≥',
  173. '?' => '¿',
  174. '@' => '՞',
  175. 'A' => 'Å',
  176. 'B' => 'Ɓ',
  177. 'C' => 'Ç',
  178. 'D' => 'Ð',
  179. 'E' => 'É',
  180. 'F' => 'Ƒ',
  181. 'G' => 'Ĝ',
  182. 'H' => 'Ĥ',
  183. 'I' => 'Î',
  184. 'J' => 'Ĵ',
  185. 'K' => 'Ķ',
  186. 'L' => 'Ļ',
  187. 'M' => 'Ṁ',
  188. 'N' => 'Ñ',
  189. 'O' => 'Ö',
  190. 'P' => 'Þ',
  191. 'Q' => 'Ǫ',
  192. 'R' => 'Ŕ',
  193. 'S' => 'Š',
  194. 'T' => 'Ţ',
  195. 'U' => 'Û',
  196. 'V' => 'Ṽ',
  197. 'W' => 'Ŵ',
  198. 'X' => 'Ẋ',
  199. 'Y' => 'Ý',
  200. 'Z' => 'Ž',
  201. '[' => '⁅',
  202. '\\' => '∖',
  203. ']' => '⁆',
  204. '^' => '˄',
  205. '_' => '‿',
  206. '`' => '‵',
  207. 'a' => 'å',
  208. 'b' => 'ƀ',
  209. 'c' => 'ç',
  210. 'd' => 'ð',
  211. 'e' => 'é',
  212. 'f' => 'ƒ',
  213. 'g' => 'ĝ',
  214. 'h' => 'ĥ',
  215. 'i' => 'î',
  216. 'j' => 'ĵ',
  217. 'k' => 'ķ',
  218. 'l' => 'ļ',
  219. 'm' => 'ɱ',
  220. 'n' => 'ñ',
  221. 'o' => 'ö',
  222. 'p' => 'þ',
  223. 'q' => 'ǫ',
  224. 'r' => 'ŕ',
  225. 's' => 'š',
  226. 't' => 'ţ',
  227. 'u' => 'û',
  228. 'v' => 'ṽ',
  229. 'w' => 'ŵ',
  230. 'x' => 'ẋ',
  231. 'y' => 'ý',
  232. 'z' => 'ž',
  233. '{' => '(',
  234. '|' => '¦',
  235. '}' => ')',
  236. '~' => '˞',
  237. ]) : $text;
  238. }
  239. private function expand(string &$trans, string $visibleText): void
  240. {
  241. if (1.0 >= $this->expansionFactor) {
  242. return;
  243. }
  244. $visibleLength = $this->strlen($visibleText);
  245. $missingLength = (int) ceil($visibleLength * $this->expansionFactor) - $visibleLength;
  246. if ($this->brackets) {
  247. $missingLength -= 2;
  248. }
  249. if (0 >= $missingLength) {
  250. return;
  251. }
  252. $words = [];
  253. $wordsCount = 0;
  254. foreach (preg_split('/ +/', $visibleText, -1, \PREG_SPLIT_NO_EMPTY) as $word) {
  255. $wordLength = $this->strlen($word);
  256. if ($wordLength >= $missingLength) {
  257. continue;
  258. }
  259. if (!isset($words[$wordLength])) {
  260. $words[$wordLength] = 0;
  261. }
  262. ++$words[$wordLength];
  263. ++$wordsCount;
  264. }
  265. if (!$words) {
  266. $trans .= 1 === $missingLength ? self::EXPANSION_CHARACTER : ' '.str_repeat(self::EXPANSION_CHARACTER, $missingLength - 1);
  267. return;
  268. }
  269. arsort($words, \SORT_NUMERIC);
  270. $longestWordLength = max(array_keys($words));
  271. while (true) {
  272. $r = mt_rand(1, $wordsCount);
  273. foreach ($words as $length => $count) {
  274. $r -= $count;
  275. if ($r <= 0) {
  276. break;
  277. }
  278. }
  279. $trans .= ' '.str_repeat(self::EXPANSION_CHARACTER, $length);
  280. $missingLength -= $length + 1;
  281. if (0 === $missingLength) {
  282. return;
  283. }
  284. while ($longestWordLength >= $missingLength) {
  285. $wordsCount -= $words[$longestWordLength];
  286. unset($words[$longestWordLength]);
  287. if (!$words) {
  288. $trans .= 1 === $missingLength ? self::EXPANSION_CHARACTER : ' '.str_repeat(self::EXPANSION_CHARACTER, $missingLength - 1);
  289. return;
  290. }
  291. $longestWordLength = max(array_keys($words));
  292. }
  293. }
  294. }
  295. private function addBrackets(string &$trans): void
  296. {
  297. if (!$this->brackets) {
  298. return;
  299. }
  300. $trans = '['.$trans.']';
  301. }
  302. private function strlen(string $s): int
  303. {
  304. return false === ($encoding = mb_detect_encoding($s, null, true)) ? \strlen($s) : mb_strlen($s, $encoding);
  305. }
  306. }