1 /** 2 * Parse .mo files and find translated messages. 3 * Authors: 4 * $(LINK2 https://github.com/FreeSlave, Roman Chistokhodov) 5 * Copyright: 6 * Roman Chistokhodov, 2018 7 * License: 8 * $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 9 * See_Also: 10 * $(LINK2 https://www.gnu.org/software/gettext/manual/html_node/MO-Files.html, The Format of GNU MO Files) 11 */ 12 13 module mofile; 14 /// 15 class PluralFormException : Exception 16 { 17 pure nothrow @nogc @safe this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable nextInChain = null) { 18 super(msg, file, line, nextInChain); 19 } 20 } 21 22 /// 23 class MoFileException : Exception 24 { 25 pure nothrow @nogc @safe this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable nextInChain = null) { 26 super(msg, file, line, nextInChain); 27 } 28 } 29 30 private @safe 31 { 32 import std.conv : parse; 33 import std.ascii; 34 enum : ushort { 35 SHL = ubyte.max + 1, 36 SHR, 37 AND, 38 OR, 39 LTE, 40 GTE, 41 EQ, 42 NEQ, 43 NUM, 44 } 45 46 class Plural 47 { 48 pure: 49 abstract int opCall(int n = 0) const; 50 abstract Plural clone(); 51 } 52 53 class Unary : Plural 54 { 55 pure: 56 this(Plural op) { 57 op1 = op; 58 } 59 protected: 60 Plural op1; 61 } 62 63 class Binary : Plural 64 { 65 pure: 66 this(Plural first, Plural second) { 67 op1 = first; 68 op2 = second; 69 } 70 protected: 71 Plural op1, op2; 72 } 73 74 final class Number : Plural 75 { 76 pure: 77 this(int number) { 78 num = number; 79 } 80 override Plural clone() { 81 return new Number(num); 82 } 83 override int opCall(int) const { 84 return num; 85 } 86 private: 87 int num; 88 } 89 90 91 final class UnaryOp(string op) : Unary 92 { 93 pure: 94 this(Plural op1) { 95 super(op1); 96 } 97 override int opCall(int n) const { 98 return mixin(op ~ " op1(n)"); 99 } 100 override Plural clone() { 101 return new UnaryOp!(op)(op1.clone()); 102 } 103 } 104 105 final class BinaryOp(string op) : Binary 106 { 107 pure: 108 this(Plural first, Plural second) { 109 super(first, second); 110 } 111 override int opCall(int n) const { 112 return mixin("op1(n)" ~ op ~ "op2(n)"); 113 } 114 override Plural clone() { 115 return new BinaryOp!(op)(op1.clone(), op2.clone()); 116 } 117 } 118 119 final class BinaryOpD(string op) : Binary 120 { 121 pure: 122 this(Plural first, Plural second) { 123 super(first, second); 124 } 125 override int opCall(int n) const { 126 int v2 = op2(n); 127 if (v2 == 0) { 128 throw new PluralFormException("Division by zero during plural form computation"); 129 } 130 return mixin("op1(n)" ~ op ~ "v2"); 131 } 132 override Plural clone() { 133 return new BinaryOp!(op)(op1.clone(), op2.clone()); 134 } 135 } 136 137 alias UnaryOp!"!" Not; 138 alias UnaryOp!"-" Minus; 139 alias UnaryOp!"~" Invert; 140 141 alias BinaryOp!"*" Mul; 142 alias BinaryOpD!"/" Div; 143 alias BinaryOpD!"%" Mod; 144 145 alias BinaryOp!"+" Add; 146 alias BinaryOp!"-" Sub; 147 148 alias BinaryOp!"<<" Shl; 149 alias BinaryOp!">>" Shr; 150 151 alias BinaryOp!">" Gt; 152 alias BinaryOp!"<" Lt; 153 alias BinaryOp!">=" Gte; 154 alias BinaryOp!"<=" Lte; 155 156 alias BinaryOp!"==" Eq; 157 alias BinaryOp!"!=" Neq; 158 159 alias BinaryOp!"&" BinAnd; 160 alias BinaryOp!"^" BinXor; 161 alias BinaryOp!"|" BinOr; 162 163 alias BinaryOp!"&&" And; 164 alias BinaryOp!"||" Or; 165 166 unittest 167 { 168 Plural op = new Mul(new Number(5), new Minus(new Number(10))); 169 assert(op() == -50); 170 op = new Eq(new Number(42), new Add(new Number(20), new Number(22))); 171 assert(op() == 1); 172 op = new Div(new Number(12), new Number(3)); 173 assert(op() == 4); 174 } 175 176 struct Tokenizer 177 { 178 pure: 179 this(string contents) { 180 content = contents; 181 get(); 182 } 183 184 @property ushort front() const pure nothrow @nogc { 185 return current; 186 } 187 @property bool empty() const pure nothrow @nogc { 188 return current == 0; 189 } 190 void popFront() { 191 get(); 192 } 193 int getNumber() { 194 if (current == NUM) 195 return number; 196 else 197 throw new PluralFormException("Not a number"); 198 } 199 private: 200 @trusted void get() { 201 while(content.length > pos && isWhite(content[pos])) { 202 pos++; 203 } 204 if (pos >= content.length) { 205 current = 0; 206 return; 207 } 208 if (content.length >= pos+2) { 209 pos += 2; 210 switch(content[pos-2..pos]) { 211 case "<<": current = SHL; return; 212 case ">>": current = SHR; return; 213 case "&&": current = AND; return; 214 case "||": current = OR; return; 215 case "<=": current = LTE; return; 216 case ">=": current = GTE; return; 217 case "==": current = EQ; return; 218 case "!=": current = NEQ; return; 219 default: pos -= 2; break; 220 } 221 } 222 if (isDigit(content[pos])) { 223 auto tmp = content[pos..$]; 224 number = parse!int(tmp); 225 current = NUM; 226 pos += tmp.ptr - (content.ptr + pos); 227 } else { 228 current = cast(ushort)content[pos]; 229 pos++; 230 } 231 } 232 233 int number; 234 ushort current; 235 size_t pos; 236 string content; 237 } 238 239 unittest 240 { 241 string contents = "n %10 ==1\n"; 242 auto tokenizer = Tokenizer(contents); 243 assert(!tokenizer.empty); 244 assert(tokenizer.front == 'n'); 245 tokenizer.popFront(); 246 assert(tokenizer.front == '%'); 247 tokenizer.popFront(); 248 assert(tokenizer.front == NUM); 249 assert(tokenizer.getNumber == 10); 250 tokenizer.popFront(); 251 assert(tokenizer.front == EQ); 252 tokenizer.popFront(); 253 assert(tokenizer.front == NUM); 254 assert(tokenizer.getNumber == 1); 255 tokenizer.popFront(); 256 assert(tokenizer.empty); 257 258 tokenizer = Tokenizer(""); 259 assert(tokenizer.empty); 260 } 261 262 final class Variable : Plural 263 { 264 pure: 265 this() { 266 } 267 override int opCall(int n) const { 268 return n; 269 } 270 override Plural clone() { 271 return new Variable(); 272 } 273 } 274 275 final class Conditional : Plural 276 { 277 pure: 278 this(Plural cond, Plural res, Plural alt) { 279 this.cond = cond; 280 this.res = res; 281 this.alt = alt; 282 } 283 override int opCall(int n) const { 284 return cond(n) ? res(n) : alt(n); 285 } 286 override Plural clone() { 287 return new Conditional(cond, res, alt); 288 } 289 private: 290 Plural cond, res, alt; 291 } 292 293 struct Parser 294 { 295 pure: 296 this(Tokenizer tokenizer) { 297 t = tokenizer; 298 } 299 300 this(string content) { 301 this(Tokenizer(content)); 302 } 303 304 Plural compile() { 305 Plural expr = condExpr(); 306 if (expr && !t.empty) { 307 throw new PluralFormException("Not in the end"); 308 } 309 return expr; 310 } 311 312 private: 313 Plural valueExpr() { 314 if (t.front == '(') { 315 t.popFront(); 316 Plural op = condExpr(); 317 if (op is null) 318 return null; 319 if (t.front != ')') 320 throw new PluralFormException("Missing ')' in expression"); 321 t.popFront(); 322 return op; 323 } else if (t.front == NUM) { 324 int number = t.getNumber(); 325 t.popFront(); 326 return new Number(number); 327 } else if (t.front == 'n') { 328 t.popFront(); 329 return new Variable(); 330 } else { 331 throw new PluralFormException("Unknown operand"); 332 } 333 assert(false); 334 } 335 336 Plural unaryExpr() { 337 Plural op1; 338 ushort op = t.front; 339 if (op == '-' || op == '~' || op == '!') { 340 t.popFront(); 341 op1 = unaryExpr(); 342 if (op1) { 343 switch(op) { 344 case '-': return new Minus(op1); 345 case '~': return new Invert(op1); 346 case '!': return new Not(op1); 347 default: assert(false); 348 } 349 } else { 350 return null; 351 } 352 } else { 353 return valueExpr(); 354 } 355 } 356 357 static int getPrec(const ushort op) { 358 switch(op) { 359 case '/': 360 case '*': 361 case '%': 362 return 10; 363 case '+': 364 case '-': 365 return 9; 366 case SHL: 367 case SHR: 368 return 8; 369 case '>': 370 case '<': 371 case GTE: 372 case LTE: 373 return 7; 374 case EQ: 375 case NEQ: 376 return 6; 377 case '&': 378 return 5; 379 case '^': 380 return 4; 381 case '|': 382 return 3; 383 case AND: 384 return 2; 385 case OR: 386 return 1; 387 default: 388 return 0; 389 } 390 } 391 392 static Plural binaryFactory(const ushort op, Plural left, Plural right) { 393 switch(op) { 394 case '/': return new Div(left,right); 395 case '*': return new Mul(left,right); 396 case '%': return new Mod(left,right); 397 case '+': return new Add(left,right); 398 case '-': return new Sub(left,right); 399 case SHL: return new Shl(left,right); 400 case SHR: return new Shr(left,right); 401 case '>': return new Gt(left,right); 402 case '<': return new Lt(left,right); 403 case GTE: return new Gte(left,right); 404 case LTE: return new Lte(left,right); 405 case EQ: return new Eq(left,right); 406 case NEQ: return new Neq(left,right); 407 case '&': return new BinAnd(left,right); 408 case '^': return new BinXor(left,right); 409 case '|': return new BinOr(left,right); 410 case AND: return new And(left,right); 411 case OR: return new Or(left,right); 412 default: return null; 413 } 414 } 415 416 Plural binaryExpr(const int prec = 1) { 417 assert(prec >= 1 && prec <= 11); 418 Plural op1,op2; 419 if (prec == 11) 420 op1 = unaryExpr(); 421 else 422 op1 = binaryExpr(prec+1); 423 if (op1 is null) 424 return null; 425 if (prec != 11) { 426 while(getPrec(t.front) == prec) { 427 ushort o = t.front; 428 t.popFront(); 429 op2 = binaryExpr(prec+1); 430 if (op2 is null) 431 return null; 432 op1 = binaryFactory(o, op1, op2); 433 } 434 } 435 436 return op1; 437 } 438 439 Plural condExpr() { 440 Plural cond, case1, case2; 441 cond = binaryExpr(); 442 if(cond is null) 443 return null; 444 if(t.front == '?') { 445 t.popFront(); 446 case1 = condExpr(); 447 if(case1 is null) 448 return null; 449 if(t.front != ':') 450 throw new PluralFormException("Missing ':' in conditional operator"); 451 t.popFront(); 452 case2 = condExpr(); 453 if(case2 is null) 454 return null; 455 } else { 456 return cond; 457 } 458 return new Conditional(cond,case1,case2); 459 } 460 461 Tokenizer t; 462 } 463 464 unittest 465 { 466 auto parser = new Parser("(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)"); 467 auto expr = parser.compile(); 468 assert(expr !is null); 469 assert(expr(1) == 0); 470 assert(expr(101) == 0); 471 assert(expr(2) == 1); 472 assert(expr(24) == 1); 473 assert(expr(104) == 1); 474 assert(expr(222) == 1); 475 assert(expr(11) == 2); 476 assert(expr(14) == 2); 477 assert(expr(111) == 2); 478 assert(expr(210) == 2); 479 480 import std.exception : assertThrown; 481 assertThrown!PluralFormException(new Parser("").compile()); 482 assertThrown!PluralFormException(new Parser("n?1").compile()); 483 assertThrown!PluralFormException(new Parser("(2-1").compile()); 484 assertThrown!PluralFormException(new Parser("p").compile()); 485 assertThrown!PluralFormException(new Parser("1+2;").compile()); 486 } 487 } 488 489 import std.exception : assumeUnique, enforce; 490 import std.range : iota, assumeSorted, drop, dropOne; 491 import std.algorithm.iteration : map, splitter; 492 import std.algorithm.searching : all, find, findSkip, skipOver; 493 import std.algorithm.sorting : isSorted; 494 import std.string : lineSplitter, stripRight; 495 import std.typecons : tuple; 496 497 private enum int moMagic = 0x950412de; 498 499 /** 500 * Struct representing .mo file. 501 * 502 * Default constructed object returns untranslated messages. 503 */ 504 @safe struct MoFile 505 { 506 /** 507 * Read from file. 508 * 509 * $(D mofile.MoFileException) if data is in invalid or unsupported format. 510 * $(D mofile.PluralFormException) if plural form expression could not be parsed. 511 * $(B FileException) on file reading error. 512 */ 513 @trusted this(string fileName) { 514 import std.file : read; 515 this(read(fileName).assumeUnique); 516 } 517 518 /** 519 * Constructor from data. 520 * Data must be immutable and live as long as translated messages are used, because it's used to return strings. 521 * Throws: 522 * $(D mofile.MoFileException) if data is in invalid or unsupported format. 523 * $(D mofile.PluralFormException) if plural form expression could not be parsed. 524 */ 525 @safe this(immutable(void)[] data) pure { 526 this.data = data; 527 const magic = readValue!int(0); 528 if (magic != moMagic) { 529 throw new MoFileException("Wrong magic"); 530 } 531 const revision = readValue!int(int.sizeof); 532 if (revision != 0) { 533 throw new MoFileException("Unknown revision"); 534 } 535 536 baseOffsetOrig = readValue!int(int.sizeof*3); 537 baseOffsetTr = readValue!int(int.sizeof*4); 538 count = readValue!int(int.sizeof*2); 539 540 if (count <= 0) { 541 throw new MoFileException("Invalid count of msgids, must be at least 1"); 542 } 543 544 auto mapped = iota(0,count).map!(i => getMessage(baseOffsetOrig, i)); 545 enforce!MoFileException(mapped.isSorted, "Invalid .mo file: message ids are not sorted"); 546 if (!mapped.empty && mapped.front.length == 0) { 547 enforce!MoFileException(mapped.dropOne.all!"!a.empty", "Some msgid besides the reserved one is empty"); 548 } 549 550 string header = getMessage(baseOffsetTr, 0); 551 foreach(line; header.lineSplitter) { 552 if (line.skipOver("Plural-Forms:")) { 553 if (line.findSkip("plural=")) { 554 string expr = line.stripRight("\n\r;"); 555 auto parser = new Parser(expr); 556 compiled = parser.compile(); 557 } 558 } 559 } 560 } 561 562 /** 563 * .mo file header that includes some info like creation date, language and translator's name. 564 */ 565 string header() pure const { 566 return gettext(""); 567 } 568 569 /** 570 * Get translated message. 571 * Params: 572 * msgid = Message id (usually untranslated string) 573 * Returns: Translated message for the msgid. 574 * If translation for this msgid does not exist or MoFile is default constructed the msgid is returned. 575 */ 576 string gettext(string msgid) pure const { 577 int index = getIndex(msgid); 578 if (index >= 0) { 579 string translated = getMessage(baseOffsetTr, index); 580 auto splitted = translated.splitter('\0'); 581 if (!splitted.empty && splitted.front.length) 582 return splitted.front; 583 } 584 return msgid; 585 } 586 587 /** 588 * Get translated message considering plural forms. 589 * Params: 590 * msgid = Untranslated message in singular form 591 * msgid_plural = Untranslated message in plural form. 592 * n = Number to calculate a plural form. 593 * Returns: Translated string in plural form dependent on number n. 594 * If translation for this msgid does not exist or MoFile is default constructed then the msgid is returned if n == 1 and msgid_plural otherwise. 595 */ 596 string ngettext(string msgid, string msgid_plural, int n) pure const { 597 int index = getIndex(msgid); 598 if (compiled !is null && index >= 0) { 599 string translated = getMessage(baseOffsetTr, index); 600 auto splitted = translated.splitter('\0'); 601 if (!splitted.empty && splitted.front.length) { 602 int pluralForm = compiled(n); 603 auto forms = splitted.drop(pluralForm); 604 if (!forms.empty) 605 return forms.front; 606 } 607 } 608 return n == 1 ? msgid : msgid_plural; 609 } 610 611 private: 612 @trusted int getIndex(string message) pure const { 613 if (data.length == 0) 614 return -1; 615 auto sorted = iota(0, count).map!(i => tuple(i, getMessageSingular(baseOffsetOrig, i))).assumeSorted!"a[1] < b[1]"; 616 auto found = sorted.equalRange(tuple(0, message)); 617 if (found.empty) { 618 return -1; 619 } else { 620 return found.front[0]; 621 } 622 } 623 624 @trusted T readValue(T)(size_t offset) pure const 625 { 626 if (data.length >= offset + T.sizeof) { 627 T value = *(cast(const(T)*)data[offset..(offset+T.sizeof)].ptr); 628 return value; 629 } else { 630 throw new MoFileException("Value is out of bounds"); 631 } 632 } 633 634 @trusted string readString(int len, int offset) pure const 635 { 636 if (data.length >= offset + len) { 637 string s = cast(string)data[offset..offset+len]; 638 return s; 639 } else { 640 throw new MoFileException("String is out of bounds"); 641 } 642 } 643 644 @trusted string getMessage(int offset, int i) pure const { 645 return readString(readValue!int(offset + i*int.sizeof*2), readValue!int(offset + i*int.sizeof*2 + int.sizeof)); 646 } 647 648 @trusted string getMessageSingular(int offset, int i) pure const { 649 auto splitted = getMessage(offset, i).splitter('\0'); 650 if (splitted.empty) { 651 return ""; 652 } else { 653 return splitted.front; 654 } 655 } 656 657 int count; 658 int baseOffsetOrig; 659 int baseOffsetTr; 660 immutable(void)[] data; 661 Plural compiled; 662 } 663 664 unittest 665 { 666 MoFile moFile; 667 assert(moFile.header.length == 0); 668 assert(moFile.gettext("") == ""); 669 assert(moFile.gettext("Hello") == "Hello"); 670 assert(moFile.ngettext("File", "Files", 1) == "File"); 671 assert(moFile.ngettext("File", "Files", 2) == "Files"); 672 assert(moFile.ngettext("File", "Files", 0) == "Files"); 673 } 674 675 unittest 676 { 677 import std.bitmanip : nativeToLittleEndian; 678 import std.string : representation; 679 immutable(ubyte)[] moHeader = (nativeToLittleEndian(moMagic)[] ~ nativeToLittleEndian(0)[] ~ nativeToLittleEndian(1)[] ~ nativeToLittleEndian(20) ~ nativeToLittleEndian(28)).assumeUnique; 680 immutable(ubyte)[] offsets = (nativeToLittleEndian(4)[] ~ nativeToLittleEndian(36)[] ~ nativeToLittleEndian(4)[] ~ nativeToLittleEndian(40)[]).assumeUnique; 681 immutable(ubyte)[] data = moHeader ~ offsets ~ "abcd".representation ~ "efgh".representation; 682 assert(data.length == 44); 683 MoFile moFile; 684 import std.exception : assertThrown, assertNotThrown; 685 assertNotThrown(moFile = MoFile(data)); 686 assert(moFile.gettext("abcd") == "efgh"); 687 }