1 /** 2 * Parse .mo files and find translated messages. 3 * Authors: 4 * $(LINK2 https://github.com/FreeSlave, Roman Chistokhodov) 5 * Copyright: 6 * Roman Chistokhodov, 2018 7 * License: 8 * $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 9 * See_Also: 10 * $(LINK2 https://www.gnu.org/software/gettext/manual/html_node/MO-Files.html, The Format of GNU MO Files) 11 */ 12 13 module mofile; 14 /// 15 class PluralFormException : Exception 16 { 17 pure nothrow @nogc @safe this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable nextInChain = null) { 18 super(msg, file, line, nextInChain); 19 } 20 } 21 22 /// 23 class MoFileException : Exception 24 { 25 pure nothrow @nogc @safe this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable nextInChain = null) { 26 super(msg, file, line, nextInChain); 27 } 28 } 29 30 private @safe 31 { 32 import std.conv : parse; 33 import std.ascii; 34 enum : ushort { 35 SHL = ubyte.max + 1, 36 SHR, 37 AND, 38 OR, 39 LTE, 40 GTE, 41 EQ, 42 NEQ, 43 NUM, 44 } 45 46 class Plural 47 { 48 pure: 49 abstract int opCall(int n = 0) const; 50 abstract Plural clone(); 51 } 52 53 class Unary : Plural 54 { 55 pure: 56 this(Plural op) { 57 op1 = op; 58 } 59 protected: 60 Plural op1; 61 } 62 63 class Binary : Plural 64 { 65 pure: 66 this(Plural first, Plural second) { 67 op1 = first; 68 op2 = second; 69 } 70 protected: 71 Plural op1, op2; 72 } 73 74 final class Number : Plural 75 { 76 pure: 77 this(int number) { 78 num = number; 79 } 80 override Plural clone() { 81 return new Number(num); 82 } 83 override int opCall(int) const { 84 return num; 85 } 86 private: 87 int num; 88 } 89 90 91 final class UnaryOp(string op) : Unary 92 { 93 pure: 94 this(Plural op1) { 95 super(op1); 96 } 97 override int opCall(int n) const { 98 return mixin(op ~ " op1(n)"); 99 } 100 override Plural clone() { 101 return new UnaryOp!(op)(op1.clone()); 102 } 103 } 104 105 final class BinaryOp(string op) : Binary 106 { 107 pure: 108 this(Plural first, Plural second) { 109 super(first, second); 110 } 111 override int opCall(int n) const { 112 return mixin("op1(n)" ~ op ~ "op2(n)"); 113 } 114 override Plural clone() { 115 return new BinaryOp!(op)(op1.clone(), op2.clone()); 116 } 117 } 118 119 final class BinaryOpD(string op) : Binary 120 { 121 pure: 122 this(Plural first, Plural second) { 123 super(first, second); 124 } 125 override int opCall(int n) const { 126 int v2 = op2(n); 127 if (v2 == 0) { 128 throw new PluralFormException("Division by zero during plural form computation"); 129 } 130 return mixin("op1(n)" ~ op ~ "v2"); 131 } 132 override Plural clone() { 133 return new BinaryOp!(op)(op1.clone(), op2.clone()); 134 } 135 } 136 137 alias UnaryOp!"!" Not; 138 alias UnaryOp!"-" Minus; 139 alias UnaryOp!"~" Invert; 140 141 alias BinaryOp!"*" Mul; 142 alias BinaryOpD!"/" Div; 143 alias BinaryOpD!"%" Mod; 144 145 alias BinaryOp!"+" Add; 146 alias BinaryOp!"-" Sub; 147 148 alias BinaryOp!"<<" Shl; 149 alias BinaryOp!">>" Shr; 150 151 alias BinaryOp!">" Gt; 152 alias BinaryOp!"<" Lt; 153 alias BinaryOp!">=" Gte; 154 alias BinaryOp!"<=" Lte; 155 156 alias BinaryOp!"==" Eq; 157 alias BinaryOp!"!=" Neq; 158 159 alias BinaryOp!"&" BinAnd; 160 alias BinaryOp!"^" BinXor; 161 alias BinaryOp!"|" BinOr; 162 163 alias BinaryOp!"&&" And; 164 alias BinaryOp!"||" Or; 165 166 unittest 167 { 168 Plural op = new Mul(new Number(5), new Minus(new Number(10))); 169 assert(op() == -50); 170 op = new Eq(new Number(42), new Add(new Number(20), new Number(22))); 171 assert(op() == 1); 172 op = new Div(new Number(12), new Number(3)); 173 assert(op() == 4); 174 } 175 176 struct Tokenizer 177 { 178 pure: 179 this(string contents) { 180 content = contents; 181 get(); 182 } 183 184 @property ushort front() const pure nothrow @nogc { 185 return current; 186 } 187 @property bool empty() const pure nothrow @nogc { 188 return current == 0; 189 } 190 void popFront() { 191 get(); 192 } 193 int getNumber() { 194 if (current == NUM) 195 return number; 196 else 197 throw new PluralFormException("Not a number"); 198 } 199 private: 200 @trusted void get() { 201 while(content.length > pos && isWhite(content[pos])) { 202 pos++; 203 } 204 if (pos >= content.length) { 205 current = 0; 206 return; 207 } 208 if (content.length >= pos+2) { 209 pos += 2; 210 switch(content[pos-2..pos]) { 211 case "<<": current = SHL; return; 212 case ">>": current = SHR; return; 213 case "&&": current = AND; return; 214 case "||": current = OR; return; 215 case "<=": current = LTE; return; 216 case ">=": current = GTE; return; 217 case "==": current = EQ; return; 218 case "!=": current = NEQ; return; 219 default: pos -= 2; break; 220 } 221 } 222 if (isDigit(content[pos])) { 223 auto tmp = content[pos..$]; 224 number = parse!int(tmp); 225 current = NUM; 226 pos += tmp.ptr - (content.ptr + pos); 227 } else { 228 current = cast(ushort)content[pos]; 229 pos++; 230 } 231 } 232 233 int number; 234 ushort current; 235 size_t pos; 236 string content; 237 } 238 239 unittest 240 { 241 string contents = "n %10 ==1\n"; 242 auto tokenizer = Tokenizer(contents); 243 assert(!tokenizer.empty); 244 assert(tokenizer.front == 'n'); 245 tokenizer.popFront(); 246 assert(tokenizer.front == '%'); 247 tokenizer.popFront(); 248 assert(tokenizer.front == NUM); 249 assert(tokenizer.getNumber == 10); 250 tokenizer.popFront(); 251 assert(tokenizer.front == EQ); 252 tokenizer.popFront(); 253 assert(tokenizer.front == NUM); 254 assert(tokenizer.getNumber == 1); 255 tokenizer.popFront(); 256 assert(tokenizer.empty); 257 258 tokenizer = Tokenizer(""); 259 assert(tokenizer.empty); 260 } 261 262 final class Variable : Plural 263 { 264 pure: 265 this() { 266 } 267 override int opCall(int n) const { 268 return n; 269 } 270 override Plural clone() { 271 return new Variable(); 272 } 273 } 274 275 final class Conditional : Plural 276 { 277 pure: 278 this(Plural cond, Plural res, Plural alt) { 279 this.cond = cond; 280 this.res = res; 281 this.alt = alt; 282 } 283 override int opCall(int n) const { 284 return cond(n) ? res(n) : alt(n); 285 } 286 override Plural clone() { 287 return new Conditional(cond, res, alt); 288 } 289 private: 290 Plural cond, res, alt; 291 } 292 293 struct Parser 294 { 295 pure: 296 this(Tokenizer tokenizer) { 297 t = tokenizer; 298 } 299 300 this(string content) { 301 this(Tokenizer(content)); 302 } 303 304 Plural compile() { 305 Plural expr = condExpr(); 306 if (expr && !t.empty) { 307 throw new PluralFormException("Not in the end"); 308 } 309 return expr; 310 } 311 312 private: 313 Plural valueExpr() { 314 if (t.front == '(') { 315 t.popFront(); 316 Plural op = condExpr(); 317 if (op is null) 318 return null; 319 if (t.front != ')') 320 throw new PluralFormException("Missing ')' in expression"); 321 t.popFront(); 322 return op; 323 } else if (t.front == NUM) { 324 int number = t.getNumber(); 325 t.popFront(); 326 return new Number(number); 327 } else if (t.front == 'n') { 328 t.popFront(); 329 return new Variable(); 330 } else { 331 throw new PluralFormException("Unknown operand"); 332 } 333 assert(false); 334 } 335 336 Plural unaryExpr() { 337 Plural op1; 338 ushort op = t.front; 339 if (op == '-' || op == '~' || op == '!') { 340 t.popFront(); 341 op1 = unaryExpr(); 342 if (op1) { 343 switch(op) { 344 case '-': return new Minus(op1); 345 case '~': return new Invert(op1); 346 case '!': return new Not(op1); 347 default: assert(false); 348 } 349 } else { 350 return null; 351 } 352 } else { 353 return valueExpr(); 354 } 355 } 356 357 static int getPrec(const ushort op) { 358 switch(op) { 359 case '/': 360 case '*': 361 case '%': 362 return 10; 363 case '+': 364 case '-': 365 return 9; 366 case SHL: 367 case SHR: 368 return 8; 369 case '>': 370 case '<': 371 case GTE: 372 case LTE: 373 return 7; 374 case EQ: 375 case NEQ: 376 return 6; 377 case '&': 378 return 5; 379 case '^': 380 return 4; 381 case '|': 382 return 3; 383 case AND: 384 return 2; 385 case OR: 386 return 1; 387 default: 388 return 0; 389 } 390 } 391 392 static Plural binaryFactory(const ushort op, Plural left, Plural right) { 393 switch(op) { 394 case '/': return new Div(left,right); 395 case '*': return new Mul(left,right); 396 case '%': return new Mod(left,right); 397 case '+': return new Add(left,right); 398 case '-': return new Sub(left,right); 399 case SHL: return new Shl(left,right); 400 case SHR: return new Shr(left,right); 401 case '>': return new Gt(left,right); 402 case '<': return new Lt(left,right); 403 case GTE: return new Gte(left,right); 404 case LTE: return new Lte(left,right); 405 case EQ: return new Eq(left,right); 406 case NEQ: return new Neq(left,right); 407 case '&': return new BinAnd(left,right); 408 case '^': return new BinXor(left,right); 409 case '|': return new BinOr(left,right); 410 case AND: return new And(left,right); 411 case OR: return new Or(left,right); 412 default: return null; 413 } 414 } 415 416 Plural binaryExpr(const int prec = 1) { 417 assert(prec >= 1 && prec <= 11); 418 Plural op1,op2; 419 if (prec == 11) 420 op1 = unaryExpr(); 421 else 422 op1 = binaryExpr(prec+1); 423 if (op1 is null) 424 return null; 425 if (prec != 11) { 426 while(getPrec(t.front) == prec) { 427 ushort o = t.front; 428 t.popFront(); 429 op2 = binaryExpr(prec+1); 430 if (op2 is null) 431 return null; 432 op1 = binaryFactory(o, op1, op2); 433 } 434 } 435 436 return op1; 437 } 438 439 Plural condExpr() { 440 Plural cond, case1, case2; 441 cond = binaryExpr(); 442 if(cond is null) 443 return null; 444 if(t.front == '?') { 445 t.popFront(); 446 case1 = condExpr(); 447 if(case1 is null) 448 return null; 449 if(t.front != ':') 450 throw new PluralFormException("Missing ':' in conditional operator"); 451 t.popFront(); 452 case2 = condExpr(); 453 if(case2 is null) 454 return null; 455 } else { 456 return cond; 457 } 458 return new Conditional(cond,case1,case2); 459 } 460 461 Tokenizer t; 462 } 463 464 unittest 465 { 466 auto parser = new Parser("(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)"); 467 auto expr = parser.compile(); 468 assert(expr !is null); 469 assert(expr(1) == 0); 470 assert(expr(101) == 0); 471 assert(expr(2) == 1); 472 assert(expr(24) == 1); 473 assert(expr(104) == 1); 474 assert(expr(222) == 1); 475 assert(expr(11) == 2); 476 assert(expr(14) == 2); 477 assert(expr(111) == 2); 478 assert(expr(210) == 2); 479 480 import std.exception : assertThrown; 481 assertThrown(new Parser("").compile()); 482 assertThrown(new Parser("n?1").compile()); 483 assertThrown(new Parser("(2-1").compile()); 484 assertThrown(new Parser("p").compile()); 485 assertThrown(new Parser("1+2;").compile()); 486 } 487 } 488 489 import std.exception : assumeUnique, enforce; 490 import std.range : iota, assumeSorted, drop; 491 import std.algorithm.iteration : map, splitter; 492 import std.algorithm.searching : all, find, findSkip, skipOver; 493 import std.algorithm.sorting : isSorted; 494 import std.string : lineSplitter, stripRight; 495 import std.typecons : tuple; 496 497 /** 498 * Struct representing .mo file. 499 * 500 * Default constructed object returns untranslated messages. 501 */ 502 @safe struct MoFile 503 { 504 /** 505 * Read from file. 506 */ 507 @trusted this(string fileName) { 508 import std.file : read; 509 this(read(fileName).assumeUnique); 510 } 511 512 /** 513 * Constructor from data. 514 * Data must be immutable and live as long as translated messages are used, because it's used to return strings. 515 * Throws: 516 * $(D mofile.MoFileException) if data is in invalid or unsupported format. 517 * $(D mofile.PluralFormException) if plural form expression could not be parsed. 518 */ 519 @safe this(immutable(void)[] data) pure { 520 this.data = data; 521 const magic = readValue!int(0); 522 if (magic != 0x950412de) { 523 throw new MoFileException("Wrong magic"); 524 } 525 const revision = readValue!int(int.sizeof); 526 if (revision != 0) { 527 throw new MoFileException("Unknown revision"); 528 } 529 530 baseOffsetOrig = readValue!int(int.sizeof*3); 531 baseOffsetTr = readValue!int(int.sizeof*4); 532 count = readValue!int(int.sizeof*2); 533 534 if (count <= 0) { 535 throw new MoFileException("Invalid count of msgids, must be at least 1"); 536 } 537 538 auto mapped = iota(1,count).map!(i => getMessage(baseOffsetOrig, i)); 539 enforce!MoFileException(mapped.isSorted, "Invalid .mo file: message ids are not sorted"); 540 enforce!MoFileException(mapped.all!"!a.empty", "Some msgid besides the reserved one is empty"); 541 542 string header = getMessage(baseOffsetTr, 0); 543 foreach(line; header.lineSplitter) { 544 if (line.skipOver("Plural-Forms:")) { 545 if (line.findSkip("plural=")) { 546 string expr = line.stripRight("\n\r;"); 547 auto parser = new Parser(expr); 548 compiled = parser.compile(); 549 } 550 } 551 } 552 } 553 554 /** 555 * .mo file header that includes some info like creation date, language and translator's name. 556 */ 557 string header() pure const { 558 if (count) 559 return getMessage(baseOffsetTr, 0); 560 return string.init; 561 } 562 563 /** 564 * Get translated message. 565 * Params: 566 * msgid = Message id (usually untranslated string) 567 * Returns: Translated message for the msgid. 568 * If translation for this msgid does not exist or MoFile is default constructed the msgid is returned. 569 */ 570 string gettext(string msgid) pure const { 571 int index = getIndex(msgid); 572 if (index >= 0) { 573 string translated = getMessage(baseOffsetTr, index); 574 auto splitted = translated.splitter('\0'); 575 if (!splitted.empty && splitted.front.length) 576 return splitted.front; 577 } 578 return msgid; 579 } 580 581 /** 582 * Get translated message considering plural forms. 583 * Params: 584 * msgid = Untranslated message in singular form 585 * msgid_plural = Untranslated message in plural form. 586 * n = Number to calculate a plural form. 587 * Returns: Translated string in plural form dependent on number n. 588 * If translation for this msgid does not exist or MoFile is default constructed then the msgid is returned if n == 1 and msgid_plural otherwise. 589 */ 590 string ngettext(string msgid, string msgid_plural, int n) pure const { 591 int index = getIndex(msgid); 592 if (compiled !is null && index >= 0) { 593 string translated = getMessage(baseOffsetTr, index); 594 auto splitted = translated.splitter('\0'); 595 if (!splitted.empty && splitted.front.length) { 596 int pluralForm = compiled(n); 597 auto forms = splitted.drop(pluralForm); 598 if (!forms.empty) 599 return forms.front; 600 } 601 } 602 return n == 1 ? msgid : msgid_plural; 603 } 604 605 private: 606 @trusted int getIndex(string message) pure const { 607 if (data.length == 0) 608 return -1; 609 if (message.length == 0) 610 return 0; 611 auto sorted = iota(1, count).map!(i => tuple(i, getMessage(baseOffsetOrig, i).splitter('\0').front)).assumeSorted!"a[1] < b[1]"; 612 auto found = sorted.equalRange(tuple(0, message)); 613 if (found.empty) { 614 return -1; 615 } else { 616 return found.front[0]; 617 } 618 } 619 620 @trusted T readValue(T)(size_t offset) pure const 621 { 622 if (data.length >= offset + T.sizeof) { 623 T value = *(cast(const(T)*)data[offset..(offset+T.sizeof)].ptr); 624 return value; 625 } else { 626 throw new MoFileException("Value is out of bounds"); 627 } 628 } 629 630 @trusted string readString(int len, int offset) pure const 631 { 632 if (data.length >= offset + len) { 633 string s = cast(string)data[offset..offset+len]; 634 return s; 635 } else { 636 throw new MoFileException("String is out of bounds"); 637 } 638 } 639 640 @trusted string getMessage(int offset, int i) pure const { 641 return readString(readValue!int(offset + i*int.sizeof*2), readValue!int(offset + i*int.sizeof*2 + int.sizeof)); 642 } 643 644 int count; 645 int baseOffsetOrig; 646 int baseOffsetTr; 647 immutable(void[]) data; 648 Plural compiled; 649 } 650 651 unittest 652 { 653 MoFile moFile; 654 assert(moFile.header.length == 0); 655 assert(moFile.gettext("Hello") == "Hello"); 656 assert(moFile.ngettext("File", "Files", 1) == "File"); 657 assert(moFile.ngettext("File", "Files", 2) == "Files"); 658 assert(moFile.ngettext("File", "Files", 0) == "Files"); 659 }