1 /**
2  * Parse .mo files and find translated messages.
3  * Authors:
4  *  $(LINK2 https://github.com/FreeSlave, Roman Chistokhodov)
5  * Copyright:
6  *  Roman Chistokhodov, 2018
7  * License:
8  *  $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
9  * See_Also:
10  *  $(LINK2 https://www.gnu.org/software/gettext/manual/html_node/MO-Files.html, The Format of GNU MO Files)
11  */
12 
13 module mofile;
14 ///
15 class PluralFormException : Exception
16 {
17     pure nothrow @nogc @safe this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable nextInChain = null) {
18         super(msg, file, line, nextInChain);
19     }
20 }
21 
22 ///
23 class MoFileException : Exception
24 {
25     pure nothrow @nogc @safe this(string msg, string file = __FILE__, size_t line = __LINE__, Throwable nextInChain = null) {
26         super(msg, file, line, nextInChain);
27     }
28 }
29 
30 private @safe
31 {
32     import std.conv : parse;
33     import std.ascii;
34     enum : ushort {
35         SHL = ubyte.max + 1,
36         SHR,
37         AND,
38         OR,
39         LTE,
40         GTE,
41         EQ,
42         NEQ,
43         NUM,
44     }
45 
46     class Plural
47     {
48     pure:
49         abstract int opCall(int n = 0) const;
50         abstract Plural clone();
51     }
52 
53     class Unary : Plural
54     {
55     pure:
56         this(Plural op) {
57             op1 = op;
58         }
59     protected:
60         Plural op1;
61     }
62 
63     class Binary : Plural
64     {
65     pure:
66         this(Plural first, Plural second) {
67             op1 = first;
68             op2 = second;
69         }
70     protected:
71         Plural op1, op2;
72     }
73 
74     final class Number : Plural
75     {
76     pure:
77         this(int number) {
78             num = number;
79         }
80         override Plural clone() {
81             return new Number(num);
82         }
83         override int opCall(int) const {
84             return num;
85         }
86     private:
87         int num;
88     }
89 
90 
91     final class UnaryOp(string op) : Unary
92     {
93     pure:
94         this(Plural op1) {
95             super(op1);
96         }
97         override int opCall(int n) const {
98             return mixin(op ~ " op1(n)");
99         }
100         override Plural clone() {
101             return new UnaryOp!(op)(op1.clone());
102         }
103     }
104 
105     final class BinaryOp(string op) : Binary
106     {
107     pure:
108         this(Plural first, Plural second) {
109             super(first, second);
110         }
111         override int opCall(int n) const {
112             return mixin("op1(n)" ~ op ~ "op2(n)");
113         }
114         override Plural clone() {
115             return new BinaryOp!(op)(op1.clone(), op2.clone());
116         }
117     }
118 
119     final class BinaryOpD(string op) : Binary
120     {
121     pure:
122         this(Plural first, Plural second) {
123             super(first, second);
124         }
125         override int opCall(int n) const {
126             int v2 = op2(n);
127             if (v2 == 0) {
128                 throw new PluralFormException("Division by zero during plural form computation");
129             }
130             return mixin("op1(n)" ~ op ~ "v2");
131         }
132         override Plural clone() {
133             return new BinaryOp!(op)(op1.clone(), op2.clone());
134         }
135     }
136 
137     alias UnaryOp!"!" Not;
138     alias UnaryOp!"-" Minus;
139     alias UnaryOp!"~" Invert;
140 
141     alias BinaryOp!"*" Mul;
142     alias BinaryOpD!"/" Div;
143     alias BinaryOpD!"%" Mod;
144 
145     alias BinaryOp!"+" Add;
146     alias BinaryOp!"-" Sub;
147 
148     alias BinaryOp!"<<" Shl;
149     alias BinaryOp!">>" Shr;
150 
151     alias BinaryOp!">" Gt;
152     alias BinaryOp!"<" Lt;
153     alias BinaryOp!">=" Gte;
154     alias BinaryOp!"<=" Lte;
155 
156     alias BinaryOp!"==" Eq;
157     alias BinaryOp!"!=" Neq;
158 
159     alias BinaryOp!"&" BinAnd;
160     alias BinaryOp!"^" BinXor;
161     alias BinaryOp!"|" BinOr;
162 
163     alias BinaryOp!"&&" And;
164     alias BinaryOp!"||" Or;
165 
166     unittest
167     {
168         Plural op = new Mul(new Number(5), new Minus(new Number(10)));
169         assert(op() == -50);
170         op = new Eq(new Number(42), new Add(new Number(20), new Number(22)));
171         assert(op() == 1);
172         op = new Div(new Number(12), new Number(3));
173         assert(op() == 4);
174     }
175 
176     struct Tokenizer
177     {
178     pure:
179         this(string contents) {
180             content = contents;
181             get();
182         }
183 
184         @property ushort front() const pure nothrow @nogc {
185             return current;
186         }
187         @property bool empty() const pure nothrow @nogc {
188             return current == 0;
189         }
190         void popFront() {
191             get();
192         }
193         int getNumber() {
194             if (current == NUM)
195                 return number;
196             else
197                 throw new PluralFormException("Not a number");
198         }
199     private:
200         @trusted void get() {
201             while(content.length > pos && isWhite(content[pos])) {
202                 pos++;
203             }
204             if (pos >= content.length) {
205                 current = 0;
206                 return;
207             }
208             if (content.length >= pos+2) {
209                 pos += 2;
210                 switch(content[pos-2..pos]) {
211                     case "<<": current = SHL; return;
212                     case ">>": current = SHR; return;
213                     case "&&": current = AND; return;
214                     case "||": current = OR; return;
215                     case "<=": current = LTE; return;
216                     case ">=": current = GTE; return;
217                     case "==": current = EQ; return;
218                     case "!=": current = NEQ; return;
219                     default: pos -= 2; break;
220                 }
221             }
222             if (isDigit(content[pos])) {
223                 auto tmp = content[pos..$];
224                 number = parse!int(tmp);
225                 current = NUM;
226                 pos += tmp.ptr - (content.ptr + pos);
227             } else {
228                 current = cast(ushort)content[pos];
229                 pos++;
230             }
231         }
232 
233         int number;
234         ushort current;
235         size_t pos;
236         string content;
237     }
238 
239     unittest
240     {
241         string contents = "n %10 ==1\n";
242         auto tokenizer = Tokenizer(contents);
243         assert(!tokenizer.empty);
244         assert(tokenizer.front == 'n');
245         tokenizer.popFront();
246         assert(tokenizer.front == '%');
247         tokenizer.popFront();
248         assert(tokenizer.front == NUM);
249         assert(tokenizer.getNumber == 10);
250         tokenizer.popFront();
251         assert(tokenizer.front == EQ);
252         tokenizer.popFront();
253         assert(tokenizer.front == NUM);
254         assert(tokenizer.getNumber == 1);
255         tokenizer.popFront();
256         assert(tokenizer.empty);
257 
258         tokenizer = Tokenizer("");
259         assert(tokenizer.empty);
260     }
261 
262     final class Variable : Plural
263     {
264     pure:
265         this() {
266         }
267         override int opCall(int n) const {
268             return n;
269         }
270         override Plural clone() {
271             return new Variable();
272         }
273     }
274 
275     final class Conditional : Plural
276     {
277     pure:
278         this(Plural cond, Plural res, Plural alt) {
279             this.cond = cond;
280             this.res = res;
281             this.alt = alt;
282         }
283         override int opCall(int n) const {
284             return cond(n) ? res(n) : alt(n);
285         }
286         override Plural clone() {
287             return new Conditional(cond, res, alt);
288         }
289     private:
290         Plural cond, res, alt;
291     }
292 
293     struct Parser
294     {
295     pure:
296         this(Tokenizer tokenizer) {
297             t = tokenizer;
298         }
299 
300         this(string content) {
301             this(Tokenizer(content));
302         }
303 
304         Plural compile() {
305             Plural expr = condExpr();
306             if (expr && !t.empty) {
307                 throw new PluralFormException("Not in the end");
308             }
309             return expr;
310         }
311 
312     private:
313         Plural valueExpr() {
314             if (t.front == '(') {
315                 t.popFront();
316                 Plural op = condExpr();
317                 if (op is null)
318                     return null;
319                 if (t.front != ')')
320                     throw new PluralFormException("Missing ')' in expression");
321                 t.popFront();
322                 return op;
323             } else if (t.front == NUM) {
324                 int number = t.getNumber();
325                 t.popFront();
326                 return new Number(number);
327             } else if (t.front == 'n') {
328                 t.popFront();
329                 return new Variable();
330             } else {
331                 throw new PluralFormException("Unknown operand");
332             }
333             assert(false);
334         }
335 
336         Plural unaryExpr() {
337             Plural op1;
338             ushort op = t.front;
339             if (op == '-' || op == '~' || op == '!') {
340                 t.popFront();
341                 op1 = unaryExpr();
342                 if (op1) {
343                     switch(op) {
344                         case '-': return new Minus(op1);
345                         case '~': return new Invert(op1);
346                         case '!': return new Not(op1);
347                         default: assert(false);
348                     }
349                 } else {
350                     return null;
351                 }
352             } else {
353                 return valueExpr();
354             }
355         }
356 
357         static int getPrec(const ushort op) {
358             switch(op) {
359                 case '/':
360                 case '*':
361                 case '%':
362                     return 10;
363                 case '+':
364                 case '-':
365                     return 9;
366                 case SHL:
367                 case SHR:
368                     return 8;
369                 case '>':
370                 case '<':
371                 case GTE:
372                 case LTE:
373                     return 7;
374                 case  EQ:
375                 case NEQ:
376                     return 6;
377                 case '&':
378                     return 5;
379                 case '^':
380                     return 4;
381                 case '|':
382                     return 3;
383                 case AND:
384                     return 2;
385                 case  OR:
386                     return 1;
387                 default:
388                     return 0;
389             }
390         }
391 
392         static Plural binaryFactory(const ushort op, Plural left, Plural right) {
393             switch(op) {
394                 case '/':  return new Div(left,right);
395                 case '*':  return new Mul(left,right);
396                 case '%':  return new Mod(left,right);
397                 case '+':  return new Add(left,right);
398                 case '-':  return new Sub(left,right);
399                 case SHL:  return new Shl(left,right);
400                 case SHR:  return new Shr(left,right);
401                 case '>':  return new  Gt(left,right);
402                 case '<':  return new  Lt(left,right);
403                 case GTE:  return new Gte(left,right);
404                 case LTE:  return new Lte(left,right);
405                 case  EQ:  return new  Eq(left,right);
406                 case NEQ:  return new Neq(left,right);
407                 case '&':  return new BinAnd(left,right);
408                 case '^':  return new BinXor(left,right);
409                 case '|':  return new BinOr(left,right);
410                 case AND:  return new And(left,right);
411                 case  OR:  return new Or(left,right);
412                 default:   return null;
413             }
414         }
415 
416         Plural binaryExpr(const int prec = 1) {
417             assert(prec >= 1 && prec <= 11);
418             Plural op1,op2;
419             if (prec == 11)
420                 op1 = unaryExpr();
421             else
422                 op1 = binaryExpr(prec+1);
423             if (op1 is null)
424                 return null;
425             if (prec != 11) {
426                 while(getPrec(t.front) == prec) {
427                     ushort o = t.front;
428                     t.popFront();
429                     op2 = binaryExpr(prec+1);
430                     if (op2 is null)
431                         return null;
432                     op1 = binaryFactory(o, op1, op2);
433                 }
434             }
435 
436             return op1;
437         }
438 
439         Plural condExpr() {
440             Plural cond, case1, case2;
441             cond = binaryExpr();
442             if(cond is null)
443                 return null;
444             if(t.front == '?') {
445                 t.popFront();
446                 case1 = condExpr();
447                 if(case1 is null)
448                     return null;
449                 if(t.front != ':')
450                     throw new PluralFormException("Missing ':' in conditional operator");
451                 t.popFront();
452                 case2 = condExpr();
453                 if(case2 is null)
454                     return null;
455             } else {
456                 return cond;
457             }
458             return new Conditional(cond,case1,case2);
459         }
460 
461         Tokenizer t;
462     }
463 
464     unittest
465     {
466         auto parser = new Parser("(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)");
467         auto expr = parser.compile();
468         assert(expr !is null);
469         assert(expr(1) == 0);
470         assert(expr(101) == 0);
471         assert(expr(2) == 1);
472         assert(expr(24) == 1);
473         assert(expr(104) == 1);
474         assert(expr(222) == 1);
475         assert(expr(11) == 2);
476         assert(expr(14) == 2);
477         assert(expr(111) == 2);
478         assert(expr(210) == 2);
479 
480         import std.exception : assertThrown;
481         assertThrown(new Parser("").compile());
482         assertThrown(new Parser("n?1").compile());
483         assertThrown(new Parser("(2-1").compile());
484         assertThrown(new Parser("p").compile());
485         assertThrown(new Parser("1+2;").compile());
486     }
487 }
488 
489 import std.exception : assumeUnique, enforce;
490 import std.range : iota, assumeSorted, drop;
491 import std.algorithm.iteration : map, splitter;
492 import std.algorithm.searching : all, find, findSkip, skipOver;
493 import std.algorithm.sorting : isSorted;
494 import std.string : lineSplitter, stripRight;
495 import std.typecons : tuple;
496 
497 /**
498  * Struct representing .mo file.
499  *
500  * Default constructed object returns untranslated messages.
501  */
502 @safe struct MoFile
503 {
504     /**
505      * Read from file.
506      */
507     @trusted this(string fileName) {
508         import std.file : read;
509         this(read(fileName).assumeUnique);
510     }
511 
512     /**
513      * Constructor from data.
514      * Data must be immutable and live as long as translated messages are used, because it's used to return strings.
515      * Throws:
516      * $(D mofile.MoFileException) if data is in invalid or unsupported format.
517      * $(D mofile.PluralFormException) if plural form expression could not be parsed.
518      */
519     @safe this(immutable(void)[] data) pure {
520         this.data = data;
521         const magic = readValue!int(0);
522         if (magic != 0x950412de) {
523             throw new MoFileException("Wrong magic");
524         }
525         const revision = readValue!int(int.sizeof);
526         if (revision != 0) {
527             throw new MoFileException("Unknown revision");
528         }
529 
530         baseOffsetOrig = readValue!int(int.sizeof*3);
531         baseOffsetTr = readValue!int(int.sizeof*4);
532         count = readValue!int(int.sizeof*2);
533 
534         if (count <= 0) {
535             throw new MoFileException("Invalid count of msgids, must be at least 1");
536         }
537 
538         auto mapped = iota(1,count).map!(i => getMessage(baseOffsetOrig, i));
539         enforce!MoFileException(mapped.isSorted, "Invalid .mo file: message ids are not sorted");
540         enforce!MoFileException(mapped.all!"!a.empty", "Some msgid besides the reserved one is empty");
541 
542         string header = getMessage(baseOffsetTr, 0);
543         foreach(line; header.lineSplitter) {
544             if (line.skipOver("Plural-Forms:")) {
545                 if (line.findSkip("plural=")) {
546                     string expr = line.stripRight("\n\r;");
547                     auto parser = new Parser(expr);
548                     compiled = parser.compile();
549                 }
550             }
551         }
552     }
553 
554     /**
555      * .mo file header that includes some info like creation date, language and translator's name.
556      */
557     string header() pure const {
558         if (count)
559             return getMessage(baseOffsetTr, 0);
560         return string.init;
561     }
562 
563     /**
564      * Get translated message.
565      * Params:
566      *  msgid = Message id (usually untranslated string)
567      * Returns: Translated message for the msgid.
568      *  If translation for this msgid does not exist or MoFile is default constructed the msgid is returned.
569      */
570     string gettext(string msgid) pure const {
571         int index = getIndex(msgid);
572         if (index >= 0) {
573             string translated = getMessage(baseOffsetTr, index);
574             auto splitted = translated.splitter('\0');
575             if (!splitted.empty && splitted.front.length)
576                 return splitted.front;
577         }
578         return msgid;
579     }
580 
581     /**
582      * Get translated message considering plural forms.
583      * Params:
584      *  msgid = Untranslated message in singular form
585      *  msgid_plural = Untranslated message in plural form.
586      *  n = Number to calculate a plural form.
587      * Returns: Translated string in plural form dependent on number n.
588      *  If translation for this msgid does not exist or MoFile is default constructed then the msgid is returned if n == 1 and msgid_plural otherwise.
589      */
590     string ngettext(string msgid, string msgid_plural, int n) pure const {
591         int index = getIndex(msgid);
592         if (compiled !is null && index >= 0) {
593             string translated = getMessage(baseOffsetTr, index);
594             auto splitted = translated.splitter('\0');
595             if (!splitted.empty && splitted.front.length) {
596                 int pluralForm = compiled(n);
597                 auto forms = splitted.drop(pluralForm);
598                 if (!forms.empty)
599                     return forms.front;
600             }
601         }
602         return n == 1 ? msgid : msgid_plural;
603     }
604 
605 private:
606     @trusted int getIndex(string message) pure const {
607         if (data.length == 0)
608             return -1;
609         if (message.length == 0)
610             return 0;
611         auto sorted = iota(1, count).map!(i => tuple(i, getMessage(baseOffsetOrig, i).splitter('\0').front)).assumeSorted!"a[1] < b[1]";
612         auto found = sorted.equalRange(tuple(0, message));
613         if (found.empty) {
614             return -1;
615         } else {
616             return found.front[0];
617         }
618     }
619 
620     @trusted T readValue(T)(size_t offset) pure const
621     {
622         if (data.length >= offset + T.sizeof) {
623             T value = *(cast(const(T)*)data[offset..(offset+T.sizeof)].ptr);
624             return value;
625         } else {
626             throw new MoFileException("Value is out of bounds");
627         }
628     }
629 
630     @trusted string readString(int len, int offset) pure const
631     {
632         if (data.length >= offset + len) {
633             string s = cast(string)data[offset..offset+len];
634             return s;
635         } else {
636             throw new MoFileException("String is out of bounds");
637         }
638     }
639 
640     @trusted string getMessage(int offset, int i) pure const {
641         return readString(readValue!int(offset + i*int.sizeof*2), readValue!int(offset + i*int.sizeof*2 + int.sizeof));
642     }
643 
644     int count;
645     int baseOffsetOrig;
646     int baseOffsetTr;
647     immutable(void[]) data;
648     Plural compiled;
649 }
650 
651 unittest
652 {
653     MoFile moFile;
654     assert(moFile.header.length == 0);
655     assert(moFile.gettext("Hello") == "Hello");
656     assert(moFile.ngettext("File", "Files", 1) == "File");
657     assert(moFile.ngettext("File", "Files", 2) == "Files");
658     assert(moFile.ngettext("File", "Files", 0) == "Files");
659 }