svghmi/i18n.py
changeset 3915 b5017dd5c049
parent 3750 f62625418bff
child 3918 9f0ef23569cb
equal deleted inserted replaced
3914:b2aa31f3a0ce 3915:b5017dd5c049
    13 import subprocess
    13 import subprocess
    14 import time
    14 import time
    15 import ast
    15 import ast
    16 import wx
    16 import wx
    17 import re
    17 import re
       
    18 from email.parser import HeaderParser
    18 
    19 
    19 # to have it for python 2, had to install 
    20 # to have it for python 2, had to install 
    20 # https://pypi.org/project/pycountry/18.12.8/
    21 # https://pypi.org/project/pycountry/18.12.8/
    21 # python2 -m pip install pycountry==18.12.8 --user
    22 # python2 -m pip install pycountry==18.12.8 --user
    22 import pycountry
    23 import pycountry
    23 from dialogs import MessageBoxOnce
    24 from dialogs import MessageBoxOnce
       
    25 from POULibrary import UserAddressedException
    24 
    26 
    25 cmd_parser = re.compile(r'(?:"([^"]+)"\s*|([^\s]+)\s*)?')
    27 cmd_parser = re.compile(r'(?:"([^"]+)"\s*|([^\s]+)\s*)?')
    26 
    28 
    27 def open_pofile(pofile):
    29 def open_pofile(pofile):
    28     """ Opens PO file with POEdit """
    30     """ Opens PO file with POEdit """
    65     """ Converts XML tree from 'extract_i18n' templates into a list of tuples """
    67     """ Converts XML tree from 'extract_i18n' templates into a list of tuples """
    66     messages = []
    68     messages = []
    67 
    69 
    68     for msg in msgs:
    70     for msg in msgs:
    69         messages.append((
    71         messages.append((
    70             "\n".join([line.text for line in msg]),
    72             b"\n".join([line.text.encode() for line in msg]),
    71             msg.get("label"), msg.get("id")))
    73             msg.get("label").encode(), msg.get("id").encode()))
    72 
    74 
    73     return messages
    75     return messages
    74 
    76 
    75 def SaveCatalog(fname, messages):
    77 def SaveCatalog(fname, messages):
    76     """ Save messages given as list of tupple (msg,label,id) in POT file """
    78     """ Save messages given as list of tupple (msg,label,id) in POT file """
    77     w = POTWriter()
    79     w = POTWriter()
    78     w.ImportMessages(messages)
    80     w.ImportMessages(messages)
    79 
    81 
    80     with open(fname, 'w') as POT_file:
    82     with open(fname, 'wb') as POT_file:
    81         w.write(POT_file)
    83         w.write(POT_file)
    82 
    84 
    83 def GetPoFiles(dirpath):
    85 def GetPoFiles(dirpath):
    84     po_files = [fname for fname in os.listdir(dirpath) if fname.endswith(".po")]
    86     po_files = [fname for fname in os.listdir(dirpath) if fname.endswith(".po")]
    85     po_files.sort()
    87     po_files.sort()
    89     """ Read all PO files from a directory and return a list of (langcode, translation_dict) tuples """
    91     """ Read all PO files from a directory and return a list of (langcode, translation_dict) tuples """
    90 
    92 
    91     translations = []
    93     translations = []
    92     for translation_name, po_path in GetPoFiles(dirpath):
    94     for translation_name, po_path in GetPoFiles(dirpath):
    93         r = POReader()
    95         r = POReader()
    94         with open(po_path, 'r') as PO_file:
    96         r.read(po_path)
    95             r.read(PO_file)
    97         translations.append((translation_name, r.get_messages()))
    96             translations.append((translation_name, r.get_messages()))
       
    97     return translations
    98     return translations
    98 
    99 
    99 def MatchTranslations(translations, messages, errcallback):
   100 def MatchTranslations(translations, messages, errcallback):
   100     """
   101     """
   101     Matches translations against original message catalog,
   102     Matches translations against original message catalog,
   153     msgsroot = etree.SubElement(result, "messages")
   154     msgsroot = etree.SubElement(result, "messages")
   154     for msgid, msgs in translated_messages:
   155     for msgid, msgs in translated_messages:
   155         msgidel = etree.SubElement(msgsroot, "msgid")
   156         msgidel = etree.SubElement(msgsroot, "msgid")
   156         for msg in msgs:
   157         for msg in msgs:
   157             msgel = etree.SubElement(msgidel, "msg")
   158             msgel = etree.SubElement(msgidel, "msg")
   158             for line in msg.split("\n"):
   159             for line in msg.split(b"\n"):
   159                 lineel = etree.SubElement(msgel, "line")
   160                 lineel = etree.SubElement(msgel, "line")
   160                 lineel.text = escape(line.encode("utf-8")).decode("utf-8")
   161                 lineel.text = escape(line).decode()
   161 
   162 
   162     return result
   163     return result
   163 
   164 
   164 
   165 # Code below is based on :
   165 
   166 #  cpython/Tools/i18n/pygettext.py
   166 locpfx = '#:svghmi.svg:'
   167 #  cpython/Tools/i18n/msgfmt.py
       
   168 
       
   169 locpfx = b'#:svghmi.svg:'
   167 
   170 
   168 pot_header = '''\
   171 pot_header = '''\
   169 # SOME DESCRIPTIVE TITLE.
   172 # SOME DESCRIPTIVE TITLE.
   170 # Copyright (C) YEAR ORGANIZATION
   173 # Copyright (C) YEAR ORGANIZATION
   171 # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
   174 # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
   183 "Generated-By: SVGHMI 1.0\\n"
   186 "Generated-By: SVGHMI 1.0\\n"
   184 
   187 
   185 '''
   188 '''
   186 escapes = []
   189 escapes = []
   187 
   190 
   188 def make_escapes(pass_iso8859):
   191 def make_escapes():
   189     global escapes
   192     global escapes
   190     escapes = [chr(i) for i in range(256)]
   193     escapes = [b"\%03o" % i for i in range(128)]
   191     if pass_iso8859:
   194     for i in range(32, 127):
   192         # Allow iso-8859 characters to pass through so that e.g. 'msgid
   195         escapes[i] = bytes([i])
   193         # "Höhe"' would result not result in 'msgid "H\366he"'.  Otherwise we
   196     escapes[ord('\\')] = b'\\\\'
   194         # escape any character outside the 32..126 range.
   197     escapes[ord('\t')] = b'\\t'
   195         mod = 128
   198     escapes[ord('\r')] = b'\\r'
   196     else:
   199     escapes[ord('\n')] = b'\\n'
   197         mod = 256
   200     escapes[ord('\"')] = b'\\"'
   198     for i in range(mod):
   201 
   199         if not(32 <= i <= 126):
   202 make_escapes()
   200             escapes[i] = "\\%03o" % i
       
   201     escapes[ord('\\')] = '\\\\'
       
   202     escapes[ord('\t')] = '\\t'
       
   203     escapes[ord('\r')] = '\\r'
       
   204     escapes[ord('\n')] = '\\n'
       
   205     escapes[ord('\"')] = '\\"'
       
   206 
       
   207 make_escapes(pass_iso8859 = True)
       
   208 
       
   209 EMPTYSTRING = ''
       
   210 
   203 
   211 def escape(s):
   204 def escape(s):
   212     global escapes
   205     l = [escapes[c] if c < 128 else bytes([c]) for c in s]
   213     s = list(s)
   206     return b''.join(l)
   214     for i in range(len(s)):
   207     #return bytes([escapes[c] if c < 128 else c for c in s])
   215         s[i] = escapes[ord(s[i])]
       
   216     return EMPTYSTRING.join(s)
       
   217 
   208 
   218 def normalize(s):
   209 def normalize(s):
   219     # This converts the various Python string types into a format that is
   210     # This converts the various Python string types into a format that is
   220     # appropriate for .po files, namely much closer to C style.
   211     # appropriate for .po files, namely much closer to C style.
   221     lines = s.split('\n')
   212     lines = s.split(b'\n')
   222     if len(lines) == 1:
   213     if len(lines) == 1:
   223         s = '"' + escape(s) + '"'
   214         s = b'"' + escape(s) + b'"'
   224     else:
   215     else:
   225         if not lines[-1]:
   216         if not lines[-1]:
   226             del lines[-1]
   217             del lines[-1]
   227             lines[-1] = lines[-1] + '\n'
   218             lines[-1] = lines[-1] + b'\n'
   228         for i in range(len(lines)):
   219         for i in range(len(lines)):
   229             lines[i] = escape(lines[i])
   220             lines[i] = escape(lines[i])
   230         lineterm = '\\n"\n"'
   221         lineterm = b'\\n"\n"'
   231         s = '""\n"' + lineterm.join(lines) + '"'
   222         s = b'""\n"' + lineterm.join(lines) + b'"'
   232     return s
   223     return s
   233 
       
   234 
   224 
   235 class POTWriter:
   225 class POTWriter:
   236     def __init__(self):
   226     def __init__(self):
   237         self.__messages = {}
   227         self.__messages = {}
   238 
   228 
   239     def ImportMessages(self, msgs):
   229     def ImportMessages(self, msgs):
   240         for  msg, label, svgid in msgs:
   230         for  msg, label, svgid in msgs:
   241             self.addentry(msg.encode("utf-8"), label, svgid)
   231             self.addentry(msg, label, svgid)
   242 
   232 
   243     def addentry(self, msg, label, svgid):
   233     def addentry(self, msg, label, svgid):
   244         entry = (label, svgid)
   234         entry = (label, svgid)
   245         self.__messages.setdefault(msg, set()).add(entry)
   235         self.__messages.setdefault(msg, set()).add(entry)
   246 
   236 
   247     def write(self, fp):
   237     def write(self, fp):
   248         timestamp = time.strftime('%Y-%m-%d %H:%M+%Z')
   238         timestamp = time.strftime('%Y-%m-%d %H:%M%z')
   249         print(pot_header % {'time': timestamp}, file=fp)
   239         header = pot_header % {'time': timestamp}
       
   240         fp.write(header.encode())
   250         reverse = {}
   241         reverse = {}
   251         for k, v in list(self.__messages.items()):
   242         for k, v in self.__messages.items():
   252             keys = list(v)
   243             keys = list(v)
   253             keys.sort()
   244             keys.sort()
   254             reverse.setdefault(tuple(keys), []).append((k, v))
   245             reverse.setdefault(tuple(keys), []).append((k, v))
   255         rkeys = list(reverse.keys())
   246         rkeys = sorted(reverse.keys())
   256         rkeys.sort()
       
   257         for rkey in rkeys:
   247         for rkey in rkeys:
   258             rentries = reverse[rkey]
   248             rentries = reverse[rkey]
   259             rentries.sort()
   249             rentries.sort()
   260             for k, v in rentries:
   250             for k, v in rentries:
   261                 v = list(v)
   251                 v = list(v)
   262                 v.sort()
   252                 v.sort()
   263                 locline = locpfx
   253                 locline = locpfx
   264                 for label, svgid in v:
   254                 for label, svgid in v:
   265                     d = {'label': label, 'svgid': svgid}
   255                     d = {b'label': label, b'svgid': svgid}
   266                     s = _(' %(label)s:%(svgid)s') % d
   256                     s = b' %(label)s:%(svgid)s' % d
   267                     if len(locline) + len(s) <= 78:
   257                     if len(locline) + len(s) <= 78:
   268                         locline = locline + s
   258                         locline = locline + s
   269                     else:
   259                     else:
   270                         print(locline, file=fp)
   260                         fp.write(locline)
   271                         locline = locpfx + s
   261                         locline = locpfx + s
   272                 if len(locline) > len(locpfx):
   262                 if len(locline) > len(locpfx):
   273                     print(locline, file=fp)
   263                     fp.write(locline)
   274                 print('msgid', normalize(k), file=fp)
   264                 fp.write(b'msgid '+normalize(k))
   275                 print('msgstr ""\n', file=fp)
   265                 fp.write(b'msgstr ""\n')
   276 
   266 
   277 
   267 
   278 class POReader:
   268 class POReader:
   279     def __init__(self):
   269     def __init__(self):
   280         self.__messages = {}
   270         self.__messages = {}
   281 
   271 
   282     def get_messages(self):
   272     def get_messages(self):
   283         return self.__messages
   273         return self.__messages
   284 
   274 
   285     def add(self, msgid, msgstr, fuzzy):
   275     def add(self, ctxt, msgid, msgstr, fuzzy):
   286         "Add a non-fuzzy translation to the dictionary."
   276         "Add a non-fuzzy translation to the dictionary."
   287         if not fuzzy and msgstr and msgid:
   277         if not fuzzy and msgstr and msgid:
   288             self.__messages[msgid.decode('utf-8')] = msgstr.decode('utf-8')
   278             if ctxt is None:
   289 
   279                 self.__messages[msgid] = msgstr
   290     def read(self, fp):
   280             else:
       
   281                 self.__messages[b"%b\x04%b" % (ctxt, id)] = str
       
   282 
       
   283     def read(self, infile):
   291         ID = 1
   284         ID = 1
   292         STR = 2
   285         STR = 2
   293 
   286         CTXT = 3
   294         lines = fp.readlines()
   287 
   295         section = None
   288 
       
   289         with open(infile, 'rb') as f:
       
   290             lines = f.readlines()
       
   291             
       
   292         section = msgctxt = None
   296         fuzzy = 0
   293         fuzzy = 0
       
   294 
       
   295         # Start off assuming Latin-1, so everything decodes without failure,
       
   296         # until we know the exact encoding
       
   297         encoding = 'latin-1'
   297 
   298 
   298         # Parse the catalog
   299         # Parse the catalog
   299         lno = 0
   300         lno = 0
   300         for l in lines:
   301         for l in lines:
       
   302             l = l.decode(encoding)
   301             lno += 1
   303             lno += 1
   302             # If we get a comment line after a msgstr, this is a new entry
   304             # If we get a comment line after a msgstr, this is a new entry
   303             if l[0] == '#' and section == STR:
   305             if l[0] == '#' and section == STR:
   304                 self.add(msgid, msgstr, fuzzy)
   306                 self.add(msgctxt, msgid, msgstr, fuzzy)
   305                 section = None
   307                 section = msgctxt = None
   306                 fuzzy = 0
   308                 fuzzy = 0
   307             # Record a fuzzy mark
   309             # Record a fuzzy mark
   308             if l[:2] == '#,' and 'fuzzy' in l:
   310             if l[:2] == '#,' and 'fuzzy' in l:
   309                 fuzzy = 1
   311                 fuzzy = 1
   310             # Skip comments
   312             # Skip comments
   311             if l[0] == '#':
   313             if l[0] == '#':
   312                 continue
   314                 continue
   313             # Now we are in a msgid section, output previous section
   315             # Now we are in a msgid or msgctxt section, output previous section
   314             if l.startswith('msgid') and not l.startswith('msgid_plural'):
   316             if l.startswith('msgctxt'):
   315                 if section == STR:
   317                 if section == STR:
   316                     self.add(msgid, msgstr, fuzzy)
   318                     self.add(msgctxt, msgid, msgstr, fuzzy)
       
   319                 section = CTXT
       
   320                 l = l[7:]
       
   321                 msgctxt = b''
       
   322             elif l.startswith('msgid') and not l.startswith('msgid_plural'):
       
   323                 if section == STR:
       
   324                     self.add(msgctxt, msgid, msgstr, fuzzy)
       
   325                     if not msgid:
       
   326                         # See whether there is an encoding declaration
       
   327                         p = HeaderParser()
       
   328                         charset = p.parsestr(msgstr.decode(encoding)).get_content_charset()
       
   329                         if charset:
       
   330                             encoding = charset
   317                 section = ID
   331                 section = ID
   318                 l = l[5:]
   332                 l = l[5:]
   319                 msgid = msgstr = ''
   333                 msgid = msgstr = b''
   320                 is_plural = False
   334                 is_plural = False
   321             # This is a message with plural forms
   335             # This is a message with plural forms
   322             elif l.startswith('msgid_plural'):
   336             elif l.startswith('msgid_plural'):
   323                 if section != ID:
   337                 if section != ID:
   324                     print('msgid_plural not preceded by msgid on %s:%d' %\
   338                     raise UserAddressedException(
   325                         (infile, lno), file=sys.stderr)
   339                         'msgid_plural not preceded by msgid on %s:%d' % (infile, lno))
   326                     sys.exit(1)
       
   327                 l = l[12:]
   340                 l = l[12:]
   328                 msgid += '\0' # separator of singular and plural
   341                 msgid += b'\0' # separator of singular and plural
   329                 is_plural = True
   342                 is_plural = True
   330             # Now we are in a msgstr section
   343             # Now we are in a msgstr section
   331             elif l.startswith('msgstr'):
   344             elif l.startswith('msgstr'):
   332                 section = STR
   345                 section = STR
   333                 if l.startswith('msgstr['):
   346                 if l.startswith('msgstr['):
   334                     if not is_plural:
   347                     if not is_plural:
   335                         print('plural without msgid_plural on %s:%d' %\
   348                         raise UserAddressedException(
   336                             (infile, lno), file=sys.stderr)
   349                             'plural without msgid_plural on %s:%d' % (infile, lno))
   337                         sys.exit(1)
       
   338                     l = l.split(']', 1)[1]
   350                     l = l.split(']', 1)[1]
   339                     if msgstr:
   351                     if msgstr:
   340                         msgstr += '\0' # Separator of the various plural forms
   352                         msgstr += b'\0' # Separator of the various plural forms
   341                 else:
   353                 else:
   342                     if is_plural:
   354                     if is_plural:
   343                         print('indexed msgstr required for plural on  %s:%d' %\
   355                         raise UserAddressedException(
   344                             (infile, lno), file=sys.stderr)
   356                             'indexed msgstr required for plural on  %s:%d' % (infile, lno))
   345                         sys.exit(1)
       
   346                     l = l[6:]
   357                     l = l[6:]
   347             # Skip empty lines
   358             # Skip empty lines
   348             l = l.strip()
   359             l = l.strip()
   349             if not l:
   360             if not l:
   350                 continue
   361                 continue
   351             l = ast.literal_eval(l)
   362             l = ast.literal_eval(l)
   352             if section == ID:
   363             if section == CTXT:
   353                 msgid += l
   364                 msgctxt += l.encode(encoding)
       
   365             elif section == ID:
       
   366                 msgid += l.encode(encoding)
   354             elif section == STR:
   367             elif section == STR:
   355                 msgstr += l
   368                 msgstr += l.encode(encoding)
   356             else:
   369             else:
   357                 print('Syntax error on %s:%d' % (infile, lno), \
   370                 raise UserAddressedException(
   358                       'before:', file=sys.stderr)
   371                     'Syntax error on %s:%d' % (infile, lno) + 'before:\n %s'%l)
   359                 print(l, file=sys.stderr)
       
   360                 sys.exit(1)
       
   361         # Add last entry
   372         # Add last entry
   362         if section == STR:
   373         if section == STR:
   363             self.add(msgid, msgstr, fuzzy)
   374             self.add(msgctxt, msgid, msgstr, fuzzy)
   364 
   375 
   365 
   376 
       
   377