| Home | Trees | Indices | Help |
|
|---|
|
|
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2006-2009 Zuza Software Foundation
5 #
6 # This file is part of the Translate Toolkit.
7 #
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, see <http://www.gnu.org/licenses/>.
20
21 """Parent class for LISA standards (TMX, TBX, XLIFF)"""
22
23 import re
24
25 from translate.storage import base
26 from translate.lang import data
27 try:
28 from lxml import etree
29 from translate.misc.xml_helpers import *
30 except ImportError, e:
31 raise ImportError("lxml is not installed. It might be possible to continue without support for XML formats.")
32
33
35 """generate match objects for all L{re_obj} matches in L{text}."""
36 start = 0
37 max = len(text)
38 while start < max:
39 m = re_obj.search(text, start)
40 if not m: break
41 yield m
42 start = m.end()
43
44 #TODO: we can now do better with our proper placeables support
45 placeholders = ['(%[diouxXeEfFgGcrs])', r'(\\+.?)', '(%[0-9]$lx)', '(%[0-9]\$[a-z])', '(<.+?>)']
46 re_placeholders = [re.compile(ph) for ph in placeholders]
48 'return list of regexp matchobjects for with all place holders in the L{text}'
49 matches = []
50 for re_ph in re_placeholders:
51 matches.extend(list(_findAllMatches(text, re_ph)))
52
53 # sort them so they come sequentially
54 matches.sort(lambda a, b: cmp(a.start(), b.start()))
55 return matches
56
57
59 """
60 A single unit in the file. Provisional work is done to make several
61 languages possible.
62 """
63
64 #The name of the root element of this unit type:(termEntry, tu, trans-unit)
65 rootNode = ""
66 #The name of the per language element of this unit type:(termEntry, tu, trans-unit)
67 languageNode = ""
68 #The name of the innermost element of this unit type:(term, seg)
69 textNode = ""
70
71 namespace = None
72 _default_xml_space = "preserve"
73 """The default handling of spacing in the absense of an xml:space attribute.
74
75 This is mostly for correcting XLIFF behaviour."""
76
78 """Constructs a unit containing the given source string"""
79 if empty:
80 self._rich_source = None
81 self._rich_target = None
82 return
83 self.xmlelement = etree.Element(self.rootNode)
84 #add descrip, note, etc.
85 super(LISAunit, self).__init__(source)
86
88 """Compares two units"""
89 if not isinstance(other, LISAunit):
90 return super(LISAunit, self).__eq__(other)
91 languageNodes = self.getlanguageNodes()
92 otherlanguageNodes = other.getlanguageNodes()
93 if len(languageNodes) != len(otherlanguageNodes):
94 return False
95 for i in range(len(languageNodes)):
96 mytext = self.getNodeText(languageNodes[i], getXMLspace(self.xmlelement, self._default_xml_space))
97 othertext = other.getNodeText(otherlanguageNodes[i], getXMLspace(self.xmlelement, self._default_xml_space))
98 if mytext != othertext:
99 #TODO:^ maybe we want to take children and notes into account
100 return False
101 return True
102
104 """Returns name in Clark notation.
105
106 For example namespaced("source") in an XLIFF document might return::
107 {urn:oasis:names:tc:xliff:document:1.1}source
108 This is needed throughout lxml.
109 """
110 return namespaced(self.namespace, name)
111
113 languageNodes = self.getlanguageNodes()
114 if len(languageNodes) > 0:
115 self.xmlelement[0] = dom_node
116 else:
117 self.xmlelement.append(dom_node)
118
121 source_dom = property(get_source_dom, set_source_dom)
122
124 text = data.forceunicode(text)
125 self.source_dom = self.createlanguageNode(sourcelang, text, "source")
126
129 source = property(getsource, setsource)
130
132 languageNodes = self.getlanguageNodes()
133 assert len(languageNodes) > 0
134 if dom_node is not None:
135 if append or len(languageNodes) == 1:
136 self.xmlelement.append(dom_node)
137 else:
138 self.xmlelement.insert(1, dom_node)
139 if not append and len(languageNodes) > 1:
140 self.xmlelement.remove(languageNodes[1])
141
143 if lang:
144 return self.getlanguageNode(lang=lang)
145 else:
146 return self.getlanguageNode(lang=None, index=1)
147 target_dom = property(get_target_dom)
148
150 #XXX: we really need the language - can't really be optional, and we
151 # need to propagate it
152 """Sets the "target" string (second language), or alternatively appends to the list"""
153 text = data.forceunicode(text)
154 #Firstly deal with reinitialising to None or setting to identical string
155 if self.gettarget() == text:
156 return
157 languageNode = self.get_target_dom(None)
158 if not text is None:
159 if languageNode is None:
160 languageNode = self.createlanguageNode(lang, text, "target")
161 self.set_target_dom(languageNode, append)
162 else:
163 if self.textNode:
164 terms = languageNode.iter(self.namespaced(self.textNode))
165 try:
166 languageNode = terms.next()
167 except StopIteration, e:
168 pass
169 languageNode.text = text
170 else:
171 self.set_target_dom(None, False)
172
174 """retrieves the "target" text (second entry), or the entry in the
175 specified language, if it exists"""
176 return self.getNodeText(self.get_target_dom(lang), getXMLspace(self.xmlelement, self._default_xml_space))
177 target = property(gettarget, settarget)
178
180 """Returns a xml Element setup with given parameters to represent a
181 single language entry. Has to be overridden."""
182 return None
183
185 """Create the text node in parent containing all the ph tags"""
186 matches = _getPhMatches(text)
187 if not matches:
188 parent.text = text
189 return
190
191 # Now we know there will definitely be some ph tags
192 start = matches[0].start()
193 pretext = text[:start]
194 if pretext:
195 parent.text = pretext
196 lasttag = parent
197 for i, m in enumerate(matches):
198 #pretext
199 pretext = text[start:m.start()]
200 # this will never happen with the first ph tag
201 if pretext:
202 lasttag.tail = pretext
203 #ph node
204 phnode = etree.SubElement(parent, "ph")
205 phnode.set("id", str(i+1))
206 phnode.text = m.group()
207 lasttag = phnode
208 start = m.end()
209 #post text
210 if text[start:]:
211 lasttag.tail = text[start:]
212
214 """Returns a list of all nodes that contain per language information."""
215 return list(self.xmlelement.iterchildren(self.namespaced(self.languageNode)))
216
218 """Retrieves a languageNode either by language or by index"""
219 if lang is None and index is None:
220 raise KeyError("No criterea for languageNode given")
221 languageNodes = self.getlanguageNodes()
222 if lang:
223 for set in languageNodes:
224 if getXMLlang(set) == lang:
225 return set
226 else:#have to use index
227 if index >= len(languageNodes):
228 return None
229 else:
230 return languageNodes[index]
231 return None
232
234 """Retrieves the term from the given languageNode"""
235 if languageNode is None:
236 return None
237 if self.textNode:
238 terms = languageNode.iterdescendants(self.namespaced(self.textNode))
239 if terms is None:
240 return None
241 else:
242 return getText(terms.next(), xml_space)
243 else:
244 return getText(languageNode, xml_space)
245
248
251
252 xid = property(lambda self: self.xmlelement.attrib[self.namespaced('xid')],
253 lambda self, value: self._set_property(self.namespaced('xid'), value))
254
255 rid = property(lambda self: self.xmlelement.attrib[self.namespaced('rid')],
256 lambda self, value: self._set_property(self.namespaced('rid'), value))
257
262 createfromxmlElement = classmethod(createfromxmlElement)
263
265 """A class representing a file store for one of the LISA file formats."""
266 UnitClass = LISAunit
267 #The root node of the XML document:
268 rootNode = ""
269 #The root node of the content section:
270 bodyNode = ""
271 #The XML skeleton to use for empty construction:
272 XMLskeleton = ""
273
274 namespace = None
275
277 super(LISAfile, self).__init__(unitclass=unitclass)
278 if inputfile is not None:
279 self.parse(inputfile)
280 assert self.document.getroot().tag == self.namespaced(self.rootNode)
281 else:
282 # We strip out newlines to ensure that spaces in the skeleton doesn't
283 # interfere with the the pretty printing of lxml
284 self.parse(self.XMLskeleton.replace("\n", ""))
285 self.setsourcelanguage(sourcelanguage)
286 self.settargetlanguage(targetlanguage)
287 self.addheader()
288 self._encoding = "UTF-8"
289
293
295 """Returns name in Clark notation.
296
297 For example namespaced("source") in an XLIFF document might return::
298 {urn:oasis:names:tc:xliff:document:1.1}source
299 This is needed throughout lxml.
300 """
301 return namespaced(self.namespace, name)
302
304 """Initialises self.body so it never needs to be retrieved from the XML again."""
305 self.namespace = self.document.getroot().nsmap.get(None, None)
306 self.body = self.document.find('//%s' % self.namespaced(self.bodyNode))
307
309 #TODO: miskien moet hierdie eerder addsourcestring of iets genoem word?
310 """Adds and returns a new unit with the given string as first entry."""
311 newunit = self.UnitClass(source)
312 self.addunit(newunit)
313 return newunit
314
316 unit.namespace = self.namespace
317 super(LISAfile, self).addunit(unit)
318 if new:
319 self.body.append(unit.xmlelement)
320
322 """Converts to a string containing the file's XML"""
323 return etree.tostring(self.document, pretty_print=True, xml_declaration=True, encoding='utf-8')
324
326 """Populates this object from the given xml string"""
327 if not hasattr(self, 'filename'):
328 self.filename = getattr(xml, 'name', '')
329 if hasattr(xml, "read"):
330 xml.seek(0)
331 posrc = xml.read()
332 xml = posrc
333 if etree.LXML_VERSION > (2, 1, 0):
334 #Since version 2.1.0 we can pass the strip_cdata parameter to
335 #indicate that we don't want cdata to be converted to raw XML
336 parser = etree.XMLParser(strip_cdata=False)
337 else:
338 parser = etree.XMLParser()
339 self.document = etree.fromstring(xml, parser).getroottree()
340 self._encoding = self.document.docinfo.encoding
341 self.initbody()
342 assert self.document.getroot().tag == self.namespaced(self.rootNode)
343 for entry in self.body.iterdescendants(self.namespaced(self.UnitClass.rootNode)):
344 term = self.UnitClass.createfromxmlElement(entry)
345 self.addunit(term, new=False)
346
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Tue Aug 4 17:33:06 2009 | http://epydoc.sourceforge.net |