1 from xml.parsers import expat
2 import textInfos
3 from logHandler import log
4
5 -class XMLTextParser(object):
6
8 self.parser=expat.ParserCreate('utf-8')
9 self.parser.StartElementHandler=self._startElementHandler
10 self.parser.EndElementHandler=self._EndElementHandler
11 self.parser.CharacterDataHandler=self._CharacterDataHandler
12 self._commandList=[]
13
14 - def _startElementHandler(self,tagName,attrs):
15 if tagName=='control':
16 newAttrs=textInfos.ControlField(attrs)
17 self._commandList.append(textInfos.FieldCommand("controlStart",newAttrs))
18 elif tagName=='text':
19 newAttrs=textInfos.FormatField(attrs)
20 self._commandList.append(textInfos.FieldCommand("formatChange",newAttrs))
21 else:
22 raise ValueError("Unknown tag name: %s"%tagName)
23
24
25 try:
26 newAttrs["_startOfNode"] = newAttrs["_startOfNode"] == "1"
27 except KeyError:
28 pass
29 try:
30 newAttrs["_endOfNode"] = newAttrs["_endOfNode"] == "1"
31 except KeyError:
32 pass
33
34 - def _EndElementHandler(self,tagName):
35 if tagName=="control":
36 self._commandList.append(textInfos.FieldCommand("controlEnd",None))
37 elif tagName=="text":
38 pass
39 else:
40 raise ValueError("unknown tag name: %s"%tagName)
41
43 cmdList=self._commandList
44 if cmdList and isinstance(cmdList[-1],basestring):
45 cmdList[-1]+=data
46 else:
47 cmdList.append(data)
48
49 - def parse(self,XMLText):
50 try:
51 self.parser.Parse(XMLText.encode('utf-8'))
52 except:
53 log.error("XML: %s"%XMLText,exc_info=True)
54 return self._commandList
55