Package textInfos :: Module offsets
[hide private]
[frames] | no frames]

Source Code for Module textInfos.offsets

  1  #textInfos/offsets.py 
  2  #A part of NonVisual Desktop Access (NVDA) 
  3  #This file is covered by the GNU General Public License. 
  4  #See the file COPYING for more details. 
  5  #Copyright (C) 2006 Michael Curran <mick@kulgan.net>, James Teh <jamie@jantrid.net> 
  6   
  7  import re 
  8  import ctypes 
  9  import NVDAHelper 
 10  import config 
 11  import textInfos 
 12   
13 -class Offsets(object):
14 """Represents two offsets.""" 15
16 - def __init__(self,startOffset,endOffset):
17 """ 18 @param startOffset: the first offset. 19 @type startOffset: integer 20 @param endOffset: the second offset. 21 @type endOffset: integer 22 """ 23 self.startOffset=startOffset 24 self.endOffset=endOffset
25
26 - def __eq__(self,other):
27 if isinstance(other,self.__class__) and self.startOffset==other.startOffset and self.endOffset==other.endOffset: 28 return True 29 else: 30 return False
31
32 - def __ne__(self,other):
33 return not self==other
34
35 -def findStartOfLine(text,offset,lineLength=None):
36 """Searches backwards through the given text from the given offset, until it finds the offset that is the start of the line. With out a set line length, it searches for new line / cariage return characters, with a set line length it simply moves back to sit on a multiple of the line length. 37 @param text: the text to search 38 @type text: string 39 @param offset: the offset of the text to start at 40 @type offset: int 41 @param lineLength: The number of characters that makes up a line, None if new line characters should be looked at instead 42 @type lineLength: int or None 43 @return: the found offset 44 @rtype: int 45 """ 46 if not text: 47 return 0 48 if offset>=len(text): 49 offset=len(text)-1 50 if isinstance(lineLength,int): 51 return offset-(offset%lineLength) 52 if text[offset]=='\n' and offset>=0 and text[offset-1]=='\r': 53 offset-=1 54 start=text.rfind('\n',0,offset) 55 if start<0: 56 start=text.rfind('\r',0,offset) 57 if start<0: 58 start=-1 59 return start+1
60
61 -def findEndOfLine(text,offset,lineLength=None):
62 """Searches forwards through the given text from the given offset, until it finds the offset that is the start of the next line. With out a set line length, it searches for new line / cariage return characters, with a set line length it simply moves forward to sit on a multiple of the line length. 63 @param text: the text to search 64 @type text: string 65 @param offset: the offset of the text to start at 66 @type offset: int 67 @param lineLength: The number of characters that makes up a line, None if new line characters should be looked at instead 68 @type lineLength: int or None 69 @return: the found offset 70 @rtype: int 71 """ 72 if not text: 73 return 0 74 if offset>=len(text): 75 offset=len(text)-1 76 if isinstance(lineLength,int): 77 return (offset-(offset%lineLength)+lineLength) 78 end=offset 79 if text[end]!='\n': 80 end=text.find('\n',offset) 81 if end<0: 82 if text[offset]!='\r': 83 end=text.find('\r',offset) 84 if end<0: 85 end=len(text)-1 86 return end+1
87
88 -def findStartOfWord(text,offset,lineLength=None):
89 """Searches backwards through the given text from the given offset, until it finds the offset that is the start of the word. It checks to see if a character is alphanumeric, or is another symbol , or is white space. 90 @param text: the text to search 91 @type text: string 92 @param offset: the offset of the text to start at 93 @type offset: int 94 @param lineLength: The number of characters that makes up a line, None if new line characters should be looked at instead 95 @type lineLength: int or None 96 @return: the found offset 97 @rtype: int 98 """ 99 if offset>=len(text): 100 return offset 101 while offset>0 and text[offset].isspace(): 102 offset-=1 103 if not text[offset].isalnum(): 104 return offset 105 else: 106 while offset>0 and text[offset-1].isalnum(): 107 offset-=1 108 return offset
109
110 -def findEndOfWord(text,offset,lineLength=None):
111 """Searches forwards through the given text from the given offset, until it finds the offset that is the start of the next word. It checks to see if a character is alphanumeric, or is another symbol , or is white space. 112 @param text: the text to search 113 @type text: string 114 @param offset: the offset of the text to start at 115 @type offset: int 116 @param lineLength: The number of characters that makes up a line, None if new line characters should be looked at instead 117 @type lineLength: int or None 118 @return: the found offset 119 @rtype: int 120 """ 121 if offset>=len(text): 122 return offset+1 123 if text[offset].isalnum(): 124 while offset<len(text) and text[offset].isalnum(): 125 offset+=1 126 elif not text[offset].isspace() and not text[offset].isalnum(): 127 offset+=1 128 while offset<len(text) and text[offset].isspace(): 129 offset+=1 130 return offset
131
132 -class OffsetsTextInfo(textInfos.TextInfo):
133 """An abstract TextInfo for text implementations which represent ranges using numeric offsets relative to the start of the text. 134 In such implementations, the start of the text is represented by 0 and the end is the length of the entire text. 135 136 All subclasses must implement L{_getStoryLength}. 137 Aside from this, there are two possible implementations: 138 * If the underlying text implementation does not support retrieval of line offsets, L{_getStoryText} should be implemented. 139 In this case, the base implementation of L{_getLineOffsets} will retrieve the entire text of the object and use text searching algorithms to find line offsets. 140 This is very inefficient and should be avoided if possible. 141 * Otherwise, subclasses must implement at least L{_getTextRange} and L{_getLineOffsets}. 142 Retrieval of other offsets (e.g. L{_getWordOffsets}) should also be implemented if possible for greatest accuracy and efficiency. 143 144 If a caret and/or selection should be supported, L{_getCaretOffset} and/or L{_getSelectionOffsets} should be implemented, respectively. 145 To support conversion from/to screen points (e.g. for mouse tracking), L{_getOffsetFromPoint}/L{_getPointFromOffset} should be implemented. 146 """ 147 148 detectFormattingAfterCursorMaybeSlow=True #: honours documentFormatting config option if true - set to false if this is not at all slow. 149 useUniscribe=True #Use uniscribe to calculate word offsets etc 150 151
152 - def __eq__(self,other):
153 if self is other or (isinstance(other,OffsetsTextInfo) and self._startOffset==other._startOffset and self._endOffset==other._endOffset): 154 return True 155 else: 156 return False
157
158 - def _getCaretOffset(self):
159 raise NotImplementedError
160
161 - def _setCaretOffset(self,offset):
162 raise NotImplementedError
163
164 - def _getSelectionOffsets(self):
165 raise NotImplementedError
166
167 - def _setSelectionOffsets(self,start,end):
168 raise NotImplementedError
169
170 - def _getStoryLength(self):
171 raise NotImplementedError
172
173 - def _getStoryText(self):
174 """Retrieve the entire text of the object. 175 @return: The entire text of the object. 176 @rtype: unicode 177 """ 178 raise NotImplementedError
179
180 - def _getTextRange(self,start,end):
181 """Retrieve the text in a given offset range. 182 @param start: The start offset. 183 @type start: int 184 @param end: The end offset (exclusive). 185 @type end: int 186 @return: The text contained in the requested range. 187 @rtype: unicode 188 """ 189 raise NotImplementedError
190
191 - def _getFormatFieldAndOffsets(self,offset,formatConfig,calculateOffsets=True):
192 """Retrieve the formatting information for a given offset and the offsets spanned by that field. 193 Subclasses must override this if support for text formatting is desired. 194 The base implementation associates text with line numbers if possible. 195 """ 196 formatField=textInfos.FormatField() 197 startOffset,endOffset=self._startOffset,self._endOffset 198 if formatConfig["reportLineNumber"]: 199 if calculateOffsets: 200 startOffset,endOffset=self._getLineOffsets(offset) 201 lineNum=self._getLineNumFromOffset(offset) 202 if lineNum is not None: 203 formatField["line-number"]=lineNum+1 204 return formatField,(startOffset,endOffset)
205
206 - def _getCharacterOffsets(self,offset):
207 return [offset,offset+1]
208
209 - def _getWordOffsets(self,offset):
210 lineStart,lineEnd=self._getLineOffsets(offset) 211 lineText=self._getTextRange(lineStart,lineEnd) 212 #Convert NULL and non-breaking space to space to make sure that words will break on them 213 lineText=lineText.translate({0:u' ',0xa0:u' '}) 214 if self.useUniscribe: 215 start=ctypes.c_int() 216 end=ctypes.c_int() 217 #uniscribe does some strange things when you give it a string with not more than two alphanumeric chars in a row. 218 #Inject two alphanumeric characters at the end to fix this 219 lineText+="xx" 220 if NVDAHelper.localLib.calculateWordOffsets(lineText,len(lineText),offset-lineStart,ctypes.byref(start),ctypes.byref(end)): 221 return start.value+lineStart,min(end.value+lineStart,lineEnd) 222 #Fall back to the older word offsets detection that only breaks on non alphanumeric 223 start=findStartOfWord(lineText,offset-lineStart)+lineStart 224 end=findEndOfWord(lineText,offset-lineStart)+lineStart 225 return [start,end]
226
227 - def _getLineNumFromOffset(self,offset):
228 return None
229 230
231 - def _getLineOffsets(self,offset):
232 text=self._getStoryText() 233 start=findStartOfLine(text,offset) 234 end=findEndOfLine(text,offset) 235 return [start,end]
236
237 - def _getParagraphOffsets(self,offset):
238 return self._getLineOffsets(offset)
239 240
241 - def _getReadingChunkOffsets(self,offset):
242 return self._getLineOffsets(offset)
243
244 - def _getPointFromOffset(self,offset):
245 raise NotImplementedError
246
247 - def _getOffsetFromPoint(self,x,y):
248 raise NotImplementedError
249
250 - def _getNVDAObjectFromOffset(self,offset):
251 raise NotImplementedError
252
253 - def _getOffsetsFromNVDAObject(self,obj):
254 raise NotImplementedError
255
256 - def __init__(self,obj,position):
257 """Constructor. 258 Subclasses may extend this to perform implementation specific initialisation, calling their superclass method afterwards. 259 """ 260 super(OffsetsTextInfo,self).__init__(obj,position) 261 from NVDAObjects import NVDAObject 262 if isinstance(position,textInfos.Point): 263 offset=self._getOffsetFromPoint(position.x,position.y) 264 position=Offsets(offset,offset) 265 elif isinstance(position,NVDAObject): 266 start,end=self._getOffsetsFromNVDAObject(position) 267 position=textInfos.offsets.Offsets(start,end) 268 if position==textInfos.POSITION_FIRST: 269 self._startOffset=self._endOffset=0 270 elif position==textInfos.POSITION_LAST: 271 self._startOffset=self._endOffset=max(self._getStoryLength()-1,0) 272 elif position==textInfos.POSITION_CARET: 273 self._startOffset=self._endOffset=self._getCaretOffset() 274 elif position==textInfos.POSITION_SELECTION: 275 (self._startOffset,self._endOffset)=self._getSelectionOffsets() 276 elif position==textInfos.POSITION_ALL: 277 self._startOffset=0 278 self._endOffset=self._getStoryLength() 279 elif isinstance(position,Offsets): 280 self._startOffset=max(min(position.startOffset,self._getStoryLength()-1),0) 281 self._endOffset=max(min(position.endOffset,self._getStoryLength()),0) 282 else: 283 raise NotImplementedError("position: %s not supported"%position)
284
285 - def _get_NVDAObjectAtStart(self):
286 return self._getNVDAObjectFromOffset(self._startOffset)
287
288 - def _getUnitOffsets(self,unit,offset):
289 if unit==textInfos.UNIT_CHARACTER: 290 offsetsFunc=self._getCharacterOffsets 291 elif unit==textInfos.UNIT_WORD: 292 offsetsFunc=self._getWordOffsets 293 elif unit==textInfos.UNIT_LINE: 294 offsetsFunc=self._getLineOffsets 295 elif unit==textInfos.UNIT_PARAGRAPH: 296 offsetsFunc=self._getParagraphOffsets 297 elif unit==textInfos.UNIT_READINGCHUNK: 298 offsetsFunc=self._getReadingChunkOffsets 299 elif unit==textInfos.UNIT_STORY: 300 return 0,self._getStoryLength() 301 else: 302 raise ValueError("unknown unit: %s"%unit) 303 return offsetsFunc(offset)
304
305 - def _get_pointAtStart(self):
306 return self._getPointFromOffset(self._startOffset)
307
308 - def _get_isCollapsed(self):
309 if self._startOffset==self._endOffset: 310 return True 311 else: 312 return False
313
314 - def collapse(self,end=False):
315 if not end: 316 self._endOffset=self._startOffset 317 else: 318 self._startOffset=self._endOffset
319
320 - def expand(self,unit):
321 self._startOffset,self._endOffset=self._getUnitOffsets(unit,self._startOffset)
322
323 - def copy(self):
324 o=self.__class__(self.obj,self.bookmark) 325 for item in self.__dict__.keys(): 326 if item.startswith('_'): 327 o.__dict__[item]=self.__dict__[item] 328 return o
329
330 - def compareEndPoints(self,other,which):
331 if which=="startToStart": 332 diff=self._startOffset-other._startOffset 333 elif which=="startToEnd": 334 diff=self._startOffset-other._endOffset 335 elif which=="endToStart": 336 diff=self._endOffset-other._startOffset 337 elif which=="endToEnd": 338 diff=self._endOffset-other._endOffset 339 else: 340 raise ValueError("bad argument - which: %s"%which) 341 if diff<0: 342 diff=-1 343 elif diff>0: 344 diff=1 345 return diff
346
347 - def setEndPoint(self,other,which):
348 if which=="startToStart": 349 self._startOffset=other._startOffset 350 elif which=="startToEnd": 351 self._startOffset=other._endOffset 352 elif which=="endToStart": 353 self._endOffset=other._startOffset 354 elif which=="endToEnd": 355 self._endOffset=other._endOffset 356 else: 357 raise ValueError("bad argument - which: %s"%which) 358 if self._startOffset>self._endOffset: 359 # start should never be after end. 360 if which in ("startToStart","startToEnd"): 361 self._endOffset=self._startOffset 362 else: 363 self._startOffset=self._endOffset
364
365 - def getTextWithFields(self,formatConfig=None):
366 if not formatConfig: 367 formatConfig=config.conf["documentFormatting"] 368 if self.detectFormattingAfterCursorMaybeSlow and not formatConfig['detectFormatAfterCursor']: 369 field,(boundStart,boundEnd)=self._getFormatFieldAndOffsets(self._startOffset,formatConfig,calculateOffsets=False) 370 text=self.text 371 return [textInfos.FieldCommand('formatChange',field),text] 372 commandList=[] 373 offset=self._startOffset 374 while offset<self._endOffset: 375 field,(boundStart,boundEnd)=self._getFormatFieldAndOffsets(offset,formatConfig) 376 if boundEnd<=boundStart: 377 boundEnd=boundStart+1 378 if boundEnd<=offset: 379 boundEnd=offset+1 380 command=textInfos.FieldCommand("formatChange",field) 381 commandList.append(command) 382 text=self._getTextRange(offset,min(boundEnd,self._endOffset)) 383 commandList.append(text) 384 offset=boundEnd 385 return commandList
386
387 - def _get_text(self):
388 return self._getTextRange(self._startOffset,self._endOffset)
389
390 - def unitIndex(self,unit):
391 if unit==textInfos.UNIT_LINE: 392 return self._lineNumFromOffset(self._startOffset) 393 else: 394 raise NotImplementedError
395
396 - def unitCount(self,unit):
397 if unit==textInfos.UNIT_LINE: 398 return self._getLineCount() 399 else: 400 raise NotImplementedError
401
402 - def move(self,unit,direction,endPoint=None):
403 if direction==0: 404 return 0; 405 if endPoint=="end": 406 offset=self._endOffset 407 elif endPoint=="start": 408 offset=self._startOffset 409 else: 410 self.collapse() 411 offset=self._startOffset 412 lastOffset=None 413 count=0 414 lowLimit=0 415 highLimit=self._getStoryLength() 416 while count!=direction and (lastOffset is None or (direction>0 and offset>lastOffset) or (direction<0 and offset<lastOffset)) and (offset<highLimit or direction<0) and (offset>lowLimit or direction>0): 417 lastOffset=offset 418 if direction<0 and offset>lowLimit: 419 offset-=1 420 newStart,newEnd=self._getUnitOffsets(unit,offset) 421 if direction<0: 422 offset=newStart 423 elif direction>0: 424 offset=newEnd 425 count=count+1 if direction>0 else count-1 426 if endPoint=="start": 427 if (direction>0 and offset<=self._startOffset) or (direction<0 and offset>=self._startOffset) or offset<lowLimit or offset>=highLimit: 428 return 0 429 self._startOffset=offset 430 elif endPoint=="end": 431 if (direction>0 and offset<=self._endOffset) or (direction<0 and offset>=self._endOffset) or offset<lowLimit or offset>highLimit: 432 return 0 433 self._endOffset=offset 434 else: 435 if (direction>0 and offset<=self._startOffset) or (direction<0 and offset>=self._startOffset) or offset<lowLimit or offset>=highLimit: 436 return 0 437 self._startOffset=self._endOffset=offset 438 if self._startOffset>self._endOffset: 439 tempOffset=self._startOffset 440 self._startOffset=self._endOffset 441 self._endOffset=tempOffset 442 return count
443
444 - def find(self,text,caseSensitive=False,reverse=False):
445 if reverse: 446 # When searching in reverse, we reverse both strings and do a forwards search. 447 text = text[::-1] 448 # Start searching one before the start to avoid finding the current match. 449 inText=self._getTextRange(0,self._startOffset)[::-1] 450 else: 451 # Start searching one past the start to avoid finding the current match. 452 inText=self._getTextRange(self._startOffset+1,self._getStoryLength()) 453 m=re.search(re.escape(text),inText,(0 if caseSensitive else re.IGNORECASE)|re.UNICODE) 454 if not m: 455 return False 456 if reverse: 457 offset=self._startOffset-m.end() 458 else: 459 offset=self._startOffset+1+m.start() 460 self._startOffset=self._endOffset=offset 461 return True
462
463 - def updateCaret(self):
464 return self._setCaretOffset(self._startOffset)
465
466 - def updateSelection(self):
467 return self._setSelectionOffsets(self._startOffset,self._endOffset)
468
469 - def _get_bookmark(self):
470 return Offsets(self._startOffset,self._endOffset)
471