# -*- test-case-name: twisted.test.test_text -*- # # Copyright (c) 2001-2004 Twisted Matrix Laboratories. # See LICENSE for details. """Miscellany of text-munging functions. """ import string, types def stringyString(object, indentation=''): """Expansive string formatting for sequence types. list.__str__ and dict.__str__ use repr() to display their elements. This function also turns these sequence types into strings, but uses str() on their elements instead. Sequence elements are also displayed on seperate lines, and nested sequences have nested indentation. """ braces = '' sl = [] if type(object) is types.DictType: braces = '{}' for key, value in object.items(): value = stringyString(value, indentation + ' ') if isMultiline(value): if endsInNewline(value): value = value[:-len('\n')] sl.append("%s %s:\n%s" % (indentation, key, value)) else: # Oops. Will have to move that indentation. sl.append("%s %s: %s" % (indentation, key, value[len(indentation) + 3:])) elif type(object) in (types.TupleType, types.ListType): if type(object) is types.TupleType: braces = '()' else: braces = '[]' for element in object: element = stringyString(element, indentation + ' ') sl.append(string.rstrip(element) + ',') else: sl[:] = map(lambda s, i=indentation: i+s, string.split(str(object),'\n')) if not sl: sl.append(indentation) if braces: sl[0] = indentation + braces[0] + sl[0][len(indentation) + 1:] sl[-1] = sl[-1] + braces[-1] s = string.join(sl, "\n") if isMultiline(s) and not endsInNewline(s): s = s + '\n' return s def isMultiline(s): """Returns True if this string has a newline in it.""" return (string.find(s, '\n') != -1) def endsInNewline(s): """Returns True if this string ends in a newline.""" return (s[-len('\n'):] == '\n') def docstringLStrip(docstring): """Gets rid of unsightly lefthand docstring whitespace residue. You'd think someone would have done this already, but apparently not in 1.5.2. BUT since we're all using Python 2.1 now, use L{inspect.getdoc} instead. I{This function should go away soon.} """ if not docstring: return docstring docstring = string.replace(docstring, '\t', ' ' * 8) lines = string.split(docstring,'\n') leading = 0 for l in xrange(1,len(lines)): line = lines[l] if string.strip(line): while 1: if line[leading] == ' ': leading = leading + 1 else: break if leading: break outlines = lines[0:1] for l in xrange(1,len(lines)): outlines.append(lines[l][leading:]) return string.join(outlines, '\n') def greedyWrap(inString, width=80): """Given a string and a column width, return a list of lines. Caveat: I'm use a stupid greedy word-wrapping algorythm. I won't put two spaces at the end of a sentence. I don't do full justification. And no, I've never even *heard* of hypenation. """ outLines = [] #eww, evil hacks to allow paragraphs delimited by two \ns :( if inString.find('\n\n') >= 0: paragraphs = string.split(inString, '\n\n') for para in paragraphs: outLines.extend(greedyWrap(para) + ['']) return outLines inWords = string.split(inString) column = 0 ptr_line = 0 while inWords: column = column + len(inWords[ptr_line]) ptr_line = ptr_line + 1 if (column > width): if ptr_line == 1: # This single word is too long, it will be the whole line. pass else: # We've gone too far, stop the line one word back. ptr_line = ptr_line - 1 (l, inWords) = (inWords[0:ptr_line], inWords[ptr_line:]) outLines.append(string.join(l,' ')) ptr_line = 0 column = 0 elif not (len(inWords) > ptr_line): # Clean up the last bit. outLines.append(string.join(inWords, ' ')) del inWords[:] else: # Space column = column + 1 # next word return outLines wordWrap = greedyWrap def removeLeadingBlanks(lines): ret = [] for line in lines: if ret or line.strip(): ret.append(line) return ret def removeLeadingTrailingBlanks(s): lines = removeLeadingBlanks(s.split('\n')) lines.reverse() lines = removeLeadingBlanks(lines) lines.reverse() return '\n'.join(lines)+'\n' def splitQuoted(s): """Like string.split, but don't break substrings inside quotes. >>> splitQuoted('the \"hairy monkey\" likes pie') ['the', 'hairy monkey', 'likes', 'pie'] Another one of those \"someone must have a better solution for this\" things. This implementation is a VERY DUMB hack done too quickly. """ out = [] quot = None phrase = None for word in s.split(): if phrase is None: if word and (word[0] in ("\"", "'")): quot = word[0] word = word[1:] phrase = [] if phrase is None: out.append(word) else: if word and (word[-1] == quot): word = word[:-1] phrase.append(word) out.append(" ".join(phrase)) phrase = None else: phrase.append(word) return out def strFile(p, f, caseSensitive=True): """Find whether string p occurs in a read()able object f @rtype: C{bool} """ buf = "" buf_len = max(len(p), 2**2**2**2) if not caseSensitive: p = p.lower() while 1: r = f.read(buf_len-len(p)) if not caseSensitive: r = r.lower() bytes_read = len(r) if bytes_read == 0: return False l = len(buf)+bytes_read-buf_len if l <= 0: buf = buf + r else: buf = buf[l:] + r if buf.find(p) != -1: return True