#!/usr/bin/env python #**************************************************************************** # treedoc.py, provides non-GUI base classes for document data # # TreeLine, an information storage program # Copyright (C) 2005, Douglas W. Bell # # This is free software; you can redistribute it and/or modify it under the # terms of the GNU General Public License, Version 2. This program is # distributed in the hope that it will be useful, but WITTHOUT ANY WARRANTY. #**************************************************************************** try: from __main__ import __version__ except ImportError: __version__ = '??' from nodeformat import NodeFormat, FileInfoFormat from treeformats import TreeFormats from treeitem import TreeItem from treeselection import TreeSelection from treexmlparse import TreeSaxHandler, XbelSaxHandler, GenericXmlHandler, \ HtmlBookmarkHandler, HtmlParseError from undo import UndoRedoStore from p3 import p3_encrypt, p3_decrypt, CryptError from optiondefaults import OptionDefaults import globalref from gzip import GzipFile from StringIO import StringIO import sys, os.path, re, xml.sax, codecs, locale escDict = {'"': '"', chr(12): ''} # added quotes for c in range(9) + range(11, 13) + range(14, 32): escDict[chr(c)] = '' # ignore low ascii chars unEscDict = {'"': '"'} encryptPrefix = '>>TL+enc' class TreeDoc: """Tree document class - stores root and has tree utilities""" localEncoding = 'utf-8' passwordDict = {} childFieldSepDflt = ', ' rootTitleDefault = _('Main', 'default root title') folderName = _('FOLDER', 'bookmark format folder name') bookmarkName = _('BOOKMARK', 'bookmark format name') separatorName = _('SEPARATOR', 'bookmark format separator name') def __init__(self, filePath=None): globalref.docRef = self self.root = None self.treeFormats = TreeFormats() # self.resetParam() self.fileName = '' if filePath: self.readFile(filePath) else: self.newFile() def resetParam(self): """Set doc setting values to defaults""" self.selection = TreeSelection([self.root]) self.undoStore = UndoRedoStore() self.redoStore = UndoRedoStore() self.modified = False self.spaceBetween = True self.lineBreaks = True self.formHtml = True self.childFieldSep = TreeDoc.childFieldSepDflt self.spellChkLang = '' self.sortFields = [''] self.xslCssLink = '' self.xlstLink = '' self.tlVersion = __version__ self.fileInfoItem = TreeItem(None, FileInfoFormat()) self.fileInfoItem.nodeFormat.translateFields() self.fileInfoItem.nodeFormat.updateFileInfo() def hasPassword(self, filePath): """Return True if a password is available for filePath""" key = filePath.encode(TreeDoc.localEncoding) return TreeDoc.passwordDict.has_key(key) def setPassword(self, filePath, password): """Set encrytion password for the filePath""" key = filePath.encode(TreeDoc.localEncoding) TreeDoc.passwordDict[key] = password.encode('utf-8') def clearPassword(self, filePath): """Remove password for filePath if present""" key = filePath.encode(TreeDoc.localEncoding) try: del TreeDoc.passwordDict[key] except KeyError: pass def getReadFileObj(self, fileRef): """Return file object and set self.compressFile to False/True, fileRef is either file path or file object""" if not hasattr(fileRef, 'read'): fileRef = file(fileRef.encode(TreeDoc.localEncoding), 'rb') # binary mode req'd for encryption if hasattr(fileRef, 'seek'): fileRef.seek(0) prefix = fileRef.read(2) self.compressFile = prefix == '\037\213' if hasattr(fileRef, 'seek'): fileRef.seek(0) else: oldFileRef = fileRef fileRef = StringIO(prefix + oldFileRef.read()) fileRef.name = oldFileRef.name oldFileRef.close() if self.compressFile: name = fileRef.name fileRef = GzipFile(fileobj=fileRef) fileRef.name = name return fileRef def decryptFile(self, fileObj): """Decrypt file if was encrypted""" name = fileObj.name prefix = fileObj.read(len(encryptPrefix)) self.encryptFile = prefix == encryptPrefix if self.encryptFile: password = TreeDoc.passwordDict.get(fileObj.name, '') if not password: self.storedFileRef = fileObj self.storedFileRef.seek(0) raise PasswordError, 'Missing password' try: text = p3_decrypt(fileObj.read(), password) except CryptError: self.storedFileRef = fileObj self.storedFileRef.seek(0) raise PasswordError, 'Incorrect password' fileObj.close() fileObj = StringIO(text) fileObj.name = name else: fileObj.seek(0) return fileObj def getEncodedFileObj(self, fileRef, encoding, errors): """Return open file object with specified encoding""" return codecs.getreader(encoding)(self.getReadFileObj(fileRef), errors) def getWriteFileObj(self, fileRef, forceCompress): """Return write file object, compressed or not based on forceCompress, but always compress if has .gz extension, fileRef is either file path or file object""" if not hasattr(fileRef, 'read'): fileRef = file(fileRef.encode(TreeDoc.localEncoding), 'wb') if fileRef.name.endswith('.gz') or forceCompress: name = fileRef.name fileRef = GzipFile(fileobj=fileRef) fileRef.name = name return fileRef def newFile(self): """Start empty file""" self.compressFile = globalref.options.boolData('CompressNewFiles') self.encryptFile = globalref.options.boolData('EncryptNewFiles') self.treeFormats = TreeFormats([], True) self.root = TreeItem(None, self.treeFormats[0]) self.root.setTitle(TreeDoc.rootTitleDefault) self.resetParam() self.fileName = '' def readFile(self, fileRef): """Open and read file - raise exception on failure, fileRef is either file path or file object""" origFileInfoItem = self.fileInfoItem self.fileInfoItem = TreeItem(None, FileInfoFormat()) filePath = hasattr(fileRef, 'read') and \ unicode(fileRef.name, TreeDoc.localEncoding) or fileRef try: f = self.getReadFileObj(fileRef) f = self.decryptFile(f) handler = TreeSaxHandler() input = xml.sax.InputSource() input.setByteStream(f) input.setEncoding('utf-8') reader = xml.sax.make_parser() reader.setContentHandler(handler) reader.setFeature(xml.sax.handler.feature_external_ges, 0) reader.parse(input) except IOError: print 'Error - could not read file', \ filePath.encode(TreeDoc.localEncoding) self.fileInfoItem = origFileInfoItem raise except UnicodeError: print 'Error - bad Unicode in file', \ filePath.encode(TreeDoc.localEncoding) f.close() self.fileInfoItem = origFileInfoItem raise except xml.sax.SAXException: self.storedFileRef = f self.storedFileRef.seek(0) self.fileInfoItem = origFileInfoItem raise ReadFileError(_('Could not open as treeline file')) f.close() self.root = handler.rootItem self.fileName = filePath self.treeFormats = TreeFormats(handler.formats.values()) self.xlstLink = handler.xlstLink self.xslCssLink = handler.xslCssLink self.spaceBetween = handler.spaceBetween self.lineBreaks = handler.lineBreaks self.formHtml = handler.formHtml self.childFieldSep = handler.childFieldSep self.spellChkLang = handler.spellChkLang self.tlVersion = handler.tlVersion self.selection = TreeSelection([self.root]) self.undoStore = UndoRedoStore() self.redoStore = UndoRedoStore() self.modified = False self.sortFields = [''] self.fileInfoItem.nodeFormat.replaceListFormat() self.fileInfoItem.nodeFormat.translateFields() self.fileInfoItem.nodeFormat.updateFileInfo() self.treeFormats.updateAutoChoices() self.treeFormats.updateDerivedTypes() if not self.tlVersion: # file from before 0.12.80, fix number format for format in self.treeFormats: for field in format.fieldList: if field.typeName == 'Number': field.format = field.format.replace(',', '\,') def readTabbed(self, fileRef, errors='strict'): """Import tabbed data into a flat tree - raise exception on failure""" try: f = self.getEncodedFileObj(fileRef, TreeDoc.localEncoding, errors) filePath = unicode(f.name, TreeDoc.localEncoding) textList = f.readlines() except UnicodeError: print 'Warning - bad unicode characters were replaced' if errors == 'strict': self.readTabbed(fileRef, 'replace') else: f.close() return f.close() bufList = [(text.count('\t', 0, len(text) - len(text.lstrip())), \ text.strip()) for text in textList if text.strip()] if bufList: buf = bufList.pop(0) if buf[0] == 0: treeFormats = TreeFormats([], True) # set defaults ROOT, DEFAULT newRoot = TreeItem(None, treeFormats[1]) newRoot.setTitle(buf[1]) if newRoot.loadTabbedChildren(bufList): newRoot.nodeFormat = treeFormats[0] self.root = newRoot self.fileName = filePath self.treeFormats = treeFormats self.resetParam() return raise ReadFileError(_('Error in tabbed list')) def readTable(self, fileRef, errors='strict'): """Import table data into a flat tree - raise exception on failure""" try: f = self.getEncodedFileObj(fileRef, TreeDoc.localEncoding, errors) filePath = unicode(f.name, TreeDoc.localEncoding) textList = f.readlines() except UnicodeError: print 'Warning - bad unicode characters were replaced' if errors == 'strict': self.readTable(fileRef, 'replace') else: f.close() return f.close() treeFormats = TreeFormats([], True) # set defaults ROOT & DEFAULT newRoot = TreeItem(None, treeFormats[0]) treeFormats[1].fieldList = [] treeFormats[1].lineList = [] treeFormats[1].addTableFields(textList.pop(0).strip().split('\t')) newRoot.setTitle(TreeDoc.rootTitleDefault) for line in textList: newItem = TreeItem(newRoot, treeFormats[1]) newRoot.childList.append(newItem) lineList = line.strip().split('\t') try: for num in range(len(lineList)): newItem.data[treeFormats[1].fieldList[num].name] \ = lineList[num].strip() except IndexError: self.modified = False print 'Too few headings to read data as a table' raise ReadFileError(_('Too few headings to read data as table')) self.root = newRoot self.fileName = filePath self.treeFormats = treeFormats self.resetParam() def readLines(self, fileRef, errors='strict'): """Import plain text, node per line""" try: f = self.getEncodedFileObj(fileRef, TreeDoc.localEncoding, errors) filePath = unicode(f.name, TreeDoc.localEncoding) textList = f.readlines() except UnicodeError: print 'Warning - bad unicode characters were replaced' if errors == 'strict': self.readLines(fileRef, 'replace') else: f.close() return f.close() treeFormats = TreeFormats([], True) # set defaults ROOT & DEFAULT newRoot = TreeItem(None, treeFormats[0]) treeFormats[1].fieldList = [] treeFormats[1].lineList = [] treeFormats[1].addTableFields([TreeFormats.textFieldName]) newRoot.setTitle(TreeDoc.rootTitleDefault) for line in textList: line = line.strip() if line: newItem = TreeItem(newRoot, treeFormats[1]) newRoot.childList.append(newItem) newItem.data[TreeFormats.textFieldName] = line self.root = newRoot self.fileName = filePath self.treeFormats = treeFormats self.resetParam() def readPara(self, fileRef, errors='strict'): """Import plain text, blank line delimitted""" try: f = self.getEncodedFileObj(fileRef, TreeDoc.localEncoding, errors) filePath = unicode(f.name, TreeDoc.localEncoding) fullText = f.read().replace('\r', '') except UnicodeError: print 'Warning - bad unicode characters were replaced' if errors == 'strict': self.readPara(fileRef, 'replace') else: f.close() return textList = fullText.split('\n\n') f.close() treeFormats = TreeFormats([], True) # set defaults ROOT & DEFAULT newRoot = TreeItem(None, treeFormats[0]) treeFormats[1].fieldList = [] treeFormats[1].lineList = [] treeFormats[1].iconName = 'doc' treeFormats[1].addTableFields([TreeFormats.textFieldName]) treeFormats[1].fieldList[0].numLines = globalref.options.\ intData('MaxEditLines', 1, \ OptionDefaults.maxNumLines) newRoot.setTitle(TreeDoc.rootTitleDefault) for line in textList: line = line.strip() if line: newItem = TreeItem(newRoot, treeFormats[1]) newRoot.childList.append(newItem) newItem.data[TreeFormats.textFieldName] = line self.root = newRoot self.fileName = filePath self.treeFormats = treeFormats self.resetParam() def readTreepad(self, fileRef, errors='strict'): """Read Treepad text-node file""" try: f = self.getEncodedFileObj(fileRef, TreeDoc.localEncoding, errors) filePath = unicode(f.name, TreeDoc.localEncoding) textList = f.read().split(' 5P9i0s8y19Z') f.close() except UnicodeError: # error common - broken unicode on windows print 'Warning - bad unicode characters were replaced' if errors == 'strict': self.readTreepad(fileRef, 'replace') else: f.close() return treeFormats = TreeFormats() format = NodeFormat(TreeFormats.formatDefault) titleFieldName = _('Title', 'title field name') format.addNewField(titleFieldName) format.addLine(u'{*%s*}' % titleFieldName) numLines = globalref.options.intData('MaxEditLines', 1, \ OptionDefaults.maxNumLines) format.addNewField(TreeFormats.textFieldName, {'lines': repr(numLines)}) format.addLine(u'{*%s*}' % TreeFormats.textFieldName) treeFormats.append(format) itemList = [] for text in textList: text = text.strip() if text: try: text = text.split('', 1)[1].lstrip() lines = text.split('\n') title = lines[0] level = int(lines[1]) lines = lines[2:] except (ValueError, IndexError): print 'Error - bad file format in %s' % \ filePath.encode(TreeDoc.localEncoding) raise ReadFileError(_('Bad file format in %s') % filePath) item = TreeItem(None, format) item.data[titleFieldName] = title item.data[TreeFormats.textFieldName] = '\n'.join(lines) item.level = level itemList.append(item) self.root = itemList[0] parentList = [] for item in itemList: if item.level != 0: parentList = parentList[:item.level] item.parent = parentList[-1] parentList[-1].childList.append(item) parentList.append(item) self.root = itemList[0] self.fileName = filePath self.treeFormats = treeFormats self.resetParam() def createBookmarkFormat(self): """Return a set of formats for bookmark imports""" treeFormats = TreeFormats() format = NodeFormat(TreeDoc.folderName) format.addNewField(TreeFormats.fieldDefault) format.addLine(u'{*%s*}' % TreeFormats.fieldDefault) format.addLine(u'{*%s*}' % TreeFormats.fieldDefault) format.iconName = 'folder_3' treeFormats.append(format) format = NodeFormat(TreeDoc.bookmarkName) format.addNewField(TreeFormats.fieldDefault) format.addLine(u'{*%s*}' % TreeFormats.fieldDefault) format.addLine(u'{*%s*}' % TreeFormats.fieldDefault) format.addNewField(TreeFormats.linkFieldName, {'type': 'URL'}) format.addLine(u'{*%s*}' % TreeFormats.linkFieldName) format.iconName = 'bookmark' treeFormats.append(format) format = NodeFormat(TreeDoc.separatorName) format.addNewField(TreeFormats.fieldDefault) format.addLine(u'------------------') format.addLine(u'
') treeFormats.append(format) return treeFormats def readXbel(self, fileRef): """Read XBEL format bookmarks""" formats = self.createBookmarkFormat() try: f = self.getReadFileObj(fileRef) filePath = unicode(f.name, TreeDoc.localEncoding) handler = XbelSaxHandler(formats.findFormat(TreeDoc.folderName), \ formats.findFormat(TreeDoc.bookmarkName), \ formats.findFormat(TreeDoc.separatorName)) input = xml.sax.InputSource() input.setByteStream(f) input.setEncoding('utf-8') reader = xml.sax.make_parser() reader.setContentHandler(handler) reader.setFeature(xml.sax.handler.feature_external_ges, 0) reader.parse(input) except UnicodeError: print 'Error - bad Unicode in file', \ filePath.encode(TreeDoc.localEncoding) f.close() raise ReadFileError(_('Problem with Unicode characters in file')) except xml.sax.SAXException: f.close() raise ReadFileError(_('Could not open as XBEL file')) f.close() if not handler.rootItem: raise ReadFileError(_('Could not open as XBEL file')) self.root = handler.rootItem if not self.root.data.get(TreeFormats.fieldDefault, ''): self.root.data[TreeFormats.fieldDefault] = _('Bookmarks') self.fileName = filePath self.treeFormats = formats self.resetParam() def readMozilla(self, fileRef, errors='strict'): """Read Mozilla HTML format bookmarks""" formats = self.createBookmarkFormat() try: f = self.getEncodedFileObj(fileRef, 'utf-8', errors) filePath = unicode(f.name, TreeDoc.localEncoding) fullText = f.read() except UnicodeError: print 'Warning - bad unicode characters were replaced' if errors == 'strict': self.readMozilla(fileRef, 'replace') else: f.close() return try: handler = HtmlBookmarkHandler(formats.findFormat(TreeDoc.\ folderName), \ formats.findFormat(TreeDoc.\ bookmarkName), \ formats.findFormat(TreeDoc.\ separatorName)) handler.feed(fullText) handler.close() except HtmlParseError: raise ReadFileError(_('Could not open as HTML bookmark file')) if not handler.rootItem: raise ReadFileError(_('Could not open as HTML bookmark file')) self.root = handler.rootItem if not self.root.data.get(TreeFormats.fieldDefault, ''): self.root.data[TreeFormats.fieldDefault] = _('Bookmarks') self.fileName = filePath self.treeFormats = formats self.resetParam() def readXml(self, fileRef): """Read a generic (non-TreeLine) XML file""" try: f = self.getReadFileObj(fileRef) filePath = unicode(f.name, TreeDoc.localEncoding) handler = GenericXmlHandler() input = xml.sax.InputSource() input.setByteStream(f) input.setEncoding('utf-8') reader = xml.sax.make_parser() reader.setContentHandler(handler) reader.setFeature(xml.sax.handler.feature_external_ges, 0) reader.parse(input) except UnicodeError: print 'Error - bad Unicode in file', \ filePath.encode(TreeDoc.localEncoding) f.close() raise ReadFileError(_('Problem with Unicode characters in file')) except xml.sax.SAXException: f.close() raise ReadFileError(_('Could not open XML file')) f.close() if not handler.rootItem: raise ReadFileError(_('Could not open XML file')) self.root = handler.rootItem self.fileName = filePath self.treeFormats = TreeFormats(handler.formats.values()) for format in self.treeFormats: format.fixImportedFormat(GenericXmlHandler.textFieldName) self.resetParam() def readXmlString(self, string, mergeFormat=False): """Read xml string and return top item or None""" try: handler = TreeSaxHandler() xml.sax.parseString(string.encode('utf-8'), handler) except xml.sax.SAXException: return None if mergeFormat: for val in handler.formats.values(): self.treeFormats.addIfMissing(val) for item in handler.rootItem.descendantGen(): item.nodeFormat = self.treeFormats.\ findFormat(item.nodeFormat.name) return handler.rootItem def writeFile(self, fileRef, updateInfo=True): """Write file - raises IOError on failure""" lines = [u''] if self.xlstLink: lines.append(u'' % self.xlstLink) lines.extend(self.root.branchXml([], True)) text = '\n'.join(lines).encode('utf-8') try: f = self.getWriteFileObj(fileRef, self.compressFile) except IOError: print 'Error - could not write file' raise filePath = unicode(f.name, TreeDoc.localEncoding) if self.encryptFile: key = filePath.encode(TreeDoc.localEncoding) password = TreeDoc.passwordDict.get(key, '') if not password: if key.endswith('~'): # for auto-save filename password = TreeDoc.passwordDict.get(key[:-1], '') if not password: raise PasswordError, 'Missing password' text = encryptPrefix + p3_encrypt(text, password) try: f.write(text) except IOError: print 'Error - could not write file', \ filePath.encode(TreeDoc.localEncoding) raise f.close() if filePath.endswith('.gz'): self.compressFile = True if updateInfo: self.modified = False self.tlVersion = __version__ self.fileName = filePath self.fileInfoItem.nodeFormat.updateFileInfo() def exportHtml(self, fileRef, item, includeRoot, openOnly=False, \ indent=20, addHeader=False): """Save branch as html to file w/o columns""" outGroup = item.outputItemList(includeRoot, openOnly, True) self.exportHtmlColumns(fileRef, outGroup, 1, indent, addHeader) def exportHtmlColumns(self, fileRef, outGroup, numCol=1, indent=20, \ addHeader=False): """Save contents of outGroup as html to file in columns""" try: f = self.getWriteFileObj(fileRef, False) except IOError: print 'Error - could not write file' raise filePath = unicode(f.name, TreeDoc.localEncoding) if self.lineBreaks: outGroup.addBreaks() outGroups = outGroup.splitColumns(numCol) for group in outGroups: group.addPrefix() group.addIndents() htmlTitle = os.path.splitext(os.path.basename(filePath))[0] lines = [u'', u'', u'', \ u'', u'%s' % htmlTitle, \ u'', \ u'', u''] if addHeader: header = self.fileInfoItem.nodeFormat.getHeaderFooter(True) if header: lines.append(header) lines.extend([u'', u'', u'
']) for item in outGroups[0]: lines.extend(item.textLines) for group in outGroups[1:]: lines.append(u'') for item in group: lines.extend(item.textLines) lines.extend([u'
']) if addHeader: footer = self.fileInfoItem.nodeFormat.getHeaderFooter(False) if footer: lines.append(footer) lines.extend([u'', u'']) try: f.writelines([(line + '\n').encode('utf-8') for line in lines]) except IOError: print 'Error - could not write file', \ filePath.encode(TreeDoc.localEncoding) raise f.close() def exportDir(self, dirName, item, addHeader=False): """Write tree to nested directory struct with html tables""" oldDir = os.getcwd() os.chdir(dirName.encode(TreeDoc.localEncoding)) if addHeader: header = self.fileInfoItem.nodeFormat.getHeaderFooter(True) footer = self.fileInfoItem.nodeFormat.getHeaderFooter(False) item.exportDir(None, header, footer) else: item.exportDir() os.chdir(oldDir) def exportXslt(self, fileRef, includeRoot, indent=20): """Write XSLT file and add link in treeline file""" try: f = self.getWriteFileObj(fileRef, False) except IOError: print 'Error - could not write file' raise filePath = unicode(f.name, TreeDoc.localEncoding) title = os.path.splitext(os.path.basename(filePath))[0] lines = [u'', \ u"", u'', \ u'', u''] if self.xslCssLink: lines.append('' \ % self.xslCssLink) lines.extend([u'%s' % title, u'', u'', \ u'', u'', u'', \ u'']) if not includeRoot: lines.extend([u'', u'' \ % self.root.nodeFormat.name, \ u'', u'']) self.treeFormats.sort() for format in self.treeFormats: lines.extend(format.xsltTemplate(indent, True)) lines.extend([u'', u'', u'', \ u'']) try: f.writelines([(line + '\n').encode('utf-8') for line in lines]) except IOError: print 'Error - could not write file', \ filePath.encode(TreeDoc.localEncoding) raise f.close() # find relative link path trlPath = os.path.abspath(self.fileName).split(os.sep) xslPath = os.path.abspath(filePath).split(os.sep) while trlPath[0] == xslPath[0]: del trlPath[0] del xslPath[0] xslPath = '/'.join(['..'] * (len(trlPath) - len(xslPath)) + xslPath) link = u'xml-stylesheet type="text/xsl" href="%s"' % xslPath if self.xlstLink != link: self.xlstLink = link self.modified = True def exportTrlSubtree(self, fileRef, item): """Write subtree TRL file starting form item""" lines = [u''] if self.xlstLink: lines.append(u'' % self.xlstLink) lines.extend(item.branchXml([], True)) try: f = self.getWriteFileObj(fileRef, self.compressFile) f.writelines([(line + '\n').encode('utf-8') for line in lines]) except IOError: print 'Error - could not write file' raise f.close() def exportTable(self, fileRef, item): """Write data to table for children of item""" typeList = [] headings = [] tableList = [] for child in item.childList: if child.nodeFormat not in typeList: for field in child.nodeFormat.fieldNames(): if field not in headings: headings.append(field) typeList.append(child.nodeFormat) tableList.append(u'\t'.join([child.data.get(head, '') for \ head in headings])) tableList.insert(0, u'\t'.join([head for head in headings])) try: text = os.linesep.join(tableList).encode(TreeDoc.localEncoding, \ 'strict') except (ValueError, UnicodeError): print 'Warning - bad unicode characters were replaced' text = os.linesep.join(tableList).encode(TreeDoc.localEncoding, \ 'replace') try: f = self.getWriteFileObj(fileRef, False) f.write(text) except IOError: print 'Error - could not write file' raise f.close() def exportTabbedTitles(self, fileRef, item, includeRoot, openOnly=False): """Write tabbed titles for descendants of item""" if includeRoot: titleList = item.exportToText(False, openOnly) else: titleList = item.exportToText(-1, openOnly) del titleList[0] try: text = os.linesep.join(titleList).encode(TreeDoc.localEncoding, \ 'strict') except (ValueError, UnicodeError): print 'Warning - bad unicode characters were replaced' text = os.linesep.join(titleList).encode(TreeDoc.localEncoding, \ 'replace') try: f = self.getWriteFileObj(fileRef, False) f.write(text) except IOError: print 'Error - could not write file' raise f.close() def exportXbel(self, fileRef, item): """Export XBEL bookmarks""" lines = [u''] lines.extend(item.exportXbelBookmarks()) try: f = self.getWriteFileObj(fileRef, False) f.writelines([(line + '\n').encode('utf-8') for line in lines]) except IOError: print 'Error - could not write file' raise f.close() def exportHtmlBookmarks(self, fileRef, item): """Export HTML bookmarks""" lines = [u'', \ u''] lines.extend(item.exportHtmlBookmarks()) try: f = self.getWriteFileObj(fileRef, False) f.writelines([(line + '\n').encode('utf-8') for line in lines]) except IOError: print 'Error - could not write file' raise f.close() def exportGenericXml(self, fileRef, item): """Export generic XML""" lines = [u''] lines.extend(item.exportGenericXml(GenericXmlHandler.textFieldName)) try: f = self.getWriteFileObj(fileRef, False) f.writelines([(line + '\n').encode('utf-8') for line in lines]) except IOError: print 'Error - could not write file' raise f.close() class ReadFileError(Exception): """Exception class for errors on reading file content""" pass class PasswordError(Exception): """Exception class for missing or invalid encryption passwords""" pass def testXmlParser(): """Return True if parser works correctly""" try: handler = xml.sax.ContentHandler() xml.sax.parseString('test', handler) except xml.sax.SAXException: return False return True def setLocalEncoding(): """Store locale's default encoding in TreeDoc.localEncoding""" try: TreeDoc.localEncoding = locale.getpreferredencoding() # not reliable? 'test'.encode(TreeDoc.localEncoding) except (AttributeError, LookupError, locale.Error): try: # not available on windows TreeDoc.localEncoding = locale.nl_langinfo(locale.CODESET) 'test'.encode(TreeDoc.localEncoding) except (AttributeError, LookupError, locale.Error): try: TreeDoc.localEncoding = locale.getdefaultlocale()[1] 'test'.encode(TreeDoc.localEncoding) except (AttributeError, LookupError, locale.Error): TreeDoc.localEncoding = 'utf-8' if __name__ == '__main__': doc = TreeDoc() if len(sys.argv) > 1: doc.readFile(sys.argv[1]) print '\n'.join(doc.root.exportToText()) print print '\n'.join(doc.root.childList[0].formatChildText())