1 # Written by Njaal Borch
2 # see LICENSE.txt for license information
4 def to_unicode(string):
6 Function to change a string (unicode or not) into a unicode string
7 Will try utf-8 first, then latin-1.
8 TODO: Is there a better way? There HAS to be!!!
11 if string.__class__ != str:
14 return unicode(string, "utf-8")
17 print "Warning: Fallback to latin-1 for unicode conversion"
18 return unicode(string, "latin-1")
24 An XML printer that will print XML *with namespaces*
26 Why minidom.toxml() does not do so really makes absolutenly no sense
31 def __init__(self, doc):
33 doc should be a xml.dom.minidom document
38 self.namespace_counter=0
40 def to_xml(self, encoding="UTF8"):
42 Like minidom toxml, just using namespaces too
44 return self._toxml(self.root, indent='', newl='').encode(encoding, "replace")
46 def to_pretty_xml(self, indent=' ', newl='\n', encoding="UTF8"):
48 Like minidom toxml, just using namespaces too
50 return self._toxml(self.root, indent, newl).encode(encoding, "replace")
53 def _make_header(self, encoding):
55 return u'<?xml version="1.0" encoding="%s" ?>\n'%encoding
57 def _new_namespace(self, namespace):
59 ns_short = "ns%d"%self.namespace_counter
60 self.namespace_counter += 1
63 def _toxml(self, element, indent=' ', newl='\n', encoding='UTF8', namespaces=None):
65 Recursive, internal function - do not use directly
76 if element == self.root:
78 buffer = self._make_header(encoding)
80 if element.nodeType == element.TEXT_NODE:
81 buffer += indent + to_unicode(element.nodeValue) + newl
83 if element.nodeType == element.ELEMENT_NODE:
84 ns = element.namespaceURI
85 name = to_unicode(element.localName)
86 if name.find(" ") > -1:
87 raise Exception("Refusing spaces in tag names")
89 if namespaces.has_key(ns):
90 ns_short = namespaces[ns]
93 if ns not in ["", None]:
94 ns_short = self._new_namespace(ns)
95 define_ns_list.append((ns, ns_short))
100 namespaces[ns] = ns_short
102 # Should we define more namespaces? Will peak into the
103 # children and see if there are any
104 for child in element.childNodes:
105 if child.nodeType != child.ELEMENT_NODE:
108 if not namespaces.has_key(child.namespaceURI) and \
109 child.namespaceURI not in [None, ""]:
110 # Should define this one too!
111 new_ns = self._new_namespace(child.namespaceURI)
112 define_ns_list.append((child.namespaceURI, new_ns))
113 namespaces[child.namespaceURI] = new_ns
116 # If we have no children, we will write <tag/>
117 if not element.hasChildNodes():
121 buffer += '<%s:%s xmlns:%s="%s"/>%s'%\
122 (ns_short, name,ns_short,ns,newl)
124 buffer += '<%s xmlns="%s"/>%s'%(name,ns,newl)
127 buffer += '<%s:%s/>%s'%(ns_short, name, newl)
129 buffer += '<%s/>%s'%(name, newl)
132 buffer += '<%s/>%s'%(name, newl)
134 # Clean up - namespaces is passed as a reference, and is
135 # as such not cleaned up. Let it be so to save some speed
136 for (n,short) in define_ns_list:
142 if len(define_ns_list) > 0:
143 for (url, short) in define_ns_list:
144 ns_string += ' xmlns:%s="%s"'%(short, url)
149 # Define all namespaces of next level children too
150 buffer += '<%s:%s xmlns:%s="%s"%s>%s'%\
151 (ns_short, name, ns_short, ns, ns_string, newl)
153 buffer += '<%s xmlns="%s"%s>%s'%(name,ns,ns_string,newl)
156 buffer += '<%s:%s%s>%s'%(ns_short, name, ns_string, newl)
158 buffer += '<%s%s>%s'%(name, ns_string, newl)
160 buffer += '<%s %s>%s'%(name, ns_string, newl)
162 buffer += '<%s>%s'%(name, newl)
164 # Recursively process
165 for child in element.childNodes:
169 buffer += self._toxml(child, new_indent, newl, encoding, namespaces)
171 buffer += "%s</%s:%s>%s"%(indent, ns_short, name, newl)
173 buffer += "%s</%s>%s"%(indent, name, newl)
175 for (n, short) in define_ns_list:
180 print "-----------------"
182 print "Buffer:",buffer
183 print "-----------------"
186 raise Exception("Could not serialize DOM")