[Twisted-web] Guard and flatsax patches

James Y Knight twisted-web@twistedmatrix.com
Tue, 3 Feb 2004 18:58:42 -0500


--Apple-Mail-8-908758300
Content-Transfer-Encoding: 7bit
Content-Type: text/plain;
	charset=US-ASCII;
	format=flowed

Radix has brought to my attention the seriously screwed up nature of 
the python xml package. Thus, an updated version of this patch that has 
better version detection.

Flatsax patch:
- Require pyxml >= 0.8 *OR* python >= 2.3
- Handle entities correctly.

James


--Apple-Mail-8-908758300
Content-Transfer-Encoding: 7bit
Content-Type: application/octet-stream;
	x-unix-mode=0644;
	name="flatsax.patch"
Content-Disposition: attachment;
	filename=flatsax.patch

Index: serial/flatsax.py
===================================================================
RCS file: /cvs/Quotient/nevow/serial/flatsax.py,v
retrieving revision 1.2
diff -u -r1.2 flatsax.py
--- serial/flatsax.py	21 Jan 2004 05:12:12 -0000	1.2
+++ serial/flatsax.py	3 Feb 2004 23:54:10 -0000
@@ -1,14 +1,21 @@
-
-
-from xml.sax import ContentHandler
-from xml.sax import make_parser
-#from xml.sax.handler import feature_namespaces
-
+from xml.sax import make_parser, handler
+import xml as pyxml
 
 from nevow.stan import xml, Tag, directive
 
+## Require PyXML 0.8.2 or later, or, if PyXML isn't installed
+## python2.3 or later, because that includes approximately the
+## same code (but doesn't share a version number *!@#$@!@#)
+
+try:
+    ## pyxml package has a version_info attribute
+    bad_version = pyxml.version_info < (0,8,2)
+except:
+    ## we're using core python xml library
+    import sys
+    bad_version = sys.version_info < (2,3)
 
-class ToStan(ContentHandler):
+class ToStan(handler.ContentHandler, handler.EntityResolver):
     directiveMapping = {
         'render': 'renderer',
         'data': 'data',
@@ -17,9 +24,17 @@
     attributeList = [
         'pattern', 'slot', 'macro', 'fill-slot', 'key',
     ]
-
+    
+    def resolveEntity(self, publicId, systemId):
+        ## This doesn't seem to get called, which is good.
+        raise Exception("resolveEntity should not be called. We don't use external DTDs.")
+
+    def skippedEntity(self, name):
+        self.current.append(xml("&%s;"%name))
+        
     def startDocument(self):
-        self.document = [xml('<?xml version="1.0"?>\n')]
+        self.document = []
+#        self.document = [xml('<?xml version="1.0"?>\n')]
         self.current = self.document
         self.stack = []
 
@@ -51,7 +66,7 @@
         self.current = el.children
 
     def characters(self, ch):
-        self.current.append(xml(ch))
+        self.current.append(ch)
 
     def endElement(self, name):
         me = self.stack.pop()
@@ -62,12 +77,23 @@
 
 
 def parse(fl):
+    ## Earlier PyXMLs don't handle non-standard entities (e.g. &copy;) 
+    ## correctly. They will either give an error or simply ignore the
+    ## entity producing bad output.
+    
+    if bad_version:
+        raise Exception("Please use PyXML later than 0.8.2 or python later than 2.3. Earlier ones are too buggy.")
+    
     parser = make_parser()
-#    parser.setFeature(feature_namespaces, 0)
+    parser.setFeature(handler.feature_validation, 0)
+    parser.setFeature(handler.feature_namespaces, 0)
+    parser.setFeature(handler.feature_external_ges, 0)
+    parser.setFeature(handler.feature_external_pes, 0)
+    
     s = ToStan()
     parser.setContentHandler(s)
-
-#    setEntityResolver()
+    parser.setEntityResolver(s)
+    
     parser.parse(fl)
 
     return s.document

--Apple-Mail-8-908758300
Content-Transfer-Encoding: 7bit
Content-Type: text/plain;
	charset=US-ASCII;
	format=flowed



--Apple-Mail-8-908758300--