root / trunk / twisted / lore / tree.py

Revision 26929, 39.1 kB (checked in by exarkun, 2 months ago)

Merge lore-microdom-imports-3619

Author: exarkun
Reviewer: glyph
Fixes: #3619

Change the minidom as microdom imports in Twisted Lore to be
minidom as dom instead, and adjust all affected code.

  • Property svn:executable set to *
Line 
1 # Copyright (c) 2001-2009 Twisted Matrix Laboratories.
2 # See LICENSE for details.
3
4
5 from itertools import count
6 import re, os, cStringIO, time, cgi, string, urlparse
7 from xml.dom import minidom as dom
8 from xml.sax.handler import ErrorHandler, feature_validation
9 from xml.dom.pulldom import SAX2DOM
10 from xml.sax import make_parser
11 from xml.sax.xmlreader import InputSource
12
13 from twisted.python import htmlizer, text
14 from twisted.python.filepath import FilePath
15 from twisted.python.deprecate import deprecated
16 from twisted.python.versions import Version
17 from twisted.web import domhelpers
18 import process, latex, indexer, numberer, htmlbook
19
20 # relative links to html files
21 def fixLinks(document, ext):
22     """
23     Rewrite links to XHTML lore input documents so they point to lore XHTML
24     output documents.
25
26     Any node with an C{href} attribute which does not contain a value starting
27     with C{http}, C{https}, C{ftp}, or C{mailto} and which does not have a
28     C{class} attribute of C{absolute} or which contains C{listing} and which
29     does point to an URL ending with C{html} will have that attribute value
30     rewritten so that the filename extension is C{ext} instead of C{html}.
31
32     @type document: A DOM Node or Document
33     @param document: The input document which contains all of the content to be
34     presented.
35
36     @type ext: C{str}
37     @param ext: The extension to use when selecting an output file name.  This
38     replaces the extension of the input file name.
39
40     @return: C{None}
41     """
42     supported_schemes=['http', 'https', 'ftp', 'mailto']
43     for node in domhelpers.findElementsWithAttribute(document, 'href'):
44         href = node.getAttribute("href")
45         if urlparse.urlparse(href)[0] in supported_schemes:
46             continue
47         if node.getAttribute("class") == "absolute":
48             continue
49         if node.getAttribute("class").find('listing') != -1:
50             continue
51
52         # This is a relative link, so it should be munged.
53         if href.endswith('html') or href[:href.rfind('#')].endswith('html'):
54             fname, fext = os.path.splitext(href)
55             if '#' in fext:
56                 fext = ext+'#'+fext.split('#', 1)[1]
57             else:
58                 fext = ext
59             node.setAttribute("href", fname + fext)
60
61
62
63 def addMtime(document, fullpath):
64     """
65     Set the last modified time of the given document.
66
67     @type document: A DOM Node or Document
68     @param document: The output template which defines the presentation of the
69     last modified time.
70
71     @type fullpath: C{str}
72     @param fullpath: The file name from which to take the last modified time.
73
74     @return: C{None}
75     """
76     for node in domhelpers.findElementsWithAttribute(document, "class","mtime"):
77         txt = dom.Text()
78         txt.data = time.ctime(os.path.getmtime(fullpath))
79         node.appendChild(txt)
80
81
82
83 def _getAPI(node):
84     """
85     Retrieve the fully qualified Python name represented by the given node.
86
87     The name is represented by one or two aspects of the node: the value of the
88     node's first child forms the end of the name.  If the node has a C{base}
89     attribute, that attribute's value is prepended to the node's value, with
90     C{.} separating the two parts.
91
92     @rtype: C{str}
93     @return: The fully qualified Python name.
94     """
95     base = ""
96     if node.hasAttribute("base"):
97         base = node.getAttribute("base") + "."
98     return base+node.childNodes[0].nodeValue
99
100
101
102 def fixAPI(document, url):
103     """
104     Replace API references with links to API documentation.
105
106     @type document: A DOM Node or Document
107     @param document: The input document which contains all of the content to be
108     presented.
109
110     @type url: C{str}
111     @param url: A string which will be interpolated with the fully qualified
112     Python name of any API reference encountered in the input document, the
113     result of which will be used as a link to API documentation for that name
114     in the output document.
115
116     @return: C{None}
117     """
118     # API references
119     for node in domhelpers.findElementsWithAttribute(document, "class", "API"):
120         fullname = _getAPI(node)
121         anchor = dom.Element('a')
122         anchor.setAttribute('href', url % (fullname,))
123         anchor.setAttribute('title', fullname)
124         while node.childNodes:
125             child = node.childNodes[0]
126             node.removeChild(child)
127             anchor.appendChild(child)
128         node.appendChild(anchor)
129         if node.hasAttribute('base'):
130             node.removeAttribute('base')
131
132
133
134 def fontifyPython(document):
135     """
136     Syntax color any node in the given document which contains a Python source
137     listing.
138
139     @type document: A DOM Node or Document
140     @param document: The input document which contains all of the content to be
141     presented.
142
143     @return: C{None}
144     """
145     def matcher(node):
146         return (node.nodeName == 'pre' and node.hasAttribute('class') and
147                 node.getAttribute('class') == 'python')
148     for node in domhelpers.findElements(document, matcher):
149         fontifyPythonNode(node)
150
151
152
153 def fontifyPythonNode(node):
154     """
155     Syntax color the given node containing Python source code.
156
157     The node must have a parent.
158
159     @return: C{None}
160     """
161     oldio = cStringIO.StringIO()
162     latex.getLatexText(node, oldio.write,
163                        entities={'lt': '<', 'gt': '>', 'amp': '&'})
164     oldio = cStringIO.StringIO(oldio.getvalue().strip()+'\n')
165     howManyLines = len(oldio.getvalue().splitlines())
166     newio = cStringIO.StringIO()
167     htmlizer.filter(oldio, newio, writer=htmlizer.SmallerHTMLWriter)
168     lineLabels = _makeLineNumbers(howManyLines)
169     newel = dom.parseString(newio.getvalue()).documentElement
170     newel.setAttribute("class", "python")
171     node.parentNode.replaceChild(newel, node)
172     newel.insertBefore(lineLabels, newel.firstChild)
173
174
175
176 def addPyListings(document, dir):
177     """
178     Insert Python source listings into the given document from files in the
179     given directory based on C{py-listing} nodes.
180
181     Any node in C{document} with a C{class} attribute set to C{py-listing} will
182     have source lines taken from the file named in that node's C{href}
183     attribute (searched for in C{dir}) inserted in place of that node.
184
185     If a node has a C{skipLines} attribute, its value will be parsed as an
186     integer and that many lines will be skipped at the beginning of the source
187     file.
188
189     @type document: A DOM Node or Document
190     @param document: The document within which to make listing replacements.
191
192     @type dir: C{str}
193     @param dir: The directory in which to find source files containing the
194     referenced Python listings.
195
196     @return: C{None}
197     """
198     for node in domhelpers.findElementsWithAttribute(document, "class",
199                                                      "py-listing"):
200         filename = node.getAttribute("href")
201         outfile = cStringIO.StringIO()
202         lines = map(string.rstrip, open(os.path.join(dir, filename)).readlines())
203
204         skip = node.getAttribute('skipLines') or 0
205         lines = lines[int(skip):]
206         howManyLines = len(lines)
207         data = '\n'.join(lines)
208
209         data = cStringIO.StringIO(text.removeLeadingTrailingBlanks(data))
210         htmlizer.filter(data, outfile, writer=htmlizer.SmallerHTMLWriter)
211         sourceNode = dom.parseString(outfile.getvalue()).documentElement
212         sourceNode.insertBefore(_makeLineNumbers(howManyLines), sourceNode.firstChild)
213         _replaceWithListing(node, sourceNode.toxml(), filename, "py-listing")
214
215
216
217 def _makeLineNumbers(howMany):
218     """
219     Return an element which will render line numbers for a source listing.
220
221     @param howMany: The number of lines in the source listing.
222     @type howMany: C{int}
223
224     @return: An L{dom.Element} which can be added to the document before
225         the source listing to add line numbers to it.
226     """
227     # Figure out how many digits wide the widest line number label will be.
228     width = len(str(howMany))
229
230     # Render all the line labels with appropriate padding
231     labels = ['%*d' % (width, i) for i in range(1, howMany + 1)]
232
233     # Create a p element with the right style containing the labels
234     p = dom.Element('p')
235     p.setAttribute('class', 'py-linenumber')
236     t = dom.Text()
237     t.data = '\n'.join(labels) + '\n'
238     p.appendChild(t)
239     return p
240
241
242 def _replaceWithListing(node, val, filename, class_):
243     captionTitle = domhelpers.getNodeText(node)
244     if captionTitle == os.path.basename(filename):
245         captionTitle = 'Source listing'
246     text = ('<div class="%s">%s<div class="caption">%s - '
247             '<a href="%s"><span class="filename">%s</span></a></div></div>' %
248             (class_, val, captionTitle, filename, filename))
249     newnode = dom.parseString(text).documentElement
250     node.parentNode.replaceChild(newnode, node)
251
252
253
254 def addHTMLListings(document, dir):
255     """
256     Insert HTML source listings into the given document from files in the given
257     directory based on C{html-listing} nodes.
258
259     Any node in C{document} with a C{class} attribute set to C{html-listing}
260     will have source lines taken from the file named in that node's C{href}
261     attribute (searched for in C{dir}) inserted in place of that node.
262
263     @type document: A DOM Node or Document
264     @param document: The document within which to make listing replacements.
265
266     @type dir: C{str}
267     @param dir: The directory in which to find source files containing the
268     referenced HTML listings.
269
270     @return: C{None}
271     """
272     for node in domhelpers.findElementsWithAttribute(document, "class",
273                                                      "html-listing"):
274         filename = node.getAttribute("href")
275         val = ('<pre class="htmlsource">\n%s</pre>' %
276                cgi.escape(open(os.path.join(dir, filename)).read()))
277         _replaceWithListing(node, val, filename, "html-listing")
278
279
280
281 def addPlainListings(document, dir):
282     """
283     Insert text listings into the given document from files in the given
284     directory based on C{listing} nodes.
285
286     Any node in C{document} with a C{class} attribute set to C{listing} will
287     have source lines taken from the file named in that node's C{href}
288     attribute (searched for in C{dir}) inserted in place of that node.
289
290     @type document: A DOM Node or Document
291     @param document: The document within which to make listing replacements.
292
293     @type dir: C{str}
294     @param dir: The directory in which to find source files containing the
295     referenced text listings.
296
297     @return: C{None}
298     """
299     for node in domhelpers.findElementsWithAttribute(document, "class",
300                                                      "listing"):
301         filename = node.getAttribute("href")
302         val = ('<pre>\n%s</pre>' %
303                cgi.escape(open(os.path.join(dir, filename)).read()))
304         _replaceWithListing(node, val, filename, "listing")
305
306
307
308 def getHeaders(document):
309     """
310     Return all H2 and H3 nodes in the given document.
311
312     @type document: A DOM Node or Document
313
314     @rtype: C{list}
315     """
316     return domhelpers.findElements(
317         document,
318         lambda n, m=re.compile('h[23]$').match: m(n.nodeName))
319
320
321
322 def generateToC(document):
323     """
324     Create a table of contents for the given document.
325
326     @type document: A DOM Node or Document
327
328     @rtype: A DOM Node
329     @return: a Node containing a table of contents based on the headers of the
330     given document.
331     """
332     subHeaders = None
333     headers = []
334     for element in getHeaders(document):
335         if element.tagName == 'h2':
336             subHeaders = []
337             headers.append((element, subHeaders))
338         elif subHeaders is None:
339             raise ValueError(
340                 "No H3 element is allowed until after an H2 element")
341         else:
342             subHeaders.append(element)
343
344     auto = count().next
345
346     def addItem(headerElement, parent):
347         anchor = dom.Element('a')
348         name = 'auto%d' % (auto(),)
349         anchor.setAttribute('href', '#' + name)
350         text = dom.Text()
351         text.data = domhelpers.getNodeText(headerElement)
352         anchor.appendChild(text)
353         headerNameItem = dom.Element('li')
354         headerNameItem.appendChild(anchor)
355         parent.appendChild(headerNameItem)
356         anchor = dom.Element('a')
357         anchor.setAttribute('name', name)
358         headerElement.appendChild(anchor)
359
360     toc = dom.Element('ol')
361     for headerElement, subHeaders in headers:
362         addItem(headerElement, toc)
363         if subHeaders:
364             subtoc = dom.Element('ul')
365             toc.appendChild(subtoc)
366             for subHeaderElement in subHeaders:
367                 addItem(subHeaderElement, subtoc)
368
369     return toc
370
371
372
373 def putInToC(document, toc):
374     """
375     Insert the given table of contents into the given document.
376
377     The node with C{class} attribute set to C{toc} has its children replaced
378     with C{toc}.
379
380     @type document: A DOM Node or Document
381     @type toc: A DOM Node
382     """
383     tocOrig = domhelpers.findElementsWithAttribute(document, 'class', 'toc')
384     if tocOrig:
385         tocOrig= tocOrig[0]
386         tocOrig.childNodes = [toc]
387
388
389
390 def removeH1(document):
391     """
392     Replace all C{h1} nodes in the given document with empty C{span} nodes.
393
394     C{h1} nodes mark up document sections and the output template is given an
395     opportunity to present this information in a different way.
396
397     @type document: A DOM Node or Document
398     @param document: The input document which contains all of the content to be
399     presented.
400
401     @return: C{None}
402     """
403     h1 = domhelpers.findNodesNamed(document, 'h1')
404     empty = dom.Element('span')
405     for node in h1:
406         node.parentNode.replaceChild(empty, node)
407
408
409
410 def footnotes(document):
411     """
412     Find footnotes in the given document, move them to the end of the body, and
413     generate links to them.
414
415     A footnote is any node with a C{class} attribute set to C{footnote}.
416     Footnote links are generated as superscript.  Footnotes are collected in a
417     C{ol} node at the end of the document.
418
419     @type document: A DOM Node or Document
420     @param document: The input document which contains all of the content to be
421     presented.
422
423     @return: C{None}
424     """
425     footnotes = domhelpers.findElementsWithAttribute(document, "class",
426                                                      "footnote")
427     if not footnotes:
428         return
429     footnoteElement = dom.Element('ol')
430     id = 1
431     for footnote in footnotes:
432         href = dom.parseString('<a href="#footnote-%(id)d">'
433                                '<super>%(id)d</super></a>'
434                                % vars()).documentElement
435         text = ' '.join(domhelpers.getNodeText(footnote).split())
436         href.setAttribute('title', text)
437         target = dom.Element('a')
438         target.setAttribute('name', 'footnote-%d' % (id,))
439         target.childNodes = [footnote]
440         footnoteContent = dom.Element('li')
441         footnoteContent.childNodes = [target]
442         footnoteElement.childNodes.append(footnoteContent)
443         footnote.parentNode.replaceChild(href, footnote)
444         id += 1
445     body = domhelpers.findNodesNamed(document, "body")[0]
446     header = dom.parseString('<h2>Footnotes</h2>').documentElement
447     body.childNodes.append(header)
448     body.childNodes.append(footnoteElement)
449
450
451
452 def notes(document):
453     """
454     Find notes in the given document and mark them up as such.
455
456     A note is any node with a C{class} attribute set to C{note}.
457
458     (I think this is a very stupid feature.  When I found it I actually
459     exclaimed out loud. -exarkun)
460
461     @type document: A DOM Node or Document
462     @param document: The input document which contains all of the content to be
463     presented.
464
465     @return: C{None}
466     """
467     notes = domhelpers.findElementsWithAttribute(document, "class", "note")
468     notePrefix = dom.parseString('<strong>Note: </strong>').documentElement
469     for note in notes:
470         note.childNodes.insert(0, notePrefix)
471
472
473
474 def compareMarkPos(a, b):
475     """
476     Perform in every way identically to L{cmp} for valid inputs.
477     """
478     linecmp = cmp(a[0], b[0])
479     if linecmp:
480         return linecmp
481     return cmp(a[1], b[1])
482 compareMarkPos = deprecated(Version('Twisted', 9, 0, 0))(compareMarkPos)
483
484
485
486 def comparePosition(firstElement, secondElement):
487     """
488     Compare the two elements given by their position in the document or
489     documents they were parsed from.
490
491     @type firstElement: C{dom.Element}
492     @type secondElement: C{dom.Element}
493
494     @return: C{-1}, C{0}, or C{1}, with the same meanings as the return value
495     of L{cmp}.
496     """
497     return cmp(firstElement._markpos, secondElement._markpos)
498 comparePosition = deprecated(Version('Twisted', 9, 0, 0))(comparePosition)
499
500
501
502 def findNodeJustBefore(target, nodes):
503     """
504     Find the last Element which is a sibling of C{target} and is in C{nodes}.
505
506     @param target: A node the previous sibling of which to return.
507     @param nodes: A list of nodes which might be the right node.
508
509     @return: The previous sibling of C{target}.
510     """
511     while target is not None:
512         node = target.previousSibling
513         while node is not None:
514             if node in nodes:
515                 return node
516             node = node.previousSibling
517         target = target.parentNode
518     raise RuntimeError("Oops")
519
520
521
522 def getFirstAncestorWithSectionHeader(entry):
523     """
524     Visit the ancestors of C{entry} until one with at least one C{h2} child
525     node is found, then return all of that node's C{h2} child nodes.
526
527     @type entry: A DOM Node
528     @param entry: The node from which to begin traversal.  This node itself is
529     excluded from consideration.
530
531     @rtype: C{list} of DOM Nodes
532     @return: All C{h2} nodes of the ultimately selected parent node.
533     """
534     for a in domhelpers.getParents(entry)[1:]:
535         headers = domhelpers.findNodesNamed(a, "h2")
536         if len(headers) > 0:
537             return headers
538     return []
539
540
541
542 def getSectionNumber(header):
543     """
544     Retrieve the section number of the given node.
545
546     This is probably intended to interact in a rather specific way with
547     L{numberDocument}.
548
549     @type header: A DOM Node or L{None}
550     @param header: The section from which to extract a number.  The section
551         number is the value of this node's first child.
552
553     @return: C{None} or a C{str} giving the section number.
554     """
555     if not header:
556         return None
557     return domhelpers.gatherTextNodes(header.childNodes[0])
558
559
560
561 def getSectionReference(entry):
562     """
563     Find the section number which contains the given node.
564
565     This function looks at the given node's ancestry until it finds a node
566     which defines a section, then returns that section's number.
567
568     @type entry: A DOM Node
569     @param entry: The node for which to determine the section.
570
571     @rtype: C{str}
572     @return: The section number, as returned by C{getSectionNumber} of the
573     first ancestor of C{entry} which defines a section, as determined by
574     L{getFirstAncestorWithSectionHeader}.
575     """
576     headers = getFirstAncestorWithSectionHeader(entry)
577     myHeader = findNodeJustBefore(entry, headers)
578     return getSectionNumber(myHeader)
579
580
581
582 def index(document, filename, chapterReference):
583     """
584     Extract index entries from the given document and store them for later use
585     and insert named anchors so that the index can link back to those entries.
586
587     Any node with a C{class} attribute set to C{index} is considered an index
588     entry.
589
590     @type document: A DOM Node or Document
591     @param document: The input document which contains all of the content to be
592     presented.
593
594     @type filename: C{str}
595     @param filename: A link to the output for the given document which will be
596     included in the index to link to any index entry found here.
597
598     @type chapterReference: ???
599     @param chapterReference: ???
600
601     @return: C{None}
602     """
603     entries = domhelpers.findElementsWithAttribute(document, "class", "index")
604     if not entries:
605         return
606     i = 0;
607     for entry in entries:
608         i += 1
609         anchor = 'index%02d' % i
610         if chapterReference:
611             ref = getSectionReference(entry) or chapterReference
612         else:
613             ref = 'link'
614         indexer.addEntry(filename, anchor, entry.getAttribute('value'), ref)
615         # does nodeName even affect anything?
616         entry.nodeName = entry.tagName = entry.endTagName = 'a'
617         for attrName in entry.attributes.keys():
618             entry.removeAttribute(attrName)
619         entry.setAttribute('name', anchor)
620
621
622
623 def setIndexLink(template, indexFilename):
624     """
625     Insert a link to an index document.
626
627     Any node with a C{class} attribute set to C{index-link} will have its tag
628     name changed to C{a} and its C{href} attribute set to C{indexFilename}.
629
630     @type template: A DOM Node or Document
631     @param template: The output template which defines the presentation of the
632     version information.
633
634     @type indexFilename: C{str}
635     @param indexFilename: The address of the index document to which to link.
636     If any C{False} value, this function will remove all index-link nodes.
637
638     @return: C{None}
639     """
640     indexLinks = domhelpers.findElementsWithAttribute(template,
641                                                       "class",
642                                                       "index-link")
643     for link in indexLinks:
644         if indexFilename is None:
645             link.parentNode.removeChild(link)
646         else:
647             link.nodeName = link.tagName = link.endTagName = 'a'
648             for attrName in link.attributes.keys():
649                 link.removeAttribute(attrName)
650             link.setAttribute('href', indexFilename)
651
652
653
654 def numberDocument(document, chapterNumber):
655     """
656     Number the sections of the given document.
657
658     A dot-separated chapter, section number is added to the beginning of each
659     section, as defined by C{h2} nodes.
660
661     This is probably intended to interact in a rather specific way with
662     L{getSectionNumber}.
663
664     @type document: A DOM Node or Document
665     @param document: The input document which contains all of the content to be
666     presented.
667
668     @type chapterNumber: C{int}
669     @param chapterNumber: The chapter number of this content in an overall
670     document.
671
672     @return: C{None}
673     """
674     i = 1
675     for node in domhelpers.findNodesNamed(document, "h2"):
676         label = dom.Text()
677         label.data = "%s.%d " % (chapterNumber, i)
678         node.insertBefore(label, node.firstChild)
679         i += 1
680
681
682
683 def fixRelativeLinks(document, linkrel):
684     """
685     Replace relative links in C{str} and C{href} attributes with links relative
686     to C{linkrel}.
687
688     @type document: A DOM Node or Document
689     @param document: The output template.
690
691     @type linkrel: C{str}
692     @param linkrel: An prefix to apply to all relative links in C{src} or
693     C{href} attributes in the input document when generating the output
694     document.
695     """
696     for attr in 'src', 'href':
697         for node in domhelpers.findElementsWithAttribute(document, attr):
698             href = node.getAttribute(attr)
699             if not href.startswith('http') and not href.startswith('/'):
700                 node.setAttribute(attr, linkrel+node.getAttribute(attr))
701
702
703
704 def setTitle(template, title, chapterNumber):
705     """
706     Add title and chapter number information to the template document.
707
708     The title is added to the end of the first C{title} tag and the end of the
709     first tag with a C{class} attribute set to C{title}.  If specified, the
710     chapter is inserted before the title.
711
712     @type template: A DOM Node or Document
713     @param template: The output template which defines the presentation of the
714     version information.
715
716     @type title: C{list} of DOM Nodes
717     @param title: Nodes from the input document defining its title.
718
719     @type chapterNumber: C{int}
720     @param chapterNumber: The chapter number of this content in an overall
721     document.  If not applicable, any C{False} value will result in this
722     information being omitted.
723
724     @return: C{None}
725     """
726     if numberer.getNumberSections() and chapterNumber:
727         titleNode = dom.Text()
728         # This is necessary in order for cloning below to work.  See Python
729         # isuse 4851.
730         titleNode.ownerDocument = template.ownerDocument
731         titleNode.data = '%s. ' % (chapterNumber,)
732         title.insert(0, titleNode)
733
734     for nodeList in (domhelpers.findNodesNamed(template, "title"),
735                      domhelpers.findElementsWithAttribute(template, "class",
736                                                           'title')):
737         if nodeList:
738             for titleNode in title:
739                 nodeList[0].appendChild(titleNode.cloneNode(True))
740
741
742
743 def setAuthors(template, authors):
744     """
745     Add author information to the template document.
746
747     Names and contact information for authors are added to each node with a
748     C{class} attribute set to C{authors} and to the template head as C{link}
749     nodes.
750
751     @type template: A DOM Node or Document
752     @param template: The output template which defines the presentation of the
753     version information.
754
755     @type authors: C{list} of two-tuples of C{str}
756     @param authors: List of names and contact information for the authors of
757     the input document.
758
759     @return: C{None}
760     """
761
762     for node in domhelpers.findElementsWithAttribute(template,
763                                                      "class", 'authors'):
764
765         # First, similarly to setTitle, insert text into an <div
766         # class="authors">
767         container = dom.Element('span')
768         for name, href in authors:
769             anchor = dom.Element('a')
770             anchor.setAttribute('href', href)
771             anchorText = dom.Text()
772             anchorText.data = name
773             anchor.appendChild(anchorText)
774             if (name, href) == authors[-1]:
775                 if len(authors) == 1:
776                     container.appendChild(anchor)
777                 else:
778                     andText = dom.Text()
779                     andText.data = 'and '
780                     container.appendChild(andText)
781                     container.appendChild(anchor)
782             else:
783                 container.appendChild(anchor)
784                 commaText = dom.Text()
785                 commaText.data = ', '
786                 container.appendChild(commaText)
787
788         node.appendChild(container)
789
790     # Second, add appropriate <link rel="author" ...> tags to the <head>.
791     head = domhelpers.findNodesNamed(template, 'head')[0]
792     authors = [dom.parseString('<link rel="author" href="%s" title="%s"/>'
793                                % (href, name)).childNodes[0]
794                for name, href in authors]
795     head.childNodes.extend(authors)
796
797
798
799 def setVersion(template, version):
800     """
801     Add a version indicator to the given template.
802
803     @type template: A DOM Node or Document
804     @param template: The output template which defines the presentation of the
805     version information.
806
807     @type version: C{str}
808     @param version: The version string to add to the template.
809
810     @return: C{None}
811     """
812     for node in domhelpers.findElementsWithAttribute(template, "class",
813                                                                "version"):
814         text = dom.Text()
815         text.data = version
816         node.appendChild(text)
817
818
819
820 def getOutputFileName(originalFileName, outputExtension, index=None):
821     """
822     Return a filename which is the same as C{originalFileName} except for the
823     extension, which is replaced with C{outputExtension}.
824
825     For example, if C{originalFileName} is C{'/foo/bar.baz'} and
826     C{outputExtension} is C{'quux'}, the return value will be
827     C{'/foo/bar.quux'}.
828
829     @type originalFileName: C{str}
830     @type outputExtension: C{stR}
831     @param index: ignored, never passed.
832     @rtype: C{str}
833     """
834     return os.path.splitext(originalFileName)[0]+outputExtension
835
836
837
838 def munge(document, template, linkrel, dir, fullpath, ext, url, config, outfileGenerator=getOutputFileName):
839     """
840     Mutate C{template} until it resembles C{document}.
841
842     @type document: A DOM Node or Document
843     @param document: The input document which contains all of the content to be
844     presented.
845
846     @type template: A DOM Node or Document
847     @param template: The template document which defines the desired
848     presentation format of the content.
849
850     @type linkrel: C{str}
851     @param linkrel: An prefix to apply to all relative links in C{src} or
852     C{href} attributes in the input document when generating the output
853     document.
854
855     @type dir: C{str}
856     @param dir: The directory in which to search for source listing files.
857
858     @type fullpath: C{str}
859     @param fullpath: The file name which contained the input document.
860
861     @type ext: C{str}
862     @param ext: The extension to use when selecting an output file name.  This
863     replaces the extension of the input file name.
864
865     @type url: C{str}
866     @param url: A string which will be interpolated with the fully qualified
867     Python name of any API reference encountered in the input document, the
868     result of which will be used as a link to API documentation for that name
869     in the output document.
870
871     @type config: C{dict}
872     @param config: Further specification of the desired form of the output.
873     Valid keys in this dictionary::
874
875         noapi: If present and set to a True value, links to API documentation
876                will not be generated.
877
878         version: A string which will be included in the output to indicate the
879                  version of this documentation.
880
881     @type outfileGenerator: Callable of C{str}, C{str} returning C{str}
882     @param outfileGenerator: Output filename factory.  This is invoked with the
883     intput filename and C{ext} and the output document is serialized to the
884     file with the name returned.
885
886     @return: C{None}
887     """
888     fixRelativeLinks(template, linkrel)
889     addMtime(template, fullpath)
890     removeH1(document)
891     if not config.get('noapi', False):
892         fixAPI(document, url)
893     fontifyPython(document)
894     fixLinks(document, ext)
895     addPyListings(document, dir)
896     addHTMLListings(document, dir)
897     addPlainListings(document, dir)
898     putInToC(template, generateToC(document))
899     footnotes(document)
900     notes(document)
901
902     setIndexLink(template, indexer.getIndexFilename())
903     setVersion(template, config.get('version', ''))
904
905     # Insert the document into the template
906     chapterNumber = htmlbook.getNumber(fullpath)
907     title = domhelpers.findNodesNamed(document, 'title')[0].childNodes
908     setTitle(template, title, chapterNumber)
909     if numberer.getNumberSections() and chapterNumber:
910         numberDocument(document, chapterNumber)
911     index(document, outfileGenerator(os.path.split(fullpath)[1], ext),
912           htmlbook.getReference(fullpath))
913
914     authors = domhelpers.findNodesNamed(document, 'link')
915     authors = [(node.getAttribute('title') or '',
916                 node.getAttribute('href') or '')
917                for node in authors
918                if node.getAttribute('rel') == 'author']
919     setAuthors(template, authors)
920
921     body = domhelpers.findNodesNamed(document, "body")[0]
922     tmplbody = domhelpers.findElementsWithAttribute(template, "class",
923                                                               "body")[0]
924     tmplbody.childNodes = body.childNodes
925     tmplbody.setAttribute("class", "content")
926
927
928 class _LocationReportingErrorHandler(ErrorHandler):
929     """
930     Define a SAX error handler which can report the location of fatal
931     errors.
932
933     Unlike the errors reported during parsing by other APIs in the xml
934     package, this one tries to mismatched tag errors by including the
935     location of both the relevant opening and closing tags.
936     """
937     def __init__(self, contentHandler):
938         self.contentHandler = contentHandler
939
940     def fatalError(self, err):
941         # Unfortunately, the underlying expat error code is only exposed as
942         # a string.  I surely do hope no one ever goes and localizes expat.
943         if err.getMessage() == 'mismatched tag':
944             expect, begLine, begCol = self.contentHandler._locationStack[-1]
945             endLine, endCol = err.getLineNumber(), err.getColumnNumber()
946             raise process.ProcessingFailure(
947                 "mismatched close tag at line %d, column %d; expected </%s> "
948                 "(from line %d, column %d)" % (
949                     endLine, endCol, expect, begLine, begCol))
950         raise process.ProcessingFailure(
951             '%s at line %d, column %d' % (err.getMessage(),
952                                           err.getLineNumber(),
953                                           err.getColumnNumber()))
954
955
956 class _TagTrackingContentHandler(SAX2DOM):
957     """
958     Define a SAX content handler which keeps track of the start location of
959     all open tags.  This information is used by the above defined error
960     handler to report useful locations when a fatal error is encountered.
961     """
962     def __init__(self):
963         SAX2DOM.__init__(self)
964         self._locationStack = []
965
966     def setDocumentLocator(self, locator):
967         self._docLocator = locator
968         SAX2DOM.setDocumentLocator(self, locator)
969
970     def startElement(self, name, attrs):
971         self._locationStack.append((name, self._docLocator.getLineNumber(), self._docLocator.getColumnNumber()))
972         SAX2DOM.startElement(self, name, attrs)
973
974     def endElement(self, name):
975         self._locationStack.pop()
976         SAX2DOM.endElement(self, name)
977
978
979 class _LocalEntityResolver(object):
980     """
981     Implement DTD loading (from a local source) for the limited number of
982     DTDs which are allowed for Lore input documents.
983
984     @ivar filename: The name of the file containing the lore input
985         document.
986
987     @ivar knownDTDs: A mapping from DTD system identifiers to L{FilePath}
988         instances pointing to the corresponding DTD.
989     """
990     s = FilePath(__file__).sibling
991
992     knownDTDs = {
993         None: s("xhtml1-strict.dtd"),
994         "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd": s("xhtml1-strict.dtd"),
995         "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd": s("xhtml1-transitional.dtd"),
996         "xhtml-lat1.ent": s("xhtml-lat1.ent"),
997         "xhtml-symbol.ent": s("xhtml-symbol.ent"),
998         "xhtml-special.ent": s("xhtml-special.ent"),
999         }
1000     del s
1001
1002     def __init__(self, filename):
1003         self.filename = filename
1004
1005
1006     def resolveEntity(self, publicId, systemId):
1007         source = InputSource()
1008         source.setSystemId(systemId)
1009         try:
1010             dtdPath = self.knownDTDs[systemId]
1011         except KeyError:
1012             raise process.ProcessingFailure(
1013                 "Invalid DTD system identifier (%r) in %s.  Only "
1014                 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd "
1015                 "is allowed." % (systemId, self.filename))
1016         source.setByteStream(dtdPath.open())
1017         return source
1018
1019
1020
1021 def parseFileAndReport(filename, _open=file):
1022     """
1023     Parse and return the contents of the given lore XHTML document.
1024
1025     @type filename: C{str}
1026     @param filename: The name of a file containing a lore XHTML document to
1027     load.
1028
1029     @raise process.ProcessingFailure: When the contents of the specified file
1030     cannot be parsed.
1031
1032     @rtype: A DOM Document
1033     @return: The document contained in C{filename}.
1034     """
1035     content = _TagTrackingContentHandler()
1036     error = _LocationReportingErrorHandler(content)
1037     parser = make_parser()
1038     parser.setContentHandler(content)
1039     parser.setErrorHandler(error)
1040
1041     # In order to call a method on the expat parser which will be used by this
1042     # parser, we need the expat parser to be created.  This doesn't happen
1043     # until reset is called, normally by the parser's parse method.  That's too
1044     # late for us, since it will then go on to parse the document without
1045     # letting us do any extra set up.  So, force the expat parser to be created
1046     # here, and then disable reset so that the parser created is the one
1047     # actually used to parse our document.  Resetting is only needed if more
1048     # than one document is going to be parsed, and that isn't the case here.
1049     parser.reset()
1050     parser.reset = lambda: None
1051
1052     # This is necessary to make the xhtml1 transitional declaration optional.
1053     # It causes LocalEntityResolver.resolveEntity(None, None) to be called.
1054     # LocalEntityResolver handles that case by giving out the xhtml1
1055     # transitional dtd.  Unfortunately, there is no public API for manipulating
1056     # the expat parser when using xml.sax.  Using the private _parser attribute
1057     # may break.  It's also possible that make_parser will return a parser
1058     # which doesn't use expat, but uses some other parser.  Oh well. :(
1059     # -exarkun
1060     parser._parser.UseForeignDTD(True)
1061     parser.setEntityResolver(_LocalEntityResolver(filename))
1062
1063     # This is probably no-op because expat is not a validating parser.  Who
1064     # knows though, maybe you figured out a way to not use expat.
1065     parser.setFeature(feature_validation, False)
1066
1067     fObj = _open(filename)
1068     try:
1069         try:
1070             parser.parse(fObj)
1071         except IOError, e:
1072             raise process.ProcessingFailure(
1073                 e.strerror + ", filename was '" + filename + "'")
1074     finally:
1075         fObj.close()
1076     return content.document
1077
1078
1079 def makeSureDirectoryExists(filename):
1080     filename = os.path.abspath(filename)
1081     dirname = os.path.dirname(filename)
1082     if (not os.path.exists(dirname)):
1083         os.makedirs(dirname)
1084
1085 def doFile(filename, linkrel, ext, url, templ, options={}, outfileGenerator=getOutputFileName):
1086     """
1087     Process the input document at C{filename} and write an output document.
1088
1089     @type filename: C{str}
1090     @param filename: The path to the input file which will be processed.
1091
1092     @type linkrel: C{str}
1093     @param linkrel: An prefix to apply to all relative links in C{src} or
1094     C{href} attributes in the input document when generating the output
1095     document.
1096
1097     @type ext: C{str}
1098     @param ext: The extension to use when selecting an output file name.  This
1099     replaces the extension of the input file name.
1100
1101     @type url: C{str}
1102     @param url: A string which will be interpolated with the fully qualified
1103     Python name of any API reference encountered in the input document, the
1104     result of which will be used as a link to API documentation for that name
1105     in the output document.
1106
1107     @type templ: A DOM Node or Document
1108     @param templ: The template on which the output document will be based.
1109     This is mutated and then serialized to the output file.
1110
1111     @type options: C{dict}
1112     @param options: Further specification of the desired form of the output.
1113     Valid keys in this dictionary::
1114
1115         noapi: If present and set to a True value, links to API documentation
1116                will not be generated.
1117
1118         version: A string which will be included in the output to indicate the
1119                  version of this documentation.
1120
1121     @type outfileGenerator: Callable of C{str}, C{str} returning C{str}
1122     @param outfileGenerator: Output filename factory.  This is invoked with the
1123     intput filename and C{ext} and the output document is serialized to the
1124     file with the name returned.
1125
1126     @return: C{None}
1127     """
1128     doc = parseFileAndReport(filename)
1129     clonedNode = templ.cloneNode(1)
1130     munge(doc, clonedNode, linkrel, os.path.dirname(filename), filename, ext,
1131           url, options, outfileGenerator)
1132     newFilename = outfileGenerator(filename, ext)
1133     _writeDocument(newFilename, clonedNode)
1134
1135
1136
1137 def _writeDocument(newFilename, clonedNode):
1138     """
1139     Serialize the given node to XML into the named file.
1140
1141     @param newFilename: The name of the file to which the XML will be
1142         written.  If this is in a directory which does not exist, the
1143         directory will be created.
1144
1145     @param clonedNode: The root DOM node which will be serialized.
1146
1147     @return: C{None}
1148     """
1149     makeSureDirectoryExists(newFilename)
1150     f = open(newFilename, 'w')
1151     f.write(clonedNode.toxml('utf-8'))
1152     f.close()
Note: See TracBrowser for help on using the browser.