root / trunk / twisted / lore / latex.py

Revision 26929, 14.6 kB (checked in by exarkun, 2 months ago)

Merge lore-microdom-imports-3619

Author: exarkun
Reviewer: glyph
Fixes: #3619

Change the minidom as microdom imports in Twisted Lore to be
minidom as dom instead, and adjust all affected code.

Line 
1 # Copyright (c) 2001-2009 Twisted Matrix Laboratories.
2 # See LICENSE for details.
3
4 """
5 LaTeX output support for Lore.
6 """
7
8 from xml.dom import minidom as dom
9 import os.path, re, string
10 from cStringIO import StringIO
11 import urlparse
12
13 from twisted.web import domhelpers
14 from twisted.python import text, procutils
15
16 import tree
17
18 escapingRE = re.compile(r'([\[\]#$%&_{}^~\\])')
19 lowerUpperRE = re.compile(r'([a-z])([A-Z])')
20
21 def _escapeMatch(match):
22     c = match.group()
23     if c == '\\':
24         return '$\\backslash$'
25     elif c == '~':
26         return '\\~{}'
27     elif c == '^':
28         return '\\^{}'
29     elif c in '[]':
30         return '{'+c+'}'
31     else:
32         return '\\' + c
33
34 def latexEscape(txt):
35     txt = escapingRE.sub(_escapeMatch, txt)
36     return txt.replace('\n', ' ')
37
38 entities = {'amp': '\&', 'gt': '>', 'lt': '<', 'quot': '"',
39             'copy': '\\copyright', 'mdash': '---', 'rdquo': '``',
40             'ldquo': "''"}
41
42
43 def realpath(path):
44     # Normalise path
45     cwd = os.getcwd()
46     path = os.path.normpath(os.path.join(cwd, path))
47     return path.replace('\\', '/') # windows slashes make LaTeX blow up
48
49
50 def getLatexText(node, writer, filter=lambda x:x, entities=entities):
51     if hasattr(node, 'eref'):
52         return writer(entities.get(node.eref, ''))
53     if hasattr(node, 'data'):
54         if isinstance(node.data, unicode):
55             data = node.data.encode('utf-8')
56         else:
57             data = node.data
58         return writer(filter(data))
59     for child in node.childNodes:
60         getLatexText(child, writer, filter, entities)
61
62 class BaseLatexSpitter:
63
64     def __init__(self, writer, currDir='.', filename=''):
65         self.writer = writer
66         self.currDir = currDir
67         self.filename = filename
68
69     def visitNode(self, node):
70         if isinstance(node, dom.Comment):
71             return
72         if not hasattr(node, 'tagName'):
73             self.writeNodeData(node)
74             return
75         getattr(self, 'visitNode_'+node.tagName, self.visitNodeDefault)(node)
76
77     def visitNodeDefault(self, node):
78         self.writer(getattr(self, 'start_'+node.tagName, ''))
79         for child in node.childNodes:
80             self.visitNode(child)
81         self.writer(getattr(self, 'end_'+node.tagName, ''))
82
83     def visitNode_a(self, node):
84         if node.hasAttribute('class'):
85             if node.getAttribute('class').endswith('listing'):
86                 return self.visitNode_a_listing(node)
87         if node.hasAttribute('href'):
88             return self.visitNode_a_href(node)
89         if node.hasAttribute('name'):
90             return self.visitNode_a_name(node)
91         self.visitNodeDefault(node)
92
93     def visitNode_span(self, node):
94         if not node.hasAttribute('class'):
95             return self.visitNodeDefault(node)
96         node.tagName += '_'+node.getAttribute('class')
97         self.visitNode(node)
98
99     visitNode_div = visitNode_span
100
101     def visitNode_h1(self, node):
102         pass
103
104     def visitNode_style(self, node):
105         pass
106
107
108 class LatexSpitter(BaseLatexSpitter):
109
110     baseLevel = 0
111     diaHack = bool(procutils.which("dia"))
112
113     def writeNodeData(self, node):
114         buf = StringIO()
115         getLatexText(node, buf.write, latexEscape)
116         self.writer(buf.getvalue().replace('<', '$<$').replace('>', '$>$'))
117
118     def visitNode_head(self, node):
119         authorNodes = domhelpers.findElementsWithAttribute(node, 'rel', 'author')
120         authorNodes = [n for n in authorNodes if n.tagName == 'link']
121
122         if authorNodes:
123             self.writer('\\author{')
124             authors = []
125             for aNode in authorNodes:
126                 name = aNode.getAttribute('title')
127                 href = aNode.getAttribute('href')
128                 if href.startswith('mailto:'):
129                     href = href[7:]
130                 if href:
131                     if name:
132                         name += ' '
133                     name += '$<$' + href + '$>$'
134                 if name:
135                     authors.append(name)
136            
137             self.writer(' \\and '.join(authors))
138             self.writer('}')
139
140         self.visitNodeDefault(node)
141
142     def visitNode_pre(self, node):
143         self.writer('\\begin{verbatim}\n')
144         buf = StringIO()
145         getLatexText(node, buf.write)
146         self.writer(text.removeLeadingTrailingBlanks(buf.getvalue()))
147         self.writer('\\end{verbatim}\n')
148
149     def visitNode_code(self, node):
150         fout = StringIO()
151         getLatexText(node, fout.write, latexEscape)
152         data = lowerUpperRE.sub(r'\1\\linebreak[1]\2', fout.getvalue())
153         data = data[:1] + data[1:].replace('.', '.\\linebreak[1]')
154         self.writer('\\texttt{'+data+'}')
155
156     def visitNode_img(self, node):
157         fileName = os.path.join(self.currDir, node.getAttribute('src'))
158         target, ext = os.path.splitext(fileName)
159         if self.diaHack and os.access(target + '.dia', os.R_OK):
160             ext = '.dia'
161             fileName = target + ext
162         f = getattr(self, 'convert_'+ext[1:], None)
163         if not f:
164             return
165         target = os.path.join(self.currDir, os.path.basename(target)+'.eps')
166         f(fileName, target)
167         target = os.path.basename(target)
168         self._write_img(target)
169
170     def _write_img(self, target):
171         """Write LaTeX for image."""
172         self.writer('\\begin{center}\\includegraphics[%%\n'
173                     'width=1.0\n'
174                     '\\textwidth,height=1.0\\textheight,\nkeepaspectratio]'
175                     '{%s}\\end{center}\n' % target)
176    
177     def convert_png(self, src, target):
178         # XXX there's a *reason* Python comes with the pipes module -
179         # someone fix this to use it.
180         r = os.system('pngtopnm "%s" | pnmtops -noturn > "%s"' % (src, target))
181         if r != 0:
182             raise OSError(r)
183
184     def convert_dia(self, src, target):
185         # EVIL DISGUSTING HACK
186         data = os.popen("gunzip -dc %s" % (src)).read()
187         pre = '<dia:attribute name="scaling">\n          <dia:real val="1"/>'
188         post = '<dia:attribute name="scaling">\n          <dia:real val="0.5"/>'
189         f = open('%s_hacked.dia' % (src), 'wb')
190         f.write(data.replace(pre, post))
191         f.close()
192         os.system('gzip %s_hacked.dia' % (src,))
193         os.system('mv %s_hacked.dia.gz %s_hacked.dia' % (src,src))
194         # Let's pretend we never saw that.
195
196         # Silly dia needs an X server, even though it doesn't display anything.
197         # If this is a problem for you, try using Xvfb.
198         os.system("dia %s_hacked.dia -n -e %s" % (src, target))
199
200     def visitNodeHeader(self, node):
201         level = (int(node.tagName[1])-2)+self.baseLevel
202         self.writer('\n\n\\'+level*'sub'+'section{')
203         spitter = HeadingLatexSpitter(self.writer, self.currDir, self.filename)
204         spitter.visitNodeDefault(node)
205         self.writer('}\n')
206
207     def visitNode_a_listing(self, node):
208         fileName = os.path.join(self.currDir, node.getAttribute('href'))
209         self.writer('\\begin{verbatim}\n')
210         lines = map(string.rstrip, open(fileName).readlines())
211         skipLines = int(node.getAttribute('skipLines') or 0)
212         lines = lines[skipLines:]
213         self.writer(text.removeLeadingTrailingBlanks('\n'.join(lines)))
214         self.writer('\\end{verbatim}')
215
216         # Write a caption for this source listing
217         fileName = os.path.basename(fileName)
218         caption = domhelpers.getNodeText(node)
219         if caption == fileName:
220             caption = 'Source listing'
221         self.writer('\parbox[b]{\linewidth}{\\begin{center}%s --- '
222                     '\\begin{em}%s\\end{em}\\end{center}}'
223                     % (latexEscape(caption), latexEscape(fileName)))
224
225     def visitNode_a_href(self, node):
226         supported_schemes=['http', 'https', 'ftp', 'mailto']
227         href = node.getAttribute('href')
228         if urlparse.urlparse(href)[0] in supported_schemes:
229             text = domhelpers.getNodeText(node)
230             self.visitNodeDefault(node)
231             if text != href:
232                 self.writer('\\footnote{%s}' % latexEscape(href))
233         else:
234             path, fragid = (href.split('#', 1) + [None])[:2]
235             if path == '':
236                 path = self.filename
237             else:
238                 path = os.path.join(os.path.dirname(self.filename), path)
239             #if path == '':
240                 #path = os.path.basename(self.filename)
241             #else:
242             #    # Hack for linking to man pages from howtos, i.e.
243             #    # ../doc/foo-man.html -> foo-man.html
244             #    path = os.path.basename(path)
245
246             path = realpath(path)
247
248             if fragid:
249                 ref = path + 'HASH' + fragid
250             else:
251                 ref = path
252             self.writer('\\textit{')
253             self.visitNodeDefault(node)
254             self.writer('}')
255             self.writer('\\loreref{%s}' % ref)
256
257     def visitNode_a_name(self, node):
258         self.writer('\\label{%sHASH%s}' % (
259                 realpath(self.filename), node.getAttribute('name')))
260         self.visitNodeDefault(node)
261
262     def visitNode_table(self, node):
263         rows = [[col for col in row.childNodes
264                      if getattr(col, 'tagName', None) in ('th', 'td')]
265             for row in node.childNodes if getattr(row, 'tagName', None)=='tr']
266         numCols = 1+max([len(row) for row in rows])
267         self.writer('\\begin{table}[ht]\\begin{center}')
268         self.writer('\\begin{tabular}{@{}'+'l'*numCols+'@{}}')
269         for row in rows:
270             th = 0
271             for col in row:
272                 self.visitNode(col)
273                 self.writer('&')
274                 if col.tagName == 'th':
275                     th = 1
276             self.writer('\\\\\n') #\\ ends lines
277             if th:
278                 self.writer('\\hline\n')
279         self.writer('\\end{tabular}\n')
280         if node.hasAttribute('title'):
281             self.writer('\\caption{%s}'
282                         % latexEscape(node.getAttribute('title')))
283         self.writer('\\end{center}\\end{table}\n')
284
285     def visitNode_span_footnote(self, node):
286         self.writer('\\footnote{')
287         spitter = FootnoteLatexSpitter(self.writer, self.currDir, self.filename)
288         spitter.visitNodeDefault(node)
289         self.writer('}')
290
291     def visitNode_span_index(self, node):
292         self.writer('\\index{%s}\n' % node.getAttribute('value'))
293         self.visitNodeDefault(node)
294
295     visitNode_h2 = visitNode_h3 = visitNode_h4 = visitNodeHeader
296
297     start_title = '\\title{'
298     end_title = '}\n'
299
300     start_sub = '$_{'
301     end_sub = '}$'
302
303     start_sup = '$^{'
304     end_sup = '}$'
305
306     start_html = '''\\documentclass{article}
307     \\newcommand{\\loreref}[1]{%
308     \\ifthenelse{\\value{page}=\\pageref{#1}}%
309                { (this page)}%
310                { (page \\pageref{#1})}%
311     }'''
312
313     start_body = '\\begin{document}\n\\maketitle\n'
314     end_body = '\\end{document}'
315
316     start_dl = '\\begin{description}\n'
317     end_dl = '\\end{description}\n'
318     start_ul = '\\begin{itemize}\n'
319     end_ul = '\\end{itemize}\n'
320
321     start_ol = '\\begin{enumerate}\n'
322     end_ol = '\\end{enumerate}\n'
323
324     start_li = '\\item '
325     end_li = '\n'
326
327     start_dt = '\\item['
328     end_dt = ']'
329     end_dd = '\n'
330
331     start_p = '\n\n'
332
333     start_strong = start_em = '\\begin{em}'
334     end_strong = end_em = '\\end{em}'
335
336     start_q = "``"
337     end_q = "''"
338
339     start_div_note = '\\begin{quotation}\\textbf{Note:}'
340     end_div_note = '\\end{quotation}'
341
342     start_th = '\\textbf{'
343     end_th = '}'
344
345
346 class SectionLatexSpitter(LatexSpitter):
347
348     baseLevel = 1
349
350     start_title = '\\section{'
351
352     def visitNode_title(self, node):
353         self.visitNodeDefault(node)
354         #self.writer('\\label{%s}}\n' % os.path.basename(self.filename))
355         self.writer('\\label{%s}}\n' % realpath(self.filename))
356
357     end_title = end_body = start_body = start_html = ''
358
359
360 class ChapterLatexSpitter(SectionLatexSpitter):
361     baseLevel = 0
362     start_title = '\\chapter{'
363
364
365 class HeadingLatexSpitter(BaseLatexSpitter):
366     start_q = "``"
367     end_q = "''"
368
369     writeNodeData = LatexSpitter.writeNodeData.im_func
370
371
372 class FootnoteLatexSpitter(LatexSpitter):
373     """For multi-paragraph footnotes, this avoids having an empty leading
374     paragraph."""
375
376     start_p = ''
377
378     def visitNode_span_footnote(self, node):
379         self.visitNodeDefault(node)
380
381     def visitNode_p(self, node):
382         self.visitNodeDefault(node)
383         self.start_p = LatexSpitter.start_p
384
385 class BookLatexSpitter(LatexSpitter):
386     def visitNode_body(self, node):
387         tocs=domhelpers.locateNodes([node], 'class', 'toc')
388         domhelpers.clearNode(node)
389         if len(tocs):
390             toc=tocs[0]
391             node.appendChild(toc)
392         self.visitNodeDefault(node)
393
394     def visitNode_link(self, node):
395         if not node.hasAttribute('rel'):
396             return self.visitNodeDefault(node)
397         node.tagName += '_'+node.getAttribute('rel')
398         self.visitNode(node)
399
400     def visitNode_link_author(self, node):
401         self.writer('\\author{%s}\n' % node.getAttribute('text'))
402
403     def visitNode_link_stylesheet(self, node):
404         if node.hasAttribute('type') and node.hasAttribute('href'):
405             if node.getAttribute('type')=='application/x-latex':
406                 packagename=node.getAttribute('href')
407                 packagebase,ext=os.path.splitext(packagename)
408                 self.writer('\\usepackage{%s}\n' % packagebase)
409
410     start_html = r'''\documentclass[oneside]{book}
411 \usepackage{graphicx}
412 \usepackage{times,mathptmx}
413 '''
414
415     start_body = r'''\begin{document}
416 \maketitle
417 \tableofcontents
418 '''
419
420     start_li=''
421     end_li=''
422     start_ul=''
423     end_ul=''
424
425
426     def visitNode_a(self, node):
427         if node.hasAttribute('class'):
428             a_class=node.getAttribute('class')
429             if a_class.endswith('listing'):
430                 return self.visitNode_a_listing(node)
431             else:
432                 return getattr(self, 'visitNode_a_%s' % a_class)(node)
433         if node.hasAttribute('href'):
434             return self.visitNode_a_href(node)
435         if node.hasAttribute('name'):
436             return self.visitNode_a_name(node)
437         self.visitNodeDefault(node)
438
439     def visitNode_a_chapter(self, node):
440         self.writer('\\chapter{')
441         self.visitNodeDefault(node)
442         self.writer('}\n')
443
444     def visitNode_a_sect(self, node):
445         base,ext=os.path.splitext(node.getAttribute('href'))
446         self.writer('\\input{%s}\n' % base)
447
448
449
450 def processFile(spitter, fin):
451     # XXX Use Inversion Of Control Pattern to orthogonalize the parsing API
452     # from the Visitor Pattern application. (EnterPrise)
453     dom = tree.parseFileAndReport(fin.name, lambda x: fin).documentElement
454     spitter.visitNode(dom)
455
456
457 def convertFile(filename, spitterClass):
458     fout = open(os.path.splitext(filename)[0]+".tex", 'w')
459     spitter = spitterClass(fout.write, os.path.dirname(filename), filename)
460     fin = open(filename)
461     processFile(spitter, fin)
462     fin.close()
463     fout.close()
Note: See TracBrowser for help on using the browser.