| 1 |
|
|---|
| 2 |
|
|---|
| 3 |
|
|---|
| 4 |
|
|---|
| 5 |
""" |
|---|
| 6 |
man2lore: Converts man page source (i.e. groff) into lore-compatible html. |
|---|
| 7 |
|
|---|
| 8 |
This is nasty and hackish (and doesn't support lots of real groff), but is good |
|---|
| 9 |
enough for converting fairly simple man pages. |
|---|
| 10 |
""" |
|---|
| 11 |
|
|---|
| 12 |
import re, os |
|---|
| 13 |
|
|---|
| 14 |
quoteRE = re.compile('"(.*?)"') |
|---|
| 15 |
|
|---|
| 16 |
|
|---|
| 17 |
|
|---|
| 18 |
def escape(text): |
|---|
| 19 |
text = text.replace('<', '<').replace('>', '>') |
|---|
| 20 |
text = quoteRE.sub('<q>\\1</q>', text) |
|---|
| 21 |
return text |
|---|
| 22 |
|
|---|
| 23 |
|
|---|
| 24 |
|
|---|
| 25 |
def stripQuotes(s): |
|---|
| 26 |
if s[0] == s[-1] == '"': |
|---|
| 27 |
s = s[1:-1] |
|---|
| 28 |
return s |
|---|
| 29 |
|
|---|
| 30 |
|
|---|
| 31 |
|
|---|
| 32 |
class ManConverter(object): |
|---|
| 33 |
""" |
|---|
| 34 |
Convert a man page to the Lore format. |
|---|
| 35 |
|
|---|
| 36 |
@ivar tp: State variable for handling text inside a C{TP} token. It can |
|---|
| 37 |
take values from 0 to 3: |
|---|
| 38 |
- 0: when outside of a C{TP} token. |
|---|
| 39 |
- 1: once a C{TP} token has been encountered. If the previous value |
|---|
| 40 |
was 0, a definition list is started. Then, at the first line of |
|---|
| 41 |
text, a definition term is started. |
|---|
| 42 |
- 2: when the first line after the C{TP} token has been handled. |
|---|
| 43 |
The definition term is closed, and a definition is started with |
|---|
| 44 |
the next line of text. |
|---|
| 45 |
- 3: when the first line as definition data has been handled. |
|---|
| 46 |
@type tp: C{int} |
|---|
| 47 |
""" |
|---|
| 48 |
state = 'regular' |
|---|
| 49 |
name = None |
|---|
| 50 |
tp = 0 |
|---|
| 51 |
dl = 0 |
|---|
| 52 |
para = 0 |
|---|
| 53 |
|
|---|
| 54 |
def convert(self, inf, outf): |
|---|
| 55 |
self.write = outf.write |
|---|
| 56 |
longline = '' |
|---|
| 57 |
for line in inf.readlines(): |
|---|
| 58 |
if line.rstrip() and line.rstrip()[-1] == '\\': |
|---|
| 59 |
longline += line.rstrip()[:-1] + ' ' |
|---|
| 60 |
continue |
|---|
| 61 |
if longline: |
|---|
| 62 |
line = longline + line |
|---|
| 63 |
longline = '' |
|---|
| 64 |
self.lineReceived(line) |
|---|
| 65 |
self.closeTags() |
|---|
| 66 |
self.write('</body>\n</html>\n') |
|---|
| 67 |
outf.flush() |
|---|
| 68 |
|
|---|
| 69 |
|
|---|
| 70 |
def lineReceived(self, line): |
|---|
| 71 |
if line[0] == '.': |
|---|
| 72 |
f = getattr(self, 'macro_' + line[1:3].rstrip().upper(), None) |
|---|
| 73 |
if f: |
|---|
| 74 |
f(line[3:].strip()) |
|---|
| 75 |
else: |
|---|
| 76 |
self.text(line) |
|---|
| 77 |
|
|---|
| 78 |
|
|---|
| 79 |
def continueReceived(self, cont): |
|---|
| 80 |
if not cont: |
|---|
| 81 |
return |
|---|
| 82 |
if cont[0].isupper(): |
|---|
| 83 |
f = getattr(self, 'macro_' + cont[:2].rstrip().upper(), None) |
|---|
| 84 |
if f: |
|---|
| 85 |
f(cont[2:].strip()) |
|---|
| 86 |
else: |
|---|
| 87 |
self.text(cont) |
|---|
| 88 |
|
|---|
| 89 |
|
|---|
| 90 |
def closeTags(self): |
|---|
| 91 |
if self.state != 'regular': |
|---|
| 92 |
self.write('</%s>' % self.state) |
|---|
| 93 |
if self.tp == 3: |
|---|
| 94 |
self.write('</dd>\n\n') |
|---|
| 95 |
self.tp = 0 |
|---|
| 96 |
if self.dl: |
|---|
| 97 |
self.write('</dl>\n\n') |
|---|
| 98 |
self.dl = 0 |
|---|
| 99 |
if self.para: |
|---|
| 100 |
self.write('</p>\n\n') |
|---|
| 101 |
self.para = 0 |
|---|
| 102 |
|
|---|
| 103 |
|
|---|
| 104 |
def paraCheck(self): |
|---|
| 105 |
if not self.tp and not self.para: |
|---|
| 106 |
self.write('<p>') |
|---|
| 107 |
self.para = 1 |
|---|
| 108 |
|
|---|
| 109 |
|
|---|
| 110 |
def macro_TH(self, line): |
|---|
| 111 |
self.write( |
|---|
| 112 |
'<?xml version="1.0"?>\n' |
|---|
| 113 |
'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\n' |
|---|
| 114 |
' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n') |
|---|
| 115 |
self.write('<html><head>\n') |
|---|
| 116 |
parts = [stripQuotes(x) for x in line.split(' ', 2)] + ['', ''] |
|---|
| 117 |
title, manSection = parts[:2] |
|---|
| 118 |
self.write('<title>%s.%s</title>' % (title, manSection)) |
|---|
| 119 |
self.write('</head>\n<body>\n\n') |
|---|
| 120 |
self.write('<h1>%s.%s</h1>\n\n' % (title, manSection)) |
|---|
| 121 |
|
|---|
| 122 |
macro_DT = macro_TH |
|---|
| 123 |
|
|---|
| 124 |
|
|---|
| 125 |
def macro_SH(self, line): |
|---|
| 126 |
self.closeTags() |
|---|
| 127 |
self.write('<h2>') |
|---|
| 128 |
self.para = 1 |
|---|
| 129 |
self.text(stripQuotes(line)) |
|---|
| 130 |
self.para = 0 |
|---|
| 131 |
self.closeTags() |
|---|
| 132 |
self.write('</h2>\n\n') |
|---|
| 133 |
|
|---|
| 134 |
|
|---|
| 135 |
def macro_B(self, line): |
|---|
| 136 |
words = line.split() |
|---|
| 137 |
words[0] = '\\fB' + words[0] + '\\fR ' |
|---|
| 138 |
self.text(' '.join(words)) |
|---|
| 139 |
|
|---|
| 140 |
|
|---|
| 141 |
def macro_NM(self, line): |
|---|
| 142 |
if not self.name: |
|---|
| 143 |
self.name = line |
|---|
| 144 |
self.text(self.name + ' ') |
|---|
| 145 |
|
|---|
| 146 |
|
|---|
| 147 |
def macro_NS(self, line): |
|---|
| 148 |
parts = line.split(' Ns ') |
|---|
| 149 |
i = 0 |
|---|
| 150 |
for l in parts: |
|---|
| 151 |
i = not i |
|---|
| 152 |
if i: |
|---|
| 153 |
self.text(l) |
|---|
| 154 |
else: |
|---|
| 155 |
self.continueReceived(l) |
|---|
| 156 |
|
|---|
| 157 |
|
|---|
| 158 |
def macro_OO(self, line): |
|---|
| 159 |
self.text('[') |
|---|
| 160 |
self.continueReceived(line) |
|---|
| 161 |
|
|---|
| 162 |
|
|---|
| 163 |
def macro_OC(self, line): |
|---|
| 164 |
self.text(']') |
|---|
| 165 |
self.continueReceived(line) |
|---|
| 166 |
|
|---|
| 167 |
|
|---|
| 168 |
def macro_OP(self, line): |
|---|
| 169 |
self.text('[') |
|---|
| 170 |
self.continueReceived(line) |
|---|
| 171 |
self.text(']') |
|---|
| 172 |
|
|---|
| 173 |
|
|---|
| 174 |
def macro_FL(self, line): |
|---|
| 175 |
parts = line.split() |
|---|
| 176 |
self.text('\\fB-%s\\fR' % parts[0]) |
|---|
| 177 |
self.continueReceived(' '.join(parts[1:])) |
|---|
| 178 |
|
|---|
| 179 |
|
|---|
| 180 |
def macro_AR(self, line): |
|---|
| 181 |
parts = line.split() |
|---|
| 182 |
self.text('\\fI %s\\fR' % parts[0]) |
|---|
| 183 |
self.continueReceived(' '.join(parts[1:])) |
|---|
| 184 |
|
|---|
| 185 |
|
|---|
| 186 |
def macro_PP(self, line): |
|---|
| 187 |
self.closeTags() |
|---|
| 188 |
|
|---|
| 189 |
|
|---|
| 190 |
def macro_IC(self, line): |
|---|
| 191 |
cmd = line.split(' ', 1)[0] |
|---|
| 192 |
args = line[line.index(cmd) + len(cmd):] |
|---|
| 193 |
args = args.split(' ') |
|---|
| 194 |
text = cmd |
|---|
| 195 |
while args: |
|---|
| 196 |
arg = args.pop(0) |
|---|
| 197 |
if arg.lower() == "ar": |
|---|
| 198 |
text += " \\fU%s\\fR" % (args.pop(0),) |
|---|
| 199 |
elif arg.lower() == "op": |
|---|
| 200 |
ign = args.pop(0) |
|---|
| 201 |
text += " [\\fU%s\\fR]" % (args.pop(0),) |
|---|
| 202 |
|
|---|
| 203 |
self.text(text) |
|---|
| 204 |
|
|---|
| 205 |
|
|---|
| 206 |
def macro_TP(self, line): |
|---|
| 207 |
""" |
|---|
| 208 |
Handle C{TP} token: start a definition list if it's first token, or |
|---|
| 209 |
close previous definition data. |
|---|
| 210 |
""" |
|---|
| 211 |
if self.tp == 3: |
|---|
| 212 |
self.write('</dd>\n\n') |
|---|
| 213 |
self.tp = 1 |
|---|
| 214 |
else: |
|---|
| 215 |
self.tp = 1 |
|---|
| 216 |
self.write('<dl>') |
|---|
| 217 |
self.dl = 1 |
|---|
| 218 |
|
|---|
| 219 |
|
|---|
| 220 |
def macro_BL(self, line): |
|---|
| 221 |
self.write('<dl>') |
|---|
| 222 |
self.tp = 1 |
|---|
| 223 |
|
|---|
| 224 |
|
|---|
| 225 |
def macro_EL(self, line): |
|---|
| 226 |
if self.tp == 3: |
|---|
| 227 |
self.write('</dd>') |
|---|
| 228 |
self.tp = 1 |
|---|
| 229 |
self.write('</dl>\n\n') |
|---|
| 230 |
self.tp = 0 |
|---|
| 231 |
|
|---|
| 232 |
|
|---|
| 233 |
def macro_IT(self, line): |
|---|
| 234 |
if self.tp == 3: |
|---|
| 235 |
self.write('</dd>') |
|---|
| 236 |
self.tp = 1 |
|---|
| 237 |
self.continueReceived(line) |
|---|
| 238 |
|
|---|
| 239 |
|
|---|
| 240 |
def text(self, line): |
|---|
| 241 |
""" |
|---|
| 242 |
Handle a line of text without detected token. |
|---|
| 243 |
""" |
|---|
| 244 |
if self.tp == 1: |
|---|
| 245 |
self.write('<dt>') |
|---|
| 246 |
if self.tp == 2: |
|---|
| 247 |
self.write('<dd>') |
|---|
| 248 |
self.paraCheck() |
|---|
| 249 |
|
|---|
| 250 |
bits = line.split('\\') |
|---|
| 251 |
self.write(escape(bits[0])) |
|---|
| 252 |
for bit in bits[1:]: |
|---|
| 253 |
if bit[:2] == 'fI': |
|---|
| 254 |
self.write('<em>' + escape(bit[2:])) |
|---|
| 255 |
self.state = 'em' |
|---|
| 256 |
elif bit[:2] == 'fB': |
|---|
| 257 |
self.write('<strong>' + escape(bit[2:])) |
|---|
| 258 |
self.state = 'strong' |
|---|
| 259 |
elif bit[:2] == 'fR': |
|---|
| 260 |
self.write('</%s>' % self.state) |
|---|
| 261 |
self.write(escape(bit[2:])) |
|---|
| 262 |
self.state = 'regular' |
|---|
| 263 |
elif bit[:2] == 'fU': |
|---|
| 264 |
|
|---|
| 265 |
|
|---|
| 266 |
self.write('<u>' + escape(bit[2:])) |
|---|
| 267 |
self.state = 'u' |
|---|
| 268 |
elif bit[:3] == '(co': |
|---|
| 269 |
self.write('©' + escape(bit[3:])) |
|---|
| 270 |
else: |
|---|
| 271 |
self.write(escape(bit)) |
|---|
| 272 |
|
|---|
| 273 |
if self.tp == 1: |
|---|
| 274 |
self.write('</dt>') |
|---|
| 275 |
self.tp = 2 |
|---|
| 276 |
elif self.tp == 2: |
|---|
| 277 |
self.tp = 3 |
|---|
| 278 |
|
|---|
| 279 |
|
|---|
| 280 |
|
|---|
| 281 |
class ProcessingFunctionFactory: |
|---|
| 282 |
|
|---|
| 283 |
def generate_lore(self, d, filenameGenerator=None): |
|---|
| 284 |
ext = d.get('ext', '.html') |
|---|
| 285 |
return lambda file,_: ManConverter().convert(open(file), |
|---|
| 286 |
open(os.path.splitext(file)[0]+ext, 'w')) |
|---|
| 287 |
|
|---|
| 288 |
|
|---|
| 289 |
|
|---|
| 290 |
factory = ProcessingFunctionFactory() |
|---|
| 291 |
|
|---|
| 292 |
|
|---|
| 293 |
if __name__ == '__main__': |
|---|
| 294 |
import sys |
|---|
| 295 |
mc = ManConverter().convert(open(sys.argv[1]), sys.stdout) |
|---|