| 1 |
|
|---|
| 2 |
|
|---|
| 3 |
|
|---|
| 4 |
""" |
|---|
| 5 |
Checker for common errors in Lore documents. |
|---|
| 6 |
""" |
|---|
| 7 |
|
|---|
| 8 |
from xml.dom import minidom as dom |
|---|
| 9 |
import parser, urlparse, os.path |
|---|
| 10 |
|
|---|
| 11 |
from twisted.lore import tree, process |
|---|
| 12 |
from twisted.web import domhelpers |
|---|
| 13 |
from twisted.python import reflect |
|---|
| 14 |
|
|---|
| 15 |
|
|---|
| 16 |
|
|---|
| 17 |
parserErrors = (SyntaxError, parser.ParserError) |
|---|
| 18 |
|
|---|
| 19 |
class TagChecker: |
|---|
| 20 |
|
|---|
| 21 |
def check(self, dom, filename): |
|---|
| 22 |
self.hadErrors = 0 |
|---|
| 23 |
for method in reflect.prefixedMethods(self, 'check_'): |
|---|
| 24 |
method(dom, filename) |
|---|
| 25 |
if self.hadErrors: |
|---|
| 26 |
raise process.ProcessingFailure("invalid format") |
|---|
| 27 |
|
|---|
| 28 |
def _reportError(self, filename, element, error): |
|---|
| 29 |
hlint = element.hasAttribute('hlint') and element.getAttribute('hlint') |
|---|
| 30 |
if hlint != 'off': |
|---|
| 31 |
self.hadErrors = 1 |
|---|
| 32 |
pos = getattr(element, '_markpos', None) or (0, 0) |
|---|
| 33 |
print "%s:%s:%s: %s" % ((filename,)+pos+(error,)) |
|---|
| 34 |
|
|---|
| 35 |
|
|---|
| 36 |
class DefaultTagChecker(TagChecker): |
|---|
| 37 |
|
|---|
| 38 |
def __init__(self, allowedTags, allowedClasses): |
|---|
| 39 |
self.allowedTags = allowedTags |
|---|
| 40 |
self.allowedClasses = allowedClasses |
|---|
| 41 |
|
|---|
| 42 |
def check_disallowedElements(self, dom, filename): |
|---|
| 43 |
def m(node, self=self): |
|---|
| 44 |
return not self.allowedTags(node.tagName) |
|---|
| 45 |
for element in domhelpers.findElements(dom, m): |
|---|
| 46 |
self._reportError(filename, element, |
|---|
| 47 |
'unrecommended tag %s' % element.tagName) |
|---|
| 48 |
|
|---|
| 49 |
def check_disallowedClasses(self, dom, filename): |
|---|
| 50 |
def matcher(element, self=self): |
|---|
| 51 |
if not element.hasAttribute('class'): |
|---|
| 52 |
return 0 |
|---|
| 53 |
checker = self.allowedClasses.get(element.tagName, lambda x:0) |
|---|
| 54 |
return not checker(element.getAttribute('class')) |
|---|
| 55 |
for element in domhelpers.findElements(dom, matcher): |
|---|
| 56 |
self._reportError(filename, element, |
|---|
| 57 |
'unknown class %s' %element.getAttribute('class')) |
|---|
| 58 |
|
|---|
| 59 |
def check_quote(self, doc, filename): |
|---|
| 60 |
def matcher(node): |
|---|
| 61 |
return ('"' in getattr(node, 'data', '') and |
|---|
| 62 |
not isinstance(node, dom.Comment) and |
|---|
| 63 |
not [1 for n in domhelpers.getParents(node)[1:-1] |
|---|
| 64 |
if n.tagName in ('pre', 'code')]) |
|---|
| 65 |
for node in domhelpers.findNodes(doc, matcher): |
|---|
| 66 |
self._reportError(filename, node.parentNode, 'contains quote') |
|---|
| 67 |
|
|---|
| 68 |
def check_styleattr(self, dom, filename): |
|---|
| 69 |
for node in domhelpers.findElementsWithAttribute(dom, 'style'): |
|---|
| 70 |
self._reportError(filename, node, 'explicit style') |
|---|
| 71 |
|
|---|
| 72 |
def check_align(self, dom, filename): |
|---|
| 73 |
for node in domhelpers.findElementsWithAttribute(dom, 'align'): |
|---|
| 74 |
self._reportError(filename, node, 'explicit alignment') |
|---|
| 75 |
|
|---|
| 76 |
def check_style(self, dom, filename): |
|---|
| 77 |
for node in domhelpers.findNodesNamed(dom, 'style'): |
|---|
| 78 |
if domhelpers.getNodeText(node) != '': |
|---|
| 79 |
self._reportError(filename, node, 'hand hacked style') |
|---|
| 80 |
|
|---|
| 81 |
def check_title(self, dom, filename): |
|---|
| 82 |
doc = dom.documentElement |
|---|
| 83 |
title = domhelpers.findNodesNamed(dom, 'title') |
|---|
| 84 |
if len(title)!=1: |
|---|
| 85 |
return self._reportError(filename, doc, 'not exactly one title') |
|---|
| 86 |
h1 = domhelpers.findNodesNamed(dom, 'h1') |
|---|
| 87 |
if len(h1)!=1: |
|---|
| 88 |
return self._reportError(filename, doc, 'not exactly one h1') |
|---|
| 89 |
if domhelpers.getNodeText(h1[0]) != domhelpers.getNodeText(title[0]): |
|---|
| 90 |
self._reportError(filename, h1[0], 'title and h1 text differ') |
|---|
| 91 |
|
|---|
| 92 |
def check_80_columns(self, dom, filename): |
|---|
| 93 |
for node in domhelpers.findNodesNamed(dom, 'pre'): |
|---|
| 94 |
|
|---|
| 95 |
|
|---|
| 96 |
|
|---|
| 97 |
|
|---|
| 98 |
for line in domhelpers.gatherTextNodes(node, 1).split('\n'): |
|---|
| 99 |
if len(line.rstrip()) > 80: |
|---|
| 100 |
self._reportError(filename, node, |
|---|
| 101 |
'text wider than 80 columns in pre') |
|---|
| 102 |
for node in domhelpers.findNodesNamed(dom, 'a'): |
|---|
| 103 |
if node.getAttribute('class', '').endswith('listing'): |
|---|
| 104 |
try: |
|---|
| 105 |
fn = os.path.dirname(filename) |
|---|
| 106 |
fn = os.path.join(fn, node.getAttribute('href')) |
|---|
| 107 |
lines = open(fn,'r').readlines() |
|---|
| 108 |
except: |
|---|
| 109 |
self._reportError(filename, node, |
|---|
| 110 |
'bad listing href: %r' % |
|---|
| 111 |
node.getAttribute('href')) |
|---|
| 112 |
continue |
|---|
| 113 |
|
|---|
| 114 |
for line in lines: |
|---|
| 115 |
if len(line.rstrip()) > 80: |
|---|
| 116 |
self._reportError(filename, node, |
|---|
| 117 |
'listing wider than 80 columns') |
|---|
| 118 |
|
|---|
| 119 |
def check_pre_py_listing(self, dom, filename): |
|---|
| 120 |
for node in domhelpers.findNodesNamed(dom, 'pre'): |
|---|
| 121 |
if node.getAttribute('class') == 'python': |
|---|
| 122 |
try: |
|---|
| 123 |
text = domhelpers.getNodeText(node) |
|---|
| 124 |
|
|---|
| 125 |
text = text.replace('>', '>').replace('<', '<') |
|---|
| 126 |
|
|---|
| 127 |
lines = filter(None,[l.rstrip() for l in text.split('\n')]) |
|---|
| 128 |
|
|---|
| 129 |
while not [1 for line in lines if line[:1] not in ('',' ')]: |
|---|
| 130 |
lines = [line[1:] for line in lines] |
|---|
| 131 |
text = '\n'.join(lines) + '\n' |
|---|
| 132 |
try: |
|---|
| 133 |
parser.suite(text) |
|---|
| 134 |
except parserErrors, e: |
|---|
| 135 |
|
|---|
| 136 |
text = text.replace("...","'...'") |
|---|
| 137 |
parser.suite(text) |
|---|
| 138 |
except parserErrors, e: |
|---|
| 139 |
self._reportError(filename, node, |
|---|
| 140 |
'invalid python code:' + str(e)) |
|---|
| 141 |
|
|---|
| 142 |
def check_anchor_in_heading(self, dom, filename): |
|---|
| 143 |
headingNames = ['h%d' % n for n in range(1,7)] |
|---|
| 144 |
for hname in headingNames: |
|---|
| 145 |
for node in domhelpers.findNodesNamed(dom, hname): |
|---|
| 146 |
if domhelpers.findNodesNamed(node, 'a'): |
|---|
| 147 |
self._reportError(filename, node, 'anchor in heading') |
|---|
| 148 |
|
|---|
| 149 |
def check_texturl_matches_href(self, dom, filename): |
|---|
| 150 |
for node in domhelpers.findNodesNamed(dom, 'a'): |
|---|
| 151 |
if not node.hasAttribute('href'): |
|---|
| 152 |
continue |
|---|
| 153 |
text = domhelpers.getNodeText(node) |
|---|
| 154 |
proto = urlparse.urlparse(text)[0] |
|---|
| 155 |
if proto and ' ' not in text: |
|---|
| 156 |
if text != node.getAttribute('href',''): |
|---|
| 157 |
self._reportError(filename, node, |
|---|
| 158 |
'link text does not match href') |
|---|
| 159 |
|
|---|
| 160 |
def check_a_py_listing(self, dom, filename): |
|---|
| 161 |
for node in domhelpers.findNodesNamed(dom, 'a'): |
|---|
| 162 |
if node.getAttribute('class') == 'py-listing': |
|---|
| 163 |
fn = os.path.join(os.path.dirname(filename), |
|---|
| 164 |
node.getAttribute('href')) |
|---|
| 165 |
lines = open(fn).readlines() |
|---|
| 166 |
lines = lines[int(node.getAttribute('skipLines', 0)):] |
|---|
| 167 |
for line, num in zip(lines, range(len(lines))): |
|---|
| 168 |
if line.count('59 Temple Place, Suite 330, Boston'): |
|---|
| 169 |
self._reportError(filename, node, |
|---|
| 170 |
'included source file %s has licence boilerplate.' |
|---|
| 171 |
' Use skipLines="%d".' |
|---|
| 172 |
% (fn, int(node.getAttribute('skipLines',0))+num+1)) |
|---|
| 173 |
|
|---|
| 174 |
def check_lists(self, dom, filename): |
|---|
| 175 |
for node in (domhelpers.findNodesNamed(dom, 'ul')+ |
|---|
| 176 |
domhelpers.findNodesNamed(dom, 'ol')): |
|---|
| 177 |
if not node.childNodes: |
|---|
| 178 |
self._reportError(filename, node, 'empty list') |
|---|
| 179 |
for child in node.childNodes: |
|---|
| 180 |
if child.nodeName != 'li': |
|---|
| 181 |
self._reportError(filename, node, |
|---|
| 182 |
'only list items allowed in lists') |
|---|
| 183 |
|
|---|
| 184 |
|
|---|
| 185 |
def list2dict(l): |
|---|
| 186 |
d = {} |
|---|
| 187 |
for el in l: |
|---|
| 188 |
d[el] = None |
|---|
| 189 |
return d |
|---|
| 190 |
|
|---|
| 191 |
classes = list2dict(['shell', 'API', 'python', 'py-prototype', 'py-filename', |
|---|
| 192 |
'py-src-string', 'py-signature', 'py-src-parameter', |
|---|
| 193 |
'py-src-identifier', 'py-src-keyword']) |
|---|
| 194 |
|
|---|
| 195 |
tags = list2dict(["html", "title", "head", "body", "h1", "h2", "h3", "ol", "ul", |
|---|
| 196 |
"dl", "li", "dt", "dd", "p", "code", "img", "blockquote", "a", |
|---|
| 197 |
"cite", "div", "span", "strong", "em", "pre", "q", "table", |
|---|
| 198 |
"tr", "td", "th", "style", "sub", "sup", "link"]) |
|---|
| 199 |
|
|---|
| 200 |
span = list2dict(['footnote', 'manhole-output', 'index']) |
|---|
| 201 |
|
|---|
| 202 |
div = list2dict(['note', 'boxed', 'doit']) |
|---|
| 203 |
|
|---|
| 204 |
a = list2dict(['listing', 'py-listing', 'html-listing', 'absolute']) |
|---|
| 205 |
|
|---|
| 206 |
pre = list2dict(['python', 'shell', 'python-interpreter', 'elisp']) |
|---|
| 207 |
|
|---|
| 208 |
allowed = {'code': classes.has_key, 'span': span.has_key, 'div': div.has_key, |
|---|
| 209 |
'a': a.has_key, 'pre': pre.has_key, 'ul': lambda x: x=='toc', |
|---|
| 210 |
'ol': lambda x: x=='toc', 'li': lambda x: x=='ignoretoc'} |
|---|
| 211 |
|
|---|
| 212 |
def getDefaultChecker(): |
|---|
| 213 |
return DefaultTagChecker(tags.has_key, allowed) |
|---|
| 214 |
|
|---|
| 215 |
def doFile(file, checker): |
|---|
| 216 |
doc = tree.parseFileAndReport(file) |
|---|
| 217 |
if doc: |
|---|
| 218 |
checker.check(doc, file) |
|---|