| 1 | # $Id: peps.py 4564 2006-05-21 20:44:42Z wiemann $ |
|---|
| 2 | # Author: David Goodger <goodger@python.org> |
|---|
| 3 | # Author: David Harrison modified for BEPs (dave@bittorrent.com) |
|---|
| 4 | # Copyright: This module has been placed in the public domain. |
|---|
| 5 | |
|---|
| 6 | """ |
|---|
| 7 | Transforms for BEP processing. |
|---|
| 8 | |
|---|
| 9 | - `Headers`: Used to transform a BEP's initial RFC-2822 header. It remains a |
|---|
| 10 | field list, but some entries get processed. |
|---|
| 11 | - `Contents`: Auto-inserts a table of contents. |
|---|
| 12 | - `BEPZero`: Special processing for BEP 0. |
|---|
| 13 | """ |
|---|
| 14 | |
|---|
| 15 | __docformat__ = 'reStructuredText' |
|---|
| 16 | |
|---|
| 17 | import sys |
|---|
| 18 | import os |
|---|
| 19 | import re |
|---|
| 20 | import time |
|---|
| 21 | from docutils import nodes, utils, languages |
|---|
| 22 | from docutils import ApplicationError, DataError |
|---|
| 23 | from docutils.transforms import Transform, TransformError |
|---|
| 24 | from docutils.transforms import parts, references, misc |
|---|
| 25 | |
|---|
| 26 | |
|---|
| 27 | class Headers(Transform): |
|---|
| 28 | |
|---|
| 29 | """ |
|---|
| 30 | Process fields in a BEP's initial RFC-2822 header. |
|---|
| 31 | """ |
|---|
| 32 | |
|---|
| 33 | default_priority = 360 |
|---|
| 34 | |
|---|
| 35 | # NOTE: There is no bep_base_url. The base of the BEP directory is specified with pep_base_url |
|---|
| 36 | # because changing this requires modifying the rst repository. This value can be changed with the pep-base-url |
|---|
| 37 | # argument. For now, I don't want to modify restructured text parsing code. It means we cannot currently |
|---|
| 38 | # disambiguate between peps and beps for specifying the base url. |
|---|
| 39 | bep_url = 'bep-%04d' |
|---|
| 40 | bep_cvs_url = ('http://bittorrent.org/trac/browser/dotorg/trunk/html/beps/bep_%04d.rst' ) |
|---|
| 41 | #bep_cvs_url = ('http://svn.bittorrent.com/view/*checkout*' |
|---|
| 42 | # '/peps/trunk/bep-%04d.txt') |
|---|
| 43 | rcs_keyword_substitutions = ( |
|---|
| 44 | (re.compile(r'\$' r'RCSfile: (.+),v \$$', re.IGNORECASE), r'\1'), |
|---|
| 45 | (re.compile(r'\$[a-zA-Z]+: (.+) \$$'), r'\1'),) |
|---|
| 46 | |
|---|
| 47 | def apply(self): |
|---|
| 48 | if not len(self.document): |
|---|
| 49 | # @@@ replace these DataErrors with proper system messages |
|---|
| 50 | raise DataError('Document tree is empty.') |
|---|
| 51 | header = self.document[0] |
|---|
| 52 | if not isinstance(header, nodes.field_list) or \ |
|---|
| 53 | 'rfc2822' not in header['classes']: |
|---|
| 54 | raise DataError('Document does not begin with an RFC-2822 ' |
|---|
| 55 | 'header; it is not a BEP.') |
|---|
| 56 | bep = None |
|---|
| 57 | for field in header: |
|---|
| 58 | if field[0].astext().lower() == 'bep': # should be the first field |
|---|
| 59 | value = field[1].astext() |
|---|
| 60 | try: |
|---|
| 61 | bep = int(value) |
|---|
| 62 | cvs_url = self.bep_cvs_url % bep |
|---|
| 63 | except ValueError: |
|---|
| 64 | bep = value |
|---|
| 65 | cvs_url = None |
|---|
| 66 | msg = self.document.reporter.warning( |
|---|
| 67 | '"BEP" header must contain an integer; "%s" is an ' |
|---|
| 68 | 'invalid value.' % bep, base_node=field) |
|---|
| 69 | msgid = self.document.set_id(msg) |
|---|
| 70 | prb = nodes.problematic(value, value or '(none)', |
|---|
| 71 | refid=msgid) |
|---|
| 72 | prbid = self.document.set_id(prb) |
|---|
| 73 | msg.add_backref(prbid) |
|---|
| 74 | if len(field[1]): |
|---|
| 75 | field[1][0][:] = [prb] |
|---|
| 76 | else: |
|---|
| 77 | field[1] += nodes.paragraph('', '', prb) |
|---|
| 78 | break |
|---|
| 79 | if bep is None: |
|---|
| 80 | raise DataError('Document does not contain an RFC-2822 "BEP" ' |
|---|
| 81 | 'header.') |
|---|
| 82 | if bep == 0: |
|---|
| 83 | # Special processing for BEP 0. |
|---|
| 84 | pending = nodes.pending(BEPZero) |
|---|
| 85 | self.document.insert(1, pending) |
|---|
| 86 | self.document.note_pending(pending) |
|---|
| 87 | if len(header) < 2 or header[1][0].astext().lower() != 'title': |
|---|
| 88 | raise DataError('No title!') |
|---|
| 89 | for field in header: |
|---|
| 90 | name = field[0].astext().lower() |
|---|
| 91 | body = field[1] |
|---|
| 92 | if len(body) > 1: |
|---|
| 93 | raise DataError('BEP header field body contains multiple ' |
|---|
| 94 | 'elements:\n%s' % field.pformat(level=1)) |
|---|
| 95 | elif len(body) == 1: |
|---|
| 96 | if not isinstance(body[0], nodes.paragraph): |
|---|
| 97 | raise DataError('BEP header field body may only contain ' |
|---|
| 98 | 'a single paragraph:\n%s' |
|---|
| 99 | % field.pformat(level=1)) |
|---|
| 100 | elif name == 'last-modified': |
|---|
| 101 | date = time.strftime( |
|---|
| 102 | '%d-%b-%Y', |
|---|
| 103 | time.localtime(os.stat(self.document['source'])[8])) |
|---|
| 104 | if cvs_url: |
|---|
| 105 | body += nodes.paragraph( |
|---|
| 106 | '', '', nodes.reference('', date, refuri=cvs_url)) |
|---|
| 107 | else: |
|---|
| 108 | # empty |
|---|
| 109 | continue |
|---|
| 110 | para = body[0] |
|---|
| 111 | if name == 'author': |
|---|
| 112 | for node in para: |
|---|
| 113 | if isinstance(node, nodes.reference): |
|---|
| 114 | node.replace_self(mask_email(node)) |
|---|
| 115 | elif name == 'discussions-to': |
|---|
| 116 | for node in para: |
|---|
| 117 | if isinstance(node, nodes.reference): |
|---|
| 118 | node.replace_self(mask_email(node, bep)) |
|---|
| 119 | elif name in ('replaces', 'replaced-by', 'requires'): |
|---|
| 120 | newbody = [] |
|---|
| 121 | space = nodes.Text(' ') |
|---|
| 122 | for refbep in re.split(',?\s+', body.astext()): |
|---|
| 123 | bepno = int(refbep) |
|---|
| 124 | newbody.append(nodes.reference( |
|---|
| 125 | refbep, refbep, |
|---|
| 126 | refuri=(self.document.settings.pep_base_url |
|---|
| 127 | + self.bep_url % bepno))) |
|---|
| 128 | newbody.append(space) |
|---|
| 129 | para[:] = newbody[:-1] # drop trailing space |
|---|
| 130 | elif name == 'last-modified': |
|---|
| 131 | utils.clean_rcs_keywords(para, self.rcs_keyword_substitutions) |
|---|
| 132 | if cvs_url: |
|---|
| 133 | date = para.astext() |
|---|
| 134 | para[:] = [nodes.reference('', date, refuri=cvs_url)] |
|---|
| 135 | elif name == 'content-type': |
|---|
| 136 | bep_type = para.astext() |
|---|
| 137 | uri = self.document.settings.pep_base_url + self.bep_url % 12 |
|---|
| 138 | para[:] = [nodes.reference('', bep_type, refuri=uri)] |
|---|
| 139 | elif name == 'version' and len(body): |
|---|
| 140 | utils.clean_rcs_keywords(para, self.rcs_keyword_substitutions) |
|---|
| 141 | |
|---|
| 142 | |
|---|
| 143 | class Contents(Transform): |
|---|
| 144 | |
|---|
| 145 | """ |
|---|
| 146 | Insert an empty table of contents topic and a transform placeholder into |
|---|
| 147 | the document after the RFC 2822 header. |
|---|
| 148 | """ |
|---|
| 149 | |
|---|
| 150 | default_priority = 380 |
|---|
| 151 | |
|---|
| 152 | def apply(self): |
|---|
| 153 | language = languages.get_language(self.document.settings.language_code) |
|---|
| 154 | name = language.labels['contents'] |
|---|
| 155 | title = nodes.title('', name) |
|---|
| 156 | topic = nodes.topic('', title, classes=['contents']) |
|---|
| 157 | name = nodes.fully_normalize_name(name) |
|---|
| 158 | if not self.document.has_name(name): |
|---|
| 159 | topic['names'].append(name) |
|---|
| 160 | self.document.note_implicit_target(topic) |
|---|
| 161 | pending = nodes.pending(parts.Contents) |
|---|
| 162 | topic += pending |
|---|
| 163 | self.document.insert(1, topic) |
|---|
| 164 | self.document.note_pending(pending) |
|---|
| 165 | |
|---|
| 166 | |
|---|
| 167 | class TargetNotes(Transform): |
|---|
| 168 | |
|---|
| 169 | """ |
|---|
| 170 | Locate the "References" section, insert a placeholder for an external |
|---|
| 171 | target footnote insertion transform at the end, and schedule the |
|---|
| 172 | transform to run immediately. |
|---|
| 173 | """ |
|---|
| 174 | |
|---|
| 175 | default_priority = 520 |
|---|
| 176 | |
|---|
| 177 | def apply(self): |
|---|
| 178 | doc = self.document |
|---|
| 179 | i = len(doc) - 1 |
|---|
| 180 | refsect = copyright = None |
|---|
| 181 | while i >= 0 and isinstance(doc[i], nodes.section): |
|---|
| 182 | title_words = doc[i][0].astext().lower().split() |
|---|
| 183 | if 'references' in title_words: |
|---|
| 184 | refsect = doc[i] |
|---|
| 185 | break |
|---|
| 186 | elif 'copyright' in title_words: |
|---|
| 187 | copyright = i |
|---|
| 188 | i -= 1 |
|---|
| 189 | if not refsect: |
|---|
| 190 | refsect = nodes.section() |
|---|
| 191 | refsect += nodes.title('', 'References') |
|---|
| 192 | doc.set_id(refsect) |
|---|
| 193 | if copyright: |
|---|
| 194 | # Put the new "References" section before "Copyright": |
|---|
| 195 | doc.insert(copyright, refsect) |
|---|
| 196 | else: |
|---|
| 197 | # Put the new "References" section at end of doc: |
|---|
| 198 | doc.append(refsect) |
|---|
| 199 | pending = nodes.pending(references.TargetNotes) |
|---|
| 200 | refsect.append(pending) |
|---|
| 201 | self.document.note_pending(pending, 0) |
|---|
| 202 | pending = nodes.pending(misc.CallBack, |
|---|
| 203 | details={'callback': self.cleanup_callback}) |
|---|
| 204 | refsect.append(pending) |
|---|
| 205 | self.document.note_pending(pending, 1) |
|---|
| 206 | |
|---|
| 207 | def cleanup_callback(self, pending): |
|---|
| 208 | """ |
|---|
| 209 | Remove an empty "References" section. |
|---|
| 210 | |
|---|
| 211 | Called after the `references.TargetNotes` transform is complete. |
|---|
| 212 | """ |
|---|
| 213 | if len(pending.parent) == 2: # <title> and <pending> |
|---|
| 214 | pending.parent.parent.remove(pending.parent) |
|---|
| 215 | |
|---|
| 216 | |
|---|
| 217 | class BEPZero(Transform): |
|---|
| 218 | |
|---|
| 219 | """ |
|---|
| 220 | Special processing for BEP 0. |
|---|
| 221 | """ |
|---|
| 222 | |
|---|
| 223 | default_priority =760 |
|---|
| 224 | |
|---|
| 225 | def apply(self): |
|---|
| 226 | visitor = BEPZeroSpecial(self.document) |
|---|
| 227 | self.document.walk(visitor) |
|---|
| 228 | self.startnode.parent.remove(self.startnode) |
|---|
| 229 | |
|---|
| 230 | |
|---|
| 231 | class BEPZeroSpecial(nodes.SparseNodeVisitor): |
|---|
| 232 | |
|---|
| 233 | """ |
|---|
| 234 | Perform the special processing needed by BEP 0: |
|---|
| 235 | |
|---|
| 236 | - Mask email addresses. |
|---|
| 237 | |
|---|
| 238 | - Link BEP numbers in the second column of 4-column tables to the BEPs |
|---|
| 239 | themselves. |
|---|
| 240 | """ |
|---|
| 241 | |
|---|
| 242 | bep_url = Headers.bep_url |
|---|
| 243 | |
|---|
| 244 | def unknown_visit(self, node): |
|---|
| 245 | pass |
|---|
| 246 | |
|---|
| 247 | def visit_reference(self, node): |
|---|
| 248 | node.replace_self(mask_email(node)) |
|---|
| 249 | |
|---|
| 250 | def visit_field_list(self, node): |
|---|
| 251 | if 'rfc2822' in node['classes']: |
|---|
| 252 | raise nodes.SkipNode |
|---|
| 253 | |
|---|
| 254 | def visit_tgroup(self, node): |
|---|
| 255 | self.bep_table = node['cols'] == 4 |
|---|
| 256 | self.entry = 0 |
|---|
| 257 | |
|---|
| 258 | def visit_colspec(self, node): |
|---|
| 259 | self.entry += 1 |
|---|
| 260 | if self.bep_table and self.entry == 2: |
|---|
| 261 | node['classes'].append('num') |
|---|
| 262 | |
|---|
| 263 | def visit_row(self, node): |
|---|
| 264 | self.entry = 0 |
|---|
| 265 | |
|---|
| 266 | def visit_entry(self, node): |
|---|
| 267 | self.entry += 1 |
|---|
| 268 | if self.bep_table and self.entry == 2 and len(node) == 1: |
|---|
| 269 | node['classes'].append('num') |
|---|
| 270 | p = node[0] |
|---|
| 271 | if isinstance(p, nodes.paragraph) and len(p) == 1: |
|---|
| 272 | text = p.astext() |
|---|
| 273 | try: |
|---|
| 274 | bep = int(text) |
|---|
| 275 | ref = (self.document.settings.pep_base_url |
|---|
| 276 | + self.bep_url % bep) |
|---|
| 277 | p[0] = nodes.reference(text, text, refuri=ref) |
|---|
| 278 | except ValueError: |
|---|
| 279 | pass |
|---|
| 280 | |
|---|
| 281 | |
|---|
| 282 | non_masked_addresses = ('beps@bittorrent.org', |
|---|
| 283 | 'bittorrent-list@bittorrent.org', |
|---|
| 284 | 'bittorrent-dev@bittorrent.org') |
|---|
| 285 | |
|---|
| 286 | def mask_email(ref, bepno=None): |
|---|
| 287 | """ |
|---|
| 288 | Mask the email address in `ref` and return a replacement node. |
|---|
| 289 | |
|---|
| 290 | `ref` is returned unchanged if it contains no email address. |
|---|
| 291 | |
|---|
| 292 | For email addresses such as "user@host", mask the address as "user at |
|---|
| 293 | host" (text) to thwart simple email address harvesters (except for those |
|---|
| 294 | listed in `non_masked_addresses`). If a BEP number (`bepno`) is given, |
|---|
| 295 | return a reference including a default email subject. |
|---|
| 296 | """ |
|---|
| 297 | if ref.hasattr('refuri') and ref['refuri'].startswith('mailto:'): |
|---|
| 298 | if ref['refuri'][8:] in non_masked_addresses: |
|---|
| 299 | replacement = ref[0] |
|---|
| 300 | else: |
|---|
| 301 | replacement_text = ref.astext().replace('@', ' at ') |
|---|
| 302 | replacement = nodes.raw('', replacement_text, format='html') |
|---|
| 303 | if bepno is None: |
|---|
| 304 | return replacement |
|---|
| 305 | else: |
|---|
| 306 | ref['refuri'] += '?subject=BEP%%20%s' % bepno |
|---|
| 307 | ref[:] = [replacement] |
|---|
| 308 | return ref |
|---|
| 309 | else: |
|---|
| 310 | return ref |
|---|