root / tags / release-2.0.1 / twisted / web2 / http_headers.py

Revision 13275, 42.0 kB (checked in by mesozoic, 4 years ago)

Make web2 code prettier.

Line 
1 from __future__ import generators
2
3 import types
4 from calendar import timegm
5 from time import gmtime, time
6 import base64
7 import re
8
9 # Counterpart to evilness in test_http_headers
10 try:
11     _http_headers_isBeingTested
12     print "isbeingtested"
13     from twisted.python.util import OrderedDict
14     ODict = OrderedDict
15     time = lambda : 999999990 # Sun, 09 Sep 2001 01:46:30 GMT
16 except:
17     ODict = dict
18
19
20 def dashCapitalize(s):
21     ''' Capitalize a string, making sure to treat - as a word seperator '''
22     return '-'.join([ x.capitalize() for x in s.split('-')])
23
24 # datetime parsing and formatting
25 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
26 weekdayname_lower = [name.lower() for name in weekdayname]
27 monthname = [None,
28              'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
29              'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
30 monthname_lower = [name and name.lower() for name in monthname]
31
32 ## HTTP DateTime parser
33 def parseDateTime(dateString):
34     """Convert an HTTP date string (one of three formats) to seconds since epoch."""
35     parts = dateString.split()
36
37     if not parts[0][0:3].lower() in weekdayname_lower:
38         # Weekday is stupid. Might have been omitted.
39         try:
40             return parseDateTime("Sun, "+dateString)
41         except ValueError:
42             # Guess not.
43             pass
44
45     partlen = len(parts)
46     if (partlen == 5 or partlen == 6) and parts[1].isdigit():
47         # 1st date format: Sun, 06 Nov 1994 08:49:37 GMT
48         # (Note: "GMT" is literal, not a variable timezone)
49         # (also handles without "GMT")
50         # This is the normal format
51         day = parts[1]
52         month = parts[2]
53         year = parts[3]
54         time = parts[4]
55     elif (partlen == 3 or partlen == 4) and parts[1].find('-') != -1:
56         # 2nd date format: Sunday, 06-Nov-94 08:49:37 GMT
57         # (Note: "GMT" is literal, not a variable timezone)
58         # (also handles without without "GMT")
59         # Two digit year, yucko.
60         day, month, year = parts[1].split('-')
61         time = parts[2]
62         year=int(year)
63         if year < 69:
64             year = year + 2000
65         elif year < 100:
66             year = year + 1900
67     elif len(parts) == 5:
68         # 3rd date format: Sun Nov  6 08:49:37 1994
69         # ANSI C asctime() format.
70         day = parts[2]
71         month = parts[1]
72         year = parts[4]
73         time = parts[3]
74     else:
75         raise ValueError("Unknown datetime format %r" % dateString)
76    
77     day = int(day)
78     month = int(monthname_lower.index(month.lower()))
79     year = int(year)
80     hour, min, sec = map(int, time.split(':'))
81     return int(timegm((year, month, day, hour, min, sec)))
82
83 ##### HTTP tokenizer
84 class Token(str):
85     __slots__=[]
86     tokens = {}
87     def __new__(self, char):
88         token = Token.tokens.get(char)
89         if token is None:
90             Token.tokens[char] = token = str.__new__(self, char)
91         return token
92
93     def __repr__(self):
94         return "Token(%s)" % str.__repr__(self)
95
96
97 http_tokens = " \t\"()<>@,;:\\/[]?={}"
98 http_ctls = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f"
99
100 def tokenize(header, foldCase=True):
101     """Tokenize a string according to normal HTTP header parsing rules.
102
103     In particular:
104     - Whitespace is irrelevant and eaten next to special separator tokens.
105       Its existance (but not amount) is important between character strings.
106     - Quoted string support including embedded backslashes.
107     - Case is insignificant (and thus lowercased), except in quoted strings.
108        (unless foldCase=False)
109     - Multiple headers are concatenated with ','
110     
111     NOTE: not all headers can be parsed with this function.
112     
113     Takes a raw header value (list of strings), and
114     Returns a generator of strings and Token class instances.
115     """
116     tokens=http_tokens
117     ctls=http_ctls
118    
119     string = ",".join(header)
120     list = []
121     start = 0
122     cur = 0
123     quoted = False
124     qpair = False
125     inSpaces = -1
126     qstring = None
127    
128     for x in string:
129         if quoted:
130             if qpair:
131                 qpair = False
132                 qstring = qstring+string[start:cur-1]+x
133                 start = cur+1
134             elif x == '\\':
135                 qpair = True
136             elif x == '"':
137                 quoted = False
138                 yield qstring+string[start:cur]
139                 qstring=None
140                 start = cur+1
141         elif x in tokens:
142             if start != cur:
143                 if foldCase:
144                     yield string[start:cur].lower()
145                 else:
146                     yield string[start:cur]
147                
148             start = cur+1
149             if x == '"':
150                 quoted = True
151                 qstring = ""
152                 inSpaces = False
153             elif x in " \t":
154                 if inSpaces is False:
155                     inSpaces = True
156             else:
157                 inSpaces = -1
158                 yield Token(x)
159         elif x in ctls:
160             raise ValueError("Invalid control character: %d in header" % ord(x))
161         else:
162             if inSpaces is True:
163                 yield Token(' ')
164                 inSpaces = False
165                
166             inSpaces = False
167         cur = cur+1
168        
169     if qpair:
170         raise ValueError, "Missing character after '\\'"
171     if quoted:
172         raise ValueError, "Missing end quote"
173    
174     if start != cur:
175         if foldCase:
176             yield string[start:cur].lower()
177         else:
178             yield string[start:cur]
179
180 def split(seq, delim):
181     """The same as str.split but works on arbitrary sequences.
182     Too bad it's not builtin to python!"""
183    
184     cur = []
185     for item in seq:
186         if item == delim:
187             yield cur
188             cur = []
189         else:
190             cur.append(item)
191     yield cur
192
193 # def find(seq, *args):
194 #     """The same as seq.index but returns -1 if not found, instead
195 #     Too bad it's not builtin to python!"""
196 #     try:
197 #         return seq.index(value, *args)
198 #     except ValueError:
199 #         return -1
200     
201
202 def filterTokens(seq):
203     """Filter out instances of Token, leaving only a list of strings.
204     
205     Used instead of a more specific parsing method (e.g. splitting on commas)
206     when only strings are expected, so as to be a little lenient.
207
208     Apache does it this way and has some comments about broken clients which
209     forget commas (?), so I'm doing it the same way. It shouldn't
210     hurt anything, in any case.
211     """
212
213     l=[]
214     for x in seq:
215         if not isinstance(x, Token):
216             l.append(x)
217     return l
218
219 ##### parser utilities:
220 def checkSingleToken(tokens):
221     if len(tokens) != 1:
222         raise ValueError, "Expected single token, not %s." % (tokens,)
223     return tokens[0]
224    
225 def parseKeyValue(val):
226     if len(val) == 1:
227         return val[0],None
228     elif len(val) == 3 and val[1] == Token('='):
229         return val[0],val[2]
230     raise ValueError, "Expected key or key=value, but got %s." % (val,)
231
232 def parseArgs(field):
233     args=split(field, Token(';'))
234     val = args.next()
235     args = [parseKeyValue(arg) for arg in args]
236     return val,args
237
238 def listParser(fun):
239     """Return a function which applies 'fun' to every element in the
240     comma-separated list"""
241     def listParserHelper(tokens):
242         fields = split(tokens, Token(','))
243         for field in fields:
244             if len(field) != 0:
245                 yield fun(field)
246
247     return listParserHelper
248
249 def last(seq):
250     """Return seq[-1]"""
251    
252     return seq[-1]
253
254 ##### Generation utilities
255 def quoteString(s):
256     return '"%s"' % s.replace('\\', '\\\\').replace('"', '\\"')
257
258 def listGenerator(fun):
259     """Return a function which applies 'fun' to every element in
260     the given list, then joins the result with generateList"""
261     def listGeneratorHelper(l):
262         return generateList([fun(e) for e in l])
263            
264     return listGeneratorHelper
265
266 def generateList(seq):
267     return ", ".join(seq)
268
269 def singleHeader(item):
270     return [item]
271
272 def generateKeyValues(kvs):
273     l = []
274     # print kvs
275     for k,v in kvs:
276         if v is None:
277             l.append('%s' % k)
278         else:
279             l.append('%s=%s' % (k,v))
280     return ';'.join(l)
281
282 class MimeType:
283     def __init__(self, mediaType, mediaSubtype, params=()):
284         self.mediaType = mediaType
285         self.mediaSubtype = mediaSubtype
286         self.params = params
287
288     def __eq__(self, other):
289         return (isinstance(other, MimeType) and
290                 self.mediaType == other.mediaType and
291                 self.mediaSubtype == other.mediaSubtype and
292                 self.params == other.params)
293
294     def __ne__(self, other):
295         return not self.__eq__(other)
296
297     def __repr__(self):
298         return "MimeType(%r, %r, %r)" % (self.mediaType, self.mediaSubtype, self.params)
299
300     def __hash__(self):
301         return hash(self.mediaType)^hash(self.mediaSubtype)^hash(self.params)
302    
303 ##### Specific header parsers.
304 def parseAccept(field):
305     type,args = parseArgs(field)
306
307     if len(type) != 3 or type[1] != Token('/'):
308         raise ValueError, "MIME Type "+str(type)+" invalid."
309    
310     # okay, this spec is screwy. A 'q' parameter is used as the separator
311     # between MIME parameters and (as yet undefined) additional HTTP
312     # parameters.
313     
314     num = 0
315     for arg in args:
316         if arg[0] == 'q':
317             mimeparams=tuple(args[0:num])
318             params=args[num:]
319             break
320         num = num + 1
321     else:
322         mimeparams=tuple(args)
323         params=[]
324
325     # Default values for parameters:
326     qval = 1.0
327    
328     # Parse accept parameters:
329     for param in params:
330         if param[0] =='q':
331             qval = float(param[1])
332         else:
333             # Warn? ignored parameter.
334             pass
335
336     ret = MimeType(type[0],type[2],mimeparams),qval
337     return ret
338
339 def parseAcceptQvalue(field):
340     type,args=parseArgs(field)
341    
342     type = checkSingleToken(type)
343    
344     qvalue = 1.0 # Default qvalue is 1
345     for arg in args:
346         if arg[0] == 'q':
347             qvalue = float(arg[1])
348     return type,qvalue
349
350
351 def addDefaultCharset(charsets):
352     if charsets.get('*') is None and charsets.get('iso-8859-1') is None:
353         charsets['iso-8859-1'] = 1.0
354     return charsets
355
356 def addDefaultEncoding(encodings):
357     if encodings.get('*') is None and encodings.get('identity') is None:
358         # RFC doesn't specify a default value for identity, only that it
359         # "is acceptable" if not mentioned. Thus, give it a very low qvalue.
360         encodings['identity'] = .0001
361     return encodings
362
363
364 def parseContentType(header):
365     # Case folding is disabled for this header, because of use of
366     # Content-Type: multipart/form-data; boundary=CaSeFuLsTuFf
367     # So, we need to explicitly .lower() the type/subtype and arg keys.
368     
369     type,args = parseArgs(header)
370    
371     if len(type) != 3 or type[1] != Token('/'):
372         raise ValueError, "MIME Type "+str(type)+" invalid."
373    
374     args = [(kv[0].lower(), kv[1]) for kv in args]
375        
376     return MimeType(type[0].lower(), type[2].lower(), tuple(args))
377
378 def parseContentMD5(header):
379     try:
380         return base64.decodestring(header)
381     except Exception,e:
382         raise ValueError(e)
383    
384 def parseContentRange(header):
385     """Parse a content-range header into (kind, start, end, realLength).
386     
387     realLength might be None if real length is not known ('*').
388     start and end might be None if start,end unspecified (for response code 416)
389     """
390     kind, other = header.strip().split()
391     if kind.lower() != "bytes":
392         raise ValueError("a range of type %r is not supported")
393     startend, realLength = other.split("/")
394     if startend.strip() == '*':
395         start,end=None,None
396     else:
397         start, end = map(int, startend.split("-"))
398     if realLength == "*":
399         realLength = None
400     else:
401         realLength = int(realLength)
402     return (kind, start, end, realLength)
403
404 def parseExpect(field):
405     type,args=parseArgs(field)
406    
407     type=parseKeyValue(type)
408     return (type[0], (lambda *args:args)(type[1], *args))
409
410 def parseExpires(header):
411     # """HTTP/1.1 clients and caches MUST treat other invalid date formats,
412     #    especially including the value 0, as in the past (i.e., "already expired")."""
413     
414     try:
415         return parseDateTime(header)
416     except ValueError:
417         return 0
418    
419 def parseIfRange(headers):
420     try:
421         return ETag.parse(tokenize(headers))
422     except ValueError:
423         return parseDateTime(last(headers))
424    
425 def parseRange(range):
426     range = list(range)
427     if len(range) < 3 or range[1] != Token('='):
428         raise ValueError("Invalid range header format: %s" %(range,))
429    
430     type=range[0]
431     if type != 'bytes':
432         raise ValueError("Unknown range unit: %s." % (type,))
433     rangeset=split(range[2:], Token(','))
434     ranges = []
435    
436     for byterangespec in rangeset:
437         if len(byterangespec) != 1:
438             raise ValueError("Invalid range header format: %s" % (range,))
439         start,end=byterangespec[0].split('-')
440        
441         if not start and not end:
442             raise ValueError("Invalid range header format: %s" % (range,))
443        
444         if start:
445             start = int(start)
446         else:
447             start = None
448        
449         if end:
450             end = int(end)
451         else:
452             end = None
453
454         if start and end and start > end:
455             raise ValueError("Invalid range header, start > end: %s" % (range,))
456         ranges.append((start,end))
457     return type,ranges
458
459 def parseRetryAfter(header):
460     try:
461         # delta seconds
462         return time() + int(header)
463     except ValueError:
464         # or datetime
465         return parseDateTime(header)
466
467 #### Header generators
468 def generateAccept(accept):
469     mimeType,q = accept
470
471     out="%s/%s"%(mimeType.mediaType, mimeType.mediaSubtype)
472     if mimeType.params:
473         out+=';'+generateKeyValues(mimeType.params)
474    
475     if q != 1.0:
476         out+=(';q=%.3f' % (q,)).rstrip('0').rstrip('.')
477        
478     return out
479
480 def removeDefaultEncoding(seq):
481     for item in seq:
482         if item[0] != 'identity' or item[1] != .0001:
483             yield item
484
485 def generateAcceptQvalue(keyvalue):
486     if keyvalue[1] == 1.0:
487         return "%s" % keyvalue[0:1]
488     else:
489         return ("%s;q=%.3f" % keyvalue).rstrip('0').rstrip('.')
490
491 def generateContentRange(tup):
492     """tup is (type, start, end, len)
493     len can be None.
494     """
495     type, start, end, len = tup
496     if len == None:
497         len = '*'
498     else:
499         len = int(len)
500     if start == None and end == None:
501         startend = '*'
502     else:
503         startend = '%d-%d' % (start, end)
504    
505     return '%s %s/%s' % (type, startend, len)
506
507 def generateDateTime(secSinceEpoch):
508     """Convert seconds since epoch to HTTP datetime string."""
509     year, month, day, hh, mm, ss, wd, y, z = gmtime(secSinceEpoch)
510     s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
511         weekdayname[wd],
512         day, monthname[month], year,
513         hh, mm, ss)
514     return s
515
516 def generateExpect(item):
517     if item[1][0] is None:
518         out = '%s' % (item[0],)
519     else:
520         out = '%s=%s' % (item[0], item[1][0])
521     if len(item[1]) > 1:
522         out += ';'+generateKeyValues(item[1][1:])
523     return out
524
525 def generateRange(range):
526     def noneOr(s):
527         if s is None:
528             return ''
529         return s
530    
531     type,ranges=range
532    
533     if type != 'bytes':
534         raise ValueError("Unknown range unit: "+type+".")
535
536     return (type+'='+
537             ','.join(['%s-%s' % (noneOr(startend[0]), noneOr(startend[1]))
538                       for startend in ranges]))
539    
540 def generateRetryAfter(when):
541     # always generate delta seconds format
542     return str(int(when - time()))
543
544 def generateContentType(mimeType):
545     out="%s/%s"%(mimeType.mediaType, mimeType.mediaSubtype)
546     if mimeType.params:
547         out+=';'+generateKeyValues(mimeType.params)
548     return out
549
550 def generateIfRange(dateOrETag):
551     if isinstance(dateOrETag, ETag):
552         return dateOrETag.generate()
553     else:
554         return generateDateTime(dateOrETag)
555
556 ####
557 class ETag:
558     def __init__(self, tag, weak=False):
559         self.tag = tag
560         self.weak = weak
561
562     def match(self, other, strongCompare):
563         # Sec 13.3.
564         # The strong comparison function: in order to be considered equal, both
565         #   validators MUST be identical in every way, and both MUST NOT be weak.
566         #
567         # The weak comparison function: in order to be considered equal, both
568         #   validators MUST be identical in every way, but either or both of
569         #   them MAY be tagged as "weak" without affecting the result.
570         
571         if not isinstance(other, ETag) or other.tag != self.tag:
572             return False
573        
574         if strongCompare and (other.weak or self.weak):
575             return False
576         return True
577    
578     def __eq__(self, other):
579         return isinstance(other, ETag) and other.tag == self.tag and other.weak == self.weak
580    
581     def __ne__(self, other):
582         return not self.__eq__(other)
583
584     def __repr__(self):
585         return "Etag(%r, weak=%r)" % (self.tag, self.weak)
586    
587     def parse(tokens):
588         tokens=tuple(tokens)
589         if len(tokens) == 1 and not isinstance(tokens[0], Token):
590             return ETag(tokens[0])
591        
592         if(len(tokens) == 3 and tokens[0] == "w"
593            and tokens[1] == Token('/')):
594             return ETag(tokens[2], weak=True)
595        
596         raise ValueError("Invalid ETag.")
597            
598     parse=staticmethod(parse)
599
600     def generate(self):
601         if self.weak:
602             return 'W/'+quoteString(self.tag)
603         else:
604             return quoteString(self.tag)
605
606 def parseStarOrETag(tokens):
607     tokens=tuple(tokens)
608     if tokens == ('*',):
609         return '*'
610     else:
611         return ETag.parse(tokens)
612
613 def generateStarOrETag(etag):
614     if etag=='*':
615         return etag
616     else:
617         return etag.generate()
618
619 #### Cookies. Blech!
620 class Cookie(object):
621     # __slots__ = ['name', 'value', 'path', 'domain', 'ports', 'expires', 'discard', 'secure', 'comment', 'commenturl', 'version']
622
623     def __init__(self, name, value, path=None, domain=None, ports=None, expires=None, discard=False, secure=False, comment=None, commenturl=None, version=0):
624         self.name=name
625         self.value=value
626         self.path=path
627         self.domain=domain
628         self.ports=ports
629         self.expires=expires
630         self.discard=discard
631         self.secure=secure
632         self.comment=comment
633         self.commenturl=commenturl
634         self.version=version
635
636     def __repr__(self):
637         s="Cookie(%r=%r" % (self.name, self.value)
638         if self.path is not None: s+=", path=%r" % (self.path,)
639         if self.domain is not None: s+=", domain=%r" % (self.domain,)
640         if self.ports is not None: s+=", ports=%r" % (self.ports,)
641         if self.expires is not None: s+=", expires=%r" % (self.expires,)
642         if self.secure is not False: s+=", secure=%r" % (self.secure,)
643         if self.comment is not None: s+=", comment=%r" % (self.comment,)
644         if self.commenturl is not None: s+=", commenturl=%r" % (self.commenturl,)
645         if self.version != 0: s+=", version=%r" % (self.version,)
646         s+=")"
647         return s
648
649     def __eq__(self, other):
650         return (isinstance(other, Cookie) and
651                 other.path == self.path and
652                 other.domain == self.domain and
653                 other.ports == self.ports and
654                 other.expires == self.expires and
655                 other.secure == self.secure and
656                 other.comment == self.comment and
657                 other.commenturl == self.commenturl and
658                 other.version == self.version)
659    
660     def __ne__(self, other):
661         return not self.__eq__(other)
662
663
664 def parseCookie(headers):
665     """Bleargh, the cookie spec sucks.
666     This surely needs interoperability testing.
667     There are two specs that are supported:
668     Version 0) http://wp.netscape.com/newsref/std/cookie_spec.html
669     Version 1) http://www.faqs.org/rfcs/rfc2965.html
670     """
671    
672     cookies = []
673     # There can't really be multiple cookie headers according to RFC, because
674     # if multiple headers are allowed, they must be joinable with ",".
675     # Neither new RFC2965 cookies nor old netscape cookies are.
676     
677     header = ';'.join(headers)
678     if header[0:8].lower() == "$version":
679         # RFC2965 cookie
680         h=tokenize([header], foldCase=False)
681         r_cookies = split(h, Token(','))
682         for r_cookie in r_cookies:
683             last_cookie = None
684             rr_cookies = split(r_cookie, Token(';'))
685             for cookie in rr_cookies:
686                 nameval = tuple(split(cookie, Token('=')))
687                 if len(nameval) == 2:
688                     (name,), (value,) = nameval
689                 else:
690                     (name,), = nameval
691                     value = None
692                
693                 name=name.lower()
694                 if name == '$version':
695                     continue
696                 if name[0] == '$':
697                     if last_cookie is not None:
698                         if name == '$path':
699                             last_cookie.path=value
700                         elif name == '$domain':
701                             last_cookie.domain=value
702                         elif name == '$port':
703                             if value is None:
704                                 last_cookie.ports = ()
705                             else:
706                                 last_cookie.ports=tuple([int(s) for s in value.split(',')])
707                 else:
708                     last_cookie = Cookie(name, value, version=1)
709                     cookies.append(last_cookie)
710     else:
711         # Oldstyle cookies don't do quoted strings or anything sensible.
712         # All characters are valid for names except ';' and '=', and all
713         # characters are valid for values except ';'. Spaces are stripped,
714         # however.
715         r_cookies = header.split(';')
716         for r_cookie in r_cookies:
717             name,value = r_cookie.split('=', 1)
718             name=name.strip(' \t')
719             value=value.strip(' \t')
720            
721             cookies.append(Cookie(name, value))
722
723     return cookies
724
725 cookie_validname = "[^"+re.escape(http_tokens+http_ctls)+"]*$"
726 cookie_validname_re = re.compile(cookie_validname)
727 cookie_validvalue = cookie_validname+'|"([^"]|\\\\")*"$'
728 cookie_validvalue_re = re.compile(cookie_validvalue)
729
730 def generateCookie(cookies):
731     # There's a fundamental problem with the two cookie specifications.
732     # They both use the "Cookie" header, and the RFC Cookie header only allows
733     # one version to be specified. Thus, when you have a collection of V0 and
734     # V1 cookies, you have to either send them all as V0 or send them all as
735     # V1.
736
737     # I choose to send them all as V1.
738     
739     # You might think converting a V0 cookie to a V1 cookie would be lossless,
740     # but you'd be wrong. If you do the conversion, and a V0 parser tries to
741     # read the cookie, it will see a modified form of the cookie, in cases
742     # where quotes must be added to conform to proper V1 syntax.
743     # (as a real example: "Cookie: cartcontents=oid:94680,qty:1,auto:0,esp:y")
744     
745     # However, that is what we will do, anyways. It has a high probability of
746     # breaking applications that only handle oldstyle cookies, where some other
747     # application set a newstyle cookie that is applicable over for site
748     # (or host), AND where the oldstyle cookie uses a value which is invalid
749     # syntax in a newstyle cookie.
750     
751     # Also, the cookie name *cannot* be quoted in V1, so some cookies just
752     # cannot be converted at all. (e.g. "Cookie: phpAds_capAd[32]=2"). These
753     # are just dicarded during conversion.
754     
755     # As this is an unsolvable problem, I will pretend I can just say
756     # OH WELL, don't do that, or else upgrade your old applications to have
757     # newstyle cookie parsers.
758     
759     # I will note offhandedly that there are *many* sites which send V0 cookies
760     # that are not valid V1 cookie syntax. About 20% for my cookies file.
761     # However, they do not generally mix them with V1 cookies, so this isn't
762     # an issue, at least right now. I have not tested to see how many of those
763     # webapps support RFC2965 V1 cookies. I suspect not many.
764
765     max_version = max([cookie.version for cookie in cookies])
766
767     if max_version == 0:
768         # no quoting or anything.
769         return ';'.join(["%s=%s" % (cookie.name, cookie.value) for cookie in cookies])
770     else:
771         str_cookies = ['$Version="1"']
772         for cookie in cookies:
773             if cookie.version == 0:
774                 # Version 0 cookie: we make sure the name and value are valid
775                 # V1 syntax.
776
777                 # If they are, we use them as is. This means in *most* cases,
778                 # the cookie will look literally the same on output as it did
779                 # on input.
780                 # If it isn't a valid name, ignore the cookie.
781                 # If it isn't a valid value, quote it and hope for the best on
782                 # the other side.
783                 
784                 if cookie_validname_re.match(cookie.name) is None:
785                     continue
786
787                 value=cookie.value
788                 if cookie_validvalue_re.match(cookie.value) is None:
789                     value = quoteString(value)
790                    
791                 str_cookies.append("%s=%s" % (cookie.name, value))
792             else:
793                 # V1 cookie, nice and easy
794                 str_cookies.append("%s=%s" % (cookie.name, quoteString(cookie.value)))
795
796             if cookie.path:
797                 str_cookies.append("$Path=%s" % quoteString(cookie.path))
798             if cookie.domain:
799                 str_cookies.append("$Domain=%s" % quoteString(cookie.domain))
800             if cookie.ports is not None:
801                 if len(cookie.ports) == 0:
802                     str_cookies.append("$Port")
803                 else:
804                     str_cookies.append("$Port=%s" % quoteString(",".join([str(x) for x in cookie.ports])))
805         return ';'.join(str_cookies)
806
807 def parseSetCookie(headers):
808     setCookies = []
809     for header in headers:
810         try:
811             parts = header.split(';')
812             l = []
813
814             for part in parts:
815                 namevalue = part.split('=',1)
816                 if len(namevalue) == 1:
817                     name=namevalue[0]
818                     value=None
819                 else:
820                     name,value=namevalue
821                     value=value.strip(' \t')
822
823                 name=name.strip(' \t')
824
825                 l.append((name, value))
826
827             setCookies.append(makeCookieFromList(l, True))
828         except ValueError:
829             # If we can't parse one Set-Cookie, ignore it,
830             # but not the rest of Set-Cookies.
831             pass
832     return setCookies
833
834 def parseSetCookie2(toks):
835     outCookies = []
836     for cookie in [[parseKeyValue(x) for x in split(y, Token(';'))]
837                    for y in split(toks, Token(','))]:
838         try:
839             outCookies.append(makeCookieFromList(cookie, False))
840         except ValueError:
841             # Again, if we can't handle one cookie -- ignore it.
842             pass
843     return outCookies
844
845 def makeCookieFromList(tup, netscapeFormat):
846     name, value = tup[0]
847     if name is None or value is None:
848         raise ValueError("Cookie has missing name or value")
849     if name.startswith("$"):
850         raise ValueError("Invalid cookie name: %r, starts with '$'." % name)
851     cookie = Cookie(name, value)
852     hadMaxAge = False
853    
854     for name,value in tup[1:]:
855         name = name.lower()
856        
857         if value is None:
858             if name in ("discard", "secure"):
859                 # Boolean attrs
860                 value = True
861             elif name != "port":
862                 # Can be either boolean or explicit
863                 continue
864        
865         if name in ("comment", "commenturl", "discard", "domain", "path", "secure"):
866             # simple cases
867             setattr(cookie, name, value)
868         elif name == "expires" and not hadMaxAge:
869             if netscapeFormat and value[0] == '"' and value[-1] == '"':
870                 value = value[1:-1]
871             cookie.expires = parseDateTime(value)
872         elif name == "max-age":
873             hadMaxAge = True
874             cookie.expires = int(value) + time()
875         elif name == "port":
876             if value is None:
877                 cookie.ports = ()
878             else:
879                 if netscapeFormat and value[0] == '"' and value[-1] == '"':
880                     value = value[1:-1]
881                 cookie.ports = tuple([int(s) for s in value.split(',')])
882         elif name == "version":
883             cookie.version = int(value)
884    
885     return cookie
886        
887
888 def generateSetCookie(cookies):
889     setCookies = []
890     for cookie in cookies:
891         out = ["%s=%s" % (cookie.name, cookie.value)]
892         if cookie.expires:
893             out.append("expires=%s" % generateDateTime(cookie.expires))
894         if cookie.path:
895             out.append("path=%s" % cookie.path)
896         if cookie.domain:
897             out.append("domain=%s" % cookie.domain)
898         if cookie.secure:
899             out.append("secure")
900
901         setCookies.append('; '.join(out))
902     return setCookies
903
904 def generateSetCookie2(cookies):
905     setCookies = []
906     for cookie in cookies:
907         out = ["%s=%s" % (cookie.name, quoteString(cookie.value))]
908         if cookie.comment:
909             out.append("Comment=%s" % quoteString(cookie.comment))
910         if cookie.commenturl:
911             out.append("CommentURL=%s" % quoteString(cookie.commenturl))
912         if cookie.discard:
913             out.append("Discard")
914         if cookie.domain:
915             out.append("Domain=%s" % quoteString(cookie.domain))
916         if cookie.expires:
917             out.append("Max-Age=%s" % (cookie.expires - time()))
918         if cookie.path:
919             out.append("Path=%s" % quoteString(cookie.path))
920         if cookie.ports is not None:
921             if len(cookie.ports) == 0:
922                 out.append("Port")
923             else:
924                 out.append("Port=%s" % quoteString(",".join([str(x) for x in cookie.ports])))
925         if cookie.secure:
926             out.append("Secure")
927         out.append('Version="1"')
928         setCookies.append('; '.join(out))
929     return setCookies
930
931 ##### Random stuff that looks useful.
932 # def sortMimeQuality(s):
933 #     def sorter(item1, item2):
934 #         if item1[0] == '*':
935 #             if item2[0] == '*':
936 #                 return 0
937
938
939 # def sortQuality(s):
940 #     def sorter(item1, item2):
941 #         if item1[1] < item2[1]:
942 #             return -1
943 #         if item1[1] < item2[1]:
944 #             return 1
945 #         if item1[0] == item2[0]:
946 #             return 0
947             
948            
949 # def getMimeQuality(mimeType, accepts):
950 #     type,args = parseArgs(mimeType)
951 #     type=type.split(Token('/'))
952 #     if len(type) != 2:
953 #         raise ValueError, "MIME Type "+s+" invalid."
954
955 #     for accept in accepts:
956 #         accept,acceptQual=accept
957 #         acceptType=accept[0:1]
958 #         acceptArgs=accept[2]
959         
960 #         if ((acceptType == type or acceptType == (type[0],'*') or acceptType==('*','*')) and
961 #             (args == acceptArgs or len(acceptArgs) == 0)):
962 #             return acceptQual
963
964 # def getQuality(type, accepts):
965 #     qual = accepts.get(type)
966 #     if qual is not None:
967 #         return qual
968     
969 #     return accepts.get('*')
970
971 # Headers object
972 class __RecalcNeeded(object):
973     def __repr__(self):
974         return "<RecalcNeeded>"
975
976 _RecalcNeeded = __RecalcNeeded()
977
978 DefaultHTTPParsers = {}
979 DefaultHTTPGenerators = {}
980
981 class Headers:
982     """This class stores the HTTP headers as both a parsed representation and
983     the raw string representation. It converts between the two on demand."""
984    
985     def __init__(self, parsers=DefaultHTTPParsers, generators=DefaultHTTPGenerators):
986         self._raw_headers = ODict()
987         self._headers = ODict()
988         self.parsers=parsers
989         self.generators=generators
990
991     def _setRawHeaders(self, headers):
992         self._raw_headers = headers
993         self._headers = {}
994        
995     def _addHeader(self, name, strvalue):
996         """Add a header & value to the collection of headers. Appends not replaces
997         a previous header of the same name."""
998         name=name.lower()
999         old = self._raw_headers.get(name, None)
1000         if old is None:
1001             old = []
1002             self._raw_headers[name]=old
1003         old.append(strvalue)
1004         self._headers[name] = _RecalcNeeded
1005    
1006     def _toParsed(self, name):
1007         parser = self.parsers.get(name, None)
1008         if parser is None:
1009             raise ValueError("No header parser for header '%s', either add one or use getHeaderRaw." % (name,))
1010
1011         h = self._raw_headers[name]
1012         try:
1013             for p in parser:
1014                 # print "Parsing %s: %s(%s)" % (name, repr(p), repr(h))
1015                 h = p(h)
1016                 # if isinstance(h, types.GeneratorType):
1017                 #     h=list(h)
1018         except ValueError,v:
1019             # print v
1020             h=None
1021        
1022         self._headers[name]=h
1023         return h
1024
1025     def _toRaw(self, name):
1026         generator = self.generators.get(name, None)
1027         if generator is None:
1028             # print self.generators
1029             raise ValueError("No header generator for header '%s', either add one or use setHeaderRaw." % (name,))
1030        
1031         h = self._headers[name]
1032         for g in generator:
1033             h = g(h)
1034            
1035         self._raw_headers[name] = h
1036         return h
1037    
1038     def hasHeader(self, name):
1039         """Does a header with the given name exist?"""
1040         name=name.lower()
1041         return self._raw_headers.has_key(name)
1042    
1043     def getRawHeaders(self, name, default=None):
1044         """Returns a list of headers matching the given name as the raw string given."""
1045
1046         name=name.lower()
1047         raw_header = self._raw_headers.get(name, default)
1048         if raw_header is not _RecalcNeeded:
1049             return raw_header
1050
1051         return self._toRaw(name)
1052    
1053     def getHeader(self, name, default=None):
1054         """Returns the parsed representation of the given header.
1055         The exact form of the return value depends on the header in question.
1056         
1057         If no parser for the header exists, raise ValueError.
1058         
1059         If the header doesn't exist, return default (or None if not specified)
1060         """
1061        
1062         name=name.lower()
1063         parsed = self._headers.get(name, default)
1064         if parsed is not _RecalcNeeded:
1065             return parsed
1066         return self._toParsed(name)
1067    
1068     def setRawHeaders(self, name, value):
1069         """Sets the raw representation of the given header.
1070         Value should be a list of strings, each being one header of the
1071         given name.
1072         """
1073         name=name.lower()
1074         self._raw_headers[name] = value
1075         self._headers[name] = _RecalcNeeded
1076
1077     def setHeader(self, name, value):
1078         """Sets the parsed representation of the given header.
1079         Value should be a list of objects whose exact form depends
1080         on the header in question.
1081         """
1082         name=name.lower()
1083         self._raw_headers[name] = _RecalcNeeded
1084         self._headers[name] = value
1085
1086     def addRawHeader(self, name, value):
1087         """
1088         Add a raw value to a header that may or may not already exist.
1089         If it exists, add it as a separate header to output; do not
1090         replace anything.
1091         """
1092         self.setRawHeaders(name, self.getRawHeaders(name, []) + [value])
1093
1094     def removeHeader(self, name):
1095         """Removes the header named."""
1096        
1097         name=name.lower()
1098         if self._raw_headers.has_key(name):
1099             del self._raw_headers[name]
1100             del self._headers[name]
1101
1102     def __repr__(self):
1103         return '<Headers: Raw: %s Parsed: %s>'% (self._raw_headers, self._headers)
1104
1105     def canonicalNameCaps(self, name):
1106         """Return the name with the canonical capitalization, if known,
1107         otherwise, Caps-After-Dashes"""
1108         return header_case_mapping.get(name) or dashCapitalize(name)
1109    
1110     def getAllRawHeaders(self):
1111         """Return an iterator of key,value pairs of all headers
1112         contained in this object, as strings. The keys are capitalized
1113         in canonical capitalization."""
1114         for k,v in self._raw_headers.iteritems():
1115             if v is _RecalcNeeded:
1116                 v = self._toRaw(k)
1117             yield self.canonicalNameCaps(k), v
1118
1119     def makeImmutable(self):
1120         """Make this header set immutable. All mutating operations will
1121         raise an exception."""
1122         self.setHeader = self.setRawHeaders = self.removeHeader = self._mutateRaise
1123
1124     def _mutateRaise(self, *args):
1125         raise AttributeError("This header object is immutable as the headers have already been sent.")
1126
1127        
1128 """The following dicts are all mappings of header to list of operations
1129    to perform. The first operation should generally be 'tokenize' if the
1130    header can be parsed according to the normal tokenization rules. If
1131    it cannot, generally the first thing you want to do is take only the
1132    last instance of the header (in case it was sent multiple times, which
1133    is strictly an error, but we're nice.).
1134    """
1135
1136 iteritems = lambda x: x.iteritems()
1137
1138
1139 parser_general_headers = {
1140 #    'Cache-Control':(tokenize,...)
1141     'Connection':(tokenize,filterTokens),
1142     'Date':(last,parseDateTime),
1143 #    'Pragma':tokenize
1144 #    'Trailer':tokenize
1145     'Transfer-Encoding':(tokenize,filterTokens),
1146 #    'Upgrade':tokenize
1147 #    'Via':tokenize,stripComment
1148 #    'Warning':tokenize
1149 }
1150
1151 generator_general_headers = {
1152 #    'Cache-Control':
1153     'Connection':(generateList,singleHeader),
1154     'Date':(generateDateTime,singleHeader),
1155 #    'Pragma':
1156 #    'Trailer':
1157     'Transfer-Encoding':(generateList,singleHeader),
1158 #    'Upgrade':
1159 #    'Via':
1160 #    'Warning':
1161 }
1162
1163 parser_request_headers = {
1164     'Accept': (tokenize, listParser(parseAccept), ODict),
1165     'Accept-Charset': (tokenize, listParser(parseAcceptQvalue), ODict, addDefaultCharset),
1166     'Accept-Encoding':(tokenize, listParser(parseAcceptQvalue), ODict, addDefaultEncoding),
1167     'Accept-Language':(tokenize, listParser(parseAcceptQvalue), ODict),
1168 #    'Authorization':str # what is "credentials"
1169     'Cookie':(parseCookie,),
1170     'Expect':(tokenize, listParser(parseExpect), ODict),
1171     'From':(last,),
1172     'Host':(last,),
1173     'If-Match':(tokenize, listParser(parseStarOrETag), list),
1174     'If-Modified-Since':(last,parseDateTime),
1175     'If-None-Match':(tokenize, listParser(parseStarOrETag), list),
1176     'If-Range':(parseIfRange,),
1177     'If-Unmodified-Since':(last,parseDateTime),
1178     'Max-Forwards':(last,int),
1179 #    'Proxy-Authorization':str, # what is "credentials"
1180     'Range':(tokenize, parseRange),
1181     'Referer':(last,str), # TODO: URI object?
1182     'TE':(tokenize, listParser(parseAcceptQvalue), ODict),
1183     'User-Agent':(last,str),
1184 }
1185
1186
1187 generator_request_headers = {
1188     'Accept': (iteritems,listGenerator(generateAccept),singleHeader),
1189     'Accept-Charset': (iteritems, listGenerator(generateAcceptQvalue),singleHeader),
1190     'Accept-Encoding': (iteritems, removeDefaultEncoding, listGenerator(generateAcceptQvalue),singleHeader),
1191     'Accept-Language': (iteritems, listGenerator(generateAcceptQvalue),singleHeader),
1192 #    'Authorization':str # what is "credentials"
1193     'Cookie':(generateCookie,singleHeader),
1194     'Expect':(iteritems, listGenerator(generateExpect), singleHeader),
1195     'From':(str,singleHeader),
1196     'Host':(str,singleHeader),
1197     'If-Match':(listGenerator(generateStarOrETag), singleHeader),
1198     'If-Modified-Since':(generateDateTime,singleHeader),
1199     'If-None-Match':(listGenerator(generateStarOrETag), singleHeader),
1200     'If-Range':(generateIfRange, singleHeader),
1201     'If-Unmodified-Since':(generateDateTime,singleHeader),
1202     'Max-Forwards':(str, singleHeader),
1203 #    'Proxy-Authorization':str, # what is "credentials"
1204     'Range':(generateRange,singleHeader),
1205     'Referer':(str,singleHeader),
1206     'TE': (iteritems, listGenerator(generateAcceptQvalue),singleHeader),
1207     'User-Agent':(str,singleHeader),
1208 }
1209
1210 parser_response_headers = {
1211     'Accept-Ranges':(tokenize, filterTokens),
1212     'Age':(last,int),
1213     'ETag':(tokenize, ETag.parse),
1214     'Location':(last,), # TODO: uri object?
1215 #    'Proxy-Authenticate'
1216     'Retry-After':(last, parseRetryAfter),
1217     'Server':(last,),
1218     'Set-Cookie':(parseSetCookie,),
1219     'Set-Cookie2':(tokenize, parseSetCookie2),
1220     'Vary':(tokenize, filterTokens),
1221 #    'WWW-Authenticate'
1222 }
1223
1224 generator_response_headers = {
1225     'Accept-Ranges':(generateList, singleHeader),
1226     'Age':(str, singleHeader),
1227     'ETag':(ETag.generate, singleHeader),
1228     'Location':(str, singleHeader),
1229 #    'Proxy-Authenticate'
1230     'Retry-After':(generateRetryAfter, singleHeader),
1231     'Server':(str, singleHeader),
1232     'Set-Cookie':(generateSetCookie,),
1233     'Set-Cookie2':(generateSetCookie2,),
1234     'Vary':(generateList, singleHeader),
1235 #    'WWW-Authenticate'
1236 }
1237
1238 parser_entity_headers = {
1239     'Allow':(lambda str:tokenize(str, foldCase=False), filterTokens),
1240     'Content-Encoding':(tokenize, filterTokens),
1241     'Content-Language':(tokenize, filterTokens),
1242     'Content-Length':(last, int),
1243     'Content-Location':(last,), # TODO: URI object?
1244     'Content-MD5':(last, parseContentMD5),
1245     'Content-Range':(last, parseContentRange),
1246     'Content-Type':(lambda str:tokenize(str, foldCase=False), parseContentType),
1247     'Expires':(last, parseExpires),
1248     'Last-Modified':(last, parseDateTime),
1249     }
1250
1251 generator_entity_headers = {
1252     'Allow':(generateList, singleHeader),
1253     'Content-Encoding':(generateList, singleHeader),
1254     'Content-Language':(generateList, singleHeader),
1255     'Content-Length':(str, singleHeader),
1256     'Content-Location':(str, singleHeader),
1257     'Content-MD5':(base64.encodestring, lambda x: x.strip("\n"), singleHeader),
1258     'Content-Range':(generateContentRange, singleHeader),
1259     'Content-Type':(generateContentType, singleHeader),
1260     'Expires':(generateDateTime, singleHeader),
1261     'Last-Modified':(generateDateTime, singleHeader),
1262     }
1263
1264 DefaultHTTPParsers.update(parser_general_headers)
1265 DefaultHTTPParsers.update(parser_request_headers)
1266 DefaultHTTPParsers.update(parser_response_headers)
1267 DefaultHTTPParsers.update(parser_entity_headers)
1268
1269 DefaultHTTPGenerators.update(generator_general_headers)
1270 DefaultHTTPGenerators.update(generator_request_headers)
1271 DefaultHTTPGenerators.update(generator_response_headers)
1272 DefaultHTTPGenerators.update(generator_entity_headers)
1273
1274 header_case_mapping = {}
1275
1276 def casemappingify(d):
1277     global header_case_mapping
1278     newd = dict([(key.lower(),key) for key in d.keys()])
1279     header_case_mapping.update(newd)
1280
1281 def lowerify(d):
1282     newd = dict([(key.lower(),value) for key,value in d.items()])
1283     d.clear()
1284     d.update(newd)
1285
1286
1287 casemappingify(DefaultHTTPParsers)
1288 casemappingify(DefaultHTTPGenerators)
1289
1290 lowerify(DefaultHTTPParsers)
1291 lowerify(DefaultHTTPGenerators)
Note: See TracBrowser for help on using the browser.