Commit a4036ced authored by 's avatar

Fixed content_types handling of content sniffing so that html can still be

recognized when formal doctype dtd declarations are used. We will need to
revisit this sometime - we are probably asking for trouble by trying to
make too many things automatic as far as content-type detection...
parent e7d87a92
...@@ -83,7 +83,7 @@ ...@@ -83,7 +83,7 @@
# #
############################################################################## ##############################################################################
"""A utility module for content-type handling.""" """A utility module for content-type handling."""
__version__='$Revision: 1.7 $'[11:-2] __version__='$Revision: 1.8 $'[11:-2]
src=""" src="""
htm, html: text/html htm, html: text/html
...@@ -101,7 +101,7 @@ tar: application/x-tar ...@@ -101,7 +101,7 @@ tar: application/x-tar
zip: application/x-zip zip: application/x-zip
""" """
from string import split, strip, lower from string import split, strip, lower, find
import ts_regex, mimetypes import ts_regex, mimetypes
content_type={} content_type={}
...@@ -113,9 +113,13 @@ for l in filter(lambda s: s and s[:1] != '#', map(strip, split(src,'\n'))): ...@@ -113,9 +113,13 @@ for l in filter(lambda s: s and s[:1] != '#', map(strip, split(src,'\n'))):
find_binary=ts_regex.compile('[\0-\7]').search find_binary=ts_regex.compile('[\0-\7]').search
html_re=ts_regex.compile('<html>', ts_regex.casefold)
def text_type(s): def text_type(s):
return "text/" + (html_re.search(s) >= 0 and 'html' or 'plain') # Yuk. See if we can figure out the type by content.
if (lower(strip(s)[:6]) == '<html>' or find(s, '</') > 0):
return 'text/html'
return 'text/plain'
# This gives us a hook to add content types that # This gives us a hook to add content types that
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment