Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Z
Zope
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
Zope
Commits
7b4ae31a
Commit
7b4ae31a
authored
Jul 14, 2004
by
Fred Drake
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
remove local version of the HTMLParser module; this is now part of Python
parent
3c86de13
Changes
2
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
0 additions
and
715 deletions
+0
-715
lib/python/TAL/HTMLParser.py
lib/python/TAL/HTMLParser.py
+0
-402
lib/python/TAL/tests/test_htmlparser.py
lib/python/TAL/tests/test_htmlparser.py
+0
-313
No files found.
lib/python/TAL/HTMLParser.py
deleted
100644 → 0
View file @
3c86de13
This diff is collapsed.
Click to expand it.
lib/python/TAL/tests/test_htmlparser.py
deleted
100755 → 0
View file @
3c86de13
#! /usr/bin/env python1.5
"""Tests for HTMLParser.py."""
import
sys
from
TAL.tests
import
utils
import
unittest
from
TAL
import
HTMLParser
class
EventCollector
(
HTMLParser
.
HTMLParser
):
def
__init__
(
self
):
self
.
events
=
[]
self
.
append
=
self
.
events
.
append
HTMLParser
.
HTMLParser
.
__init__
(
self
)
def
get_events
(
self
):
# Normalize the list of events so that buffer artefacts don't
# separate runs of contiguous characters.
L
=
[]
prevtype
=
None
for
event
in
self
.
events
:
type
=
event
[
0
]
if
type
==
prevtype
==
"data"
:
L
[
-
1
]
=
(
"data"
,
L
[
-
1
][
1
]
+
event
[
1
])
else
:
L
.
append
(
event
)
prevtype
=
type
self
.
events
=
L
return
L
# structure markup
def
handle_starttag
(
self
,
tag
,
attrs
):
self
.
append
((
"starttag"
,
tag
,
attrs
))
def
handle_startendtag
(
self
,
tag
,
attrs
):
self
.
append
((
"startendtag"
,
tag
,
attrs
))
def
handle_endtag
(
self
,
tag
):
self
.
append
((
"endtag"
,
tag
))
# all other markup
def
handle_comment
(
self
,
data
):
self
.
append
((
"comment"
,
data
))
def
handle_charref
(
self
,
data
):
self
.
append
((
"charref"
,
data
))
def
handle_data
(
self
,
data
):
self
.
append
((
"data"
,
data
))
def
handle_decl
(
self
,
data
):
self
.
append
((
"decl"
,
data
))
def
handle_entityref
(
self
,
data
):
self
.
append
((
"entityref"
,
data
))
def
handle_pi
(
self
,
data
):
self
.
append
((
"pi"
,
data
))
def
unknown_decl
(
self
,
decl
):
self
.
append
((
"unknown decl"
,
decl
))
class
EventCollectorExtra
(
EventCollector
):
def
handle_starttag
(
self
,
tag
,
attrs
):
EventCollector
.
handle_starttag
(
self
,
tag
,
attrs
)
self
.
append
((
"starttag_text"
,
self
.
get_starttag_text
()))
class
TestCaseBase
(
unittest
.
TestCase
):
# Constant pieces of source and events
prologue
=
""
epilogue
=
""
initial_events
=
[]
final_events
=
[]
def
_run_check
(
self
,
source
,
events
,
collector
=
EventCollector
):
parser
=
collector
()
parser
.
feed
(
self
.
prologue
)
for
s
in
source
:
parser
.
feed
(
s
)
for
c
in
self
.
epilogue
:
parser
.
feed
(
c
)
parser
.
close
()
self
.
assert_
(
parser
.
get_events
()
==
self
.
initial_events
+
events
+
self
.
final_events
,
parser
.
get_events
())
def
_run_check_extra
(
self
,
source
,
events
):
self
.
_run_check
(
source
,
events
,
EventCollectorExtra
)
def
_parse_error
(
self
,
source
):
def
parse
(
source
=
source
):
parser
=
HTMLParser
.
HTMLParser
()
parser
.
feed
(
source
)
parser
.
close
()
self
.
assertRaises
(
HTMLParser
.
HTMLParseError
,
parse
)
class
HTMLParserTestCase
(
TestCaseBase
):
def
check_processing_instruction_only
(
self
):
self
.
_run_check
(
"<?processing instruction>"
,
[
(
"pi"
,
"processing instruction"
),
])
def
check_simple_html
(
self
):
self
.
_run_check
(
"""
<!DOCTYPE html PUBLIC 'foo'>
<HTML>&entity; 
<!--comment1a
-></foo><bar><<?pi?></foo<bar
comment1b-->
<Img sRc='Bar' isMAP>sample
text
“
<!--comment2a-- --comment2b-->
</Html>
"""
,
[
(
"data"
,
"
\
n
"
),
(
"decl"
,
"DOCTYPE html PUBLIC 'foo'"
),
(
"data"
,
"
\
n
"
),
(
"starttag"
,
"html"
,
[]),
(
"entityref"
,
"entity"
),
(
"charref"
,
"32"
),
(
"data"
,
"
\
n
"
),
(
"comment"
,
"comment1a
\
n
-></foo><bar><<?pi?></foo<bar
\
n
comment1b"
),
(
"data"
,
"
\
n
"
),
(
"starttag"
,
"img"
,
[(
"src"
,
"Bar"
),
(
"ismap"
,
None
)]),
(
"data"
,
"sample
\
n
text
\
n
"
),
(
"charref"
,
"x201C"
),
(
"data"
,
"
\
n
"
),
(
"comment"
,
"comment2a-- --comment2b"
),
(
"data"
,
"
\
n
"
),
(
"endtag"
,
"html"
),
(
"data"
,
"
\
n
"
),
])
def
check_unclosed_entityref
(
self
):
self
.
_run_check
(
"&entityref foo"
,
[
(
"entityref"
,
"entityref"
),
(
"data"
,
" foo"
),
])
def
check_doctype_decl
(
self
):
inside
=
"""
\
DOCTYPE html [
<!ELEMENT html - O EMPTY>
<!ATTLIST html
version CDATA #IMPLIED
profile CDATA 'DublinCore'>
<!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'>
<!ENTITY myEntity 'internal parsed entity'>
<!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'>
<!ENTITY % paramEntity 'name|name|name'>
%paramEntity;
<!-- comment -->
]"""
self
.
_run_check
(
"<!%s>"
%
inside
,
[
(
"decl"
,
inside
),
])
def
check_bad_nesting
(
self
):
# Strangely, this *is* supposed to test that overlapping
# elements are allowed. HTMLParser is more geared toward
# lexing the input that parsing the structure.
self
.
_run_check
(
"<a><b></a></b>"
,
[
(
"starttag"
,
"a"
,
[]),
(
"starttag"
,
"b"
,
[]),
(
"endtag"
,
"a"
),
(
"endtag"
,
"b"
),
])
def
check_bare_ampersands
(
self
):
self
.
_run_check
(
"this text & contains & ampersands &"
,
[
(
"data"
,
"this text & contains & ampersands &"
),
])
def
check_bare_pointy_brackets
(
self
):
self
.
_run_check
(
"this < text > contains < bare>pointy< brackets"
,
[
(
"data"
,
"this < text > contains < bare>pointy< brackets"
),
])
def
check_attr_syntax
(
self
):
output
=
[
(
"starttag"
,
"a"
,
[(
"b"
,
"v"
),
(
"c"
,
"v"
),
(
"d"
,
"v"
),
(
"e"
,
None
)])
]
self
.
_run_check
(
"""<a b='v' c="v" d=v e>"""
,
output
)
self
.
_run_check
(
"""<a b = 'v' c = "v" d = v e>"""
,
output
)
self
.
_run_check
(
"""<a
\
n
b
\
n
=
\
n
'v'
\
n
c
\
n
=
\
n
"v"
\
n
d
\
n
=
\
n
v
\
n
e>"""
,
output
)
self
.
_run_check
(
"""<a
\
t
b
\
t
=
\
t
'v'
\
t
c
\
t
=
\
t
"v"
\
t
d
\
t
=
\
t
v
\
t
e>"""
,
output
)
def
check_attr_values
(
self
):
self
.
_run_check
(
"""<a b='xxx
\
n
\
t
xxx' c="yyy
\
t
\
n
yyy" d='
\
t
xyz
\
n
'>"""
,
[(
"starttag"
,
"a"
,
[(
"b"
,
"xxx
\
n
\
t
xxx"
),
(
"c"
,
"yyy
\
t
\
n
yyy"
),
(
"d"
,
"
\
t
xyz
\
n
"
)])
])
self
.
_run_check
(
"""<a b='' c="">"""
,
[
(
"starttag"
,
"a"
,
[(
"b"
,
""
),
(
"c"
,
""
)]),
])
def
check_attr_entity_replacement
(
self
):
self
.
_run_check
(
"""<a b='&><"''>"""
,
[
(
"starttag"
,
"a"
,
[(
"b"
,
"&><
\
"
'"
)]),
])
def
check_attr_funky_names
(
self
):
self
.
_run_check
(
"""<a a.b='v' c:d=v e-f=v>"""
,
[
(
"starttag"
,
"a"
,
[(
"a.b"
,
"v"
),
(
"c:d"
,
"v"
),
(
"e-f"
,
"v"
)]),
])
def
check_illegal_declarations
(
self
):
self
.
_parse_error
(
'<!spacer type="block" height="25">'
)
def
check_starttag_end_boundary
(
self
):
self
.
_run_check
(
"""<a b='<'>"""
,
[(
"starttag"
,
"a"
,
[(
"b"
,
"<"
)])])
self
.
_run_check
(
"""<a b='>'>"""
,
[(
"starttag"
,
"a"
,
[(
"b"
,
">"
)])])
def
check_buffer_artefacts
(
self
):
output
=
[(
"starttag"
,
"a"
,
[(
"b"
,
"<"
)])]
self
.
_run_check
([
"<a b='<'>"
],
output
)
self
.
_run_check
([
"<a "
,
"b='<'>"
],
output
)
self
.
_run_check
([
"<a b"
,
"='<'>"
],
output
)
self
.
_run_check
([
"<a b="
,
"'<'>"
],
output
)
self
.
_run_check
([
"<a b='<"
,
"'>"
],
output
)
self
.
_run_check
([
"<a b='<'"
,
">"
],
output
)
output
=
[(
"starttag"
,
"a"
,
[(
"b"
,
">"
)])]
self
.
_run_check
([
"<a b='>'>"
],
output
)
self
.
_run_check
([
"<a "
,
"b='>'>"
],
output
)
self
.
_run_check
([
"<a b"
,
"='>'>"
],
output
)
self
.
_run_check
([
"<a b="
,
"'>'>"
],
output
)
self
.
_run_check
([
"<a b='>"
,
"'>"
],
output
)
self
.
_run_check
([
"<a b='>'"
,
">"
],
output
)
def
check_starttag_junk_chars
(
self
):
self
.
_parse_error
(
"</>"
)
self
.
_parse_error
(
"</$>"
)
self
.
_parse_error
(
"</"
)
self
.
_parse_error
(
"</a"
)
self
.
_parse_error
(
"<a<a>"
)
self
.
_parse_error
(
"</a<a>"
)
self
.
_parse_error
(
"<!"
)
self
.
_parse_error
(
"<a $>"
)
self
.
_parse_error
(
"<a"
)
self
.
_parse_error
(
"<a foo='bar'"
)
self
.
_parse_error
(
"<a foo='bar"
)
self
.
_parse_error
(
"<a foo='>'"
)
self
.
_parse_error
(
"<a foo='>"
)
self
.
_parse_error
(
"<a foo=>"
)
def
check_declaration_junk_chars
(
self
):
self
.
_parse_error
(
"<!DOCTYPE foo $ >"
)
def
check_startendtag
(
self
):
self
.
_run_check
(
"<p/>"
,
[
(
"startendtag"
,
"p"
,
[]),
])
self
.
_run_check
(
"<p></p>"
,
[
(
"starttag"
,
"p"
,
[]),
(
"endtag"
,
"p"
),
])
self
.
_run_check
(
"<p><img src='foo' /></p>"
,
[
(
"starttag"
,
"p"
,
[]),
(
"startendtag"
,
"img"
,
[(
"src"
,
"foo"
)]),
(
"endtag"
,
"p"
),
])
def
check_get_starttag_text
(
self
):
s
=
"""<foo:bar
\
n
one="1"
\
t
two=2 >"""
self
.
_run_check_extra
(
s
,
[
(
"starttag"
,
"foo:bar"
,
[(
"one"
,
"1"
),
(
"two"
,
"2"
)]),
(
"starttag_text"
,
s
)])
def
check_cdata_content
(
self
):
s
=
"""<script> <!-- not a comment --> ¬-an-entity-ref; </script>"""
self
.
_run_check
(
s
,
[
(
"starttag"
,
"script"
,
[]),
(
"data"
,
" <!-- not a comment --> ¬-an-entity-ref; "
),
(
"endtag"
,
"script"
),
])
s
=
"""<script> <not a='start tag'> </script>"""
self
.
_run_check
(
s
,
[
(
"starttag"
,
"script"
,
[]),
(
"data"
,
" <not a='start tag'> "
),
(
"endtag"
,
"script"
),
])
def
check_enumerated_attr_type
(
self
):
s
=
"<!DOCTYPE doc [<!ATTLIST doc attr (a | b) >]>"
self
.
_run_check
(
s
,
[
(
'decl'
,
'DOCTYPE doc [<!ATTLIST doc attr (a | b) >]'
),
])
# Support for the Zope regression test framework:
def
test_suite
():
suite
=
unittest
.
TestSuite
()
suite
.
addTest
(
unittest
.
makeSuite
(
HTMLParserTestCase
,
"check_"
))
return
suite
if
__name__
==
"__main__"
:
errs
=
utils
.
run_suite
(
test_suite
())
sys
.
exit
(
errs
and
1
or
0
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment