import pytest from bs4.element import Tag from bs4.formatter import ( Formatter, HTMLFormatter, XMLFormatter, ) from . import SoupTest class TestFormatter(SoupTest): def test_default_attributes(self): # Test the default behavior of Formatter.attributes(). formatter = Formatter() tag = Tag(name="tag") tag['b'] = 1 tag['a'] = 2 # Attributes come out sorted by name. In Python 3, attributes # normally come out of a dictionary in the order they were # added. assert [('a', 2), ('b', 1)] == formatter.attributes(tag) # This works even if Tag.attrs is None, though this shouldn't # normally happen. tag.attrs = None assert [] == formatter.attributes(tag) assert ' ' == formatter.indent def test_sort_attributes(self): # Test the ability to override Formatter.attributes() to, # e.g., disable the normal sorting of attributes. class UnsortedFormatter(Formatter): def attributes(self, tag): self.called_with = tag for k, v in sorted(tag.attrs.items()): if k == 'ignore': continue yield k,v soup = self.soup('
') formatter = UnsortedFormatter() decoded = soup.decode(formatter=formatter) # attributes() was called on thetag. It filtered out one # attribute and sorted the other two. assert formatter.called_with == soup.p assert '
' == decoded def test_empty_attributes_are_booleans(self): # Test the behavior of empty_attributes_are_booleans as well # as which Formatters have it enabled. for name in ('html', 'minimal', None): formatter = HTMLFormatter.REGISTRY[name] assert False == formatter.empty_attributes_are_booleans formatter = XMLFormatter.REGISTRY[None] assert False == formatter.empty_attributes_are_booleans formatter = HTMLFormatter.REGISTRY['html5'] assert True == formatter.empty_attributes_are_booleans # Verify that the constructor sets the value. formatter = Formatter(empty_attributes_are_booleans=True) assert True == formatter.empty_attributes_are_booleans # Now demonstrate what it does to markup. for markup in ( "", '' ): soup = self.soup(markup) for formatter in ('html', 'minimal', 'xml', None): assert b'' == soup.option.encode(formatter='html') assert b'' == soup.option.encode(formatter='html5') @pytest.mark.parametrize( "indent,expect", [ (None, '\n\ntext\n\n'), (-1, '\n\ntext\n\n'), (0, '\n\ntext\n\n'), ("", '\n\ntext\n\n'), (1, '\n \n text\n \n'), (2, '\n \n text\n \n'), ("\t", '\n\t\n\t\ttext\n\t\n'), ('abc', '\nabc\nabcabctext\nabc\n'), # Some invalid inputs -- the default behavior is used. (object(), '\n \n text\n \n'), (b'bytes', '\n \n text\n \n'), ] ) def test_indent(self, indent, expect): # Pretty-print a tree with a Formatter set to # indent in a certain way and verify the results. soup = self.soup("text") formatter = Formatter(indent=indent) assert soup.prettify(formatter=formatter) == expect # Pretty-printing only happens with prettify(), not # encode(). assert soup.encode(formatter=formatter) != expect def test_default_indent_value(self): formatter = Formatter() assert formatter.indent == ' '