Hello.

Here is some bolded text", string_containers = { 'b': BString, 'p': PString, } ) # The string before the

tag is a regular NavigableString. assert isinstance(soup.div.contents[0], NavigableString) # The string inside the

tag are empty-element, just because # they have no contents. assert b"
" == xml_br.encode() assert b"

" == xml_p.encode() html_soup = BeautifulSoup("", "html.parser") html_br = html_soup.new_tag("br") html_p = html_soup.new_tag("p") # The HTML builder users HTML's rules about which tags are # empty-element tags, and the new tags reflect these rules. assert b"
" == html_br.encode() assert b"

" == html_p.encode() class TestNewString(SoupTest): """Test the BeautifulSoup.new_string() method.""" def test_new_string_creates_navigablestring(self): soup = self.soup("") s = soup.new_string("foo") assert "foo" == s assert isinstance(s, NavigableString) def test_new_string_can_create_navigablestring_subclass(self): soup = self.soup("") s = soup.new_string("foo", Comment) assert "foo" == s assert isinstance(s, Comment) class TestPickle(SoupTest): # Test our ability to pickle the BeautifulSoup object itself. def test_normal_pickle(self): soup = self.soup("some markup") pickled = pickle.dumps(soup) unpickled = pickle.loads(pickled) assert "some markup" == unpickled.a.string def test_pickle_with_no_builder(self): # We had a bug that prevented pickling from working if # the builder wasn't set. soup = self.soup("some markup") soup.builder = None pickled = pickle.dumps(soup) unpickled = pickle.loads(pickled) assert "some markup" == unpickled.string class TestEncodingConversion(SoupTest): # Test Beautiful Soup's ability to decode and encode from various # encodings. def setup_method(self): self.unicode_data = 'Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!' self.utf8_data = self.unicode_data.encode("utf-8") # Just so you know what it looks like. assert self.utf8_data == b'Sacr\xc3\xa9 bleu!' def test_ascii_in_unicode_out(self): # ASCII input is converted to Unicode. The original_encoding # attribute is set to 'utf-8', a superset of ASCII. chardet = dammit.chardet_dammit logging.disable(logging.WARNING) try: def noop(str): return None # Disable chardet, which will realize that the ASCII is ASCII. dammit.chardet_dammit = noop ascii = b"a" soup_from_ascii = self.soup(ascii) unicode_output = soup_from_ascii.decode() assert isinstance(unicode_output, str) assert unicode_output == self.document_for(ascii.decode()) assert soup_from_ascii.original_encoding.lower() == "utf-8" finally: logging.disable(logging.NOTSET) dammit.chardet_dammit = chardet def test_unicode_in_unicode_out(self): # Unicode input is left alone. The original_encoding attribute # is not set. soup_from_unicode = self.soup(self.unicode_data) assert soup_from_unicode.decode() == self.unicode_data assert soup_from_unicode.foo.string == 'Sacr\xe9 bleu!' assert soup_from_unicode.original_encoding == None def test_utf8_in_unicode_out(self): # UTF-8 input is converted to Unicode. The original_encoding # attribute is set. soup_from_utf8 = self.soup(self.utf8_data) assert soup_from_utf8.decode() == self.unicode_data assert soup_from_utf8.foo.string == 'Sacr\xe9 bleu!' def test_utf8_out(self): # The internal data structures can be encoded as UTF-8. soup_from_unicode = self.soup(self.unicode_data) assert soup_from_unicode.encode('utf-8') == self.utf8_data @skipIf( PYTHON_3_PRE_3_2, "Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.") def test_attribute_name_containing_unicode_characters(self): markup = '
' assert self.soup(markup).div.encode("utf8") == markup.encode("utf8")

éé

foo\0bar