From 8227df459a8a9286a4b5e8829b95abad337fefe8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Louis=20V=C3=A9zina?=
 <5130500+morpheus65535@users.noreply.github.com>
Date: Tue, 24 Sep 2019 06:23:11 -0400
Subject: [PATCH] WIP

---
 libs/bs4/__init__.py                    | 24 +++++---
 libs/bs4/builder/__init__.py            |  4 +-
 libs/bs4/builder/_html5lib.py           | 16 ++---
 libs/bs4/builder/_htmlparser.py         | 15 +++--
 libs/bs4/builder/_lxml.py               | 12 ++--
 libs/bs4/dammit.py                      | 13 ++--
 libs/bs4/diagnose.py                    | 58 +++++++++---------
 libs/bs4/element.py                     | 81 +++++++++++++------------
 libs/bs4/testing.py                     | 10 +--
 libs/bs4/tests/test_builder_registry.py |  1 +
 libs/bs4/tests/test_docs.py             |  1 +
 libs/bs4/tests/test_html5lib.py         |  3 +-
 libs/bs4/tests/test_htmlparser.py       |  1 +
 libs/bs4/tests/test_lxml.py             |  6 +-
 libs/bs4/tests/test_soup.py             |  6 +-
 libs/bs4/tests/test_tree.py             | 10 +--
 16 files changed, 148 insertions(+), 113 deletions(-)

diff --git a/libs/bs4/__init__.py b/libs/bs4/__init__.py
index 7a80452f7..b7ea25e2e 100644
--- a/libs/bs4/__init__.py
+++ b/libs/bs4/__init__.py
@@ -20,6 +20,10 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
+from __future__ import absolute_import
+from __future__ import print_function
+import six
+from six.moves import range
 __author__ = "Leonard Richardson (leonardr@segfault.org)"
 __version__ = "4.6.0"
 __copyright__ = "Copyright (c) 2004-2017 Leonard Richardson"
@@ -50,7 +54,7 @@ from .element import (
 
 # The very first thing we do is give a useful error if someone is
 # running this code under Python 3 without converting it.
-'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
+'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'!='You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
 
 class BeautifulSoup(Tag):
     """
@@ -142,18 +146,18 @@ class BeautifulSoup(Tag):
         from_encoding = from_encoding or deprecated_argument(
             "fromEncoding", "from_encoding")
 
-        if from_encoding and isinstance(markup, unicode):
+        if from_encoding and isinstance(markup, six.text_type):
             warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
             from_encoding = None
 
         if len(kwargs) > 0:
-            arg = kwargs.keys().pop()
+            arg = list(kwargs.keys()).pop()
             raise TypeError(
                 "__init__() got an unexpected keyword argument '%s'" % arg)
 
         if builder is None:
             original_features = features
-            if isinstance(features, basestring):
+            if isinstance(features, six.string_types):
                 features = [features]
             if features is None or len(features) == 0:
                 features = self.DEFAULT_BUILDER_FEATURES
@@ -191,13 +195,13 @@ class BeautifulSoup(Tag):
             markup = markup.read()
         elif len(markup) <= 256 and (
                 (isinstance(markup, bytes) and not b'<' in markup)
-                or (isinstance(markup, unicode) and not u'<' in markup)
+                or (isinstance(markup, six.text_type) and not u'<' in markup)
         ):
             # Print out warnings for a couple beginner problems
             # involving passing non-markup to Beautiful Soup.
             # Beautiful Soup will still parse the input as markup,
             # just in case that's what the user really wants.
-            if (isinstance(markup, unicode)
+            if (isinstance(markup, six.text_type)
                 and not os.path.supports_unicode_filenames):
                 possible_filename = markup.encode("utf8")
             else:
@@ -205,13 +209,13 @@ class BeautifulSoup(Tag):
             is_file = False
             try:
                 is_file = os.path.exists(possible_filename)
-            except Exception, e:
+            except Exception as e:
                 # This is almost certainly a problem involving
                 # characters not valid in filenames on this
                 # system. Just let it go.
                 pass
             if is_file:
-                if isinstance(markup, unicode):
+                if isinstance(markup, six.text_type):
                     markup = markup.encode("utf8")
                 warnings.warn(
                     '"%s" looks like a filename, not markup. You should'
@@ -263,7 +267,7 @@ class BeautifulSoup(Tag):
         if isinstance(markup, bytes):
             space = b' '
             cant_start_with = (b"http:", b"https:")
-        elif isinstance(markup, unicode):
+        elif isinstance(markup, six.text_type):
             space = u' '
             cant_start_with = (u"http:", u"https:")
         else:
@@ -526,4 +530,4 @@ class FeatureNotFound(ValueError):
 if __name__ == '__main__':
     import sys
     soup = BeautifulSoup(sys.stdin)
-    print soup.prettify()
+    print(soup.prettify())
diff --git a/libs/bs4/builder/__init__.py b/libs/bs4/builder/__init__.py
index fdb3362fc..4b44ef05f 100644
--- a/libs/bs4/builder/__init__.py
+++ b/libs/bs4/builder/__init__.py
@@ -1,6 +1,7 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
+from __future__ import absolute_import
 from collections import defaultdict
 import itertools
 import sys
@@ -10,6 +11,7 @@ from bs4.element import (
     HTMLAwareEntitySubstitution,
     whitespace_re
     )
+import six
 
 __all__ = [
     'HTMLTreeBuilder',
@@ -166,7 +168,7 @@ class TreeBuilder(object):
                     # value is a whitespace-separated list of
                     # values. Split it into a list.
                     value = attrs[attr]
-                    if isinstance(value, basestring):
+                    if isinstance(value, six.string_types):
                         values = whitespace_re.split(value)
                     else:
                         # html5lib sometimes calls setAttributes twice
diff --git a/libs/bs4/builder/_html5lib.py b/libs/bs4/builder/_html5lib.py
index 5f5489358..cf6063b83 100644
--- a/libs/bs4/builder/_html5lib.py
+++ b/libs/bs4/builder/_html5lib.py
@@ -1,6 +1,8 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
+from __future__ import absolute_import
+import six
 __all__ = [
     'HTML5TreeBuilder',
     ]
@@ -33,7 +35,7 @@ try:
     # Pre-0.99999999
     from html5lib.treebuilders import _base as treebuilder_base
     new_html5lib = False
-except ImportError, e:
+except ImportError as e:
     # 0.99999999 and up
     from html5lib.treebuilders import base as treebuilder_base
     new_html5lib = True
@@ -64,7 +66,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
         parser = html5lib.HTMLParser(tree=self.create_treebuilder)
 
         extra_kwargs = dict()
-        if not isinstance(markup, unicode):
+        if not isinstance(markup, six.text_type):
             if new_html5lib:
                 extra_kwargs['override_encoding'] = self.user_specified_encoding
             else:
@@ -72,13 +74,13 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
         doc = parser.parse(markup, **extra_kwargs)
 
         # Set the character encoding detected by the tokenizer.
-        if isinstance(markup, unicode):
+        if isinstance(markup, six.text_type):
             # We need to special-case this because html5lib sets
             # charEncoding to UTF-8 if it gets Unicode input.
             doc.original_encoding = None
         else:
             original_encoding = parser.tokenizer.stream.charEncoding[0]
-            if not isinstance(original_encoding, basestring):
+            if not isinstance(original_encoding, six.string_types):
                 # In 0.99999999 and up, the encoding is an html5lib
                 # Encoding object. We want to use a string for compatibility
                 # with other tree builders.
@@ -229,7 +231,7 @@ class Element(treebuilder_base.Node):
 
     def appendChild(self, node):
         string_child = child = None
-        if isinstance(node, basestring):
+        if isinstance(node, six.string_types):
             # Some other piece of code decided to pass in a string
             # instead of creating a TextElement object to contain the
             # string.
@@ -246,7 +248,7 @@ class Element(treebuilder_base.Node):
             child = node.element
             node.parent = self
 
-        if not isinstance(child, basestring) and child.parent is not None:
+        if not isinstance(child, six.string_types) and child.parent is not None:
             node.element.extract()
 
         if (string_child and self.element.contents
@@ -259,7 +261,7 @@ class Element(treebuilder_base.Node):
             old_element.replace_with(new_element)
             self.soup._most_recent_element = new_element
         else:
-            if isinstance(node, basestring):
+            if isinstance(node, six.string_types):
                 # Create a brand new NavigableString from this string.
                 child = self.soup.new_string(node)
 
diff --git a/libs/bs4/builder/_htmlparser.py b/libs/bs4/builder/_htmlparser.py
index 67890b3a3..624028e3c 100644
--- a/libs/bs4/builder/_htmlparser.py
+++ b/libs/bs4/builder/_htmlparser.py
@@ -3,15 +3,18 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 
+from __future__ import absolute_import
+from six import unichr
+import six
 __all__ = [
     'HTMLParserTreeBuilder',
     ]
 
-from HTMLParser import HTMLParser
+from six.moves.html_parser import HTMLParser
 
 try:
-    from HTMLParser import HTMLParseError
-except ImportError, e:
+    from six.moves.html_parser import HTMLParseError
+except ImportError as e:
     # HTMLParseError is removed in Python 3.5. Since it can never be
     # thrown in 3.5, we can just define our own class as a placeholder.
     class HTMLParseError(Exception):
@@ -131,7 +134,7 @@ class BeautifulSoupHTMLParser(HTMLParser):
 
         try:
             data = unichr(real_name)
-        except (ValueError, OverflowError), e:
+        except (ValueError, OverflowError) as e:
             data = u"\N{REPLACEMENT CHARACTER}"
 
         self.handle_data(data)
@@ -196,7 +199,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
         declared within markup, whether any characters had to be
         replaced with REPLACEMENT CHARACTER).
         """
-        if isinstance(markup, unicode):
+        if isinstance(markup, six.text_type):
             yield (markup, None, None, False)
             return
 
@@ -213,7 +216,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
         parser.soup = self.soup
         try:
             parser.feed(markup)
-        except HTMLParseError, e:
+        except HTMLParseError as e:
             warnings.warn(RuntimeWarning(
                 "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
             raise e
diff --git a/libs/bs4/builder/_lxml.py b/libs/bs4/builder/_lxml.py
index d2ca2872d..73f6e2b34 100644
--- a/libs/bs4/builder/_lxml.py
+++ b/libs/bs4/builder/_lxml.py
@@ -1,5 +1,7 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
+from __future__ import absolute_import
+import six
 __all__ = [
     'LXMLTreeBuilderForXML',
     'LXMLTreeBuilder',
@@ -101,12 +103,12 @@ class LXMLTreeBuilderForXML(TreeBuilder):
         else:
             self.processing_instruction_class = XMLProcessingInstruction
 
-        if isinstance(markup, unicode):
+        if isinstance(markup, six.text_type):
             # We were given Unicode. Maybe lxml can parse Unicode on
             # this system?
             yield markup, None, document_declared_encoding, False
 
-        if isinstance(markup, unicode):
+        if isinstance(markup, six.text_type):
             # No, apparently not. Convert the Unicode to UTF-8 and
             # tell lxml to parse it as UTF-8.
             yield (markup.encode("utf8"), "utf8",
@@ -121,7 +123,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
     def feed(self, markup):
         if isinstance(markup, bytes):
             markup = BytesIO(markup)
-        elif isinstance(markup, unicode):
+        elif isinstance(markup, six.text_type):
             markup = StringIO(markup)
 
         # Call feed() at least once, even if the markup is empty,
@@ -136,7 +138,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
                 if len(data) != 0:
                     self.parser.feed(data)
             self.parser.close()
-        except (UnicodeDecodeError, LookupError, etree.ParserError), e:
+        except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
             raise ParserRejectedMarkup(str(e))
 
     def close(self):
@@ -249,7 +251,7 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
             self.parser = self.parser_for(encoding)
             self.parser.feed(markup)
             self.parser.close()
-        except (UnicodeDecodeError, LookupError, etree.ParserError), e:
+        except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
             raise ParserRejectedMarkup(str(e))
 
 
diff --git a/libs/bs4/dammit.py b/libs/bs4/dammit.py
index 7965565f5..fe10691d0 100644
--- a/libs/bs4/dammit.py
+++ b/libs/bs4/dammit.py
@@ -8,10 +8,13 @@ XML or HTML to reflect a new encoding; that's the tree builder's job.
 """
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
+from __future__ import absolute_import
+from six import unichr
+import six
 __license__ = "MIT"
 
 import codecs
-from htmlentitydefs import codepoint2name
+from six.moves.html_entities import codepoint2name
 import re
 import logging
 import string
@@ -274,7 +277,7 @@ class EncodingDetector:
     def strip_byte_order_mark(cls, data):
         """If a byte-order mark is present, strip it and return the encoding it implies."""
         encoding = None
-        if isinstance(data, unicode):
+        if isinstance(data, six.text_type):
             # Unicode data cannot have a byte-order mark.
             return data, encoding
         if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \
@@ -352,9 +355,9 @@ class UnicodeDammit:
             markup, override_encodings, is_html, exclude_encodings)
 
         # Short-circuit if the data is in Unicode to begin with.
-        if isinstance(markup, unicode) or markup == '':
+        if isinstance(markup, six.text_type) or markup == '':
             self.markup = markup
-            self.unicode_markup = unicode(markup)
+            self.unicode_markup = six.text_type(markup)
             self.original_encoding = None
             return
 
@@ -438,7 +441,7 @@ class UnicodeDammit:
     def _to_unicode(self, data, encoding, errors="strict"):
         '''Given a string and its encoding, decodes the string into Unicode.
         %encoding is a string recognized by encodings.aliases'''
-        return unicode(data, encoding, errors)
+        return six.text_type(data, encoding, errors)
 
     @property
     def declared_html_encoding(self):
diff --git a/libs/bs4/diagnose.py b/libs/bs4/diagnose.py
index 8768332f5..8cc44833b 100644
--- a/libs/bs4/diagnose.py
+++ b/libs/bs4/diagnose.py
@@ -2,11 +2,15 @@
 
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
+from __future__ import absolute_import
+from __future__ import print_function
+from six.moves import map
+from six.moves import range
 __license__ = "MIT"
 
 import cProfile
 from StringIO import StringIO
-from HTMLParser import HTMLParser
+from six.moves.html_parser import HTMLParser
 import bs4
 from bs4 import BeautifulSoup, __version__
 from bs4.builder import builder_registry
@@ -22,8 +26,8 @@ import cProfile
 
 def diagnose(data):
     """Diagnostic suite for isolating common problems."""
-    print "Diagnostic running on Beautiful Soup %s" % __version__
-    print "Python version %s" % sys.version
+    print("Diagnostic running on Beautiful Soup %s" % __version__)
+    print("Python version %s" % sys.version)
 
     basic_parsers = ["html.parser", "html5lib", "lxml"]
     for name in basic_parsers:
@@ -32,16 +36,16 @@ def diagnose(data):
                 break
         else:
             basic_parsers.remove(name)
-            print (
+            print((
                 "I noticed that %s is not installed. Installing it may help." %
-                name)
+                name))
 
     if 'lxml' in basic_parsers:
         basic_parsers.append(["lxml", "xml"])
         try:
             from lxml import etree
-            print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))
-        except ImportError, e:
+            print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)))
+        except ImportError as e:
             print (
                 "lxml is not installed or couldn't be imported.")
 
@@ -49,37 +53,37 @@ def diagnose(data):
     if 'html5lib' in basic_parsers:
         try:
             import html5lib
-            print "Found html5lib version %s" % html5lib.__version__
-        except ImportError, e:
+            print("Found html5lib version %s" % html5lib.__version__)
+        except ImportError as e:
             print (
                 "html5lib is not installed or couldn't be imported.")
 
     if hasattr(data, 'read'):
         data = data.read()
     elif os.path.exists(data):
-        print '"%s" looks like a filename. Reading data from the file.' % data
+        print('"%s" looks like a filename. Reading data from the file.' % data)
         with open(data) as fp:
             data = fp.read()
     elif data.startswith("http:") or data.startswith("https:"):
-        print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
-        print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
+        print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data)
+        print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
         return
-    print
+    print()
 
     for parser in basic_parsers:
-        print "Trying to parse your markup with %s" % parser
+        print("Trying to parse your markup with %s" % parser)
         success = False
         try:
             soup = BeautifulSoup(data, parser)
             success = True
-        except Exception, e:
-            print "%s could not parse the markup." % parser
+        except Exception as e:
+            print("%s could not parse the markup." % parser)
             traceback.print_exc()
         if success:
-            print "Here's what %s did with the markup:" % parser
-            print soup.prettify()
+            print("Here's what %s did with the markup:" % parser)
+            print(soup.prettify())
 
-        print "-" * 80
+        print("-" * 80)
 
 def lxml_trace(data, html=True, **kwargs):
     """Print out the lxml events that occur during parsing.
@@ -89,7 +93,7 @@ def lxml_trace(data, html=True, **kwargs):
     """
     from lxml import etree
     for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
-        print("%s, %4s, %s" % (event, element.tag, element.text))
+        print(("%s, %4s, %s" % (event, element.tag, element.text)))
 
 class AnnouncingParser(HTMLParser):
     """Announces HTMLParser parse events, without doing anything else."""
@@ -171,9 +175,9 @@ def rdoc(num_elements=1000):
 
 def benchmark_parsers(num_elements=100000):
     """Very basic head-to-head performance benchmark."""
-    print "Comparative parser benchmark on Beautiful Soup %s" % __version__
+    print("Comparative parser benchmark on Beautiful Soup %s" % __version__)
     data = rdoc(num_elements)
-    print "Generated a large invalid HTML document (%d bytes)." % len(data)
+    print("Generated a large invalid HTML document (%d bytes)." % len(data))
     
     for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
         success = False
@@ -182,24 +186,24 @@ def benchmark_parsers(num_elements=100000):
             soup = BeautifulSoup(data, parser)
             b = time.time()
             success = True
-        except Exception, e:
-            print "%s could not parse the markup." % parser
+        except Exception as e:
+            print("%s could not parse the markup." % parser)
             traceback.print_exc()
         if success:
-            print "BS4+%s parsed the markup in %.2fs." % (parser, b-a)
+            print("BS4+%s parsed the markup in %.2fs." % (parser, b-a))
 
     from lxml import etree
     a = time.time()
     etree.HTML(data)
     b = time.time()
-    print "Raw lxml parsed the markup in %.2fs." % (b-a)
+    print("Raw lxml parsed the markup in %.2fs." % (b-a))
 
     import html5lib
     parser = html5lib.HTMLParser()
     a = time.time()
     parser.parse(data)
     b = time.time()
-    print "Raw html5lib parsed the markup in %.2fs." % (b-a)
+    print("Raw html5lib parsed the markup in %.2fs." % (b-a))
 
 def profile(num_elements=100000, parser="lxml"):
 
diff --git a/libs/bs4/element.py b/libs/bs4/element.py
index 9ef75f814..39f480371 100644
--- a/libs/bs4/element.py
+++ b/libs/bs4/element.py
@@ -1,5 +1,8 @@
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
+from __future__ import absolute_import
+from __future__ import print_function
+import six
 __license__ = "MIT"
 
 import collections
@@ -26,22 +29,22 @@ def _alias(attr):
     return alias
 
 
-class NamespacedAttribute(unicode):
+class NamespacedAttribute(six.text_type):
 
     def __new__(cls, prefix, name, namespace=None):
         if name is None:
-            obj = unicode.__new__(cls, prefix)
+            obj = six.text_type.__new__(cls, prefix)
         elif prefix is None:
             # Not really namespaced.
-            obj = unicode.__new__(cls, name)
+            obj = six.text_type.__new__(cls, name)
         else:
-            obj = unicode.__new__(cls, prefix + ":" + name)
+            obj = six.text_type.__new__(cls, prefix + ":" + name)
         obj.prefix = prefix
         obj.name = name
         obj.namespace = namespace
         return obj
 
-class AttributeValueWithCharsetSubstitution(unicode):
+class AttributeValueWithCharsetSubstitution(six.text_type):
     """A stand-in object for a character encoding specified in HTML."""
 
 class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
@@ -52,7 +55,7 @@ class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
     """
 
     def __new__(cls, original_value):
-        obj = unicode.__new__(cls, original_value)
+        obj = six.text_type.__new__(cls, original_value)
         obj.original_value = original_value
         return obj
 
@@ -75,9 +78,9 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
         match = cls.CHARSET_RE.search(original_value)
         if match is None:
             # No substitution necessary.
-            return unicode.__new__(unicode, original_value)
+            return six.text_type.__new__(six.text_type, original_value)
 
-        obj = unicode.__new__(cls, original_value)
+        obj = six.text_type.__new__(cls, original_value)
         obj.original_value = original_value
         return obj
 
@@ -312,7 +315,7 @@ class PageElement(object):
             raise ValueError("Cannot insert None into a tag.")
         if new_child is self:
             raise ValueError("Cannot insert a tag into itself.")
-        if (isinstance(new_child, basestring)
+        if (isinstance(new_child, six.string_types)
             and not isinstance(new_child, NavigableString)):
             new_child = NavigableString(new_child)
 
@@ -533,7 +536,7 @@ class PageElement(object):
                 result = (element for element in generator
                           if isinstance(element, Tag))
                 return ResultSet(strainer, result)
-            elif isinstance(name, basestring):
+            elif isinstance(name, six.string_types):
                 # Optimization to find all tags with a given name.
                 if name.count(':') == 1:
                     # This is a name with a prefix.
@@ -691,7 +694,7 @@ class PageElement(object):
         return self.parents
 
 
-class NavigableString(unicode, PageElement):
+class NavigableString(six.text_type, PageElement):
 
     PREFIX = ''
     SUFFIX = ''
@@ -709,10 +712,10 @@ class NavigableString(unicode, PageElement):
         passed in to the superclass's __new__ or the superclass won't know
         how to handle non-ASCII characters.
         """
-        if isinstance(value, unicode):
-            u = unicode.__new__(cls, value)
+        if isinstance(value, six.text_type):
+            u = six.text_type.__new__(cls, value)
         else:
-            u = unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
+            u = six.text_type.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
         u.setup()
         return u
 
@@ -723,7 +726,7 @@ class NavigableString(unicode, PageElement):
         return type(self)(self)
 
     def __getnewargs__(self):
-        return (unicode(self),)
+        return (six.text_type(self),)
 
     def __getattr__(self, attr):
         """text.string gives you text. This is for backwards
@@ -1142,8 +1145,8 @@ class Tag(PageElement):
                 else:
                     if isinstance(val, list) or isinstance(val, tuple):
                         val = ' '.join(val)
-                    elif not isinstance(val, basestring):
-                        val = unicode(val)
+                    elif not isinstance(val, six.string_types):
+                        val = six.text_type(val)
                     elif (
                         isinstance(val, AttributeValueWithCharsetSubstitution)
                         and eventual_encoding is not None):
@@ -1151,7 +1154,7 @@ class Tag(PageElement):
 
                     text = self.format_string(val, formatter)
                     decoded = (
-                        unicode(key) + '='
+                        six.text_type(key) + '='
                         + EntitySubstitution.quoted_attribute_value(text))
                 attrs.append(decoded)
         close = ''
@@ -1368,7 +1371,7 @@ class Tag(PageElement):
                 'Final combinator "%s" is missing an argument.' % tokens[-1])
 
         if self._select_debug:
-            print 'Running CSS selector "%s"' % selector
+            print('Running CSS selector "%s"' % selector)
 
         for index, token in enumerate(tokens):
             new_context = []
@@ -1377,11 +1380,11 @@ class Tag(PageElement):
             if tokens[index-1] in self._selector_combinators:
                 # This token was consumed by the previous combinator. Skip it.
                 if self._select_debug:
-                    print '  Token was consumed by the previous combinator.'
+                    print('  Token was consumed by the previous combinator.')
                 continue
 
             if self._select_debug:
-                print ' Considering token "%s"' % token
+                print(' Considering token "%s"' % token)
             recursive_candidate_generator = None
             tag_name = None
 
@@ -1488,14 +1491,14 @@ class Tag(PageElement):
                 next_token = tokens[index+1]
                 def recursive_select(tag):
                     if self._select_debug:
-                        print '    Calling select("%s") recursively on %s %s' % (next_token, tag.name, tag.attrs)
-                        print '-' * 40
+                        print('    Calling select("%s") recursively on %s %s' % (next_token, tag.name, tag.attrs))
+                        print('-' * 40)
                     for i in tag.select(next_token, recursive_candidate_generator):
                         if self._select_debug:
-                            print '(Recursive select picked up candidate %s %s)' % (i.name, i.attrs)
+                            print('(Recursive select picked up candidate %s %s)' % (i.name, i.attrs))
                         yield i
                     if self._select_debug:
-                        print '-' * 40
+                        print('-' * 40)
                 _use_candidate_generator = recursive_select
             elif _candidate_generator is None:
                 # By default, a tag's candidates are all of its
@@ -1506,7 +1509,7 @@ class Tag(PageElement):
                         check = "[any]"
                     else:
                         check = tag_name
-                    print '   Default candidate generator, tag name="%s"' % check
+                    print('   Default candidate generator, tag name="%s"' % check)
                 if self._select_debug:
                     # This is redundant with later code, but it stops
                     # a bunch of bogus tags from cluttering up the
@@ -1527,8 +1530,8 @@ class Tag(PageElement):
             count = 0
             for tag in current_context:
                 if self._select_debug:
-                    print "    Running candidate generator on %s %s" % (
-                        tag.name, repr(tag.attrs))
+                    print("    Running candidate generator on %s %s" % (
+                        tag.name, repr(tag.attrs)))
                 for candidate in _use_candidate_generator(tag):
                     if not isinstance(candidate, Tag):
                         continue
@@ -1543,23 +1546,23 @@ class Tag(PageElement):
                             break
                     if checker is None or result:
                         if self._select_debug:
-                            print "     SUCCESS %s %s" % (candidate.name, repr(candidate.attrs))
+                            print("     SUCCESS %s %s" % (candidate.name, repr(candidate.attrs)))
                         if id(candidate) not in new_context_ids:
                             # If a tag matches a selector more than once,
                             # don't include it in the context more than once.
                             new_context.append(candidate)
                             new_context_ids.add(id(candidate))
                     elif self._select_debug:
-                        print "     FAILURE %s %s" % (candidate.name, repr(candidate.attrs))
+                        print("     FAILURE %s %s" % (candidate.name, repr(candidate.attrs)))
 
             current_context = new_context
         if limit and len(current_context) >= limit:
             current_context = current_context[:limit]
 
         if self._select_debug:
-            print "Final verdict:"
+            print("Final verdict:")
             for i in current_context:
-                print " %s %s" % (i.name, i.attrs)
+                print(" %s %s" % (i.name, i.attrs))
         return current_context
 
     # Old names for backwards compatibility
@@ -1612,7 +1615,7 @@ class SoupStrainer(object):
     def _normalize_search_value(self, value):
         # Leave it alone if it's a Unicode string, a callable, a
         # regular expression, a boolean, or None.
-        if (isinstance(value, unicode) or callable(value) or hasattr(value, 'match')
+        if (isinstance(value, six.text_type) or callable(value) or hasattr(value, 'match')
             or isinstance(value, bool) or value is None):
             return value
 
@@ -1625,7 +1628,7 @@ class SoupStrainer(object):
             new_value = []
             for v in value:
                 if (hasattr(v, '__iter__') and not isinstance(v, bytes)
-                    and not isinstance(v, unicode)):
+                    and not isinstance(v, six.text_type)):
                     # This is almost certainly the user's mistake. In the
                     # interests of avoiding infinite loops, we'll let
                     # it through as-is rather than doing a recursive call.
@@ -1637,7 +1640,7 @@ class SoupStrainer(object):
         # Otherwise, convert it into a Unicode string.
         # The unicode(str()) thing is so this will do the same thing on Python 2
         # and Python 3.
-        return unicode(str(value))
+        return six.text_type(str(value))
 
     def __str__(self):
         if self.text:
@@ -1691,7 +1694,7 @@ class SoupStrainer(object):
         found = None
         # If given a list of items, scan it for a text element that
         # matches.
-        if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, basestring)):
+        if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, six.string_types)):
             for element in markup:
                 if isinstance(element, NavigableString) \
                        and self.search(element):
@@ -1704,7 +1707,7 @@ class SoupStrainer(object):
                 found = self.search_tag(markup)
         # If it's text, make sure the text matches.
         elif isinstance(markup, NavigableString) or \
-                 isinstance(markup, basestring):
+                 isinstance(markup, six.string_types):
             if not self.name and not self.attrs and self._matches(markup, self.text):
                 found = markup
         else:
@@ -1749,7 +1752,7 @@ class SoupStrainer(object):
             return not match_against
 
         if (hasattr(match_against, '__iter__')
-            and not isinstance(match_against, basestring)):
+            and not isinstance(match_against, six.string_types)):
             # We're asked to match against an iterable of items.
             # The markup must be match at least one item in the
             # iterable. We'll try each one in turn.
@@ -1776,7 +1779,7 @@ class SoupStrainer(object):
         # the tag's name and once against its prefixed name.
         match = False
         
-        if not match and isinstance(match_against, unicode):
+        if not match and isinstance(match_against, six.text_type):
             # Exact string match
             match = markup == match_against
 
diff --git a/libs/bs4/testing.py b/libs/bs4/testing.py
index 6ba2506c4..489b1b25a 100644
--- a/libs/bs4/testing.py
+++ b/libs/bs4/testing.py
@@ -2,6 +2,8 @@
 
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
+from __future__ import absolute_import
+import six
 __license__ = "MIT"
 
 import pickle
@@ -645,7 +647,7 @@ class XMLTreeBuilderSmokeTest(object):
         markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>'
         soup = self.soup(markup)
         self.assertEqual(
-            unicode(soup.rss), markup)
+            six.text_type(soup.rss), markup)
 
     def test_docstring_includes_correct_encoding(self):
         soup = self.soup("<root/>")
@@ -676,17 +678,17 @@ class XMLTreeBuilderSmokeTest(object):
     def test_closing_namespaced_tag(self):
         markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>'
         soup = self.soup(markup)
-        self.assertEqual(unicode(soup.p), markup)
+        self.assertEqual(six.text_type(soup.p), markup)
 
     def test_namespaced_attributes(self):
         markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>'
         soup = self.soup(markup)
-        self.assertEqual(unicode(soup.foo), markup)
+        self.assertEqual(six.text_type(soup.foo), markup)
 
     def test_namespaced_attributes_xml_namespace(self):
         markup = '<foo xml:lang="fr">bar</foo>'
         soup = self.soup(markup)
-        self.assertEqual(unicode(soup.foo), markup)
+        self.assertEqual(six.text_type(soup.foo), markup)
 
     def test_find_by_prefixed_name(self):
         doc = """<?xml version="1.0" encoding="utf-8"?>
diff --git a/libs/bs4/tests/test_builder_registry.py b/libs/bs4/tests/test_builder_registry.py
index 90cad8293..d20678285 100644
--- a/libs/bs4/tests/test_builder_registry.py
+++ b/libs/bs4/tests/test_builder_registry.py
@@ -1,5 +1,6 @@
 """Tests of the builder registry."""
 
+from __future__ import absolute_import
 import unittest
 import warnings
 
diff --git a/libs/bs4/tests/test_docs.py b/libs/bs4/tests/test_docs.py
index 5b9f67709..01eb94ef4 100644
--- a/libs/bs4/tests/test_docs.py
+++ b/libs/bs4/tests/test_docs.py
@@ -2,6 +2,7 @@
 
 # pylint: disable-msg=E0611,W0142
 
+from __future__ import absolute_import
 __metaclass__ = type
 __all__ = [
     'additional_tests',
diff --git a/libs/bs4/tests/test_html5lib.py b/libs/bs4/tests/test_html5lib.py
index 0f89d6244..b9e54c9a0 100644
--- a/libs/bs4/tests/test_html5lib.py
+++ b/libs/bs4/tests/test_html5lib.py
@@ -1,11 +1,12 @@
 """Tests to ensure that the html5lib tree builder generates good trees."""
 
+from __future__ import absolute_import
 import warnings
 
 try:
     from bs4.builder import HTML5TreeBuilder
     HTML5LIB_PRESENT = True
-except ImportError, e:
+except ImportError as e:
     HTML5LIB_PRESENT = False
 from bs4.element import SoupStrainer
 from bs4.testing import (
diff --git a/libs/bs4/tests/test_htmlparser.py b/libs/bs4/tests/test_htmlparser.py
index d5cf0253f..94d9ca456 100644
--- a/libs/bs4/tests/test_htmlparser.py
+++ b/libs/bs4/tests/test_htmlparser.py
@@ -1,6 +1,7 @@
 """Tests to ensure that the html.parser tree builder generates good
 trees."""
 
+from __future__ import absolute_import
 from pdb import set_trace
 import pickle
 from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
diff --git a/libs/bs4/tests/test_lxml.py b/libs/bs4/tests/test_lxml.py
index a05870b91..b579b1938 100644
--- a/libs/bs4/tests/test_lxml.py
+++ b/libs/bs4/tests/test_lxml.py
@@ -1,13 +1,15 @@
 """Tests to ensure that the lxml tree builder generates good trees."""
 
+from __future__ import absolute_import
 import re
 import warnings
+import six
 
 try:
     import lxml.etree
     LXML_PRESENT = True
     LXML_VERSION = lxml.etree.LXML_VERSION
-except ImportError, e:
+except ImportError as e:
     LXML_PRESENT = False
     LXML_VERSION = (0,)
 
@@ -62,7 +64,7 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
         # if one is installed.
         with warnings.catch_warnings(record=True) as w:
             soup = BeautifulStoneSoup("<b />")
-        self.assertEqual(u"<b/>", unicode(soup.b))
+        self.assertEqual(u"<b/>", six.text_type(soup.b))
         self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
 
 @skipIf(
diff --git a/libs/bs4/tests/test_soup.py b/libs/bs4/tests/test_soup.py
index f3e69edf3..047d9c42e 100644
--- a/libs/bs4/tests/test_soup.py
+++ b/libs/bs4/tests/test_soup.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 """Tests of Beautiful Soup as a whole."""
 
+from __future__ import absolute_import
 from pdb import set_trace
 import logging
 import unittest
@@ -28,11 +29,12 @@ from bs4.testing import (
     skipIf,
 )
 import warnings
+import six
 
 try:
     from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
     LXML_PRESENT = True
-except ImportError, e:
+except ImportError as e:
     LXML_PRESENT = False
 
 PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
@@ -250,7 +252,7 @@ class TestEncodingConversion(SoupTest):
             ascii = b"<foo>a</foo>"
             soup_from_ascii = self.soup(ascii)
             unicode_output = soup_from_ascii.decode()
-            self.assertTrue(isinstance(unicode_output, unicode))
+            self.assertTrue(isinstance(unicode_output, six.text_type))
             self.assertEqual(unicode_output, self.document_for(ascii.decode()))
             self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8")
         finally:
diff --git a/libs/bs4/tests/test_tree.py b/libs/bs4/tests/test_tree.py
index c0e7c4080..f4bce3e14 100644
--- a/libs/bs4/tests/test_tree.py
+++ b/libs/bs4/tests/test_tree.py
@@ -10,6 +10,7 @@ same markup, but all Beautiful Soup trees can be traversed with the
 methods tested here.
 """
 
+from __future__ import absolute_import
 from pdb import set_trace
 import copy
 import pickle
@@ -34,6 +35,7 @@ from bs4.testing import (
     SoupTest,
     skipIf,
 )
+import six
 
 XML_BUILDER_PRESENT = (builder_registry.lookup("xml") is not None)
 LXML_PRESENT = (builder_registry.lookup("lxml") is not None)
@@ -1111,7 +1113,7 @@ class TestTreeModification(SoupTest):
 <script>baz</script>
 </html>""")
         [soup.script.extract() for i in soup.find_all("script")]
-        self.assertEqual("<body>\n\n<a></a>\n</body>", unicode(soup.body))
+        self.assertEqual("<body>\n\n<a></a>\n</body>", six.text_type(soup.body))
 
 
     def test_extract_works_when_element_is_surrounded_by_identical_strings(self):
@@ -1349,7 +1351,7 @@ class TestPersistence(SoupTest):
         soup = BeautifulSoup(b'<p>&nbsp;</p>', 'html.parser')
         encoding = soup.original_encoding
         copy = soup.__copy__()
-        self.assertEqual(u"<p> </p>", unicode(copy))
+        self.assertEqual(u"<p> </p>", six.text_type(copy))
         self.assertEqual(encoding, copy.original_encoding)
 
     def test_unicode_pickle(self):
@@ -1393,7 +1395,7 @@ class TestPersistence(SoupTest):
         div_copy = copy.copy(div)
 
         # The two tags look the same, and evaluate to equal.
-        self.assertEqual(unicode(div), unicode(div_copy))
+        self.assertEqual(six.text_type(div), six.text_type(div_copy))
         self.assertEqual(div, div_copy)
 
         # But they're not the same object.
@@ -1505,7 +1507,7 @@ class TestSubstitutions(SoupTest):
 
     def test_prettify_outputs_unicode_by_default(self):
         soup = self.soup("<a></a>")
-        self.assertEqual(unicode, type(soup.prettify()))
+        self.assertEqual(six.text_type, type(soup.prettify()))
 
     def test_prettify_can_encode_data(self):
         soup = self.soup("<a></a>")