2019-09-13 19:14:31 +00:00
|
|
|
"""
|
|
|
|
Soup Sieve.
|
|
|
|
|
|
|
|
A CSS selector filter for BeautifulSoup4.
|
|
|
|
|
|
|
|
MIT License
|
|
|
|
|
|
|
|
Copyright (c) 2018 Isaac Muse
|
|
|
|
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
|
|
in the Software without restriction, including without limitation the rights
|
|
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
|
|
|
|
The above copyright notice and this permission notice shall be included in all
|
|
|
|
copies or substantial portions of the Software.
|
|
|
|
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
|
|
SOFTWARE.
|
|
|
|
"""
|
|
|
|
from .__meta__ import __version__, __version_info__ # noqa: F401
|
|
|
|
from . import css_parser as cp
|
|
|
|
from . import css_match as cm
|
|
|
|
from . import css_types as ct
|
2022-01-24 04:07:52 +00:00
|
|
|
from .util import DEBUG, SelectorSyntaxError # noqa: F401
|
|
|
|
import bs4 # type: ignore[import]
|
|
|
|
from typing import Dict, Optional, Any, List, Iterator, Iterable
|
2019-09-13 19:14:31 +00:00
|
|
|
|
|
|
|
__all__ = (
|
|
|
|
'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
|
2022-01-24 04:07:52 +00:00
|
|
|
'closest', 'compile', 'filter', 'iselect',
|
|
|
|
'match', 'select', 'select_one'
|
2019-09-13 19:14:31 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
SoupSieve = cm.SoupSieve
|
|
|
|
|
|
|
|
|
2022-01-24 04:07:52 +00:00
|
|
|
def compile( # noqa: A001
|
|
|
|
pattern: str,
|
|
|
|
namespaces: Optional[Dict[str, str]] = None,
|
|
|
|
flags: int = 0,
|
|
|
|
*,
|
|
|
|
custom: Optional[Dict[str, str]] = None,
|
|
|
|
**kwargs: Any
|
|
|
|
) -> cm.SoupSieve:
|
2019-09-13 19:14:31 +00:00
|
|
|
"""Compile CSS pattern."""
|
|
|
|
|
2022-01-24 04:07:52 +00:00
|
|
|
ns = ct.Namespaces(namespaces) if namespaces is not None else namespaces # type: Optional[ct.Namespaces]
|
|
|
|
cs = ct.CustomSelectors(custom) if custom is not None else custom # type: Optional[ct.CustomSelectors]
|
2019-09-13 19:14:31 +00:00
|
|
|
|
|
|
|
if isinstance(pattern, SoupSieve):
|
|
|
|
if flags:
|
|
|
|
raise ValueError("Cannot process 'flags' argument on a compiled selector list")
|
|
|
|
elif namespaces is not None:
|
|
|
|
raise ValueError("Cannot process 'namespaces' argument on a compiled selector list")
|
|
|
|
elif custom is not None:
|
|
|
|
raise ValueError("Cannot process 'custom' argument on a compiled selector list")
|
|
|
|
return pattern
|
|
|
|
|
2022-01-24 04:07:52 +00:00
|
|
|
return cp._cached_css_compile(pattern, ns, cs, flags)
|
2019-09-13 19:14:31 +00:00
|
|
|
|
|
|
|
|
2022-01-24 04:07:52 +00:00
|
|
|
def purge() -> None:
|
2019-09-13 19:14:31 +00:00
|
|
|
"""Purge cached patterns."""
|
|
|
|
|
|
|
|
cp._purge_cache()
|
|
|
|
|
|
|
|
|
2022-01-24 04:07:52 +00:00
|
|
|
def closest(
|
|
|
|
select: str,
|
|
|
|
tag: 'bs4.Tag',
|
|
|
|
namespaces: Optional[Dict[str, str]] = None,
|
|
|
|
flags: int = 0,
|
|
|
|
*,
|
|
|
|
custom: Optional[Dict[str, str]] = None,
|
|
|
|
**kwargs: Any
|
|
|
|
) -> 'bs4.Tag':
|
2019-09-13 19:14:31 +00:00
|
|
|
"""Match closest ancestor."""
|
|
|
|
|
|
|
|
return compile(select, namespaces, flags, **kwargs).closest(tag)
|
|
|
|
|
|
|
|
|
2022-01-24 04:07:52 +00:00
|
|
|
def match(
|
|
|
|
select: str,
|
|
|
|
tag: 'bs4.Tag',
|
|
|
|
namespaces: Optional[Dict[str, str]] = None,
|
|
|
|
flags: int = 0,
|
|
|
|
*,
|
|
|
|
custom: Optional[Dict[str, str]] = None,
|
|
|
|
**kwargs: Any
|
|
|
|
) -> bool:
|
2019-09-13 19:14:31 +00:00
|
|
|
"""Match node."""
|
|
|
|
|
|
|
|
return compile(select, namespaces, flags, **kwargs).match(tag)
|
|
|
|
|
|
|
|
|
2022-01-24 04:07:52 +00:00
|
|
|
def filter( # noqa: A001
|
|
|
|
select: str,
|
|
|
|
iterable: Iterable['bs4.Tag'],
|
|
|
|
namespaces: Optional[Dict[str, str]] = None,
|
|
|
|
flags: int = 0,
|
|
|
|
*,
|
|
|
|
custom: Optional[Dict[str, str]] = None,
|
|
|
|
**kwargs: Any
|
|
|
|
) -> List['bs4.Tag']:
|
2019-09-13 19:14:31 +00:00
|
|
|
"""Filter list of nodes."""
|
|
|
|
|
|
|
|
return compile(select, namespaces, flags, **kwargs).filter(iterable)
|
|
|
|
|
|
|
|
|
2022-01-24 04:07:52 +00:00
|
|
|
def select_one(
|
|
|
|
select: str,
|
|
|
|
tag: 'bs4.Tag',
|
|
|
|
namespaces: Optional[Dict[str, str]] = None,
|
|
|
|
flags: int = 0,
|
|
|
|
*,
|
|
|
|
custom: Optional[Dict[str, str]] = None,
|
|
|
|
**kwargs: Any
|
|
|
|
) -> 'bs4.Tag':
|
2019-09-13 19:14:31 +00:00
|
|
|
"""Select a single tag."""
|
|
|
|
|
|
|
|
return compile(select, namespaces, flags, **kwargs).select_one(tag)
|
|
|
|
|
|
|
|
|
2022-01-24 04:07:52 +00:00
|
|
|
def select(
|
|
|
|
select: str,
|
|
|
|
tag: 'bs4.Tag',
|
|
|
|
namespaces: Optional[Dict[str, str]] = None,
|
|
|
|
limit: int = 0,
|
|
|
|
flags: int = 0,
|
|
|
|
*,
|
|
|
|
custom: Optional[Dict[str, str]] = None,
|
|
|
|
**kwargs: Any
|
|
|
|
) -> List['bs4.Tag']:
|
2019-09-13 19:14:31 +00:00
|
|
|
"""Select the specified tags."""
|
|
|
|
|
|
|
|
return compile(select, namespaces, flags, **kwargs).select(tag, limit)
|
|
|
|
|
|
|
|
|
2022-01-24 04:07:52 +00:00
|
|
|
def iselect(
|
|
|
|
select: str,
|
|
|
|
tag: 'bs4.Tag',
|
|
|
|
namespaces: Optional[Dict[str, str]] = None,
|
|
|
|
limit: int = 0,
|
|
|
|
flags: int = 0,
|
|
|
|
*,
|
|
|
|
custom: Optional[Dict[str, str]] = None,
|
|
|
|
**kwargs: Any
|
|
|
|
) -> Iterator['bs4.Tag']:
|
2019-09-13 19:14:31 +00:00
|
|
|
"""Iterate the specified tags."""
|
|
|
|
|
|
|
|
for el in compile(select, namespaces, flags, **kwargs).iselect(tag, limit):
|
|
|
|
yield el
|
|
|
|
|
|
|
|
|
2022-01-24 04:07:52 +00:00
|
|
|
def escape(ident: str) -> str:
|
2019-09-13 19:14:31 +00:00
|
|
|
"""Escape identifier."""
|
|
|
|
|
|
|
|
return cp.escape(ident)
|