mirror of
https://github.com/morpheus65535/bazarr
synced 2024-12-23 08:13:14 +00:00
593 lines
22 KiB
Python
593 lines
22 KiB
Python
import railroad
|
|
import pyparsing
|
|
from pkg_resources import resource_filename
|
|
from typing import (
|
|
List,
|
|
Optional,
|
|
NamedTuple,
|
|
Generic,
|
|
TypeVar,
|
|
Dict,
|
|
Callable,
|
|
Set,
|
|
Iterable,
|
|
)
|
|
from jinja2 import Template
|
|
from io import StringIO
|
|
import inspect
|
|
|
|
with open(resource_filename(__name__, "template.jinja2"), encoding="utf-8") as fp:
|
|
template = Template(fp.read())
|
|
|
|
# Note: ideally this would be a dataclass, but we're supporting Python 3.5+ so we can't do this yet
|
|
NamedDiagram = NamedTuple(
|
|
"NamedDiagram",
|
|
[("name", str), ("diagram", Optional[railroad.DiagramItem]), ("index", int)],
|
|
)
|
|
"""
|
|
A simple structure for associating a name with a railroad diagram
|
|
"""
|
|
|
|
T = TypeVar("T")
|
|
|
|
|
|
class EachItem(railroad.Group):
|
|
"""
|
|
Custom railroad item to compose a:
|
|
- Group containing a
|
|
- OneOrMore containing a
|
|
- Choice of the elements in the Each
|
|
with the group label indicating that all must be matched
|
|
"""
|
|
|
|
all_label = "[ALL]"
|
|
|
|
def __init__(self, *items):
|
|
choice_item = railroad.Choice(len(items) - 1, *items)
|
|
one_or_more_item = railroad.OneOrMore(item=choice_item)
|
|
super().__init__(one_or_more_item, label=self.all_label)
|
|
|
|
|
|
class AnnotatedItem(railroad.Group):
|
|
"""
|
|
Simple subclass of Group that creates an annotation label
|
|
"""
|
|
|
|
def __init__(self, label: str, item):
|
|
super().__init__(item=item, label="[{}]".format(label))
|
|
|
|
|
|
class EditablePartial(Generic[T]):
|
|
"""
|
|
Acts like a functools.partial, but can be edited. In other words, it represents a type that hasn't yet been
|
|
constructed.
|
|
"""
|
|
|
|
# We need this here because the railroad constructors actually transform the data, so can't be called until the
|
|
# entire tree is assembled
|
|
|
|
def __init__(self, func: Callable[..., T], args: list, kwargs: dict):
|
|
self.func = func
|
|
self.args = args
|
|
self.kwargs = kwargs
|
|
|
|
@classmethod
|
|
def from_call(cls, func: Callable[..., T], *args, **kwargs) -> "EditablePartial[T]":
|
|
"""
|
|
If you call this function in the same way that you would call the constructor, it will store the arguments
|
|
as you expect. For example EditablePartial.from_call(Fraction, 1, 3)() == Fraction(1, 3)
|
|
"""
|
|
return EditablePartial(func=func, args=list(args), kwargs=kwargs)
|
|
|
|
@property
|
|
def name(self):
|
|
return self.kwargs["name"]
|
|
|
|
def __call__(self) -> T:
|
|
"""
|
|
Evaluate the partial and return the result
|
|
"""
|
|
args = self.args.copy()
|
|
kwargs = self.kwargs.copy()
|
|
|
|
# This is a helpful hack to allow you to specify varargs parameters (e.g. *args) as keyword args (e.g.
|
|
# args=['list', 'of', 'things'])
|
|
arg_spec = inspect.getfullargspec(self.func)
|
|
if arg_spec.varargs in self.kwargs:
|
|
args += kwargs.pop(arg_spec.varargs)
|
|
|
|
return self.func(*args, **kwargs)
|
|
|
|
|
|
def railroad_to_html(diagrams: List[NamedDiagram], **kwargs) -> str:
|
|
"""
|
|
Given a list of NamedDiagram, produce a single HTML string that visualises those diagrams
|
|
:params kwargs: kwargs to be passed in to the template
|
|
"""
|
|
data = []
|
|
for diagram in diagrams:
|
|
io = StringIO()
|
|
diagram.diagram.writeSvg(io.write)
|
|
title = diagram.name
|
|
if diagram.index == 0:
|
|
title += " (root)"
|
|
data.append({"title": title, "text": "", "svg": io.getvalue()})
|
|
|
|
return template.render(diagrams=data, **kwargs)
|
|
|
|
|
|
def resolve_partial(partial: "EditablePartial[T]") -> T:
|
|
"""
|
|
Recursively resolves a collection of Partials into whatever type they are
|
|
"""
|
|
if isinstance(partial, EditablePartial):
|
|
partial.args = resolve_partial(partial.args)
|
|
partial.kwargs = resolve_partial(partial.kwargs)
|
|
return partial()
|
|
elif isinstance(partial, list):
|
|
return [resolve_partial(x) for x in partial]
|
|
elif isinstance(partial, dict):
|
|
return {key: resolve_partial(x) for key, x in partial.items()}
|
|
else:
|
|
return partial
|
|
|
|
|
|
def to_railroad(
|
|
element: pyparsing.ParserElement,
|
|
diagram_kwargs: Optional[dict] = None,
|
|
vertical: int = 3,
|
|
show_results_names: bool = False,
|
|
) -> List[NamedDiagram]:
|
|
"""
|
|
Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram
|
|
creation if you want to access the Railroad tree before it is converted to HTML
|
|
:param element: base element of the parser being diagrammed
|
|
:param diagram_kwargs: kwargs to pass to the Diagram() constructor
|
|
:param vertical: (optional) - int - limit at which number of alternatives should be
|
|
shown vertically instead of horizontally
|
|
:param show_results_names - bool to indicate whether results name annotations should be
|
|
included in the diagram
|
|
"""
|
|
# Convert the whole tree underneath the root
|
|
lookup = ConverterState(diagram_kwargs=diagram_kwargs or {})
|
|
_to_diagram_element(
|
|
element,
|
|
lookup=lookup,
|
|
parent=None,
|
|
vertical=vertical,
|
|
show_results_names=show_results_names,
|
|
)
|
|
|
|
root_id = id(element)
|
|
# Convert the root if it hasn't been already
|
|
if root_id in lookup:
|
|
if not element.customName:
|
|
lookup[root_id].name = ""
|
|
lookup[root_id].mark_for_extraction(root_id, lookup, force=True)
|
|
|
|
# Now that we're finished, we can convert from intermediate structures into Railroad elements
|
|
diags = list(lookup.diagrams.values())
|
|
if len(diags) > 1:
|
|
# collapse out duplicate diags with the same name
|
|
seen = set()
|
|
deduped_diags = []
|
|
for d in diags:
|
|
# don't extract SkipTo elements, they are uninformative as subdiagrams
|
|
if d.name == "...":
|
|
continue
|
|
if d.name is not None and d.name not in seen:
|
|
seen.add(d.name)
|
|
deduped_diags.append(d)
|
|
resolved = [resolve_partial(partial) for partial in deduped_diags]
|
|
else:
|
|
# special case - if just one diagram, always display it, even if
|
|
# it has no name
|
|
resolved = [resolve_partial(partial) for partial in diags]
|
|
return sorted(resolved, key=lambda diag: diag.index)
|
|
|
|
|
|
def _should_vertical(
|
|
specification: int, exprs: Iterable[pyparsing.ParserElement]
|
|
) -> bool:
|
|
"""
|
|
Returns true if we should return a vertical list of elements
|
|
"""
|
|
if specification is None:
|
|
return False
|
|
else:
|
|
return len(_visible_exprs(exprs)) >= specification
|
|
|
|
|
|
class ElementState:
|
|
"""
|
|
State recorded for an individual pyparsing Element
|
|
"""
|
|
|
|
# Note: this should be a dataclass, but we have to support Python 3.5
|
|
def __init__(
|
|
self,
|
|
element: pyparsing.ParserElement,
|
|
converted: EditablePartial,
|
|
parent: EditablePartial,
|
|
number: int,
|
|
name: str = None,
|
|
parent_index: Optional[int] = None,
|
|
):
|
|
#: The pyparsing element that this represents
|
|
self.element: pyparsing.ParserElement = element
|
|
#: The name of the element
|
|
self.name: str = name
|
|
#: The output Railroad element in an unconverted state
|
|
self.converted: EditablePartial = converted
|
|
#: The parent Railroad element, which we store so that we can extract this if it's duplicated
|
|
self.parent: EditablePartial = parent
|
|
#: The order in which we found this element, used for sorting diagrams if this is extracted into a diagram
|
|
self.number: int = number
|
|
#: The index of this inside its parent
|
|
self.parent_index: Optional[int] = parent_index
|
|
#: If true, we should extract this out into a subdiagram
|
|
self.extract: bool = False
|
|
#: If true, all of this element's children have been filled out
|
|
self.complete: bool = False
|
|
|
|
def mark_for_extraction(
|
|
self, el_id: int, state: "ConverterState", name: str = None, force: bool = False
|
|
):
|
|
"""
|
|
Called when this instance has been seen twice, and thus should eventually be extracted into a sub-diagram
|
|
:param el_id: id of the element
|
|
:param state: element/diagram state tracker
|
|
:param name: name to use for this element's text
|
|
:param force: If true, force extraction now, regardless of the state of this. Only useful for extracting the
|
|
root element when we know we're finished
|
|
"""
|
|
self.extract = True
|
|
|
|
# Set the name
|
|
if not self.name:
|
|
if name:
|
|
# Allow forcing a custom name
|
|
self.name = name
|
|
elif self.element.customName:
|
|
self.name = self.element.customName
|
|
else:
|
|
self.name = ""
|
|
|
|
# Just because this is marked for extraction doesn't mean we can do it yet. We may have to wait for children
|
|
# to be added
|
|
# Also, if this is just a string literal etc, don't bother extracting it
|
|
if force or (self.complete and _worth_extracting(self.element)):
|
|
state.extract_into_diagram(el_id)
|
|
|
|
|
|
class ConverterState:
|
|
"""
|
|
Stores some state that persists between recursions into the element tree
|
|
"""
|
|
|
|
def __init__(self, diagram_kwargs: Optional[dict] = None):
|
|
#: A dictionary mapping ParserElements to state relating to them
|
|
self._element_diagram_states: Dict[int, ElementState] = {}
|
|
#: A dictionary mapping ParserElement IDs to subdiagrams generated from them
|
|
self.diagrams: Dict[int, EditablePartial[NamedDiagram]] = {}
|
|
#: The index of the next unnamed element
|
|
self.unnamed_index: int = 1
|
|
#: The index of the next element. This is used for sorting
|
|
self.index: int = 0
|
|
#: Shared kwargs that are used to customize the construction of diagrams
|
|
self.diagram_kwargs: dict = diagram_kwargs or {}
|
|
self.extracted_diagram_names: Set[str] = set()
|
|
|
|
def __setitem__(self, key: int, value: ElementState):
|
|
self._element_diagram_states[key] = value
|
|
|
|
def __getitem__(self, key: int) -> ElementState:
|
|
return self._element_diagram_states[key]
|
|
|
|
def __delitem__(self, key: int):
|
|
del self._element_diagram_states[key]
|
|
|
|
def __contains__(self, key: int):
|
|
return key in self._element_diagram_states
|
|
|
|
def generate_unnamed(self) -> int:
|
|
"""
|
|
Generate a number used in the name of an otherwise unnamed diagram
|
|
"""
|
|
self.unnamed_index += 1
|
|
return self.unnamed_index
|
|
|
|
def generate_index(self) -> int:
|
|
"""
|
|
Generate a number used to index a diagram
|
|
"""
|
|
self.index += 1
|
|
return self.index
|
|
|
|
def extract_into_diagram(self, el_id: int):
|
|
"""
|
|
Used when we encounter the same token twice in the same tree. When this
|
|
happens, we replace all instances of that token with a terminal, and
|
|
create a new subdiagram for the token
|
|
"""
|
|
position = self[el_id]
|
|
|
|
# Replace the original definition of this element with a regular block
|
|
if position.parent:
|
|
ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name)
|
|
if "item" in position.parent.kwargs:
|
|
position.parent.kwargs["item"] = ret
|
|
elif "items" in position.parent.kwargs:
|
|
position.parent.kwargs["items"][position.parent_index] = ret
|
|
|
|
# If the element we're extracting is a group, skip to its content but keep the title
|
|
if position.converted.func == railroad.Group:
|
|
content = position.converted.kwargs["item"]
|
|
else:
|
|
content = position.converted
|
|
|
|
self.diagrams[el_id] = EditablePartial.from_call(
|
|
NamedDiagram,
|
|
name=position.name,
|
|
diagram=EditablePartial.from_call(
|
|
railroad.Diagram, content, **self.diagram_kwargs
|
|
),
|
|
index=position.number,
|
|
)
|
|
|
|
del self[el_id]
|
|
|
|
|
|
def _worth_extracting(element: pyparsing.ParserElement) -> bool:
|
|
"""
|
|
Returns true if this element is worth having its own sub-diagram. Simply, if any of its children
|
|
themselves have children, then its complex enough to extract
|
|
"""
|
|
children = element.recurse()
|
|
return any(child.recurse() for child in children)
|
|
|
|
|
|
def _apply_diagram_item_enhancements(fn):
|
|
"""
|
|
decorator to ensure enhancements to a diagram item (such as results name annotations)
|
|
get applied on return from _to_diagram_element (we do this since there are several
|
|
returns in _to_diagram_element)
|
|
"""
|
|
|
|
def _inner(
|
|
element: pyparsing.ParserElement,
|
|
parent: Optional[EditablePartial],
|
|
lookup: ConverterState = None,
|
|
vertical: int = None,
|
|
index: int = 0,
|
|
name_hint: str = None,
|
|
show_results_names: bool = False,
|
|
) -> Optional[EditablePartial]:
|
|
|
|
ret = fn(
|
|
element,
|
|
parent,
|
|
lookup,
|
|
vertical,
|
|
index,
|
|
name_hint,
|
|
show_results_names,
|
|
)
|
|
|
|
# apply annotation for results name, if present
|
|
if show_results_names and ret is not None:
|
|
element_results_name = element.resultsName
|
|
if element_results_name:
|
|
# add "*" to indicate if this is a "list all results" name
|
|
element_results_name += "" if element.modalResults else "*"
|
|
ret = EditablePartial.from_call(
|
|
railroad.Group, item=ret, label=element_results_name
|
|
)
|
|
|
|
return ret
|
|
|
|
return _inner
|
|
|
|
|
|
def _visible_exprs(exprs: Iterable[pyparsing.ParserElement]):
|
|
non_diagramming_exprs = (
|
|
pyparsing.ParseElementEnhance,
|
|
pyparsing.PositionToken,
|
|
pyparsing.And._ErrorStop,
|
|
)
|
|
return [
|
|
e
|
|
for e in exprs
|
|
if not (e.customName or e.resultsName or isinstance(e, non_diagramming_exprs))
|
|
]
|
|
|
|
|
|
@_apply_diagram_item_enhancements
|
|
def _to_diagram_element(
|
|
element: pyparsing.ParserElement,
|
|
parent: Optional[EditablePartial],
|
|
lookup: ConverterState = None,
|
|
vertical: int = None,
|
|
index: int = 0,
|
|
name_hint: str = None,
|
|
show_results_names: bool = False,
|
|
) -> Optional[EditablePartial]:
|
|
"""
|
|
Recursively converts a PyParsing Element to a railroad Element
|
|
:param lookup: The shared converter state that keeps track of useful things
|
|
:param index: The index of this element within the parent
|
|
:param parent: The parent of this element in the output tree
|
|
:param vertical: Controls at what point we make a list of elements vertical. If this is an integer (the default),
|
|
it sets the threshold of the number of items before we go vertical. If True, always go vertical, if False, never
|
|
do so
|
|
:param name_hint: If provided, this will override the generated name
|
|
:param show_results_names: bool flag indicating whether to add annotations for results names
|
|
:returns: The converted version of the input element, but as a Partial that hasn't yet been constructed
|
|
"""
|
|
exprs = element.recurse()
|
|
name = name_hint or element.customName or element.__class__.__name__
|
|
|
|
# Python's id() is used to provide a unique identifier for elements
|
|
el_id = id(element)
|
|
|
|
element_results_name = element.resultsName
|
|
|
|
# Here we basically bypass processing certain wrapper elements if they contribute nothing to the diagram
|
|
if not element.customName:
|
|
if isinstance(
|
|
element,
|
|
(
|
|
pyparsing.TokenConverter,
|
|
# pyparsing.Forward,
|
|
pyparsing.Located,
|
|
),
|
|
):
|
|
# However, if this element has a useful custom name, and its child does not, we can pass it on to the child
|
|
if exprs:
|
|
if not exprs[0].customName:
|
|
propagated_name = name
|
|
else:
|
|
propagated_name = None
|
|
|
|
return _to_diagram_element(
|
|
element.expr,
|
|
parent=parent,
|
|
lookup=lookup,
|
|
vertical=vertical,
|
|
index=index,
|
|
name_hint=propagated_name,
|
|
show_results_names=show_results_names,
|
|
)
|
|
|
|
# If the element isn't worth extracting, we always treat it as the first time we say it
|
|
if _worth_extracting(element):
|
|
if el_id in lookup:
|
|
# If we've seen this element exactly once before, we are only just now finding out that it's a duplicate,
|
|
# so we have to extract it into a new diagram.
|
|
looked_up = lookup[el_id]
|
|
looked_up.mark_for_extraction(el_id, lookup, name=name_hint)
|
|
ret = EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name)
|
|
return ret
|
|
|
|
elif el_id in lookup.diagrams:
|
|
# If we have seen the element at least twice before, and have already extracted it into a subdiagram, we
|
|
# just put in a marker element that refers to the sub-diagram
|
|
ret = EditablePartial.from_call(
|
|
railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"]
|
|
)
|
|
return ret
|
|
|
|
# Recursively convert child elements
|
|
# Here we find the most relevant Railroad element for matching pyparsing Element
|
|
# We use ``items=[]`` here to hold the place for where the child elements will go once created
|
|
if isinstance(element, pyparsing.And):
|
|
# detect And's created with ``expr*N`` notation - for these use a OneOrMore with a repeat
|
|
# (all will have the same name, and resultsName)
|
|
if not exprs:
|
|
return None
|
|
if len(set((e.name, e.resultsName) for e in exprs)) == 1:
|
|
ret = EditablePartial.from_call(
|
|
railroad.OneOrMore, item="", repeat=str(len(exprs))
|
|
)
|
|
elif _should_vertical(vertical, exprs):
|
|
ret = EditablePartial.from_call(railroad.Stack, items=[])
|
|
else:
|
|
ret = EditablePartial.from_call(railroad.Sequence, items=[])
|
|
elif isinstance(element, (pyparsing.Or, pyparsing.MatchFirst)):
|
|
if not exprs:
|
|
return None
|
|
if _should_vertical(vertical, exprs):
|
|
ret = EditablePartial.from_call(railroad.Choice, 0, items=[])
|
|
else:
|
|
ret = EditablePartial.from_call(railroad.HorizontalChoice, items=[])
|
|
elif isinstance(element, pyparsing.Each):
|
|
if not exprs:
|
|
return None
|
|
ret = EditablePartial.from_call(EachItem, items=[])
|
|
elif isinstance(element, pyparsing.NotAny):
|
|
ret = EditablePartial.from_call(AnnotatedItem, label="NOT", item="")
|
|
elif isinstance(element, pyparsing.FollowedBy):
|
|
ret = EditablePartial.from_call(AnnotatedItem, label="LOOKAHEAD", item="")
|
|
elif isinstance(element, pyparsing.PrecededBy):
|
|
ret = EditablePartial.from_call(AnnotatedItem, label="LOOKBEHIND", item="")
|
|
elif isinstance(element, pyparsing.Opt):
|
|
ret = EditablePartial.from_call(railroad.Optional, item="")
|
|
elif isinstance(element, pyparsing.OneOrMore):
|
|
ret = EditablePartial.from_call(railroad.OneOrMore, item="")
|
|
elif isinstance(element, pyparsing.ZeroOrMore):
|
|
ret = EditablePartial.from_call(railroad.ZeroOrMore, item="")
|
|
elif isinstance(element, pyparsing.Group):
|
|
ret = EditablePartial.from_call(
|
|
railroad.Group, item=None, label=element_results_name
|
|
)
|
|
elif isinstance(element, pyparsing.Empty) and not element.customName:
|
|
# Skip unnamed "Empty" elements
|
|
ret = None
|
|
elif len(exprs) > 1:
|
|
ret = EditablePartial.from_call(railroad.Sequence, items=[])
|
|
elif len(exprs) > 0 and not element_results_name:
|
|
ret = EditablePartial.from_call(railroad.Group, item="", label=name)
|
|
else:
|
|
terminal = EditablePartial.from_call(railroad.Terminal, element.defaultName)
|
|
ret = terminal
|
|
|
|
if ret is None:
|
|
return
|
|
|
|
# Indicate this element's position in the tree so we can extract it if necessary
|
|
lookup[el_id] = ElementState(
|
|
element=element,
|
|
converted=ret,
|
|
parent=parent,
|
|
parent_index=index,
|
|
number=lookup.generate_index(),
|
|
)
|
|
if element.customName:
|
|
lookup[el_id].mark_for_extraction(el_id, lookup, element.customName)
|
|
|
|
i = 0
|
|
for expr in exprs:
|
|
# Add a placeholder index in case we have to extract the child before we even add it to the parent
|
|
if "items" in ret.kwargs:
|
|
ret.kwargs["items"].insert(i, None)
|
|
|
|
item = _to_diagram_element(
|
|
expr,
|
|
parent=ret,
|
|
lookup=lookup,
|
|
vertical=vertical,
|
|
index=i,
|
|
show_results_names=show_results_names,
|
|
)
|
|
|
|
# Some elements don't need to be shown in the diagram
|
|
if item is not None:
|
|
if "item" in ret.kwargs:
|
|
ret.kwargs["item"] = item
|
|
elif "items" in ret.kwargs:
|
|
# If we've already extracted the child, don't touch this index, since it's occupied by a nonterminal
|
|
ret.kwargs["items"][i] = item
|
|
i += 1
|
|
elif "items" in ret.kwargs:
|
|
# If we're supposed to skip this element, remove it from the parent
|
|
del ret.kwargs["items"][i]
|
|
|
|
# If all this items children are none, skip this item
|
|
if ret and (
|
|
("items" in ret.kwargs and len(ret.kwargs["items"]) == 0)
|
|
or ("item" in ret.kwargs and ret.kwargs["item"] is None)
|
|
):
|
|
ret = EditablePartial.from_call(railroad.Terminal, name)
|
|
|
|
# Mark this element as "complete", ie it has all of its children
|
|
if el_id in lookup:
|
|
lookup[el_id].complete = True
|
|
|
|
if el_id in lookup and lookup[el_id].extract and lookup[el_id].complete:
|
|
lookup.extract_into_diagram(el_id)
|
|
if ret is not None:
|
|
ret = EditablePartial.from_call(
|
|
railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"]
|
|
)
|
|
|
|
return ret
|