Source code for cassis.typesystem

import re
import warnings
from collections import defaultdict
from copy import copy
from enum import Enum, auto
from io import BytesIO
from itertools import chain, filterfalse
from pathlib import Path
from typing import IO, Any, Callable, Dict, Iterator, List, Optional, Set, Union

import attr
from deprecation import deprecated
from lxml import etree
from more_itertools import unique_everseen
from toposort import toposort_flatten

TOP_TYPE_NAME = "uima.cas.TOP"

NAMESPACE_SEPARATOR = "."

NAME_SPACE_UIMA_CAS = "uima" + NAMESPACE_SEPARATOR + "cas"
UIMA_CAS_PREFIX = NAME_SPACE_UIMA_CAS + NAMESPACE_SEPARATOR
TYPE_NAME_TOP = UIMA_CAS_PREFIX + "TOP"
TYPE_NAME_INTEGER = UIMA_CAS_PREFIX + "Integer"
TYPE_NAME_FLOAT = UIMA_CAS_PREFIX + "Float"
TYPE_NAME_STRING = UIMA_CAS_PREFIX + "String"
TYPE_NAME_BOOLEAN = UIMA_CAS_PREFIX + "Boolean"
TYPE_NAME_BYTE = UIMA_CAS_PREFIX + "Byte"
TYPE_NAME_SHORT = UIMA_CAS_PREFIX + "Short"
TYPE_NAME_LONG = UIMA_CAS_PREFIX + "Long"
TYPE_NAME_DOUBLE = UIMA_CAS_PREFIX + "Double"
TYPE_NAME_ARRAY_BASE = UIMA_CAS_PREFIX + "ArrayBase"
TYPE_NAME_FS_ARRAY = UIMA_CAS_PREFIX + "FSArray"
TYPE_NAME_FS_LIST = UIMA_CAS_PREFIX + "FSList"
TYPE_NAME_EMPTY_FS_LIST = UIMA_CAS_PREFIX + "EmptyFSList"
TYPE_NAME_NON_EMPTY_FS_LIST = UIMA_CAS_PREFIX + "NonEmptyFSList"
TYPE_NAME_INTEGER_ARRAY = UIMA_CAS_PREFIX + "IntegerArray"
TYPE_NAME_INTEGER_LIST = UIMA_CAS_PREFIX + "IntegerList"
TYPE_NAME_EMPTY_INTEGER_LIST = UIMA_CAS_PREFIX + "EmptyIntegerList"
TYPE_NAME_NON_EMPTY_INTEGER_LIST = UIMA_CAS_PREFIX + "NonEmptyIntegerList"
TYPE_NAME_FLOAT_ARRAY = UIMA_CAS_PREFIX + "FloatArray"
TYPE_NAME_FLOAT_LIST = UIMA_CAS_PREFIX + "FloatList"
TYPE_NAME_EMPTY_FLOAT_LIST = UIMA_CAS_PREFIX + "EmptyFloatList"
TYPE_NAME_NON_EMPTY_FLOAT_LIST = UIMA_CAS_PREFIX + "NonEmptyFloatList"
TYPE_NAME_STRING_ARRAY = UIMA_CAS_PREFIX + "StringArray"
TYPE_NAME_STRING_LIST = UIMA_CAS_PREFIX + "StringList"
TYPE_NAME_EMPTY_STRING_LIST = UIMA_CAS_PREFIX + "EmptyStringList"
TYPE_NAME_NON_EMPTY_STRING_LIST = UIMA_CAS_PREFIX + "NonEmptyStringList"
TYPE_NAME_BOOLEAN_ARRAY = UIMA_CAS_PREFIX + "BooleanArray"
TYPE_NAME_BYTE_ARRAY = UIMA_CAS_PREFIX + "ByteArray"
TYPE_NAME_SHORT_ARRAY = UIMA_CAS_PREFIX + "ShortArray"
TYPE_NAME_LONG_ARRAY = UIMA_CAS_PREFIX + "LongArray"
TYPE_NAME_DOUBLE_ARRAY = UIMA_CAS_PREFIX + "DoubleArray"
TYPE_NAME_FS_HASH_SET = UIMA_CAS_PREFIX + "FSHashSet"
TYPE_NAME_ANNOTATION_BASE = UIMA_CAS_PREFIX + "AnnotationBase"

TYPE_NAME_SOFA = UIMA_CAS_PREFIX + "Sofa"
FEATURE_BASE_NAME_SOFANUM = "sofaNum"
FEATURE_BASE_NAME_SOFAID = "sofaID"
FEATURE_BASE_NAME_SOFAMIME = "mimeType"
FEATURE_BASE_NAME_SOFAURI = "sofaURI"
FEATURE_BASE_NAME_SOFASTRING = "sofaString"
FEATURE_BASE_NAME_SOFAARRAY = "sofaArray"

NAME_SPACE_UIMA_TCAS = "uima" + NAMESPACE_SEPARATOR + "tcas"
UIMA_TCAS_PREFIX = NAME_SPACE_UIMA_TCAS + NAMESPACE_SEPARATOR
TYPE_NAME_ANNOTATION = UIMA_TCAS_PREFIX + "Annotation"
TYPE_NAME_DOCUMENT_ANNOTATION = UIMA_TCAS_PREFIX + "DocumentAnnotation"
FEATURE_BASE_NAME_SOFA = "sofa"
FEATURE_BASE_NAME_BEGIN = "begin"
FEATURE_BASE_NAME_END = "end"
FEATURE_BASE_NAME_LANGUAGE = "language"
FEATURE_BASE_NAME_HEAD = "head"
FEATURE_BASE_NAME_TAIL = "tail"

_DOCUMENT_ANNOTATION_TYPE = "uima.tcas.DocumentAnnotation"

_PREDEFINED_TYPES = {
    "uima.cas.TOP",
    "uima.cas.NULL",
    "uima.cas.Boolean",
    "uima.cas.Byte",
    "uima.cas.Short",
    "uima.cas.Integer",
    "uima.cas.Long",
    "uima.cas.Float",
    "uima.cas.Double",
    "uima.cas.String",
    "uima.cas.ArrayBase",
    "uima.cas.FSArray",
    "uima.cas.FloatArray",
    "uima.cas.IntegerArray",
    "uima.cas.StringArray",
    "uima.cas.ListBase",
    "uima.cas.FSList",
    "uima.cas.EmptyFSList",
    "uima.cas.NonEmptyFSList",
    "uima.cas.FloatList",
    "uima.cas.EmptyFloatList",
    "uima.cas.NonEmptyFloatList",
    "uima.cas.IntegerList",
    "uima.cas.EmptyIntegerList",
    "uima.cas.NonEmptyIntegerList",
    "uima.cas.StringList",
    "uima.cas.EmptyStringList",
    "uima.cas.NonEmptyStringList",
    "uima.cas.BooleanArray",
    "uima.cas.ByteArray",
    "uima.cas.ShortArray",
    "uima.cas.LongArray",
    "uima.cas.DoubleArray",
    "uima.cas.Sofa",
    "uima.cas.AnnotationBase",
    TYPE_NAME_ANNOTATION,
}

_PRIMITIVE_TYPES = {
    "uima.cas.Boolean",
    "uima.cas.Byte",
    "uima.cas.Short",
    "uima.cas.Integer",
    "uima.cas.Long",
    "uima.cas.Float",
    "uima.cas.Double",
    "uima.cas.String",
}

_COLLECTION_TYPES = {
    "uima.cas.ArrayBase",
    "uima.cas.FSArray",
    "uima.cas.FloatArray",
    "uima.cas.IntegerArray",
    "uima.cas.StringArray",
    "uima.cas.ListBase",
    "uima.cas.FSList",
    "uima.cas.EmptyFSList",
    "uima.cas.NonEmptyFSList",
    "uima.cas.FloatList",
    "uima.cas.EmptyFloatList",
    "uima.cas.NonEmptyFloatList",
    "uima.cas.IntegerList",
    "uima.cas.EmptyIntegerList",
    "uima.cas.NonEmptyIntegerList",
    "uima.cas.StringList",
    "uima.cas.EmptyStringList",
    "uima.cas.NonEmptyStringList",
    "uima.cas.BooleanArray",
    "uima.cas.ByteArray",
    "uima.cas.ShortArray",
    "uima.cas.LongArray",
    "uima.cas.DoubleArray",
}

_PRIMITIVE_COLLECTION_TYPES = {
    "uima.cas.FloatArray",
    "uima.cas.IntegerArray",
    "uima.cas.StringArray",
    "uima.cas.FloatList",
    "uima.cas.EmptyFloatList",
    "uima.cas.NonEmptyFloatList",
    "uima.cas.IntegerList",
    "uima.cas.EmptyIntegerList",
    "uima.cas.NonEmptyIntegerList",
    "uima.cas.StringList",
    "uima.cas.EmptyStringList",
    "uima.cas.NonEmptyStringList",
    "uima.cas.BooleanArray",
    "uima.cas.ByteArray",
    "uima.cas.ShortArray",
    "uima.cas.LongArray",
    "uima.cas.DoubleArray",
}

_PRIMITIVE_ARRAY_TYPES = {
    "uima.cas.FloatArray",
    "uima.cas.IntegerArray",
    "uima.cas.BooleanArray",
    "uima.cas.ByteArray",
    "uima.cas.ShortArray",
    "uima.cas.LongArray",
    "uima.cas.DoubleArray",
    "uima.cas.StringArray",
}

_PRIMITIVE_LIST_TYPES = {TYPE_NAME_INTEGER_LIST, TYPE_NAME_FLOAT_LIST, TYPE_NAME_STRING_LIST}

_INHERITANCE_FINAL_TYPES = _PRIMITIVE_ARRAY_TYPES

_ARRAY_TYPES = _PRIMITIVE_ARRAY_TYPES | {TYPE_NAME_FS_ARRAY}

_LIST_TYPES = _PRIMITIVE_LIST_TYPES | {TYPE_NAME_FS_LIST}


class TypeSystemMode(Enum):
    """How much type system information to include."""

    FULL = auto()
    MINIMAL = auto()
    NONE = auto()


def array_type_name_for_type(type_: Union[str, "Type"]) -> str:
    type_name = type_ if isinstance(type_, str) else type_.name
    if type_name == TYPE_NAME_BYTE:
        return TYPE_NAME_BYTE_ARRAY
    if type_name == TYPE_NAME_FLOAT:
        return TYPE_NAME_FLOAT_ARRAY
    if type_name == TYPE_NAME_DOUBLE:
        return TYPE_NAME_DOUBLE_ARRAY
    if type_name == TYPE_NAME_BOOLEAN:
        return TYPE_NAME_BOOLEAN_ARRAY
    if type_name == TYPE_NAME_INTEGER:
        return TYPE_NAME_INTEGER_ARRAY
    if type_name == TYPE_NAME_SHORT:
        return TYPE_NAME_SHORT_ARRAY
    if type_name == TYPE_NAME_LONG:
        return TYPE_NAME_LONG_ARRAY
    if type_name == TYPE_NAME_STRING:
        return TYPE_NAME_STRING_ARRAY
    return TYPE_NAME_FS_ARRAY


def element_type_name_for_array_type(type_: Union[str, "Type"]) -> str:
    type_name = type_ if isinstance(type_, str) else type_.name
    if type_name == TYPE_NAME_BYTE_ARRAY:
        return TYPE_NAME_BYTE
    if type_name == TYPE_NAME_FLOAT_ARRAY:
        return TYPE_NAME_FLOAT
    if type_name == TYPE_NAME_DOUBLE_ARRAY:
        return TYPE_NAME_DOUBLE
    if type_name == TYPE_NAME_BOOLEAN_ARRAY:
        return TYPE_NAME_BOOLEAN
    if type_name == TYPE_NAME_INTEGER_ARRAY:
        return TYPE_NAME_INTEGER
    if type_name == TYPE_NAME_SHORT_ARRAY:
        return TYPE_NAME_SHORT
    if type_name == TYPE_NAME_LONG_ARRAY:
        return TYPE_NAME_LONG
    if type_name == TYPE_NAME_STRING_ARRAY:
        return TYPE_NAME_STRING
    return TYPE_NAME_TOP


def _string_to_valid_classname(name: str):
    return re.sub("[^a-zA-Z0-9_]", "_", name)


def is_predefined(type_: Union[str, "Type"]) -> bool:
    """Checks if the given type  is predefined by UIMA and by default in a new type system.

    Args:
        type_: The type to check
    Returns:
        Returns True if the given type is predefined, else False
    """
    type_name = type_ if isinstance(type_, str) else type_.name
    return type_name in _PREDEFINED_TYPES


def is_collection(type_: Union[str, "Type"], feature: "Feature") -> bool:
    """Checks if the given feature for the type identified by `type` is a collection, e.g. list or array.

    Args:
        type_: The type to which the feature belongs (`Type` or name as string)
        feature: The feature to query for.
    Returns:
        Returns True if the given feature is a collection type, else False
    """
    type_name = type_ if isinstance(type_, str) else type_.name

    if type_name in _COLLECTION_TYPES and feature.name == "elements":
        return True
    else:
        return feature.rangeType.name in _COLLECTION_TYPES


def is_primitive(type_: "Type") -> bool:
    """Checks if the type identified by `type` is a primitive type.

    Args:
        type_: Type to query for
    Returns:
        Returns True if the type identified by `type` is a primitive type, else False
    """
    type_name = type_.name

    if type_name == TOP_TYPE_NAME:
        return False
    elif type_name in _PRIMITIVE_TYPES:
        return True
    else:
        return is_primitive(type_.supertype)


def is_primitive_collection(type_: "Type") -> bool:
    """Checks if the type identified by `type` is a primitive collection, e.g. list or array of primitives.

    Args:
        type_: Type to query for
    Returns:
        Returns True if the type identified by `type` is a primitive collection type, else False
    """
    type_name = type_.name

    if type_name == TOP_TYPE_NAME:
        return False
    elif type_name in _PRIMITIVE_COLLECTION_TYPES:
        return True
    else:
        return is_primitive_collection(type_.supertype)


def is_primitive_array(type_: Union[str, "Type"]) -> bool:
    """Checks if the type identified by `type` is a primitive array, e.g. array of primitives.

    Args:
        type_: Type to query for (`Type` or name as string)
    Returns:
        Returns `True` if the type identified by `type` is a primitive array type, else `False`
    """
    type_name = type_ if isinstance(type_, str) else type_.name

    if type_name == TOP_TYPE_NAME:
        return False

    # Arrays are inheritance-final, so we do not need to check the inheritance hierarchy
    return type_name in _PRIMITIVE_ARRAY_TYPES


def is_primitive_list(type_: Union[str, "Type"]) -> bool:
    """Checks if the type identified by `type` is a primitive list, e.g. list of primitives.

    Args:
        type_: Type to query for (`Type` or name as string)
    Returns:
        Returns `True` if the type identified by `type` is a primitive array type, else `False`
    """
    type_name = type_ if isinstance(type_, str) else type_.name

    if type_name == TOP_TYPE_NAME:
        return False

    # Arrays are inheritance-final, so we do not need to check the inheritance hierarchy
    return type_name in _PRIMITIVE_LIST_TYPES


def is_array(type_: Union[str, "Type"]) -> bool:
    """Checks if the type identified by `type` is an array.

    Args:
        type_: Type to query for (`Type` or name as string)
    Returns:
        Returns `True` if the type identified by `type` is an array type, else `False`
    """
    type_name = type_ if isinstance(type_, str) else type_.name

    if type_name == TOP_TYPE_NAME:
        return False

    # Arrays are inheritance-final, so we do not need to check the inheritance hierarchy
    return type_name in _ARRAY_TYPES


def is_list(type_: Union[str, "Type"]) -> bool:
    """Checks if the type identified by `type` is a list.

    Args:
        type_: Type to query for (`Type` or name as string)
    Returns:
        Returns `True` if the type identified by `type` is a list type, else `False`
    """
    type_name = type_ if isinstance(type_, str) else type_.name

    if type_name == TOP_TYPE_NAME:
        return False

    # Lists are inheritance-final, so we do not need to check the inheritance hierarchy
    return type_name in _LIST_TYPES


@attr.s
class TypeCheckError(Exception):
    xmiID: int = attr.ib()  # xmiID of the feature structure with type error
    description: str = attr.ib()  # Description of the type check error


@attr.s
class TypeNotFoundError(Exception):
    message: str = attr.ib()  # Description of the error


@attr.s
class AnnotationHasNoSofa(Exception):
    message: str = attr.ib()  # Description of the error


@attr.s(slots=True, hash=False, eq=True, order=True, repr=False)
class FeatureStructure:
    """The base class for all feature structure instances"""

    type: "Type" = attr.ib()  # Type name of this feature structure instance
    xmiID: int = attr.ib(default=None, eq=False)  # xmiID of this feature structure instance

    def value(self, name: str):
        """Returns the value of the feature `name`."""
        return getattr(self, name)

    def get_covered_text(self) -> str:
        """Gets the text that is covered by this feature structure iff it is associated with a sofa and has a begin/end.

        Returns:
            The text covered by the annotation

        """
        if hasattr(self, "sofa") and hasattr(self, "begin") and hasattr(self, "end"):
            if self.sofa is None:
                raise AnnotationHasNoSofa(
                    "Annotations must have a SofA (be added to a CAS) before get_covered_text() can be called"
                )
            if self.sofa.sofaString is None:
                return None
            return self.sofa.sofaString[self.begin : self.end]
        else:
            raise NotImplementedError()

    def get(self, path: str) -> Optional[Any]:
        """Recursively gets an attribute, e.g. fs.get("a.b.c") would return attribute `c` of `b` of `a`.

        If you have nested feature structures, e.g. a feature structure with feature `a` that has a feature `b` that
        has a feature `c`, some of which can be `None`, then you can use the following:

            fs.get("a.b.c")
        """
        if not isinstance(path, str):
            raise AttributeError(f"Feature path [{path}] must be a string but is a [{type(path)}]")

        cur = self
        for part in path.split("."):
            cur = getattr(cur, part, None)
            if cur is None:
                return None

        return cur

    def set(self, path: str, value: Any):
        """Recursively sets an attribute, e.g. fs.set("a.b.c", 42) would set attribute `c` of `b` of `a` to `42`."""

        if "." not in path:
            setattr(self, path, value)
            return

        idx = path.rindex(".")

        value_name = path[idx + 1 :]
        path = path[:idx]

        target = self.get(path)

        if target is None:
            raise AttributeError(f"Attribute with name [{value_name}] not found on: {target}")

        setattr(target, value_name, value)

    def __getitem__(self, key):
        return self.get(key)

    def __setitem__(self, key, value):
        return self.set(key, value)

    def __hash__(self):
        return self.xmiID

    def __eq__(self, other):
        return self.__slots__ == other.__slots__

    def __str__(self):
        def _abbreviate_type_name(type_name: str):
            """Turns long type names like `de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token` to
            something shorter like `d.t.u.d.c.a.s.t.Token`.
            """
            parts = type_name.split(".")
            result = []
            for part in parts[:-1]:
                result.append(part[0])
            result.append(parts[-1])
            return ".".join(result)

        values = {}
        for feature in self.type.all_features:
            name = feature.name
            value = getattr(self, name)

            if value is not None and name not in {"sofa", "parent", "type"}:
                values[name] = value

        s = ", ".join(f"{n}={v}" for n, v in sorted(values.items()))
        return f"{_abbreviate_type_name(self.type.name)}({s})"

    def __repr__(self):
        return str(self)



[docs]
@attr.s(slots=True, eq=False, order=False, repr=False)
class Feature:
    """A feature defines one attribute of a feature structure"""

    name: str = attr.ib()
    domainType: "Type" = attr.ib()
    rangeType: "Type" = attr.ib()
    description: str = attr.ib(default=None)
    elementType: "Type" = attr.ib(default=None)
    multipleReferencesAllowed: bool = attr.ib(default=None)
    _has_reserved_name: bool = attr.ib(default=False)

    def __eq__(self, other):
        if not isinstance(other, Feature):
            return False

        if self.name != other.name or self.description != other.description:
            return False

        if self.rangeType.name != other.rangeType.name:
            return False

        # If elementType is `None`, then we assume the default is `TOP`
        element_type_name = self.elementType.name if self.elementType else None
        other_element_type_name = other.elementType.name if other.elementType else None
        if (element_type_name or TOP_TYPE_NAME) != (other_element_type_name or TOP_TYPE_NAME):
            return False

        # If multipleReferencesAllowed is `None`, then we assume the default is `False`
        self_multiref = False if self.multipleReferencesAllowed is None else self.multipleReferencesAllowed
        other_multiref = False if self.multipleReferencesAllowed is None else self.multipleReferencesAllowed
        if self_multiref != other_multiref:
            return False

        return True

    def __ne__(self, other):
        return not self.__eq__(other)

    def __lt__(self, other):
        return self.name < other.name

    def __str__(self):
        return f"Feature(name={self.name})"

    def __repr__(self):
        return str(self)




[docs]
@attr.s(slots=True, hash=False, eq=True, repr=False)
class Type:
    """Describes types in a type system.

    Instances of this class should not be created by hand, instead the type
    system's `create_type` should be used.

    """

    name: str = attr.ib()  #: Type name of this type
    supertype: "Type" = attr.ib()  # : The super type (parent) of this type
    description: str = attr.ib(default=None)  #: Description of this type
    typesystem: "TypeSystem" = attr.ib(default=None)  #: The typesystem this type belongs to
    _children: Dict[str, "Type"] = attr.ib(factory=dict)
    _features: Dict[str, Feature] = attr.ib(factory=dict)
    _inherited_features: Dict[str, Feature] = attr.ib(factory=dict)
    _constructor_fn = attr.ib(init=False, eq=False, order=False, repr=False)
    _constructor: Callable[[Dict], FeatureStructure] = attr.ib(default=None, eq=False, order=False, repr=False)
    _cached_all_features = attr.ib(default=None, eq=False, order=False, repr=False)

    def __attrs_post_init__(self):
        """Build the constructor that can create feature structures of this type"""
        name = _string_to_valid_classname(self.name)
        fields = {feature.name: attr.ib(default=None, repr=(feature.name != "sofa")) for feature in self.all_features}
        fields["type"] = attr.ib(default=self)

        # We assign this to a lambda to make it lazy
        # When creating large type systems, almost no types are used so
        # creating them on the fly is on average better
        self._constructor_fn = lambda: attr.make_class(
            name, fields, bases=(FeatureStructure,), slots=True, eq=False, order=False
        )


[docs]
    def __call__(self, **kwargs) -> FeatureStructure:
        """Creates an feature structure of this type

        When called with keyword arguments whose keys are the feature names and values are the
        respective feature values, then a new feature structure instance is created.

        Returns:
            A new feature structure instance of this type.

        """
        if self._constructor is None:
            self._constructor = self._constructor_fn()

        return self._constructor(**kwargs)


    def get_feature(self, name: str) -> Optional[Feature]:
        """Find a feature by name

        This returns `None` if this type does not contain a feature
        with the given `name`.

        Args:
            name: The name of the feature

        Returns:
            The feature with name `name` or `None` if it does not exist.
        """
        if name in self._features:
            return self._features[name]
        elif name in self._inherited_features:
            return self._inherited_features[name]
        else:
            return None

    def _add_feature(self, feature: Feature, inherited: bool = False, warn: bool = True):
        """Add the given feature to his type.

        Args:
            feature: The feature
            inherited: Indicates whether this feature is inherited from a parent or not
            warn: Emit a user warning when exactly redefining features

        """
        # Clear the feature cache when adding a new feature. Note that this method is also called by supertypes when
        # a feature is added to them so that the subtypes receive the new feature as an inherited feature.
        self._cached_all_features = None
        target = self._features if not inherited else self._inherited_features

        # Check that feature is not defined in on current type
        if feature.name in target:
            redefined_feature = target[feature.name]

            if redefined_feature != feature:
                msg = "Feature with name [{}] already exists in [{}] but is redefined differently!".format(
                    feature.name, self.name
                )
                raise ValueError(msg)
            elif warn:
                msg = f"Feature with name [{feature.name}] already exists in [{self.name}]!"
                warnings.warn(msg)
            return

        # Check that feature is not redefined on parent type
        if feature.name in self._inherited_features:
            redefined_feature = self._inherited_features[feature.name]

            if redefined_feature != feature:
                msg = f"For type [{self.name}] feature with name [{feature.name}] already exists in parent [{self.supertype.name}] but is redefined!"
                raise ValueError(msg)
            elif warn:
                msg = f"For type [{self.name}] feature with name [{feature.name}] already exists in parent [{self.supertype.name}]!"
                warnings.warn(msg)
            return

        target[feature.name] = feature

        # Recreate constructor to incorporate new features
        self.__attrs_post_init__()

        for child_type in self._children.values():
            child_type._add_feature(feature, inherited=True)

    @property
    def features(self) -> Iterator[Feature]:
        """Returns an iterator over the features of this type. Inherited features are excluded. To
        find these in addition to this types' own features, use `all_features`.

        Returns:
            An iterator over all features of this type, excluding inherited ones

        """
        return iter(self._features.values())

    @property
    def all_features(self) -> List[Feature]:
        """Returns an iterator over the features of this type. Inherited features are included. To
        just retrieve immediate features, use `features`.

        Returns:
            An iterator over all features of this type, including inherited ones

        """

        # In particular during (de)serialization, this method is called often and it should be fast. Thus we cache
        # the vetted list of all features instead of recalculating it every time, in particular since the type system
        # should be mostly static after the initial setup
        if self._cached_all_features is None:
            # We use `unique_everseen` here, as children could redefine parent types (Issue #56)
            self._cached_all_features = list(
                unique_everseen(chain(self._features.values(), self._inherited_features.values()))
            )

        return self._cached_all_features

    @property
    def children(self) -> Iterator["Type"]:
        yield from self._children.values()

    @property
    def descendants(self) -> Iterator["Type"]:
        """
        Returns an iterator of the type and any descendant types (subtypes).
        """
        yield self
        if self._children:
            for child in self._children.values():
                yield from child.descendants

    def subsumes(self, other_type: "Type") -> bool:
        """Determines if the type `other_type` is a child of `self`.

        Args:
            other_type: Name of the type to check

        Returns:
            `True` if `self` subsumes `other_type` else `False`
        """
        if self.name == TOP_TYPE_NAME:
            return True

        cur = other_type

        while cur:
            if self.name == cur.name:
                return True
            else:
                cur = cur.supertype

        return False

    def __hash__(self):
        return hash(self.name)

    def __eq__(self, other):
        return self.name == other.name

    def __str__(self):
        return f"Type(name={self.name})"

    def __repr__(self):
        return str(self)




[docs]
class TypeSystem:
    def __init__(self, add_document_annotation_type: bool = True):
        self._types = {}

        # We store types that are predefined but still defined in the typesystem here
        # In order to restore them when serializing
        self._predefined_types = set()

        # The type system of a UIMA CAS has several predefined types. These are
        # added in the following

        # `top` is directly assigned in order to circumvent the inheritance
        top = Type(name=TOP_TYPE_NAME, supertype=None)
        self._types[top.name] = top

        # cas:NULL
        self.create_type(name="uima.cas.NULL", supertypeName="uima.cas.TOP")

        # Primitive types
        self.create_type(name="uima.cas.Boolean", supertypeName="uima.cas.TOP")
        self.create_type(name="uima.cas.Byte", supertypeName="uima.cas.TOP")
        self.create_type(name="uima.cas.Short", supertypeName="uima.cas.TOP")
        self.create_type(name="uima.cas.Integer", supertypeName="uima.cas.TOP")
        self.create_type(name="uima.cas.Long", supertypeName="uima.cas.TOP")
        self.create_type(name="uima.cas.Float", supertypeName="uima.cas.TOP")
        self.create_type(name="uima.cas.Double", supertypeName="uima.cas.TOP")
        self.create_type(name="uima.cas.String", supertypeName="uima.cas.TOP")

        # Array
        t = self.create_type(name="uima.cas.ArrayBase", supertypeName="uima.cas.TOP")
        # FIXME "elements" is not actually a feature according to the UIMA Java SDK
        self.create_feature(t, name="elements", rangeType="uima.cas.TOP", multipleReferencesAllowed=True)

        self.create_type(name="uima.cas.FSArray", supertypeName="uima.cas.ArrayBase")
        self.create_type(name="uima.cas.BooleanArray", supertypeName="uima.cas.ArrayBase")
        self.create_type(name="uima.cas.ByteArray", supertypeName="uima.cas.ArrayBase")
        self.create_type(name="uima.cas.ShortArray", supertypeName="uima.cas.ArrayBase")
        self.create_type(name="uima.cas.LongArray", supertypeName="uima.cas.ArrayBase")
        self.create_type(name="uima.cas.DoubleArray", supertypeName="uima.cas.ArrayBase")
        self.create_type(name="uima.cas.FloatArray", supertypeName="uima.cas.ArrayBase")
        self.create_type(name="uima.cas.IntegerArray", supertypeName="uima.cas.ArrayBase")
        self.create_type(name="uima.cas.StringArray", supertypeName="uima.cas.ArrayBase")

        # List
        self.create_type(name="uima.cas.ListBase", supertypeName="uima.cas.TOP")
        self.create_type(name="uima.cas.FSList", supertypeName="uima.cas.ListBase")
        self.create_type(name="uima.cas.EmptyFSList", supertypeName="uima.cas.FSList")
        t = self.create_type(name="uima.cas.NonEmptyFSList", supertypeName="uima.cas.FSList")
        self.create_feature(t, name="head", rangeType="uima.cas.TOP", multipleReferencesAllowed=True)
        self.create_feature(t, name="tail", rangeType="uima.cas.FSList", multipleReferencesAllowed=True)

        # FloatList
        self.create_type(name="uima.cas.FloatList", supertypeName="uima.cas.ListBase")
        self.create_type(name="uima.cas.EmptyFloatList", supertypeName="uima.cas.FloatList")
        t = self.create_type(name="uima.cas.NonEmptyFloatList", supertypeName="uima.cas.FloatList")
        self.create_feature(t, name="head", rangeType="uima.cas.Float")
        self.create_feature(t, name="tail", rangeType="uima.cas.FloatList", multipleReferencesAllowed=True)

        # IntegerList
        self.create_type(name="uima.cas.IntegerList", supertypeName="uima.cas.ListBase")
        self.create_type(name="uima.cas.EmptyIntegerList", supertypeName="uima.cas.IntegerList")
        t = self.create_type(name="uima.cas.NonEmptyIntegerList", supertypeName="uima.cas.IntegerList")
        self.create_feature(t, name="head", rangeType="uima.cas.Integer")
        self.create_feature(t, name="tail", rangeType="uima.cas.IntegerList", multipleReferencesAllowed=True)

        # StringList
        self.create_type(name="uima.cas.StringList", supertypeName="uima.cas.ListBase")
        self.create_type(name="uima.cas.EmptyStringList", supertypeName="uima.cas.StringList")
        t = self.create_type(name="uima.cas.NonEmptyStringList", supertypeName="uima.cas.StringList")
        self.create_feature(t, name="head", rangeType="uima.cas.String")
        self.create_feature(t, name="tail", rangeType="uima.cas.StringList", multipleReferencesAllowed=True)

        # Sofa
        t = self.create_type(name="uima.cas.Sofa", supertypeName="uima.cas.TOP")
        self.create_feature(t, name="sofaNum", rangeType="uima.cas.Integer")
        self.create_feature(t, name="sofaID", rangeType="uima.cas.String")
        self.create_feature(t, name="mimeType", rangeType="uima.cas.String")
        self.create_feature(t, name="sofaArray", rangeType="uima.cas.TOP", multipleReferencesAllowed=True)
        self.create_feature(t, name="sofaString", rangeType="uima.cas.String")
        self.create_feature(t, name="sofaURI", rangeType="uima.cas.String")

        # AnnotationBase
        t = self.create_type(name="uima.cas.AnnotationBase", supertypeName="uima.cas.TOP")
        self.create_feature(t, name="sofa", rangeType="uima.cas.Sofa")

        # Annotation
        t = self.create_type(name=TYPE_NAME_ANNOTATION, supertypeName="uima.cas.AnnotationBase")
        self.create_feature(t, name="begin", rangeType="uima.cas.Integer")
        self.create_feature(t, name="end", rangeType="uima.cas.Integer")

        if add_document_annotation_type:
            self._add_document_annotation_type()

    def __iter__(self):
        return self.get_types()


[docs]
    def contains_type(self, typename: str):
        """Checks whether this type system contains a type with name `typename`.

        Args:
            typename: The name of type whose existence is to be checked.

        Returns:
            `True` if a type with `typename` exists, else `False`.
        """
        return typename in self._types



[docs]
    def create_type(self, name: str, supertypeName: str = TYPE_NAME_ANNOTATION, description: str = None) -> Type:
        """Creates a new type and return it.

        Args:
            name: The name of the new type
            supertypeName: The name of the new types' supertype. Defaults to `uima.cas.AnnotationBase`
            description: The description of the new type

        Returns:
            The newly created type
        """
        if supertypeName in _INHERITANCE_FINAL_TYPES:
            raise ValueError(f"[{name}] cannot inherit from [{supertypeName}] because the latter is inheritance final")

        if self.contains_type(name) and not is_predefined(name):
            raise ValueError(f"Type with name [{name}] already exists!")

        supertype = self.get_type(supertypeName)
        new_type = Type(name=name, supertype=supertype, description=description, typesystem=self)

        if name != TOP_TYPE_NAME:
            supertype._children[name] = new_type

            for feature in supertype.all_features:
                new_type._add_feature(feature, inherited=True)

        self._types[name] = new_type
        return new_type



[docs]
    def get_type(self, type_name: str) -> Type:
        """Finds a type by name in the type system of this CAS.

        Args:
            typename: The name of the type to retrieve

        Returns:
            The type with name `typename`
        Raises:
            Exception: If no type with `typename` could be found.
        """
        if self.contains_type(type_name):
            return self._types[type_name]
        else:
            raise TypeNotFoundError(f"Type with name [{type_name}] not found!")



[docs]
    def get_types(self, built_in: bool = False) -> Iterator[Type]:
        """Returns all types of this type system. Normally, this excludes the built-in types

        Args:
            built_in: Also include the built-in types

        """
        if built_in:
            return self._types.values()

        return filterfalse(lambda x: x.name in _PREDEFINED_TYPES, self._types.values())



[docs]
    def is_instance_of(self, type_: Union[Type, str], parent: Union[Type, str]) -> bool:
        if not parent:
            return False

        type_name = type_ if isinstance(type_, str) else type_.name
        parent_name = parent if isinstance(parent, str) else parent.name

        if type_name == parent_name:
            return True
        elif type_name == TOP_TYPE_NAME:
            return False
        else:
            super_type = self.get_type(type_).supertype if isinstance(type_, str) else type_.supertype
            parent_type = self.get_type(parent) if isinstance(parent, str) else parent
            return self.is_instance_of(super_type, parent_type)



[docs]
    def is_collection(self, type_: Union[str, "Type"], feature: "Feature") -> bool:
        """Checks if the given feature for the type identified by ``type_`is a collection, e.g. list or array.

        Args:
            type_: The type to which the feature belongs (`Type` or name as string)
            feature: The feature to query for.
        Returns:
            Returns True if the given feature is a collection type, else False
        """
        return is_collection(self.get_type(type_) if isinstance(type_, str) else type_, feature)



[docs]
    def is_primitive(self, type_: Union[str, Type]) -> bool:
        """Checks if the type identified by `type_name` is a primitive type.

        Args:
            type_: Type to query for (`Type` or name as string)
        Returns:
            Returns True if the type identified by `type` is a primitive type, else False
        """
        return is_primitive(self.get_type(type_) if isinstance(type_, str) else type_)



[docs]
    def is_primitive_collection(self, type_: Union[str, Type]) -> bool:
        """Checks if the type identified by `type` is a primitive collection, e.g. list or array of primitives.

        Args:
            type_: Type to query for (`Type` or name as string)
        Returns:
            Returns True if the type identified by `type` is a primitive collection type, else False
        """
        return is_primitive_collection(self.get_type(type_) if isinstance(type_, str) else type_)



[docs]
    def is_primitive_array(self, type_: Union[str, Type]) -> bool:
        """Checks if the type identified by `type` is a primitive array, e.g. array of primitives.

        Args:
            type_: Type to query for (`Type` or name as string)
        Returns:
            Returns `True` if the type identified by `type` is a primitive array type, else `False`
        """
        return is_primitive_array(type_)



[docs]
    def is_primitive_list(self, type_: Union[str, Type]) -> bool:
        """Checks if the type identified by `type` is a primitive list, e.g. list of primitives.

        Args:
            type_: Type to query for (`Type` or name as string)
        Returns:
            Returns `True` if the type identified by `type` is a primitive array type, else `False`
        """
        return is_primitive_list(type_)



[docs]
    def is_array(self, type_: Union[str, Type]) -> bool:
        """Checks if the type identified by `type` is an array.

        Args:
            type_: Type to query for (`Type` or name as string)
        Returns:
            Returns `True` if the type identified by `type` is an array type, else `False`
        """
        return is_array(type_)



[docs]
    def is_list(self, type_: Union[str, Type]) -> bool:
        """Checks if the type identified by `type` is a list.

        Args:
            type_: Type to query for (`Type` or name as string)
        Returns:
            Returns `True` if the type identified by `type` is a list type, else `False`
        """
        return is_list(type_)



[docs]
    def subsumes(self, parent: Union[str, Type], child: Union[str, Type]) -> bool:
        """Determines if the type `child` is a child of `parent`.

        Args:
            parent_name: Parent type (`Type` or name as string)
            child_name: Child type (`Type` or name as string)

        Returns:
            True if `parent` subsumes `child` else False
        """
        parent_type = self.get_type(parent) if isinstance(parent, str) else parent
        child_type = self.get_type(child) if isinstance(child, str) else child
        return parent_type.subsumes(child_type)



[docs]
    def create_feature(
        self,
        domainType: Union[Type, str],
        name: str,
        rangeType: Union[Type, str],
        elementType: Union[Type, str] = None,
        description: str = None,
        multipleReferencesAllowed: bool = None,
    ) -> Feature:
        """Adds a feature to the given type.

        Args:
            domainType: The type to which the feature will be added
            name: The name of the new feature
            rangeType: The feature's rangeTypeName specifies the type of value that the feature can take.
            elementType: The elementType of a feature is optional, and applies only when the rangeTypeName
                is uima.cas.FSArray or uima.cas.FSList The elementType specifies what type of value can be
                assigned as an element of the array or list.
            description: The description of the new feature
            multipleReferencesAllowed: Setting this to true indicates that the array or list may be shared,
                so changes to it may affect other objects in the CAS.

        Raises:
            Exception: If a feature with name `name` already exists in `type_`.
        """
        has_reserved_name = False

        if name == "self" or name == "type":
            msg = "Trying to add feature `{0}` which is a reserved name in Python, renamed accessor to '{0}_' !".format(
                name
            )
            name = name + "_"
            has_reserved_name = True
            warnings.warn(msg)

        resolved_domain_type = self.get_type(domainType) if isinstance(domainType, str) else domainType

        feature = Feature(
            name=name,
            domainType=resolved_domain_type,
            rangeType=self.get_type(rangeType) if isinstance(rangeType, str) else rangeType,
            elementType=self.get_type(elementType) if isinstance(elementType, str) else elementType,
            description=description,
            multipleReferencesAllowed=multipleReferencesAllowed,
            has_reserved_name=has_reserved_name,
        )

        resolved_domain_type._add_feature(feature)

        return feature



[docs]
    @deprecated(details="Use create_feature")
    def add_feature(
        self,
        type_: Type,
        name: str,
        rangeTypeName: str,
        elementType: str = None,
        description: str = None,
        multipleReferencesAllowed: bool = None,
    ):
        """Adds a feature to the given type.
        Args:
            type_: The type to which the feature will be added
            name: The name of the new feature
            rangeTypeName: The feature's rangeTypeName specifies the type of value that the feature can take.
            elementType: The elementType of a feature is optional, and applies only when the rangeTypeName
                is uima.cas.FSArray or uima.cas.FSList The elementType specifies what type of value can be
                assigned as an element of the array or list.
            description: The description of the new feature
            multipleReferencesAllowed: Setting this to true indicates that the array or list may be shared,
                so changes to it may affect other objects in the CAS.
        Raises:
            Exception: If a feature with name `name` already exists in `type_`.
        """
        self.create_feature(type_, name, rangeTypeName, elementType, description, multipleReferencesAllowed)



[docs]
    def to_xml(self, path: Union[str, Path, None] = None) -> Optional[str]:
        """Creates a XMI representation of this type system.

        Args:
            path: File path or file-like object, if `None` is provided the result is returned as a string.

        Returns:
            If `path` is None, then the XML representation of this type system is returned as a string.

        """
        serializer = TypeSystemSerializer()

        # If `path` is None, then serialize to a string and return it
        if path is None:
            sink = BytesIO()
            serializer.serialize(sink, self)
            return sink.getvalue().decode("utf-8")
        elif isinstance(path, str):
            with open(path, "wb") as f:
                serializer.serialize(f, self)
        elif isinstance(path, Path):
            with path.open("wb") as f:
                serializer.serialize(f, self)
        else:
            raise TypeError(f"`path` needs to be one of [str, None, Path], but was <{type(path)}>")



[docs]
    def typecheck(self, fs: FeatureStructure) -> List[TypeCheckError]:
        """Checks whether a feature structure is type sound.

        Currently only checks `uima.cas.FSArray`.

        Args:
            fs: The feature structure to type check.

        Returns:
            List of type errors found, empty list of no errors were found.
        """
        errors = []

        t = self.get_type(fs.type.name)
        for f in t.all_features:
            if f.rangeType.name == "uima.cas.FSArray":
                feature_value = fs.value(f.name)
                if not feature_value.elements:
                    continue
                # We check for every element that it is of type `elementType` or a child thereof
                element_type = f.elementType or TOP_TYPE_NAME
                for e in feature_value.elements:
                    if not self.subsumes(element_type, e.type.name):
                        msg = "Member of [{}] has unsound type: was [{}], need [{}]!".format(
                            f.rangeType.name, e.type.name, element_type.name
                        )
                        errors.append(TypeCheckError(fs.xmiID, msg))

        return errors


    def _defines_predefined_type(self, type_name):
        self._predefined_types.add(type_name)

    def _add_document_annotation_type(self):
        t = self.create_type(name=_DOCUMENT_ANNOTATION_TYPE, supertypeName=TYPE_NAME_ANNOTATION)
        self.create_feature(t, name="language", rangeType=TYPE_NAME_STRING)


[docs]
    def transitive_closure(self, seed_types: Set[Type], built_in: bool = False) -> Set[Type]:
        # Build transitive closure of used types by following parents, features, etc.
        transitively_referenced_types = set()
        openlist = []
        openlist.extend(seed_types)
        while openlist:
            type_ = openlist.pop(0)

            if type_ in transitively_referenced_types:
                continue

            if not built_in and type_.name in _PREDEFINED_TYPES:
                continue

            transitively_referenced_types.add(type_)

            if type_.supertype and type_.supertype not in transitively_referenced_types:
                openlist.append(type_.supertype)

            for feature in type_.all_features:
                if feature.rangeType not in transitively_referenced_types:
                    openlist.append(feature.rangeType)
                if feature.elementType and feature.elementType not in transitively_referenced_types:
                    openlist.append(feature.elementType)

        return transitively_referenced_types




# Deserializing



[docs]
def load_typesystem(source: Union[IO, str, Path]) -> TypeSystem:
    """Loads a type system from a XML source.

    Args:
        source: The XML source. If `source` is a string, then it is assumed to be an XML string.
                If `source` is a file-like object, then the data is read from it.
                If `source` is a `Path`, then load the file at the given location.

    Returns:
        The deserialized type system

    """
    deserializer = TypeSystemDeserializer()
    if isinstance(source, str):
        return deserializer.deserialize(BytesIO(source.encode("utf-8")))
    elif isinstance(source, Path):
        with source.open("rb") as src:
            return deserializer.deserialize(src)
    else:
        return deserializer.deserialize(source)



class TypeSystemDeserializer:
    def deserialize(self, source: Union[IO, str]) -> TypeSystem:
        """

        Args:
            source: a filename or file object containing XML data

        Returns:
            typesystem (TypeSystem):
        """

        # It can be that the types in the xml are listed out-of-order, that means
        # some type A appears before its supertype. In order to deserialize these
        # files properly without sacrificing the requirement that the supertype
        # of a type needs to already be present, we sort the graph of types and
        # supertypes topologically. This means a supertype will always be inserted
        # before its children. The inheritance relation is expressed in the
        # `dependencies` dictionary.
        types = {}
        features = defaultdict(list)
        type_dependencies = defaultdict(set)
        types_to_supertypes = {}

        context = etree.iterparse(source, events=("end",), tag=("{*}typeDescription",))
        for event, elem in context:
            type_name = self._get_elem_as_str(elem.find("{*}name"))
            description = self._get_elem_as_str(elem.find("{*}description"))
            supertypeName = self._get_elem_as_str(elem.find("{*}supertypeName"))

            # We store the supertype in order to later fill in the real supertype type,
            # not only the supertype name. It can be that it is a builtin or a type in
            # the type system XML is defined before its supertype.
            types_to_supertypes[type_name] = supertypeName
            types[type_name] = Type(name=type_name, supertype=None, description=description)
            type_dependencies[type_name].add(supertypeName)

            # Parse features
            for fd in elem.iterfind("{*}features/{*}featureDescription"):
                feature_name = self._get_elem_as_str(fd.find("{*}name"))
                rangeTypeName = self._get_elem_as_str(fd.find("{*}rangeTypeName"))
                description = self._get_elem_as_str(fd.find("{*}description"))
                multipleReferencesAllowed = self._get_elem_as_bool(fd.find("{*}multipleReferencesAllowed"))
                elementType = self._get_elem_as_str(fd.find("{*}elementType"))

                f = Feature(
                    domainType=type_name,  # value should actually be a Type, but we still need to load these
                    name=feature_name,
                    rangeType=rangeTypeName,  # value should actually be a Type, but we still need to load these
                    description=description,
                    multipleReferencesAllowed=multipleReferencesAllowed,
                    elementType=elementType,  # value should actually be a Type, but we still need to load these
                )
                features[type_name].append(f)

            # Free the XML tree element from memory as it is not needed anymore
            elem.clear()
            while elem.getprevious() is not None:
                del elem.getparent()[0]
        del context

        ts = TypeSystem(add_document_annotation_type=False)

        # DocumentAnnotation is not a predefined UIMA type, but some applications assume that it exists.
        # It can be defined by users with custom fields. In case the loaded type system did not define
        # it, we add the standard DocumentAnnotation type. In case it is already defined, we add it to
        # the list of redefined predefined types so that is written back on serialization.
        if _DOCUMENT_ANNOTATION_TYPE not in types:
            t = Type(name=_DOCUMENT_ANNOTATION_TYPE, supertype=ts.get_type(TYPE_NAME_ANNOTATION))
            features[t.name].append(Feature(domainType=t, name="language", rangeType=TYPE_NAME_STRING))
            types[t.name] = t
            type_dependencies[t.name].add(TYPE_NAME_ANNOTATION)
        else:
            ts._defines_predefined_type(_DOCUMENT_ANNOTATION_TYPE)

        # We fill in the supertypes here now that we parsed and created all types
        for type_name, supertype_name in types_to_supertypes.items():
            t = types[type_name]

            if supertype_name in _PREDEFINED_TYPES:
                supertype = ts.get_type(supertype_name)
            else:
                supertype = types[supertype_name]

            t.supertype = supertype

        def resolve_type(type_: Union[str, Type]):
            if isinstance(type_, str):
                return ts.get_type(type_) if type_ in _PREDEFINED_TYPES else types[type_]
            return type_

        # Fill in actual types into the features
        for fl in features.values():
            for f in fl:
                f.domainType = resolve_type(f.domainType)
                f.rangeType = resolve_type(f.rangeType)
                f.elementType = resolve_type(f.elementType)

        # Some CAS handling libraries add predefined types to the typesystem XML.
        # Here we check that the redefinition of predefined types adheres to the definition in UIMA
        for type_name, t in types.items():
            if type_name in _PREDEFINED_TYPES:
                pt = ts.get_type(type_name)

                t_features = list(sorted(features[type_name]))
                pt_features = list(sorted(pt.features))

                if t.supertype != pt.supertype:
                    msg = "Redefining predefined type [{0}] with different superType [{1}], expected [{2}]"
                    raise ValueError(msg.format(type_name, t.supertype, pt.supertype))

                # We check whether the predefined type is defined the same in UIMA and this typesystem
                if t_features == pt_features:
                    # No need to create predefined types, but store them for serialization
                    ts._defines_predefined_type(type_name)
                    continue
                else:
                    msg = "Redefining predefined type [{0}] with different features: {1} - Have to be {2}"
                    raise ValueError(msg.format(type_name, t_features, pt_features))

        # Add the types to the type system in order of dependency (parents before children)
        created_types = []
        for type_name in toposort_flatten(type_dependencies, sort=False):
            # No need to recreate predefined types
            if type_name in _PREDEFINED_TYPES:
                continue

            t = types[type_name]
            created_type = ts.create_type(name=t.name, description=t.description, supertypeName=t.supertype.name)
            created_types.append(created_type)

        # Add the features to the type AFTER we create all the types to not cause circular references
        # between type references in inheritance and type references in range or element type.
        for t in created_types:
            for f in features[t.name]:
                ts.create_feature(
                    t,
                    name=f.name,
                    rangeType=f.rangeType,
                    elementType=f.elementType,
                    description=f.description,
                    multipleReferencesAllowed=f.multipleReferencesAllowed,
                )

        return ts

    def _get_elem_as_str(self, elem: etree.Element) -> Optional[str]:
        if elem is not None:
            return elem.text if elem.text is None else elem.text.strip()
        else:
            return None

    def _get_elem_as_bool(self, elem: etree.Element) -> Optional[bool]:
        if elem is not None:
            text = elem.text
            if text == "true":
                return True
            elif text == "false":
                return False
            else:
                raise ValueError("Cannot parse boolean: " + str(text))
        else:
            return None


# Serializing


class TypeSystemSerializer:
    def serialize(self, sink: Union[IO, str], typesystem: TypeSystem):
        nsmap = {None: "http://uima.apache.org/resourceSpecifier"}
        with etree.xmlfile(sink, encoding="utf-8") as xf:
            xf.write_declaration()

            with xf.element("typeSystemDescription", nsmap=nsmap):
                with xf.element("types"):
                    # In order to export the same types that we imported, we
                    # also emit the (redundant) predefined types
                    for predefined_type_name in sorted(typesystem._predefined_types):
                        predefined_type = typesystem.get_type(predefined_type_name)
                        self._serialize_type(xf, predefined_type)

                    for type_ in sorted(typesystem.get_types(), key=lambda t: t.name):
                        # We do not want to serialize our implicitly added DocumentAnnotation.
                        # If it was defined by the user, it is in `typesystem._predefined_types`
                        # and serialized in the loop before.
                        if type_.name == _DOCUMENT_ANNOTATION_TYPE:
                            continue

                        self._serialize_type(xf, type_)

    def _serialize_type(self, xf: IO, type_: Type):
        typeDescription = etree.Element("typeDescription")

        name = etree.SubElement(typeDescription, "name")
        name.text = type_.name

        description = etree.SubElement(typeDescription, "description")
        description.text = type_.description

        supertype_name_node = etree.SubElement(typeDescription, "supertypeName")
        supertype_name_node.text = type_.supertype.name

        # Only create the `feature` element if there is at least one feature
        feature_list = list(type_.features)
        if feature_list:
            features = etree.SubElement(typeDescription, "features")
            for feature in feature_list:
                self._serialize_feature(features, feature)

        xf.write(typeDescription)

    def _serialize_feature(self, features: etree.Element, feature: Feature):
        featureDescription = etree.SubElement(features, "featureDescription")

        name = etree.SubElement(featureDescription, "name")

        feature_name = feature.name
        # If the feature name is a reserved name like `self`, then we added an
        # underscore to it before so Python can handle it. We now need to remove it.
        if feature._has_reserved_name:
            feature_name = feature_name[:-1]

        name.text = feature_name

        description = etree.SubElement(featureDescription, "description")
        description.text = feature.description

        rangeTypeName = etree.SubElement(featureDescription, "rangeTypeName")
        rangeTypeName.text = feature.rangeType.name

        if feature.multipleReferencesAllowed is not None:
            multipleReferencesAllowed = etree.SubElement(featureDescription, "multipleReferencesAllowed")
            multipleReferencesAllowed.text = "true" if feature.multipleReferencesAllowed else "false"

        if feature.elementType is not None:
            elementType = etree.SubElement(featureDescription, "elementType")
            elementType.text = feature.elementType.name



[docs]
def merge_typesystems(*typesystems: TypeSystem) -> TypeSystem:
    """Merges several type systems into one.

    If a type is defined in two source file systems, then the features of all of the these types are joined together in+
    the target type system. The exact rules are outlined in
    https://uima.apache.org/d/uimaj-2.10.4/references.html#ugr.ref.cas.typemerging .

    Args:
        *typesystems: The type systems to merge

    Returns:
        A new type system that is the result of merging  all of the type systems together.
    """

    type_list = []

    for ts in typesystems:
        type_list.extend(ts.get_types())

    merged_types = set()
    merged_ts = TypeSystem()

    # A type can only be added if its supertype was added before. We therefore iterate over the list of all
    # types and remove types once we were able to merge it. If we were not able to add a type for one iteration,
    # then it means that the type systems are not mergeable and we abort with an error.
    while True:
        updated_type_list = type_list[:]
        for t in type_list:
            # Check whether the type is ready to be added
            if not is_predefined(t.supertype) and t.supertype.name not in merged_types:
                continue

            # The supertype is defined, so we can add the current type to the new type system
            if not merged_ts.contains_type(t.name):
                # Create the type and add its features as it does not exist yet in the merged type system
                created_type = merged_ts.create_type(
                    name=t.name, description=t.description, supertypeName=t.supertype.name
                )

                for feature in t.features:
                    created_type._add_feature(copy(feature), warn=False)
            else:
                # Type is already defined
                existing_type = merged_ts.get_type(t.name)

                # If the supertypes are not the same, we need to check whether they are at
                # least compatible and then patch the hierarchy
                if t.supertype.name != existing_type.supertype.name:
                    if merged_ts.subsumes(existing_type.supertype.name, t.supertype.name):
                        # Existing supertype subsumes newly specified supertype;
                        # reset supertype to the new, more specific type
                        existing_type.supertype = t.supertype
                    elif merged_ts.subsumes(t.supertype.name, existing_type.supertype.name):
                        # Newly specified supertype subsumes old type, this is OK and we don't
                        # need to do anything
                        pass
                    else:
                        msg = "Cannot merge type [{}] with incompatible super types: [{}] - [{}]".format(
                            t.name, t.supertype.name, existing_type.supertype.name
                        )
                        raise ValueError(msg)

                # If the type is already defined, merge features
                for feature in t.features:
                    existing_type._add_feature(copy(feature), warn=False)

            merged_types.add(t.name)
            updated_type_list.remove(t)

        # If there was no progress in the last iteration, then the leftover types cannot be merged
        if len(type_list) == updated_type_list:
            raise ValueError("Unmergeable types" + ", ".join([t.name for t in type_list]))

        # If there are no types to merge left, then we are done
        if len(updated_type_list) == 0:
            break

    # Fix up type references to ensure that only type instances of the merged type system are referenced, not any
    # types from the source type systems
    for t in merged_ts.get_types():
        if t.supertype:
            t.supertype = merged_ts.get_type(t.supertype.name)
        for f in t.features:
            if f.domainType:
                f.domainType = merged_ts.get_type(f.domainType.name)
            if f.rangeType:
                f.rangeType = merged_ts.get_type(f.rangeType.name)
            if f.elementType:
                f.elementType = merged_ts.get_type(f.elementType.name)
    return merged_ts




[docs]
def load_dkpro_core_typesystem() -> TypeSystem:
    # https://stackoverflow.com/a/20885799
    try:
        import importlib.resources as pkg_resources
    except ImportError:
        # Try backported to PY<37 `importlib_resources`.
        import importlib_resources as pkg_resources

    from . import resources  # relative-import the *package* containing the templates

    with pkg_resources.open_binary(resources, "dkpro-core-types.xml") as f:
        return load_typesystem(f)