Source code for kumoai.graph.column
from dataclasses import dataclass
from typing import Any, Optional, Union
from kumoapi.table import TimestampUnit
from kumoapi.typing import Dtype, Stype
from kumoai.mixin import CastMixin
[docs]
@dataclass(init=False)
class Column(CastMixin):
    r"""A column represents metadata information for a column in a Kumo
    :class:`~kumoai.graph.Table`. Columns can be created independent of
    a table, or can be fetched from a table with the
    :meth:`~kumoai.graph.Table.column` method.
    .. code-block:: python
        import kumoai
        # Fetch a column from a `kumoai.Table`:
        table = kumoai.Table(...)
        column = table.column("col_name")
        column = table["col_name"]  # equivalent to the above.
        # Edit a column's data type:
        print("Existing dtype: ", column.dtype)
        column.dtype = "int"
        # Edit a column's semantic type:
        print("Existing stype: ", column.stype)
        column.stype = "ID"
    Args:
        name: The name of this column.
        stype: The semantic type of this column. Semantic types can be
            specified as strings: the list of possible semantic types
            is located at :class:`~kumoai.Stype`.
        dtype: The data type of this column. Data types can be specified
            as strings: the list of possible data types is located at
            :class:`~kumoai.Dtype`.
        timestamp_format: If this column represents a timestamp, the format
            that the timestamp should be parsed in. The format can either be
            a :class:`~kumoapi.table.TimestampUnit` for integer columns or a
            string with a format identifier described
            `here <https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html>`__
            for a SaaS Kumo deployment and
            `here <https://docs.snowflake.com/en/sql-reference/date-time-input-output#about-the-elements-used-in-input-and-output-formats>`__
            for a Snowpark Container Services Kumo deployment. If left empty,
            will be intelligently inferred by Kumo.
    """  # noqa: E501
    name: str
    stype: Optional[Stype] = None
    dtype: Optional[Dtype] = None
    timestamp_format: Optional[Union[str, TimestampUnit]] = None
[docs]
    def __init__(
        self,
        name: str,
        stype: Optional[Union[Stype, str]] = None,
        dtype: Optional[Union[Dtype, str]] = None,
        timestamp_format: Optional[Union[str, TimestampUnit]] = None,
    ) -> None:
        self.name = name
        self.stype = Stype(stype) if stype is not None else None
        self.dtype = Dtype(dtype) if dtype is not None else None
        try:
            self.timestamp_format = TimestampUnit(timestamp_format)
        except ValueError:
            self.timestamp_format = timestamp_format 
    def __hash__(self) -> int:
        return hash((self.name, self.stype, self.dtype, self.timestamp_format))
    def __setattr__(self, key: Any, value: Any) -> None:
        if key == 'name' and value != getattr(self, key, value):
            raise AttributeError("Attribute 'name' is read-only")
        elif key == 'stype' and isinstance(value, str):
            value = Stype(value)
        elif key == 'dtype' and isinstance(value, str):
            value = Dtype(value)
        elif key == 'timestamp_format' and isinstance(value, str):
            try:
                value = TimestampUnit(value)
            except ValueError:
                pass
        super().__setattr__(key, value)
    def update(self, obj: 'Column', override: bool = True) -> 'Column':
        for key in self.__dict__:
            if key[0] == '_':  # Skip private attributes:
                continue
            value = getattr(obj, key, None)
            if value is not None:
                if override or getattr(self, key, None) is None:
                    setattr(self, key, value)
        return self
    def __repr__(self) -> str:
        out = (f"Column(name=\"{self.name}\", stype=\"{self.stype}\", "
               f"dtype=\"{self.dtype}\"")
        if self.timestamp_format is not None:
            out += f", timestamp_format=\"{self.timestamp_format}\""
        out += ")"
        return out