Source code for kumoai.graph.column

from dataclasses import dataclass
from typing import Any, Optional, Union

from kumoapi.table import TimestampUnit
from kumoapi.typing import Dtype, Stype

from kumoai.mixin import CastMixin


[docs]@dataclass(init=False) class Column(CastMixin): r"""A column represents metadata information for a column in a Kumo :class:`~kumoai.graph.Table`. Columns can be created independent of a table, or can be fetched from a table with the :meth:`~kumoai.graph.Table.column` method. .. code-block:: python import kumoai # Fetch a column from a `kumoai.Table`: table = kumoai.Table(...) column = table.column("col_name") column = table["col_name"] # equivalent to the above. # Edit a column's data type: print("Existing dtype: ", column.dtype) column.dtype = "int" # Edit a column's semantic type: print("Existing stype: ", column.stype) column.stype = "ID" Args: name: The name of this column. stype: The semantic type of this column. Semantic types can be specified as strings: the list of possible semantic types is located at :class:`~kumoai.Stype`. dtype: The data type of this column. Data types can be specified as strings: the list of possible data types is located at :class:`~kumoai.Dtype`. timestamp_format: If this column represents a timestamp, the format that the timestamp should be parsed in. The format can either be a :class:`~kumoapi.table.TimestampUnit` for integer columns or a string with a format identifier described `here <https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html>`__ for a SaaS Kumo deployment and `here <https://docs.snowflake.com/en/sql-reference/date-time-input-output#about-the-elements-used-in-input-and-output-formats>`__ for a Snowpark Container Services Kumo deployment. If left empty, will be intelligently inferred by Kumo. """ # noqa: E501 name: str stype: Optional[Stype] = None dtype: Optional[Dtype] = None timestamp_format: Optional[Union[str, TimestampUnit]] = None
[docs] def __init__( self, name: str, stype: Optional[Union[Stype, str]] = None, dtype: Optional[Union[Dtype, str]] = None, timestamp_format: Optional[Union[str, TimestampUnit]] = None, ) -> None: self.name = name self.stype = Stype(stype) if stype is not None else None self.dtype = Dtype(dtype) if dtype is not None else None try: self.timestamp_format = TimestampUnit(timestamp_format) except ValueError: self.timestamp_format = timestamp_format
def __hash__(self) -> int: return hash((self.name, self.stype, self.dtype, self.timestamp_format)) def __setattr__(self, key: Any, value: Any) -> None: if key == 'name' and value != getattr(self, key, value): raise AttributeError("Attribute 'name' is read-only") elif key == 'stype' and isinstance(value, str): value = Stype(value) elif key == 'dtype' and isinstance(value, str): value = Dtype(value) elif key == 'timestamp_format' and isinstance(value, str): try: value = TimestampUnit(value) except ValueError: pass super().__setattr__(key, value) def update(self, obj: 'Column', override: bool = True) -> 'Column': for key in self.__dict__: if key[0] == '_': # Skip private attributes: continue value = getattr(obj, key, None) if value is not None: if override or getattr(self, key, None) is None: setattr(self, key, value) return self def __repr__(self) -> str: out = (f"Column(name=\"{self.name}\", stype=\"{self.stype}\", " f"dtype=\"{self.dtype}\"") if self.timestamp_format is not None: out += f", timestamp_format=\"{self.timestamp_format}\"" out += ")" return out