kumoai.experimental.rfm.LocalTable#

class kumoai.experimental.rfm.LocalTable[source]#

Bases: Table

A table backed by a pandas.DataFrame.

A LocalTable fully specifies the relevant metadata, i.e. selected columns, column semantic types, primary keys and time columns. LocalTable is used to create a Graph.

import pandas as pd
import kumoai.experimental.rfm as rfm

# Load data from a CSV file:
df = pd.read_csv("data.csv")

# Create a table from a `pandas.DataFrame` and infer its metadata ...
table = rfm.LocalTable(df, name="my_table").infer_metadata()

# ... or create a table explicitly:
table = rfm.LocalTable(
    df=df,
    name="my_table",
    primary_key="id",
    time_column="time",
    end_time_column=None,
)

# Verify metadata:
table.print_metadata()

# Change the semantic type of a column:
table[column].stype = "text"
Parameters:
  • df (DataFrame) – The data frame to create this table from.

  • name (str) – The name of this table.

  • primary_key (MissingType | str | None) – The name of the primary key of this table, if it exists.

  • time_column (Optional[str]) – The name of the time column of this table, if it exists.

  • end_time_column (Optional[str]) – The name of the end time column of this table, if it exists.

__init__(df, name, primary_key=???, time_column=None, end_time_column=None)[source]#
property backend: DataBackend#

The data backend of this table.

add_column(column)#

Adds a column to this table.

Parameters:

column (Union[ColumnSpec, Mapping[str, Any], str]) – The column to add.

Raises:

KeyError – If the column name already exists in this table.

Return type:

Column

add_columns(columns)#

Adds a set of columns to this table.

Parameters:

columns (Sequence[Union[ColumnSpec, Mapping[str, Any], str]]) – The columns to add.

Raises:

KeyError – If any of the column names already exist in this table.

Return type:

None

column(name)#

Returns the data column named with name name in this table.

Parameters:

name (str) – The name of the column.

Raises:

KeyError – If name is not present in this table.

Return type:

Column

property columns: list[Column]#

Returns a list of Column objects that represent the columns in this table.

property end_time_column: Column | None#

The end time column of this table.

The getter returns the end time column of this table, or None if no such end time column is present.

The setter sets a column as an end time column on this table, and raises a ValueError if the end time column has a non-timestamp compatible data type or if the column name does not match a column in the data frame.

has_column(name)#

Returns True if this table holds a column with name name; False otherwise.

Return type:

bool

has_end_time_column()#

Returns True if this table has an end time column; False otherwise.

Return type:

bool

has_primary_key()#

Returns True` if this table has a primary key; False otherwise.

Return type:

bool

has_time_column()#

Returns True if this table has a time column; False otherwise.

Return type:

bool

infer_metadata(verbose=True)#

Infers metadata, i.e., primary keys and time columns, in this table.

Parameters:

verbose (bool) – Whether to print verbose output.

Return type:

Self

infer_primary_key(verbose=True)#

Infers the primary key in this table.

Parameters:

verbose (bool) – Whether to print verbose output.

Return type:

Self

infer_time_column(verbose=True)#

Infers the time column in this table.

Parameters:

verbose (bool) – Whether to print verbose output.

Return type:

Self

property metadata: DataFrame#

Returns a pandas.DataFrame object containing metadata information about the columns in this table.

The returned dataframe has columns "Name", "Data Type", "Semantic Type", "Primary Key", "Time Column" and "End Time Column", which provide an aggregated view of the properties of the columns of this table.

Example

>>> 
>>> import kumoai.experimental.rfm as rfm
>>> table = rfm.LocalTable(df=..., name=...).infer_metadata()
>>> table.metadata
    Name        Data Type  Semantic Type  Primary Key  Time Column  End Time Column
0   CustomerID  float64    ID             True         False        False
property name: str#

The name of this table.

property primary_key: Column | None#

The primary key column of this table.

The getter returns the primary key column of this table, or None if no such primary key is present.

The setter sets a column as a primary key on this table, and raises a ValueError if the primary key has a non-ID compatible data type or if the column name does not match a column in the data frame.

print_metadata()#

Prints the metadata() of this table.

Return type:

None

remove_column(name)#

Removes a column from this table.

Parameters:

name (str) – The name of the column.

Raises:

KeyError – If name is not present in this table.

Return type:

Self

property source_name: str#

The source name of this table.

property time_column: Column | None#

The time column of this table.

The getter returns the time column of this table, or None if no such time column is present.

The setter sets a column as a time column on this table, and raises a ValueError if the time column has a non-timestamp compatible data type or if the column name does not match a column in the data frame.