Source code for matrixone.sqlalchemy_ext.table_builder

# Copyright 2021 - 2022 Matrix Origin
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Table builder utilities for MatrixOne vector tables.
"""

from typing import List, Optional, Union

from sqlalchemy import (
    TIMESTAMP,
    BigInteger,
    Boolean,
    Column,
    Date,
    DateTime,
    ForeignKeyConstraint,
    Index,
    Integer,
    MetaData,
    Numeric,
    PrimaryKeyConstraint,
    SmallInteger,
    String,
    Table,
    Text,
    Time,
)
from sqlalchemy.dialects.mysql import BLOB, JSON, LONGBLOB, MEDIUMBLOB, TINYBLOB, TINYINT, VARBINARY

from .vector_type import Vectorf32, Vectorf64, VectorPrecision, VectorType


[docs] class VectorTableBuilder: """ Builder class for creating MatrixOne vector tables with SQLAlchemy. This class provides a fluent interface for building vector tables with proper column definitions, indexes, and constraints. It's designed to work seamlessly with MatrixOne's vector capabilities and SQLAlchemy. Key Features: - Fluent method chaining for table definition - Support for all MatrixOne column types including vectors - Automatic vector index creation - Constraint and foreign key support - Integration with SQLAlchemy metadata Supported Column Types: - Standard types: Integer, String, Text, DateTime, etc. - Vector types: Vectorf32, Vectorf64 with configurable dimensions - MatrixOne-specific types: JSON, BLOB variants Usage Examples .. code-block:: python # Create a simple vector table builder = VectorTableBuilder('documents') table = (builder .add_int_column('id', primary_key=True) .add_string_column('title', length=255) .add_text_column('content') .add_vector_column('embedding', Vectorf32(384)) .build()) # Create a complex table with indexes builder = VectorTableBuilder('products') table = (builder .add_bigint_column('id', primary_key=True) .add_string_column('name', length=100) .add_numeric_column('price', precision=10, scale=2) .add_vector_column('features', Vectorf64(512)) .add_vector_index('idx_features', 'features', 'ivfflat', lists=100) .build()) Note: This builder is primarily used internally by the Client's table creation methods, but can be used directly for advanced use cases. """
[docs] def __init__(self, table_name: str, metadata: MetaData = None): """ Initialize the table builder. Args: table_name: Name of the table to create metadata: SQLAlchemy metadata object """ self.table_name = table_name self.metadata = metadata or MetaData() self.columns = [] self.indexes = [] self.constraints = []
[docs] def add_column(self, name: str, type_, **kwargs): """Add a column to the table.""" column = Column(name, type_, **kwargs) self.columns.append(column) return self
[docs] def add_int_column(self, name: str, primary_key: bool = False, **kwargs): """Add an integer column.""" if primary_key: kwargs["primary_key"] = True return self.add_column(name, Integer, **kwargs)
[docs] def add_bigint_column(self, name: str, primary_key: bool = False, **kwargs): """Add a bigint column.""" if primary_key: kwargs["primary_key"] = True return self.add_column(name, BigInteger, **kwargs)
[docs] def add_string_column(self, name: str, length: int = 255, **kwargs): """Add a string column.""" return self.add_column(name, String(length), **kwargs)
[docs] def add_text_column(self, name: str, **kwargs): """Add a text column.""" return self.add_column(name, Text, **kwargs)
[docs] def add_json_column(self, name: str, **kwargs): """Add a JSON column.""" return self.add_column(name, JSON, **kwargs)
[docs] def add_vector_column(self, name: str, dimension: int, precision: str = VectorPrecision.F32, **kwargs): """Add a vector column.""" if precision == VectorPrecision.F32: vector_type = Vectorf32(dimension=dimension) elif precision == VectorPrecision.F64: vector_type = Vectorf64(dimension=dimension) else: vector_type = VectorType(dimension=dimension, precision=precision) return self.add_column(name, vector_type, **kwargs)
[docs] def add_vecf32_column(self, name: str, dimension: int, **kwargs): """Add a vecf32 column.""" return self.add_vector_column(name, dimension, VectorPrecision.F32, **kwargs)
[docs] def add_vecf64_column(self, name: str, dimension: int, **kwargs): """Add a vecf64 column.""" return self.add_vector_column(name, dimension, VectorPrecision.F64, **kwargs)
[docs] def add_smallint_column(self, name: str, primary_key: bool = False, **kwargs): """Add a smallint column.""" if primary_key: kwargs["primary_key"] = True return self.add_column(name, SmallInteger, **kwargs)
[docs] def add_tinyint_column(self, name: str, primary_key: bool = False, **kwargs): """Add a tinyint column.""" if primary_key: kwargs["primary_key"] = True return self.add_column(name, TINYINT, **kwargs)
[docs] def add_numeric_column( self, name: str, column_type: str, precision: Optional[int] = None, scale: Optional[int] = None, **kwargs ): """Add a numeric column (float, double, decimal, numeric).""" if column_type in ("float", "double"): from sqlalchemy import Float return self.add_column(name, Float(precision=precision), **kwargs) elif column_type in ("decimal", "numeric"): return self.add_column(name, Numeric(precision=precision, scale=scale), **kwargs) else: raise ValueError(f"Unsupported numeric type: {column_type}")
[docs] def add_datetime_column(self, name: str, column_type: str, **kwargs): """Add a datetime column (date, datetime, timestamp, time, year).""" if column_type == "date": return self.add_column(name, Date, **kwargs) elif column_type == "datetime": return self.add_column(name, DateTime, **kwargs) elif column_type == "timestamp": return self.add_column(name, TIMESTAMP, **kwargs) elif column_type == "time": return self.add_column(name, Time, **kwargs) elif column_type == "year": # MySQL YEAR type from sqlalchemy.dialects.mysql import YEAR return self.add_column(name, YEAR, **kwargs) else: raise ValueError(f"Unsupported datetime type: {column_type}")
[docs] def add_boolean_column(self, name: str, **kwargs): """Add a boolean column.""" return self.add_column(name, Boolean, **kwargs)
[docs] def add_binary_column(self, name: str, column_type: str, **kwargs): """Add a binary column (blob, longblob, mediumblob, tinyblob, binary, varbinary).""" if column_type == "blob": return self.add_column(name, BLOB, **kwargs) elif column_type == "longblob": return self.add_column(name, LONGBLOB, **kwargs) elif column_type == "mediumblob": return self.add_column(name, MEDIUMBLOB, **kwargs) elif column_type == "tinyblob": return self.add_column(name, TINYBLOB, **kwargs) elif column_type == "binary": from sqlalchemy import Binary return self.add_column(name, Binary, **kwargs) elif column_type == "varbinary": return self.add_column(name, VARBINARY, **kwargs) else: raise ValueError(f"Unsupported binary type: {column_type}")
[docs] def add_enum_column(self, name: str, column_type: str, values: list, **kwargs): """Add an enum or set column.""" if column_type == "enum": from sqlalchemy import Enum return self.add_column(name, Enum(*values), **kwargs) elif column_type == "set": from sqlalchemy.dialects.mysql import SET return self.add_column(name, SET(*values), **kwargs) else: raise ValueError(f"Unsupported enum type: {column_type}")
[docs] def add_index(self, columns: Union[str, List[str]], name: Optional[str] = None, **kwargs): """Add an index to the table.""" if isinstance(columns, str): columns = [columns] index_name = name or f"idx_{self.table_name}_{'_'.join(columns)}" index = Index(index_name, *columns, **kwargs) self.indexes.append(index) return self
[docs] def add_primary_key(self, columns: Union[str, List[str]]): """Add a primary key constraint.""" if isinstance(columns, str): columns = [columns] constraint = PrimaryKeyConstraint(*columns) self.constraints.append(constraint) return self
[docs] def add_foreign_key( self, columns: Union[str, List[str]], ref_table: str, ref_columns: Union[str, List[str]], name: Optional[str] = None, ): """Add a foreign key constraint.""" if isinstance(columns, str): columns = [columns] if isinstance(ref_columns, str): ref_columns = [ref_columns] constraint_name = name or f"fk_{self.table_name}_{'_'.join(columns)}" constraint = ForeignKeyConstraint(columns, [f"{ref_table}.{col}" for col in ref_columns], name=constraint_name) self.constraints.append(constraint) return self
[docs] def build(self) -> Table: """Build and return the SQLAlchemy Table object.""" return Table(self.table_name, self.metadata, *self.columns, *self.indexes, *self.constraints)
def create_vector_table(table_name: str, metadata: MetaData = None) -> VectorTableBuilder: """ Create a new vector table builder. Args: table_name: Name of the table metadata: SQLAlchemy metadata object Returns: VectorTableBuilder instance """ return VectorTableBuilder(table_name, metadata) # Convenience functions for common table patterns def create_vector_index_table(table_name: str, metadata: MetaData = None) -> VectorTableBuilder: """ Create a table builder for vector index tables. Example: create table vector_index_07(a int primary key, b vecf32(128), c int, key c_k(c)) """ builder = VectorTableBuilder(table_name, metadata) # Add common columns builder.add_int_column("a", primary_key=True) builder.add_vecf32_column("b", dimension=128) builder.add_int_column("c") # Add index on column c builder.add_index("c", name="c_k") return builder def create_document_vector_table(table_name: str, metadata: MetaData = None, vector_dim: int = 384) -> VectorTableBuilder: """ Create a table builder for document vector storage. """ builder = VectorTableBuilder(table_name, metadata) builder.add_int_column("id", primary_key=True, autoincrement=True) builder.add_string_column("document_id", length=100, nullable=False, unique=True) builder.add_string_column("title", length=255) builder.add_text_column("content") builder.add_vecf32_column("embedding", dimension=vector_dim, nullable=False) builder.add_json_column("metadata") # Add indexes builder.add_index("document_id") builder.add_index("title") return builder def create_product_vector_table(table_name: str, metadata: MetaData = None, vector_dim: int = 512) -> VectorTableBuilder: """ Create a table builder for product vector storage. """ builder = VectorTableBuilder(table_name, metadata) builder.add_int_column("id", primary_key=True, autoincrement=True) builder.add_string_column("product_id", length=50, nullable=False, unique=True) builder.add_string_column("name", length=200) builder.add_text_column("description") builder.add_vecf32_column("embedding", dimension=vector_dim, nullable=False) builder.add_string_column("category", length=100) builder.add_string_column("price", length=20) # Add indexes builder.add_index("product_id") builder.add_index("category") return builder