Fulltext Index Extensions

The MatrixOne Python SDK provides advanced fulltext indexing and search capabilities through SQLAlchemy extensions.

FulltextIndex

class matrixone.sqlalchemy_ext.fulltext_index.FulltextIndex(name: str, columns: str | List[str], algorithm: str = 'TF-IDF', parser: str | None = None)[source]

Bases: Index

SQLAlchemy Index for fulltext columns with MatrixOne-specific syntax.

Specialized class for fulltext indexes with type safety and clear API.

Usage Examples

  1. Class Methods (Recommended for one-time operations):

    # Create index using class method
    success = FulltextIndex.create_index(
        engine=engine,
        table_name='my_table',
        name='ftidx_content',
        columns=['title', 'content'],
        algorithm=FulltextAlgorithmType.BM25
    )
    
    # Drop index using class method
    success = FulltextIndex.drop_index(
        engine=engine,
        table_name='my_table',
        name='ftidx_content'
    )
    
  2. Instance Methods (Useful for reusable index configurations):

    # Create index object
    index = FulltextIndex('ftidx_content', ['title', 'content'], algorithm=FulltextAlgorithmType.BM25)
    
    # Create index using instance method
    success = index.create(engine, 'my_table')
    
    # Drop index using instance method
    success = index.drop(engine, 'my_table')
    
  3. SQLAlchemy ORM Integration:

    # In table definition
    class Document(Base):
        __tablename__ = 'documents'
        id = Column(Integer, primary_key=True)
        title = Column(String)
        content = Column(Text)
    
        # Define fulltext index in table
        __table_args__ = (FulltextIndex('ftidx_doc', ['title', 'content']),)
    
    # Or create index separately
    FulltextIndex.create_index(engine, 'documents', 'ftidx_doc', ['title', 'content'])
    
  4. Using client.fulltext_index manager (recommended):

    # Auto-commit mode
    client.fulltext_index.create(
        'my_table', 'ftidx_content', ['title', 'content'],
        algorithm=FulltextAlgorithmType.BM25
    )
    
    # Transaction context mode
    with client.session() as session:
        session.fulltext_index.create(
            'my_table', 'ftidx_content', ['title', 'content']
        )
        session.commit()
    
__init__(name: str, columns: str | List[str], algorithm: str = 'TF-IDF', parser: str | None = None)[source]

Initialize FulltextIndex.

Parameters:
  • name (str) – Index name (e.g., ‘ftidx_content’, ‘idx_search’)

  • columns (str or list) –

    Column(s) to index

    • Single column: “content” or [“content”]

    • Multiple columns: [“title”, “content”]

  • algorithm (str) –

    Fulltext algorithm type (stored but not part of DDL)

    • FulltextAlgorithmType.TF_IDF (default): Traditional TF-IDF

    • FulltextAlgorithmType.BM25: Modern BM25 ranking

    • Note: Set via SET ft_relevancy_algorithm at runtime

  • parser (str, optional) –

    Parser type for specialized content

    • None (default): Standard text parser

    • FulltextParserType.JSON: Parse JSON documents

    • FulltextParserType.NGRAM: N-gram for Chinese/Asian languages

Examples:

# Basic fulltext index (no parser)
index = FulltextIndex("ftidx_content", "content")

# Multiple columns with BM25
index = FulltextIndex("ftidx_search", ["title", "content"],
                     algorithm=FulltextAlgorithmType.BM25)

# JSON parser for JSON content
index = FulltextIndex("ftidx_json", "json_data",
                     parser=FulltextParserType.JSON)

# NGRAM parser for Chinese content
index = FulltextIndex("ftidx_chinese", ["title", "body"],
                     parser=FulltextParserType.NGRAM)

# Combined: Multiple columns with JSON parser
index = FulltextIndex("ftidx_multi_json", ["json1", "json2"],
                     parser=FulltextParserType.JSON)
get_columns()[source]

Get column names as a list

classmethod create_index(bind, table_name: str, name: str, columns: str | List[str], algorithm: str = 'TF-IDF', parser: str | None = None) bool[source]

Create a fulltext index using class method.

This method creates a fulltext index on specified columns with optional parser support for specialized content types (JSON, Chinese text, etc.).

Parameters:
  • bind – SQLAlchemy connection (from session.connection() or engine.begin())

  • table_name (str) – Target table name (e.g., ‘articles’, ‘documents’)

  • name (str) – Index name (e.g., ‘ftidx_content’, ‘idx_search’)

  • columns (str or list) –

    Column(s) to index

    • Single: “content” or [“content”]

    • Multiple: [“title”, “content”, “summary”]

  • algorithm (str) –

    Algorithm type (stored for reference, not in DDL)

    • FulltextAlgorithmType.TF_IDF (default)

    • FulltextAlgorithmType.BM25

    • Set via: SET ft_relevancy_algorithm = “BM25”

  • parser (str, optional) –

    Parser type for specialized content

    • None (default): Standard parser

    • FulltextParserType.JSON: For JSON documents

    • FulltextParserType.NGRAM: For Chinese/Asian languages

Returns:

True if succeeded, False otherwise

Return type:

bool

Examples:

# With session connection (recommended)
with client.session() as session:
    conn = session.connection()
    FulltextIndex.create_index(
        conn, 'articles', 'ftidx_content', 'content'
    )
    session.commit()

# With async session connection
async with async_client.session() as session:
    conn = await session.connection()
    await conn.run_sync(
        lambda sync_conn: FulltextIndex.create_index(
            sync_conn, 'articles', 'ftidx_content', 'content'
        )
    )
    await session.commit()
classmethod drop_index(bind, table_name: str, name: str) bool[source]

Drop a fulltext index using ORM-style method.

Parameters:
  • bind – SQLAlchemy connection (from session.connection() or engine.begin())

  • table_name – Target table name

  • name – Index name

Returns:

True if successful, False otherwise

Return type:

bool

create(engine, table_name: str) bool[source]

Create this fulltext index using ORM-style method.

Parameters:
  • engine – SQLAlchemy engine

  • table_name – Target table name

Returns:

True if successful, False otherwise

Return type:

bool

drop(engine, table_name: str) bool[source]

Drop this fulltext index using ORM-style method.

Parameters:
  • engine – SQLAlchemy engine

  • table_name – Target table name

Returns:

True if successful, False otherwise

Return type:

bool

FulltextSearchBuilder

class matrixone.sqlalchemy_ext.fulltext_index.FulltextSearchBuilder(table_name: str, columns: str | List[str])[source]

Bases: object

Builder class for fulltext search queries.

Provides a fluent interface for building MATCH…AGAINST queries.

__init__(table_name: str, columns: str | List[str])[source]

Initialize FulltextSearchBuilder.

Parameters:
  • table_name – Table to search in

  • columns – Column(s) to search in

property with_score

Get the with_score setting for backward compatibility

property mode

Get the search mode for backward compatibility

property order_by

Get the order by clause for backward compatibility

search(term: str) FulltextSearchBuilder[source]

Set the search term.

Parameters:

term – Search term

Returns:

Self for chaining

Return type:

FulltextSearchBuilder

set_mode(mode: str) FulltextSearchBuilder[source]

Set the search mode.

Parameters:

mode – Search mode (natural language, boolean, query expansion)

Returns:

Self for chaining

Return type:

FulltextSearchBuilder

set_with_score(include_score: bool = True) FulltextSearchBuilder[source]

Include relevance score in results.

Parameters:

include_score – Whether to include score

Returns:

Self for chaining

Return type:

FulltextSearchBuilder

where(condition: str) FulltextSearchBuilder[source]

Add WHERE condition.

Parameters:

condition – WHERE condition

Returns:

Self for chaining

Return type:

FulltextSearchBuilder

set_order_by(column: str, direction: str = 'DESC') FulltextSearchBuilder[source]

Set ORDER BY clause.

Parameters:
  • column – Column to order by

  • direction – Order direction (ASC/DESC)

Returns:

Self for chaining

Return type:

FulltextSearchBuilder

limit(count: int) FulltextSearchBuilder[source]

Set LIMIT clause.

Parameters:

count – Number of rows to limit

Returns:

Self for chaining

Return type:

FulltextSearchBuilder

offset(count: int) FulltextSearchBuilder[source]

Set OFFSET clause.

Parameters:

count – Number of rows to offset

Returns:

Self for chaining

Return type:

FulltextSearchBuilder

build_sql() str[source]

Build the SQL query using unified SQL builder.

Returns:

SQL query string

Return type:

str

execute(connection) Any[source]

Execute the search query.

Parameters:

connection – Database connection

Returns:

Query result

FulltextAlgorithmType

class matrixone.sqlalchemy_ext.fulltext_index.FulltextAlgorithmType[source]

Bases: object

Enum-like class for fulltext algorithm types.

MatrixOne supports two main fulltext relevancy algorithms:

TF_IDF

Term Frequency-Inverse Document Frequency

  • Traditional information retrieval algorithm

  • Good for specific use cases with proven reliability

  • Formula: TF(term) × IDF(term)

  • Use case: Academic search, technical documentation

Type:

str

BM25

Best Matching 25 (Okapi BM25)

  • Modern probabilistic ranking algorithm

  • Generally superior to TF-IDF for diverse content

  • Handles document length normalization better

  • Use case: General-purpose search, modern applications

  • Recommended as default for new applications

Type:

str

Note

The algorithm is set at runtime via SQL command, not in the index DDL.

Examples:

# Set algorithm to BM25
client.execute('SET ft_relevancy_algorithm = "BM25"')

# Create index with BM25 reference
index = FulltextIndex("ftidx_content", ["title", "content"],
                     algorithm=FulltextAlgorithmType.BM25)

# Perform searches with BM25 scoring
result = client.query(Article).filter(
    boolean_match(Article.content).must("search term")
).execute()
TF_IDF = 'TF-IDF'
BM25 = 'BM25'

FulltextModeType

class matrixone.sqlalchemy_ext.fulltext_index.FulltextModeType[source]

Bases: object

Enum-like class for fulltext search modes.

NATURAL_LANGUAGE = 'natural language mode'
BOOLEAN = 'boolean mode'
QUERY_EXPANSION = 'query expansion mode'

FulltextParserType

class matrixone.sqlalchemy_ext.fulltext_index.FulltextParserType[source]

Bases: object

Enum-like class for fulltext parser types.

MatrixOne supports specialized parsers for different content types.

JSON

Parser for JSON documents

  • Indexes JSON values (not keys)

  • Suitable for text/varchar/json columns containing JSON data

  • Use case: Product details, user profiles, metadata

  • Example SQL: CREATE FULLTEXT INDEX idx ON table (col) WITH PARSER json

Type:

str

NGRAM

Parser for Chinese and Asian languages

  • N-gram based tokenization for languages without word delimiters

  • Better word segmentation for Chinese, Japanese, Korean, etc.

  • Use case: Chinese articles, mixed language content

  • Example SQL: CREATE FULLTEXT INDEX idx ON table (col) WITH PARSER ngram

Type:

str

Examples:

# Using JSON parser in ORM
class Product(Base):
    __tablename__ = "products"
    details = Column(Text)
    __table_args__ = (
        FulltextIndex("ftidx_json", "details",
                     parser=FulltextParserType.JSON),
    )

# Using NGRAM parser for Chinese content
class ChineseArticle(Base):
    __tablename__ = "chinese_articles"
    title = Column(String(200))
    body = Column(Text)
    __table_args__ = (
        FulltextIndex("ftidx_chinese", ["title", "body"],
                     parser=FulltextParserType.NGRAM),
    )

# Using parser in create_index method
FulltextIndex.create_index(
    engine, 'products', 'ftidx_json', 'details',
    parser=FulltextParserType.JSON
)
JSON = 'json'
NGRAM = 'ngram'

Usage Examples

Basic Fulltext Index Creation

from matrixone import Client
from matrixone.sqlalchemy_ext import FulltextIndex

client = Client()
client.connect(host='localhost', port=6001, user='root', password='111', database='test')

# Create a fulltext index
client.fulltext_index.create(
    table_name_or_model='articles',
    name='ftidx_content',
    columns=['title', 'content'],
    algorithm='TF-IDF'
)

Advanced Search with Builder

from matrixone.sqlalchemy_ext import FulltextSearchBuilder

# Create a search builder
builder = FulltextSearchBuilder('articles', ['title', 'content'])

# Build a complex search query
sql = (builder
       .search('database management')
       .set_mode('natural language')
       .set_with_score(True)
       .where('id > 0')
       .set_order_by('score', 'DESC')
       .limit(10)
       .build_sql())

result = client.execute(sql)

Async Operations

from matrixone import AsyncClient

async_client = AsyncClient()
await async_client.connect(host='localhost', port=6001, user='root', password='111', database='test')

# Create fulltext index asynchronously
await async_client.fulltext_index.create(
    table_name_or_model='articles',
    name='ftidx_async',
    columns=['title', 'content']
)

# Search asynchronously
result = await async_client.execute(sql)

Fulltext Index with JSON Parser

from matrixone import Client, FulltextParserType
from matrixone.orm import declarative_base
from matrixone.sqlalchemy_ext import FulltextIndex, boolean_match
from sqlalchemy import Column, BigInteger, Text

client = Client()
client.connect(host='localhost', port=6001, user='root', password='111', database='test')
client.fulltext_index.enable_fulltext()

# Define ORM model with JSON parser
Base = declarative_base()

class Product(Base):
    __tablename__ = "products"
    id = Column(BigInteger, primary_key=True)
    details = Column(Text)  # Stores JSON as text

    # Define fulltext index with JSON parser
    __table_args__ = (
        FulltextIndex("ftidx_details", "details", parser=FulltextParserType.JSON),
    )

# Create table (index is created automatically)
client.create_table(Product)

# Insert JSON data
products = [
    {"id": 1, "details": '{"name": "Laptop", "brand": "Dell", "price": 1200}'},
    {"id": 2, "details": '{"name": "Phone", "brand": "Apple", "price": 800}'},
]
client.batch_insert(Product, products)

# Search within JSON content
result = client.query(Product).filter(
    boolean_match(Product.details).must("Dell")
).execute()

for row in result.fetchall():
    print(row.details)

Fulltext Index with NGRAM Parser

from matrixone import Client, FulltextParserType
from matrixone.orm import declarative_base
from matrixone.sqlalchemy_ext import FulltextIndex, natural_match, boolean_match
from sqlalchemy import Column, Integer, String, Text

client = Client()
client.connect(host='localhost', port=6001, user='root', password='111', database='test')
client.fulltext_index.enable_fulltext()

# Define ORM model with NGRAM parser for Chinese content
Base = declarative_base()

class ChineseArticle(Base):
    __tablename__ = "chinese_articles"
    id = Column(Integer, primary_key=True, autoincrement=True)
    title = Column(String(200))
    body = Column(Text)

    # Define fulltext index with NGRAM parser for Chinese tokenization
    __table_args__ = (
        FulltextIndex("ftidx_chinese", ["title", "body"],
                     parser=FulltextParserType.NGRAM),
    )

# Create table (index is created automatically)
client.create_table(ChineseArticle)

# Insert Chinese content
articles = [
    {"id": 1, "title": "神雕侠侣 第一回", "body": "越女采莲秋水畔,窄袖轻罗,暗露双金钏"},
    {"id": 2, "title": "神雕侠侣 第二回", "body": "正自发痴,忽听左首屋中传出一人喝道"},
]
client.batch_insert(ChineseArticle, articles)

# Search Chinese content with natural language mode
result = client.query(ChineseArticle).filter(
    natural_match(ChineseArticle.title, ChineseArticle.body, query="神雕侠侣")
).execute()

rows = result.fetchall()
print(f"Found {len(rows)} articles")

# Boolean mode search
result = client.query(ChineseArticle).filter(
    boolean_match(ChineseArticle.title, ChineseArticle.body).must("杨过")
).execute()

BM25 Algorithm Usage

from matrixone import Client, FulltextAlgorithmType
from matrixone.orm import declarative_base
from matrixone.sqlalchemy_ext import FulltextIndex, boolean_match
from sqlalchemy import Column, Integer, String, Text

client = Client()
client.connect(host='localhost', port=6001, user='root', password='111', database='test')
client.fulltext_index.enable_fulltext()

# Set algorithm to BM25 (affects all subsequent fulltext searches)
client.execute('SET ft_relevancy_algorithm = "BM25"')

# Define ORM model
Base = declarative_base()

class Article(Base):
    __tablename__ = "articles"
    id = Column(Integer, primary_key=True)
    title = Column(String(200))
    content = Column(Text)

    # Create index (algorithm is a runtime setting, not in DDL)
    __table_args__ = (
        FulltextIndex("ftidx_content", ["title", "content"],
                     algorithm=FulltextAlgorithmType.BM25),
    )

client.create_table(Article)

# Insert data
articles = [
    {"id": 1, "title": "Python Guide", "content": "Learn Python programming"},
    {"id": 2, "title": "Java Tutorial", "content": "Java programming basics"},
]
client.batch_insert(Article, articles)

# Search using BM25 scoring
result = client.query(Article).filter(
    boolean_match(Article.title, Article.content).must("programming")
).execute()

# Reset to TF-IDF if needed
client.execute('SET ft_relevancy_algorithm = "TF-IDF"')