Fulltext Index Extensions

The MatrixOne Python SDK provides advanced fulltext indexing and search capabilities through SQLAlchemy extensions.

FulltextIndex

class matrixone.sqlalchemy_ext.fulltext_index.FulltextIndex(name: str, columns: str | List[str], algorithm: str = 'TF-IDF', parser: str | None = None)[source]

Bases: Index

SQLAlchemy Index for fulltext columns with MatrixOne-specific syntax.

Specialized class for fulltext indexes with type safety and clear API.

Usage Examples

Class Methods (Recommended for one-time operations):

# Create index using class method
success = FulltextIndex.create_index(
    engine=engine,
    table_name='my_table',
    name='ftidx_content',
    columns=['title', 'content'],
    algorithm=FulltextAlgorithmType.BM25
)

# Drop index using class method
success = FulltextIndex.drop_index(
    engine=engine,
    table_name='my_table',
    name='ftidx_content'
)

Instance Methods (Useful for reusable index configurations):

# Create index object
index = FulltextIndex('ftidx_content', ['title', 'content'], algorithm=FulltextAlgorithmType.BM25)

# Create index using instance method
success = index.create(engine, 'my_table')

# Drop index using instance method
success = index.drop(engine, 'my_table')

SQLAlchemy ORM Integration:

# In table definition
class Document(Base):
    __tablename__ = 'documents'
    id = Column(Integer, primary_key=True)
    title = Column(String)
    content = Column(Text)

    # Define fulltext index in table
    __table_args__ = (FulltextIndex('ftidx_doc', ['title', 'content']),)

# Or create index separately
FulltextIndex.create_index(engine, 'documents', 'ftidx_doc', ['title', 'content'])

Using client.fulltext_index manager (recommended):

# Auto-commit mode
client.fulltext_index.create(
    'my_table', 'ftidx_content', ['title', 'content'],
    algorithm=FulltextAlgorithmType.BM25
)

# Transaction context mode
with client.session() as session:
    session.fulltext_index.create(
        'my_table', 'ftidx_content', ['title', 'content']
    )
    session.commit()

__init__(name: str, columns: str | List[str], algorithm: str = 'TF-IDF', parser: str | None = None)[source]

Initialize FulltextIndex.

Parameters:

name (str) – Index name (e.g., ‘ftidx_content’, ‘idx_search’)
columns (str or list) –
Column(s) to index
- Single column: “content” or [“content”]
- Multiple columns: [“title”, “content”]
algorithm (str) –
Fulltext algorithm type (stored but not part of DDL)
- FulltextAlgorithmType.TF_IDF (default): Traditional TF-IDF
- FulltextAlgorithmType.BM25: Modern BM25 ranking
- Note: Set via SET ft_relevancy_algorithm at runtime
parser (str, optional) –
Parser type for specialized content
- None (default): Standard text parser
- FulltextParserType.JSON: Parse JSON documents
- FulltextParserType.NGRAM: N-gram for Chinese/Asian languages

Examples:

# Basic fulltext index (no parser)
index = FulltextIndex("ftidx_content", "content")

# Multiple columns with BM25
index = FulltextIndex("ftidx_search", ["title", "content"],
                     algorithm=FulltextAlgorithmType.BM25)

# JSON parser for JSON content
index = FulltextIndex("ftidx_json", "json_data",
                     parser=FulltextParserType.JSON)

# NGRAM parser for Chinese content
index = FulltextIndex("ftidx_chinese", ["title", "body"],
                     parser=FulltextParserType.NGRAM)

# Combined: Multiple columns with JSON parser
index = FulltextIndex("ftidx_multi_json", ["json1", "json2"],
                     parser=FulltextParserType.JSON)

get_columns()[source]: Get column names as a list

classmethod create_index(bind, table_name: str, name: str, columns: str | List[str], algorithm: str = 'TF-IDF', parser: str | None = None) → bool[source]

Create a fulltext index using class method.

This method creates a fulltext index on specified columns with optional parser support for specialized content types (JSON, Chinese text, etc.).

Parameters:

bind – SQLAlchemy connection (from session.connection() or engine.begin())
table_name (str) – Target table name (e.g., ‘articles’, ‘documents’)
name (str) – Index name (e.g., ‘ftidx_content’, ‘idx_search’)
columns (str or list) –
Column(s) to index
- Single: “content” or [“content”]
- Multiple: [“title”, “content”, “summary”]
algorithm (str) –
Algorithm type (stored for reference, not in DDL)
- FulltextAlgorithmType.TF_IDF (default)
- FulltextAlgorithmType.BM25
- Set via: SET ft_relevancy_algorithm = “BM25”
parser (str, optional) –
Parser type for specialized content
- None (default): Standard parser
- FulltextParserType.JSON: For JSON documents
- FulltextParserType.NGRAM: For Chinese/Asian languages

Returns:

True if succeeded, False otherwise

Return type:

bool

Examples:

# With session connection (recommended)
with client.session() as session:
    conn = session.connection()
    FulltextIndex.create_index(
        conn, 'articles', 'ftidx_content', 'content'
    )
    session.commit()

# With async session connection
async with async_client.session() as session:
    conn = await session.connection()
    await conn.run_sync(
        lambda sync_conn: FulltextIndex.create_index(
            sync_conn, 'articles', 'ftidx_content', 'content'
        )
    )
    await session.commit()

classmethod drop_index(bind, table_name: str, name: str) → bool[source]

Drop a fulltext index using ORM-style method.

Parameters:

bind – SQLAlchemy connection (from session.connection() or engine.begin())
table_name – Target table name
name – Index name

Returns:

True if successful, False otherwise

Return type:

bool

create(engine, table_name: str) → bool[source]

Create this fulltext index using ORM-style method.

Parameters:

engine – SQLAlchemy engine
table_name – Target table name

Returns:

True if successful, False otherwise

Return type:

bool

drop(engine, table_name: str) → bool[source]

Drop this fulltext index using ORM-style method.

Parameters:

engine – SQLAlchemy engine
table_name – Target table name

Returns:

True if successful, False otherwise

Return type:

bool

FulltextSearchBuilder

class matrixone.sqlalchemy_ext.fulltext_index.FulltextSearchBuilder(table_name: str, columns: str | List[str])[source]

Bases: object

Builder class for fulltext search queries.

Provides a fluent interface for building MATCH…AGAINST queries.

__init__(table_name: str, columns: str | List[str])[source]

Initialize FulltextSearchBuilder.

Parameters:

table_name – Table to search in
columns – Column(s) to search in

property with_score: Get the with_score setting for backward compatibility

property mode: Get the search mode for backward compatibility

property order_by: Get the order by clause for backward compatibility

search(term: str) → FulltextSearchBuilder[source]

Set the search term.

Parameters:: term – Search term
Returns:: Self for chaining
Return type:: FulltextSearchBuilder

set_mode(mode: str) → FulltextSearchBuilder[source]

Set the search mode.

Parameters:: mode – Search mode (natural language, boolean, query expansion)
Returns:: Self for chaining
Return type:: FulltextSearchBuilder

set_with_score(include_score: bool = True) → FulltextSearchBuilder[source]

Include relevance score in results.

Parameters:: include_score – Whether to include score
Returns:: Self for chaining
Return type:: FulltextSearchBuilder

where(condition: str) → FulltextSearchBuilder[source]

Add WHERE condition.

Parameters:: condition – WHERE condition
Returns:: Self for chaining
Return type:: FulltextSearchBuilder

set_order_by(column: str, direction: str = 'DESC') → FulltextSearchBuilder[source]

Set ORDER BY clause.

Parameters:

column – Column to order by
direction – Order direction (ASC/DESC)

Returns:

Self for chaining

Return type:

FulltextSearchBuilder

limit(count: int) → FulltextSearchBuilder[source]

Set LIMIT clause.

Parameters:: count – Number of rows to limit
Returns:: Self for chaining
Return type:: FulltextSearchBuilder

offset(count: int) → FulltextSearchBuilder[source]

Set OFFSET clause.

Parameters:: count – Number of rows to offset
Returns:: Self for chaining
Return type:: FulltextSearchBuilder

build_sql() → str[source]

Build the SQL query using unified SQL builder.

Returns:: SQL query string
Return type:: str

execute(connection) → Any[source]

Execute the search query.

Parameters:: connection – Database connection
Returns:: Query result

FulltextAlgorithmType

class matrixone.sqlalchemy_ext.fulltext_index.FulltextAlgorithmType[source]

Bases: object

Enum-like class for fulltext algorithm types.

MatrixOne supports two main fulltext relevancy algorithms:

TF_IDF

Term Frequency-Inverse Document Frequency

Traditional information retrieval algorithm
Good for specific use cases with proven reliability
Formula: TF(term) × IDF(term)
Use case: Academic search, technical documentation

Type:: str

BM25

Best Matching 25 (Okapi BM25)

Modern probabilistic ranking algorithm
Generally superior to TF-IDF for diverse content
Handles document length normalization better
Use case: General-purpose search, modern applications
Recommended as default for new applications

Type:: str

Note

The algorithm is set at runtime via SQL command, not in the index DDL.

Examples:

# Set algorithm to BM25
client.execute('SET ft_relevancy_algorithm = "BM25"')

# Create index with BM25 reference
index = FulltextIndex("ftidx_content", ["title", "content"],
                     algorithm=FulltextAlgorithmType.BM25)

# Perform searches with BM25 scoring
result = client.query(Article).filter(
    boolean_match(Article.content).must("search term")
).execute()

TF_IDF = 'TF-IDF'

BM25 = 'BM25'

FulltextModeType

class matrixone.sqlalchemy_ext.fulltext_index.FulltextModeType[source]

Bases: object

Enum-like class for fulltext search modes.

NATURAL_LANGUAGE = 'natural language mode'

BOOLEAN = 'boolean mode'

QUERY_EXPANSION = 'query expansion mode'

FulltextParserType

class matrixone.sqlalchemy_ext.fulltext_index.FulltextParserType[source]

Bases: object

Enum-like class for fulltext parser types.

MatrixOne supports specialized parsers for different content types.

JSON

Parser for JSON documents

Indexes JSON values (not keys)
Suitable for text/varchar/json columns containing JSON data
Use case: Product details, user profiles, metadata
Example SQL: CREATE FULLTEXT INDEX idx ON table (col) WITH PARSER json

Type:: str

NGRAM

Parser for Chinese and Asian languages

N-gram based tokenization for languages without word delimiters
Better word segmentation for Chinese, Japanese, Korean, etc.
Use case: Chinese articles, mixed language content
Example SQL: CREATE FULLTEXT INDEX idx ON table (col) WITH PARSER ngram

Type:: str

Examples:

# Using JSON parser in ORM
class Product(Base):
    __tablename__ = "products"
    details = Column(Text)
    __table_args__ = (
        FulltextIndex("ftidx_json", "details",
                     parser=FulltextParserType.JSON),
    )

# Using NGRAM parser for Chinese content
class ChineseArticle(Base):
    __tablename__ = "chinese_articles"
    title = Column(String(200))
    body = Column(Text)
    __table_args__ = (
        FulltextIndex("ftidx_chinese", ["title", "body"],
                     parser=FulltextParserType.NGRAM),
    )

# Using parser in create_index method
FulltextIndex.create_index(
    engine, 'products', 'ftidx_json', 'details',
    parser=FulltextParserType.JSON
)

JSON = 'json'

NGRAM = 'ngram'

Usage Examples

Basic Fulltext Index Creation

from matrixone import Client
from matrixone.sqlalchemy_ext import FulltextIndex

client = Client()
client.connect(host='localhost', port=6001, user='root', password='111', database='test')

# Create a fulltext index
client.fulltext_index.create(
    table_name_or_model='articles',
    name='ftidx_content',
    columns=['title', 'content'],
    algorithm='TF-IDF'
)

Advanced Search with Builder

from matrixone.sqlalchemy_ext import FulltextSearchBuilder

# Create a search builder
builder = FulltextSearchBuilder('articles', ['title', 'content'])

# Build a complex search query
sql = (builder
       .search('database management')
       .set_mode('natural language')
       .set_with_score(True)
       .where('id > 0')
       .set_order_by('score', 'DESC')
       .limit(10)
       .build_sql())

result = client.execute(sql)

Boolean Mode Search

# Boolean mode search with operators
builder = FulltextSearchBuilder('articles', ['title', 'content'])
sql = (builder
       .search('+database +management -mysql')
       .set_mode('boolean')
       .build_sql())

result = client.execute(sql)

Async Operations

from matrixone import AsyncClient

async_client = AsyncClient()
await async_client.connect(host='localhost', port=6001, user='root', password='111', database='test')

# Create fulltext index asynchronously
await async_client.fulltext_index.create(
    table_name_or_model='articles',
    name='ftidx_async',
    columns=['title', 'content']
)

# Search asynchronously
result = await async_client.execute(sql)

Fulltext Index with JSON Parser

from matrixone import Client, FulltextParserType
from matrixone.orm import declarative_base
from matrixone.sqlalchemy_ext import FulltextIndex, boolean_match
from sqlalchemy import Column, BigInteger, Text

client = Client()
client.connect(host='localhost', port=6001, user='root', password='111', database='test')
client.fulltext_index.enable_fulltext()

# Define ORM model with JSON parser
Base = declarative_base()

class Product(Base):
    __tablename__ = "products"
    id = Column(BigInteger, primary_key=True)
    details = Column(Text)  # Stores JSON as text

    # Define fulltext index with JSON parser
    __table_args__ = (
        FulltextIndex("ftidx_details", "details", parser=FulltextParserType.JSON),
    )

# Create table (index is created automatically)
client.create_table(Product)

# Insert JSON data
products = [
    {"id": 1, "details": '{"name": "Laptop", "brand": "Dell", "price": 1200}'},
    {"id": 2, "details": '{"name": "Phone", "brand": "Apple", "price": 800}'},
]
client.batch_insert(Product, products)

# Search within JSON content
result = client.query(Product).filter(
    boolean_match(Product.details).must("Dell")
).execute()

for row in result.fetchall():
    print(row.details)

Fulltext Index with NGRAM Parser

from matrixone import Client, FulltextParserType
from matrixone.orm import declarative_base
from matrixone.sqlalchemy_ext import FulltextIndex, natural_match, boolean_match
from sqlalchemy import Column, Integer, String, Text

client = Client()
client.connect(host='localhost', port=6001, user='root', password='111', database='test')
client.fulltext_index.enable_fulltext()

# Define ORM model with NGRAM parser for Chinese content
Base = declarative_base()

class ChineseArticle(Base):
    __tablename__ = "chinese_articles"
    id = Column(Integer, primary_key=True, autoincrement=True)
    title = Column(String(200))
    body = Column(Text)

    # Define fulltext index with NGRAM parser for Chinese tokenization
    __table_args__ = (
        FulltextIndex("ftidx_chinese", ["title", "body"],
                     parser=FulltextParserType.NGRAM),
    )

# Create table (index is created automatically)
client.create_table(ChineseArticle)

# Insert Chinese content
articles = [
    {"id": 1, "title": "神雕侠侣 第一回", "body": "越女采莲秋水畔，窄袖轻罗，暗露双金钏"},
    {"id": 2, "title": "神雕侠侣 第二回", "body": "正自发痴，忽听左首屋中传出一人喝道"},
]
client.batch_insert(ChineseArticle, articles)

# Search Chinese content with natural language mode
result = client.query(ChineseArticle).filter(
    natural_match(ChineseArticle.title, ChineseArticle.body, query="神雕侠侣")
).execute()

rows = result.fetchall()
print(f"Found {len(rows)} articles")

# Boolean mode search
result = client.query(ChineseArticle).filter(
    boolean_match(ChineseArticle.title, ChineseArticle.body).must("杨过")
).execute()

BM25 Algorithm Usage

from matrixone import Client, FulltextAlgorithmType
from matrixone.orm import declarative_base
from matrixone.sqlalchemy_ext import FulltextIndex, boolean_match
from sqlalchemy import Column, Integer, String, Text

client = Client()
client.connect(host='localhost', port=6001, user='root', password='111', database='test')
client.fulltext_index.enable_fulltext()

# Set algorithm to BM25 (affects all subsequent fulltext searches)
client.execute('SET ft_relevancy_algorithm = "BM25"')

# Define ORM model
Base = declarative_base()

class Article(Base):
    __tablename__ = "articles"
    id = Column(Integer, primary_key=True)
    title = Column(String(200))
    content = Column(Text)

    # Create index (algorithm is a runtime setting, not in DDL)
    __table_args__ = (
        FulltextIndex("ftidx_content", ["title", "content"],
                     algorithm=FulltextAlgorithmType.BM25),
    )

client.create_table(Article)

# Insert data
articles = [
    {"id": 1, "title": "Python Guide", "content": "Learn Python programming"},
    {"id": 2, "title": "Java Tutorial", "content": "Java programming basics"},
]
client.batch_insert(Article, articles)

# Search using BM25 scoring
result = client.query(Article).filter(
    boolean_match(Article.title, Article.content).must("programming")
).execute()

# Reset to TF-IDF if needed
client.execute('SET ft_relevancy_algorithm = "TF-IDF"')