# Copyright 2021 - 2022 Matrix Origin
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Advanced Fulltext Search Builder for MatrixOne
This module provides an Elasticsearch-like query builder for MatrixOne fulltext search,
with chainable methods and comprehensive search capabilities.
## Column Matching Requirements
**CRITICAL**: The columns specified in MATCH() must exactly match the columns
defined in the FULLTEXT index. This is a MatrixOne requirement.
Examples:
- If your index is: `FULLTEXT(title, content, tags)`
- Your MATCH() must be: `MATCH(title, content, tags) AGAINST(...)`
- NOT: `MATCH(title) AGAINST(...)` or `MATCH(title, content) AGAINST(...)`
## MatrixOne Limitations
1. **Multiple MATCH() Functions**: MatrixOne does not support multiple
MATCH() functions in the same query.
❌ WRONG: `WHERE MATCH(...) AND MATCH(...)`
✅ CORRECT: Use chained filter() calls or combine terms in single MATCH()
2. **Complex Nested Groups**: Some complex nested syntaxes are not supported.
❌ WRONG: `'+learning -basic (+machine AI) (+deep neural)'`
✅ CORRECT: `'+learning -basic +machine +deep'`
## Supported Boolean Mode Operators
### Group-level operators (applied to entire groups):
- `+(group)`: Group must be present
- `-(group)`: Group must not be present
### Element-level operators:
- `+term`: Term must contain (required)
- `-term`: Term must not contain (excluded)
- `term`: Term optional (should contain)
- `"phrase"`: Exact phrase match
- `term*`: Prefix match
### Weight operators (within groups/elements):
- `term`: Optional term with normal positive weight boost
- `>term`: Higher relevance weight for term (high positive boost)
- `<term`: Lower relevance weight for term (low positive boost)
- `~term`: Reduced/suppressed relevance weight (negative or minimal boost)
### Weight Operator Comparison:
- `encourage("tutorial")` → `tutorial` : Encourages documents with "tutorial"
- `discourage("legacy")` → `~legacy` : Discourages documents with "legacy"
Both are optional (don't filter documents) but affect ranking differently.
### MatrixOne Example:
- `'+red -(<blue >is)'`: Must have 'red', must NOT have group containing 'blue' (low weight) and 'is' (high weight)
## Supported Modes
- **NATURAL LANGUAGE**: Default full-text search
- **BOOLEAN**: Advanced boolean operators
- **QUERY EXPANSION**: Automatic query expansion (limited support)
"""
from __future__ import annotations
from typing import TYPE_CHECKING, Any, List, Optional
from sqlalchemy import Boolean
from sqlalchemy.sql import and_, or_, text
from sqlalchemy.sql.elements import ColumnElement
from sqlalchemy.ext.compiler import compiles
if TYPE_CHECKING:
from ..client import Client
class FulltextSearchMode:
"""Enum-like class for fulltext search modes."""
NATURAL_LANGUAGE = "natural language mode"
BOOLEAN = "boolean mode"
QUERY_EXPANSION = "query expansion mode"
class FulltextSearchAlgorithm:
"""Enum-like class for fulltext search algorithms."""
TF_IDF = "TF-IDF"
BM25 = "BM25"
class FulltextElement:
"""Represents a single fulltext element (term, phrase, prefix, etc.)."""
def __init__(self, content: str, operator: str = "", weight_modifier: str = ""):
self.content = content
self.operator = operator # "+", "-", "", etc.
self.weight_modifier = weight_modifier # ">", "<", "~", etc.
def build(self) -> str:
"""Build the element string."""
if self.weight_modifier:
return f"{self.operator}{self.weight_modifier}{self.content}"
return f"{self.operator}{self.content}"
class FulltextGroup:
"""Represents a group of fulltext elements for building nested boolean queries.
This class is used to create groups of terms that can be combined with
group-level operators (+, -, ~, no prefix) in MatrixOne's boolean mode.
Element-level Methods (within groups):
- medium(): Add terms with medium weight (no operators)
- high(): Add terms with high weight (>term)
- low(): Add terms with low weight (<term)
- phrase(): Add exact phrase matches ("phrase")
- prefix(): Add prefix matches (term*)
Group Types:
- "or": OR semantics (default) - any term in group can match
- "and": AND semantics - all terms in group must match
- "not": NOT semantics - none of the terms in group can match
Usage with Group-level Operators:
# Create groups and apply group-level operators
query.must(group().medium("java", "kotlin")) # +(java kotlin)
query.encourage(group().medium("tutorial", "guide")) # (tutorial guide)
query.discourage(group().medium("old", "outdated")) # ~(old outdated)
query.must_not(group().medium("spam", "junk")) # -(spam junk)
Element-level Weight Operators (inside groups):
# MatrixOne syntax: '+red -(<blue >is)'
group().low("blue").high("is")
# Used as: query.must("red").must_not(group().low("blue").high("is"))
Important Notes:
- Use medium() for normal terms inside groups (no operators)
- Use high()/low() for element-level weight control
- Group-level operators (+, -, ~) are applied by the parent query builder
"""
def __init__(self, group_type: str = "or"):
self.elements: List[FulltextElement] = []
self.groups: List["FulltextGroup"] = []
self.group_type = group_type # "or", "and", "not"
self.is_tilde = False # Whether this group has tilde weight
def must(self, *terms: str) -> "FulltextGroup":
"""Add required terms (only for top-level, groups should use medium() instead)."""
for term in terms:
# Groups don't use +/- operators on elements, only at group level
if self.group_type in ["or", "not"]:
# Inside groups, elements should not have +/- operators
self.elements.append(FulltextElement(term, ""))
else:
# Top-level (main group) can use + operator
self.elements.append(FulltextElement(term, "+"))
return self
def must_not(self, *terms: str) -> "FulltextGroup":
"""Add excluded terms (only for top-level, groups should use medium() instead)."""
for term in terms:
# Groups don't use +/- operators on elements, only at group level
if self.group_type in ["or", "not"]:
# Inside groups, elements should not have +/- operators
self.elements.append(FulltextElement(term, ""))
else:
# Top-level (main group) can use - operator
self.elements.append(FulltextElement(term, "-"))
return self
def encourage(self, *terms: str) -> "FulltextGroup":
"""Add terms that should be encouraged (normal positive weight).
These terms are optional - documents without them can still match,
but documents containing them will get normal positive scoring boost.
Args::
*terms: Terms to add with normal positive weight
Example::
# Documents with 'python' get normal positive boost
group.encourage("python") # Generates: python
"""
for term in terms:
# Optional terms never have operators
self.elements.append(FulltextElement(term, ""))
return self
def medium(self, *terms: str) -> "FulltextGroup":
"""Add terms with medium/normal weight (no operators)."""
for term in terms:
self.elements.append(FulltextElement(term, ""))
return self
def phrase(self, phrase: str) -> "FulltextGroup":
"""Add a phrase search."""
self.elements.append(FulltextElement(f'"{phrase}"', ""))
return self
def prefix(self, prefix: str) -> "FulltextGroup":
"""Add a prefix search."""
self.elements.append(FulltextElement(f"{prefix}*", ""))
return self
def boost(self, term: str, weight: float) -> "FulltextGroup":
"""Add a boosted term."""
self.elements.append(FulltextElement(f"{term}^{weight}", ""))
return self
def high(self, *terms: str) -> "FulltextGroup":
"""Add terms with high weight (>term)."""
for term in terms:
self.elements.append(FulltextElement(term, "", ">"))
return self
def low(self, *terms: str) -> "FulltextGroup":
"""Add terms with low weight (<term)."""
for term in terms:
self.elements.append(FulltextElement(term, "", "<"))
return self
def add_group(self, *groups: "FulltextGroup") -> "FulltextGroup":
"""Add nested groups."""
for group in groups:
self.groups.append(group)
return self
def add_tilde_group(self, group: "FulltextGroup") -> "FulltextGroup":
"""Add a group with tilde weight (~group)."""
group.is_tilde = True
self.groups.append(group)
return self
def build(self) -> str:
"""Build the group string."""
parts = []
# Add elements
for element in self.elements:
parts.append(element.build())
# Add nested groups with appropriate prefix based on group type
for group in self.groups:
group_str = group.build()
if group_str:
if group.is_tilde:
# For tilde groups, use ~(<content>) format
parts.append(f"~({group_str})")
elif group.group_type == "not":
# For NOT groups, use -(<content>) format
parts.append(f"-({group_str})")
elif group.group_type == "and":
# For AND groups, use +(<content>) format
parts.append(f"+({group_str})")
else: # or (default)
# For OR groups, just use (<content>) format
parts.append(f"({group_str})")
return " ".join(parts)
[docs]
class FulltextQueryBuilder:
"""Builder for constructing fulltext boolean queries.
This class provides a chainable API for building complex fulltext search queries
that are compatible with MatrixOne's MATCH() AGAINST() syntax.
Core Methods:
- must(): Required terms/groups (+ operator)
- must_not(): Excluded terms/groups (- operator)
- encourage(): Optional terms/groups with normal weight (no prefix)
- discourage(): Optional terms/groups with reduced weight (~ operator)
Examples::
# Basic usage
query.must("python") # +python
query.encourage("tutorial") # tutorial
query.discourage("legacy") # ~legacy
query.must_not("deprecated") # -deprecated
# Group usage
query.must(group().medium("java", "kotlin")) # +(java kotlin)
query.encourage(group().medium("tutorial", "guide")) # (tutorial guide)
query.must_not(group().medium("spam", "junk")) # -(spam junk)
Note: Group-level operators (+, -, ~) applied to entire groups. Element-level operators (>, <)
applied within groups using high(), low()
"""
[docs]
def __init__(self):
self.main_group = FulltextGroup("and") # Main group with AND semantics
[docs]
def must(self, *items) -> "FulltextQueryBuilder":
"""Add required terms or groups (+ operator at group level).
Documents MUST contain these terms/groups to match. This is equivalent
to the '+' operator in MatrixOne's boolean mode syntax.
Args::
*items: Can be strings (terms) or FulltextGroup objects
Examples::
# Required term - documents must contain 'python'
query.must("python") # Generates: +python
# Required group - documents must contain either 'java' OR 'kotlin'
query.must(group().medium("java", "kotlin")) # Generates: +(java kotlin)
# Multiple required terms
query.must("python", "programming") # Generates: +python +programming
# Unpack list to search multiple terms
words = ["python", "programming"]
query.must(*words) # Correct: unpacks the list
Raises::
TypeError: If a list or tuple is passed directly without unpacking
Returns::
FulltextQueryBuilder: Self for method chaining
"""
for item in items:
# Validate parameter types and provide friendly error messages
if isinstance(item, (list, tuple)):
raise TypeError(
f"must() received a {type(item).__name__} object, but expected individual terms. "
f"To search multiple terms, use the unpacking operator: must(*terms) instead of must(terms). "
f"Example: must(*{list(item)[:3]}) or must({', '.join(repr(str(t)) for t in list(item)[:3])})"
)
if isinstance(item, FulltextGroup):
item.group_type = "and" # Force group to be required
self.main_group.add_group(item)
else:
self.main_group.must(item)
return self
[docs]
def must_not(self, *items) -> "FulltextQueryBuilder":
"""Add excluded terms or groups (- operator at group level).
Documents MUST NOT contain these terms/groups to match. This is equivalent
to the '-' operator in MatrixOne's boolean mode syntax.
Args::
*items: Can be strings (terms) or FulltextGroup objects
Examples::
# Excluded term - documents must not contain 'deprecated'
query.must_not("deprecated") # Generates: -deprecated
# Excluded group - documents must not contain 'spam' OR 'junk'
query.must_not(group().medium("spam", "junk")) # Generates: -(spam junk)
# Multiple excluded terms
query.must_not("spam", "junk") # Generates: -spam -junk
# Unpack list to exclude multiple terms
words = ["spam", "junk"]
query.must_not(*words) # Correct: unpacks the list
Raises::
TypeError: If a list or tuple is passed directly without unpacking
Returns::
FulltextQueryBuilder: Self for method chaining
"""
for item in items:
# Validate parameter types and provide friendly error messages
if isinstance(item, (list, tuple)):
raise TypeError(
f"must_not() received a {type(item).__name__} object, but expected individual terms. "
f"To exclude multiple terms, use the unpacking operator: must_not(*terms) instead of must_not(terms). "
f"Example: must_not(*{list(item)[:3]}) or must_not({', '.join(repr(str(t)) for t in list(item)[:3])})"
)
if isinstance(item, FulltextGroup):
item.group_type = "not" # Force group to be excluded
self.main_group.add_group(item)
else:
self.main_group.must_not(item)
return self
[docs]
def encourage(self, *items) -> "FulltextQueryBuilder":
"""Add terms or groups that should be encouraged (normal positive weight).
Documents can match without these terms, but containing them will
INCREASE the relevance score. This provides normal positive weight boost.
Args::
*items: Can be strings (terms) or FulltextGroup objects
Examples::
# Encourage documents with 'tutorial'
query.encourage("tutorial") # Generates: tutorial
# Encourage documents with 'beginner' OR 'intro'
query.encourage(group().medium("beginner", "intro")) # Generates: (beginner intro)
# Multiple encouraged terms
query.encourage("tutorial", "guide") # Generates: tutorial guide
# Unpack list to encourage multiple terms
words = ["tutorial", "guide"]
query.encourage(*words) # Correct: unpacks the list
Weight Comparison:
- encourage("term"): Normal positive boost (encourages term)
- discourage("term"): Reduced/negative boost (discourages term)
Raises::
TypeError: If a list or tuple is passed directly without unpacking
Returns::
FulltextQueryBuilder: Self for method chaining
"""
for item in items:
# Validate parameter types and provide friendly error messages
if isinstance(item, (list, tuple)):
raise TypeError(
f"encourage() received a {type(item).__name__} object, but expected individual terms. "
f"To encourage multiple terms, use the unpacking operator: encourage(*terms) "
f"instead of encourage(terms). "
f"Example: encourage(*{list(item)[:3]}) or "
f"encourage({', '.join(repr(str(t)) for t in list(item)[:3])})"
)
if isinstance(item, FulltextGroup):
item.group_type = "or" # Force group to be optional
self.main_group.add_group(item)
else:
self.main_group.encourage(item)
return self
[docs]
def discourage(self, *items) -> "FulltextQueryBuilder":
"""Add terms or groups that should be discouraged (~ operator at group level).
Documents can match without these terms, but containing them will
DECREASE the relevance score. This provides reduced or negative weight boost,
effectively discouraging documents that contain these terms.
Args::
*items: Can be strings (terms) or FulltextGroup objects
Examples::
# Discourage documents with 'legacy'
query.discourage("legacy") # Generates: ~legacy
# Discourage documents with 'old' OR 'outdated'
query.discourage(group().medium("old", "outdated")) # Generates: ~(old outdated)
# Multiple discouraged terms
query.discourage("legacy", "deprecated") # Generates: ~legacy ~deprecated
# Unpack list to discourage multiple terms
words = ["legacy", "deprecated"]
query.discourage(*words) # Correct: unpacks the list
Weight Comparison:
- encourage("term"): Normal positive boost (encourages term)
- discourage("term"): Reduced/negative boost (discourages term)
Use Cases:
# Search Python content, but discourage legacy versions
query.must("python").encourage("3.11").discourage("2.7")
# Find tutorials, but avoid outdated content
query.must("tutorial").discourage(group().medium("old", "deprecated"))
Raises::
TypeError: If a list or tuple is passed directly without unpacking
Returns::
FulltextQueryBuilder: Self for method chaining
"""
for item in items:
# Validate parameter types and provide friendly error messages
if isinstance(item, (list, tuple)):
raise TypeError(
f"discourage() received a {type(item).__name__} object, but expected individual terms. "
f"To discourage multiple terms, use the unpacking operator: discourage(*terms) "
f"instead of discourage(terms). "
f"Example: discourage(*{list(item)[:3]}) or "
f"discourage({', '.join(repr(str(t)) for t in list(item)[:3])})"
)
if isinstance(item, FulltextGroup):
# Apply tilde to the entire group
self.main_group.add_tilde_group(item)
else:
# Apply tilde to individual term
self.main_group.elements.append(FulltextElement(item, "", "~"))
return self
[docs]
def phrase(self, phrase: str) -> "FulltextQueryBuilder":
"""Add a phrase search to the main group."""
self.main_group.phrase(phrase)
return self
[docs]
def prefix(self, prefix: str) -> "FulltextQueryBuilder":
"""Add a prefix search to the main group."""
self.main_group.prefix(prefix)
return self
[docs]
def boost(self, term: str, weight: float) -> "FulltextQueryBuilder":
"""Add a boosted term to the main group."""
self.main_group.boost(term, weight)
return self
[docs]
def group(self, *builders: "FulltextQueryBuilder") -> "FulltextQueryBuilder":
"""Add nested query builders as groups (OR semantics)."""
for builder in builders:
# Convert builder to group and add to main group
group = FulltextGroup("or")
# Add all elements from the builder's main group
group.elements.extend(builder.main_group.elements)
group.groups.extend(builder.main_group.groups)
self.main_group.groups.append(group)
return self
[docs]
def build(self) -> str:
"""Build the final query string."""
return self.main_group.build()
[docs]
def as_sql(
self,
table: str,
columns: List[str],
mode: str = FulltextSearchMode.BOOLEAN,
include_score: bool = False,
select_columns: Optional[List[str]] = None,
where_conditions: Optional[List[str]] = None,
order_by: Optional[str] = None,
limit: Optional[int] = None,
offset: Optional[int] = None,
) -> str:
"""Build a complete SQL query with optional AS score support.
This method generates a full SQL query similar to FulltextSearchBuilder but using
the query built by FulltextQueryBuilder.
Args::
table: Table name to search in
columns: List of columns to search in (must match FULLTEXT index)
mode: Search mode (BOOLEAN, NATURAL_LANGUAGE, etc.)
include_score: Whether to include relevance score in results
select_columns: Columns to select (default: all columns "*")
where_conditions: Additional WHERE conditions
order_by: ORDER BY clause (e.g., "score DESC")
limit: LIMIT value
offset: OFFSET value
Returns::
str: Complete SQL query
Examples::
.. code-block:: python
# Basic query with score
query = FulltextQueryBuilder().must("python").encourage("tutorial")
sql = query.as_sql("articles", ["title", "content"], include_score=True)
# SELECT *, MATCH(title, content) AGAINST('+python tutorial' IN boolean mode) AS score
# FROM articles WHERE MATCH(title, content) AGAINST('+python tutorial' IN boolean mode)
# Query with custom columns and ORDER BY score
sql = query.as_sql("articles", ["title", "content"],
select_columns=["id", "title"], include_score=True,
order_by="score DESC", limit=10)
"""
query_string = self.build()
if not query_string:
raise ValueError("Query is required - add at least one search term")
if not table:
raise ValueError("Table name is required")
if not columns:
raise ValueError("Search columns are required")
# Build columns string for MATCH()
columns_str = ", ".join(columns)
# Build SELECT clause
if select_columns:
select_parts = select_columns.copy()
else:
select_parts = ["*"]
if include_score:
score_expr = f"MATCH({columns_str}) AGAINST('{query_string}' IN {mode}) AS score"
select_parts.append(score_expr)
select_clause = f"SELECT {', '.join(select_parts)}"
# Build FROM clause
from_clause = f"FROM {table}"
# Build WHERE clause
where_parts = []
# Add fulltext search condition
fulltext_condition = f"MATCH({columns_str}) AGAINST('{query_string}' IN {mode})"
where_parts.append(fulltext_condition)
# Add additional WHERE conditions
if where_conditions:
where_parts.extend(where_conditions)
where_clause = f"WHERE {' AND '.join(where_parts)}" if where_parts else ""
# Build ORDER BY clause
order_clause = f"ORDER BY {order_by}" if order_by else ""
# Build LIMIT clause
limit_clause = f"LIMIT {limit}" if limit else ""
# Build OFFSET clause
offset_clause = f"OFFSET {offset}" if offset else ""
# Combine all clauses
sql_parts = [
select_clause,
from_clause,
where_clause,
order_clause,
limit_clause,
offset_clause,
]
return " ".join(filter(None, sql_parts))
[docs]
def as_score_sql(self, table: str, columns: List[str], mode: str = FulltextSearchMode.BOOLEAN) -> str:
"""Convenient method to generate SQL with score included.
This is equivalent to calling as_sql() with include_score=True.
Args::
table: Table name to search in
columns: List of columns to search in
mode: Search mode
Returns::
str: Complete SQL query with AS score
Example::
query = FulltextQueryBuilder().must("python").encourage("tutorial")
sql = query.as_score_sql("articles", ["title", "content"])
# Generates SQL with AS score automatically included
"""
return self.as_sql(table, columns, mode, include_score=True)
[docs]
class FulltextFilter(ColumnElement):
"""Advanced fulltext filter for integrating fulltext search with ORM queries.
This class wraps FulltextQueryBuilder to provide seamless integration
with MatrixOne ORM's filter() method, allowing fulltext search to be
combined with other SQL conditions.
Core Methods (Group-level operators):
- must(): Required terms/groups (+ operator)
- must_not(): Excluded terms/groups (- operator)
- encourage(): Optional terms/groups with normal weight (no prefix)
- discourage(): Optional terms/groups with reduced weight (~ operator)
Parameter Types:
- str: Single term (e.g., "python")
- FulltextGroup: Group of terms (e.g., group().medium("java", "kotlin"))
Usage with ORM:
.. code-block:: python
# Basic fulltext filter
results = client.query(Article).filter(
boolean_match("title", "content").must("python").encourage("tutorial")
).all()
# Combined with other conditions
results = client.query(Article).filter(
boolean_match("title", "content").must("python")
).filter(
Article.category == "Programming"
).all()
# Complex fulltext with groups
results = client.query(Article).filter(
boolean_match("title", "content", "tags")
.must("programming")
.must(group().medium("python", "java"))
.discourage(group().medium("legacy", "deprecated"))
).all()
Weight Operator Examples
.. code-block:: python
# Encourage tutorials, discourage legacy content
boolean_match("title", "content")
.must("python")
.encourage("tutorial") # Boost documents with 'tutorial'
.discourage("legacy") # Lower ranking for 'legacy' documents
Supported MatrixOne Boolean Mode Operators:
Group-level: +, -, ~, (no prefix) - applied to entire groups/terms
Element-level: >, < - applied within groups using high(), low()
Other: "phrase", term* - exact phrases and prefix matching
Complex: +red -(<blue >is) - nested groups with mixed operators
Important MatrixOne Requirements:
**Column Matching**: The columns specified must exactly match
the columns defined in the FULLTEXT index. If your index is
`FULLTEXT(title, content, tags)`, you must include all three columns.
**Limitations**:
- Only one MATCH() function per query is supported
- Complex nested groups may have syntax restrictions
- Use fulltext_and/fulltext_or for combining with other conditions
"""
# Disable SQL compilation caching for this class
# We set this to False because FulltextFilter has complex internal state
# (query_builder, columns, mode) that affects SQL generation, and we haven't
# implemented the cache key generation methods (__visit_name__, _cache_key_traversal)
# Setting to False is the safe choice - it disables caching but ensures correctness
inherit_cache = False
[docs]
def __init__(self, columns: List[str], mode: str = FulltextSearchMode.BOOLEAN):
super().__init__()
self.columns = columns
self.mode = mode
self.query_builder = FulltextQueryBuilder()
self._natural_query = None # Store natural language query separately
self.type = Boolean()
[docs]
def __bool__(self):
"""
Override bool to prevent SQLAlchemy from treating this as a boolean value.
This is important for proper WHERE clause generation.
"""
raise NotImplementedError(
"FulltextFilter cannot be used as a boolean value directly. "
"Use it within SQLAlchemy expressions like select().where()"
)
[docs]
def self_group(self, against=None):
"""Override self_group to return self without wrapping."""
return self
[docs]
def columns(self, *columns: str) -> "FulltextFilter":
"""Set the columns to search in."""
self.columns = list(columns)
return self
[docs]
def must(self, *items) -> "FulltextFilter":
"""Add required terms or groups (+ operator at group level)."""
self.query_builder.must(*items)
return self
[docs]
def must_not(self, *items) -> "FulltextFilter":
"""Add excluded terms or groups (- operator at group level)."""
self.query_builder.must_not(*items)
return self
[docs]
def encourage(self, *items) -> "FulltextFilter":
"""Add terms or groups that should be encouraged (normal positive weight)."""
self.query_builder.encourage(*items)
return self
[docs]
def phrase(self, *phrases: str) -> "FulltextFilter":
"""Add exact phrases - equivalent to "phrase"."""
self.query_builder.phrase(*phrases)
return self
[docs]
def prefix(self, *terms: str) -> "FulltextFilter":
"""Add prefix terms - equivalent to term*."""
self.query_builder.prefix(*terms)
return self
[docs]
def boost(self, term: str, weight: float) -> "FulltextFilter":
"""Add a boosted term (term^weight)."""
self.query_builder.boost(term, weight)
return self
[docs]
def discourage(self, *items) -> "FulltextFilter":
"""Add terms or groups that should be discouraged (~ operator at group level)."""
self.query_builder.discourage(*items)
return self
[docs]
def set_natural_query(self, query: str) -> "FulltextFilter":
"""Set natural language query string (used for NATURAL_LANGUAGE mode)."""
self._natural_query = query
return self
[docs]
def group(self, *filters: "FulltextFilter") -> "FulltextFilter":
"""Add nested query groups (OR semantics)."""
builders = [f.query_builder for f in filters]
self.query_builder.group(*builders)
return self
[docs]
def natural_language(self) -> "FulltextFilter":
"""Set to natural language mode."""
self.mode = FulltextSearchMode.NATURAL_LANGUAGE
return self
[docs]
def boolean_mode(self) -> "FulltextFilter":
"""Set to boolean mode."""
self.mode = FulltextSearchMode.BOOLEAN
return self
[docs]
def query_expansion(self) -> "FulltextFilter":
"""Set to query expansion mode."""
self.mode = FulltextSearchMode.QUERY_EXPANSION
return self
[docs]
def compile(self, compile_kwargs=None):
"""Compile to SQL expression for use in filter() method."""
if not self.columns:
raise ValueError("Columns must be specified")
columns_str = ", ".join(self.columns)
# For natural language mode, use the stored natural query if available
if self.mode == FulltextSearchMode.NATURAL_LANGUAGE and self._natural_query:
query_string = self._natural_query
else:
query_string = self.query_builder.build()
if not query_string:
raise ValueError("Query cannot be empty")
if self.mode == FulltextSearchMode.NATURAL_LANGUAGE:
return f"MATCH({columns_str}) AGAINST('{query_string}')"
elif self.mode == FulltextSearchMode.BOOLEAN:
return f"MATCH({columns_str}) AGAINST('{query_string}' IN BOOLEAN MODE)"
elif self.mode == FulltextSearchMode.QUERY_EXPANSION:
return f"MATCH({columns_str}) AGAINST('{query_string}' WITH QUERY EXPANSION)"
else:
return f"MATCH({columns_str}) AGAINST('{query_string}')"
[docs]
def label(self, name: str):
"""Create a labeled version for use in SELECT clauses.
This allows using fulltext expressions as selectable columns with aliases:
Args::
name: The alias name for the column
Returns::
A SQLAlchemy labeled expression
Examples::
.. code-block:: python
# Use as a SELECT column with score
query(Article, Article.id,
boolean_match("title", "content").must("python").label("score"))
# Multiple fulltext scores
query(Article, Article.id,
boolean_match("title", "content").must("python").label("relevance"),
boolean_match("tags").must("programming").label("tag_score"))
Generated SQL:
.. code-block:: sql
SELECT articles.id,
MATCH(title, content) AGAINST('+python' IN BOOLEAN MODE) AS score
FROM articles
"""
# Create a text expression that can be labeled
sql_text = self.compile()
text_expr = text(sql_text)
# Create a custom labeled expression
class FulltextLabel:
def __init__(self, text_expr, name):
self.text_expr = text_expr
self.name = name
def __str__(self):
# For ORM integration, return only the expression without AS
# The ORM will add the AS alias part
return sql_text
# Make it compatible with SQLAlchemy's compilation
def compile(self, compile_kwargs=None):
# For standalone use, include AS
return f"{sql_text} AS {self.name}"
def _compiler_dispatch(self, visitor, **kw):
# For SQLAlchemy integration, return only the expression
# SQLAlchemy will handle the AS alias
return sql_text
return FulltextLabel(text_expr, name)
[docs]
def __str__(self):
"""String representation for debugging."""
return f"FulltextFilter({self.columns}, mode={self.mode})"
[docs]
def __repr__(self):
"""Detailed representation for debugging."""
return f"FulltextFilter(columns={self.columns}, mode='{self.mode}', query='{self.query_builder.build()}')"
[docs]
def as_text(self):
"""Convert to SQLAlchemy text() object for compatibility with and_(), or_(), etc."""
return text(self.compile())
@classmethod
def _create_and(cls, *conditions):
"""Helper to create AND expressions with FulltextFilter support."""
processed_conditions = []
for condition in conditions:
if isinstance(condition, cls):
processed_conditions.append(condition.as_text())
else:
processed_conditions.append(condition)
return and_(*processed_conditions)
@classmethod
def _create_or(cls, *conditions):
"""Helper to create OR expressions with FulltextFilter support."""
processed_conditions = []
for condition in conditions:
if isinstance(condition, cls):
processed_conditions.append(condition.as_text())
else:
processed_conditions.append(condition)
return or_(*processed_conditions)
# Custom compiler for FulltextFilter to prevent "= 1" wrapping
@compiles(FulltextFilter)
def visit_fulltext_filter(element, compiler, **kw):
"""
Custom compiler for FulltextFilter that generates MATCH() AGAINST() SQL.
This ensures that in WHERE clauses, we get:
WHERE MATCH(...) AGAINST(...)
Instead of:
WHERE MATCH(...) AGAINST(...) = 1
"""
# Get the MATCH() AGAINST() SQL
match_sql = element.compile()
# In WHERE context, SQLAlchemy expects a boolean comparison
# For MATCH() AGAINST(), the expression itself returns a boolean/score
# We need to return it without the "= 1" wrapper
#
# The trick is to check if we're in a boolean context
# If we are, return the expression as-is (it's already a valid boolean expression)
return match_sql
# Convenience functions for common use cases
[docs]
def boolean_match(*columns) -> FulltextFilter:
"""Create a boolean mode fulltext filter for specified columns.
This is the main entry point for creating fulltext search queries that integrate
seamlessly with MatrixOne ORM's filter() method.
Args::
*columns: Column names or SQLAlchemy Column objects to search against
Returns::
FulltextFilter: A chainable filter object
Examples::
# Basic search - must contain 'python'
boolean_match("title", "content").must("python")
# Multiple conditions
boolean_match("title", "content")
.must("python")
.encourage("tutorial")
.discourage("legacy")
# Group search - either 'python' or 'java'
boolean_match("title", "content").must(group().medium("python", "java"))
# Using SQLAlchemy Column objects
boolean_match(Article.title, Article.content).must("python")
Note: The columns specified must exactly match the FULLTEXT index columns. For example, if your
index is FULLTEXT(title, content, tags), you must use boolean_match("title", "content", "tags")
"""
# Convert columns to strings
column_names = []
for col in columns:
if hasattr(col, 'name'):
# SQLAlchemy Column object
column_names.append(col.name)
elif hasattr(col, '__tablename__') and hasattr(col, 'name'):
# Model attribute
column_names.append(col.name)
else:
# String column name
column_names.append(str(col))
return FulltextFilter(column_names, FulltextSearchMode.BOOLEAN)
[docs]
def natural_match(*columns, query: str) -> FulltextFilter:
"""
Create a natural language mode fulltext filter for specified columns.
Natural language mode provides user-friendly search with automatic processing:
- Stopword removal (e.g., 'the', 'a', 'an')
- Stemming and variations
- Relevance scoring based on TF-IDF or BM25 algorithm
- Best for end-user search interfaces
Args:
*columns: Column names or SQLAlchemy Column objects to search against
- Must exactly match the columns in your fulltext index
- Can be strings or Column objects
query: Natural language query string
- User-friendly search terms
- Automatically processed for best results
- Multi-word queries are supported
Important - Column Matching:
The columns specified in MATCH() must exactly match the columns defined in
the FULLTEXT index. Mismatches will cause errors.
Examples:
If index is: FULLTEXT(title, content)
- ✅ natural_match("title", "content", query="...") - Correct
- ❌ natural_match("title", query="...") - Error (partial)
- ❌ natural_match("content", query="...") - Error (partial)
If index is: FULLTEXT(content)
- ✅ natural_match("content", query="...") - Correct
- ❌ natural_match("title", "content", query="...") - Error (extra column)
Parser Compatibility:
Works with all parser types:
- Default parser: Standard text tokenization
- JSON parser: Searches JSON values within documents
- NGRAM parser: Chinese and Asian language tokenization
Returns:
FulltextFilter: A fulltext filter object for use in queries
Examples::
# Basic natural language search
result = client.query("articles.id", "articles.title", "articles.content").filter(
natural_match("title", "content", query="machine learning")
).execute()
# Using with ORM models
result = client.query(Article).filter(
natural_match(Article.title, Article.content, query="artificial intelligence")
).execute()
# Single column search
result = client.query(Article).filter(
natural_match(Article.content, query="python programming")
).execute()
# With relevance scoring
result = client.query(
Article.id,
Article.title,
Article.content,
natural_match(Article.content, query="deep learning").label("score")
).execute()
# JSON parser - searching within JSON documents
result = client.query(Product).filter(
natural_match(Product.details, query="Dell laptop")
).execute()
# NGRAM parser - Chinese content search
result = client.query(ChineseArticle).filter(
natural_match(ChineseArticle.title, ChineseArticle.body, query="神雕侠侣")
).execute()
# Combined with SQL filters
result = client.query(Article).filter(
natural_match(Article.content, query="programming tutorial")
).filter(Article.category == "Education").execute()
"""
# Convert columns to strings
column_names = []
for col in columns:
if hasattr(col, 'name'):
# SQLAlchemy Column object
column_names.append(col.name)
elif hasattr(col, '__tablename__') and hasattr(col, 'name'):
# Model attribute
column_names.append(col.name)
else:
# String column name
column_names.append(str(col))
return FulltextFilter(column_names, FulltextSearchMode.NATURAL_LANGUAGE).set_natural_query(query)
[docs]
def group() -> FulltextGroup:
"""Create a new query group builder with OR semantics between elements.
Creates a group where elements have OR relationship. The group-level semantics
(required, excluded, optional, reduced weight) are determined by how it's used:
- must(group()) → +(...) - group is required
- must_not(group()) → -(...) - group is excluded
- encourage(group()) → (...) - group is optional with normal weight
- discourage(group()) → ~(...) - group is optional with reduced weight
Element-level Methods (use inside groups):
- medium(): Add terms with medium weight (no operators)
- high(): Add terms with high weight (>term)
- low(): Add terms with low weight (<term)
- phrase(): Add exact phrase matches ("phrase")
- prefix(): Add prefix matches (term*)
IMPORTANT: Inside groups, do NOT use must()/must_not() as they add +/- operators.
Use medium() for plain terms or high()/low() for element-level weight control.
Examples
# Required group - must contain 'java' OR 'kotlin'
query.must(group().medium("java", "kotlin")) # +(java kotlin)
# Excluded group - must not contain 'spam' OR 'junk'
query.must_not(group().medium("spam", "junk")) # -(spam junk)
# Optional group with normal weight
query.encourage(group().medium("tutorial", "guide")) # (tutorial guide)
# Optional group with reduced weight
query.discourage(group().medium("old", "outdated")) # ~(old outdated)
# Complex MatrixOne style with element-level weights
query.must("red").must_not(group().low("blue").high("is"))
# Generates: '+red -(<blue >is)'
"""
return FulltextGroup("or")
# Note: logical_and, logical_or, logical_not are no longer needed.
# Since FulltextFilter now inherits from ColumnElement, you can use
# SQLAlchemy's and_(), or_(), not_() directly with FulltextFilter objects.
# Remove old FulltextTerm and FulltextQuery classes as they are replaced by FulltextQueryBuilder
[docs]
class FulltextSearchBuilder:
"""
Elasticsearch-like fulltext search builder for MatrixOne.
Provides a chainable interface for building complex fulltext queries
with support for various search modes, filters, and sorting.
Boolean Mode Operators:
- ``+word`` : Required term (must contain)
- ``-word`` : Excluded term (must not contain)
- ``~word`` : Lower weight term (reduces relevance score)
- ``<word`` : Lower weight term (reduces relevance score)
- ``>word`` : Higher weight term (increases relevance score)
- word : Optional term (may contain)
- ``"phrase"`` : Exact phrase match
- ``word*`` : Wildcard prefix match
- (word1 word2) : Grouping (contains any of the words)
Note: MatrixOne supports all boolean mode operators.
Search Modes:
- NATURAL_LANGUAGE: Automatic stopword removal, stemming, relevance scoring
- BOOLEAN: Full control with operators, no automatic processing
- QUERY_EXPANSION: Not supported in MatrixOne
Examples::
# Natural language search
results = client.fulltext_search()
.table("articles")
.columns(["title", "content"])
.with_mode(FulltextSearchMode.NATURAL_LANGUAGE)
.query("machine learning")
.with_score()
.limit(10)
.execute()
# Boolean search with complex terms
results = client.fulltext_search()
.table("articles")
.columns(["title", "content"])
.with_mode(FulltextSearchMode.BOOLEAN)
.add_term("machine", required=True)
.add_term("learning", required=True)
.where("category = 'AI'")
.order_by("score", "DESC")
.limit(20)
.execute()
"""
[docs]
def __init__(self, client: "Client"):
self.client = client
self._table_name: Optional[str] = None
self._columns: List[str] = []
self._search_mode = FulltextSearchMode.NATURAL_LANGUAGE
self._algorithm = FulltextSearchAlgorithm.BM25
self._query_obj = FulltextQueryBuilder()
self._include_score = False
self._where_conditions: List[str] = []
self._order_by: Optional[str] = None
self._limit_value: Optional[int] = None
self._offset_value: Optional[int] = None
self._select_columns: List[str] = ["*"]
[docs]
def table(self, table_name: str) -> "FulltextSearchBuilder":
"""
Set the target table for the search.
Args::
table_name: Name of the table to search
Returns::
FulltextSearchBuilder: Self for chaining
"""
self._table_name = table_name
return self
[docs]
def columns(self, columns: List[str]) -> "FulltextSearchBuilder":
"""
Set the columns to search in.
Args::
columns: List of column names to search
Returns::
FulltextSearchBuilder: Self for chaining
"""
self._columns = columns
return self
[docs]
def with_mode(self, mode: str) -> "FulltextSearchBuilder":
"""
Set the search mode.
Args::
mode: Search mode
- FulltextSearchMode.NATURAL_LANGUAGE: Automatic processing, user-friendly
- FulltextSearchMode.BOOLEAN: Full control with operators
- FulltextSearchMode.QUERY_EXPANSION: Not supported in MatrixOne
Returns::
FulltextSearchBuilder: Self for chaining
Examples::
# Natural language mode (default)
.with_mode(FulltextSearchMode.NATURAL_LANGUAGE)
# Boolean mode for complex queries
.with_mode(FulltextSearchMode.BOOLEAN)
"""
self._search_mode = mode
return self
[docs]
def with_algorithm(self, algorithm: str) -> "FulltextSearchBuilder":
"""
Set the search algorithm.
Args::
algorithm: Search algorithm
- FulltextSearchAlgorithm.TF_IDF: Traditional TF-IDF scoring
- FulltextSearchAlgorithm.BM25: Modern BM25 scoring (recommended)
Returns::
FulltextSearchBuilder: Self for chaining
Examples::
# Use BM25 algorithm (recommended)
.with_algorithm(FulltextSearchAlgorithm.BM25)
# Use TF-IDF algorithm
.with_algorithm(FulltextSearchAlgorithm.TF_IDF)
"""
self._algorithm = algorithm
return self
[docs]
def query(self, query_string: str) -> "FulltextSearchBuilder":
"""
Set a simple query string (resets previous terms).
Args::
query_string: The search query (natural language or boolean syntax)
Returns::
FulltextSearchBuilder: Self for chaining
Examples::
# Natural language query
.query("machine learning algorithms")
# Boolean query
.query("+machine +learning -java")
Note: This method resets any previously added terms, phrases, or wildcards.
"""
self._query_obj = FulltextQueryBuilder()
self._query_obj.encourage(query_string)
return self
[docs]
def add_term(
self,
term: str,
required: bool = False,
excluded: bool = False,
proximity: Optional[int] = None,
) -> "FulltextSearchBuilder":
"""
Add a search term to the query.
Args::
term: The search term
required: Whether the term is required (+) - must contain this term
excluded: Whether the term is excluded (-) - must not contain this term
proximity: Proximity modifier for boolean mode (not supported in MatrixOne)
Returns::
FulltextSearchBuilder: Self for chaining
Examples::
# Required term: +machine
.add_term("machine", required=True)
# Excluded term: -java
.add_term("java", excluded=True)
# Optional term: learning
.add_term("learning")
# Complex query: +machine +learning -java
.add_term("machine", required=True)
.add_term("learning", required=True)
.add_term("java", excluded=True)
"""
if required:
self._query_obj.must(term)
elif excluded:
self._query_obj.must_not(term)
else:
self._query_obj.encourage(term)
return self
[docs]
def add_phrase(self, phrase: str) -> "FulltextSearchBuilder":
"""
Add an exact phrase to the query.
Args::
phrase: The exact phrase to search for (wrapped in double quotes)
Returns::
FulltextSearchBuilder: Self for chaining
Examples::
# Exact phrase: "machine learning"
.add_phrase("machine learning")
# Multiple phrases
.add_phrase("deep learning")
.add_phrase("neural networks")
"""
self._query_obj.phrase(phrase)
return self
[docs]
def add_wildcard(self, pattern: str) -> "FulltextSearchBuilder":
"""
Add a wildcard pattern to the query.
Args::
pattern: Wildcard pattern with * suffix (e.g., "test*", "neural*")
Returns::
FulltextSearchBuilder: Self for chaining
Examples::
# Prefix match: neural*
.add_wildcard("neural*")
# Multiple wildcards
.add_wildcard("machine*")
.add_wildcard("learn*")
"""
self._query_obj.prefix(pattern.rstrip('*'))
return self
[docs]
def with_score(self, include: bool = True) -> "FulltextSearchBuilder":
"""
Include relevance score in results.
Args::
include: Whether to include the score
Returns::
FulltextSearchBuilder: Self for chaining
"""
self._include_score = include
return self
[docs]
def select(self, columns: List[str]) -> "FulltextSearchBuilder":
"""
Set the columns to select in the result.
Args::
columns: List of column names to select
Returns::
FulltextSearchBuilder: Self for chaining
"""
self._select_columns = columns
return self
[docs]
def where(self, condition: str) -> "FulltextSearchBuilder":
"""
Add a WHERE condition.
Args::
condition: WHERE condition
Returns::
FulltextSearchBuilder: Self for chaining
"""
self._where_conditions.append(condition)
return self
[docs]
def order_by(self, column: str, direction: str = "DESC") -> "FulltextSearchBuilder":
"""
Set ORDER BY clause.
Args::
column: Column to order by
direction: Order direction (ASC/DESC)
Returns::
FulltextSearchBuilder: Self for chaining
"""
self._order_by = f"{column} {direction}"
return self
[docs]
def limit(self, count: int) -> "FulltextSearchBuilder":
"""
Set LIMIT clause.
Args::
count: Number of results to return
Returns::
FulltextSearchBuilder: Self for chaining
"""
self._limit_value = count
return self
[docs]
def offset(self, count: int) -> "FulltextSearchBuilder":
"""
Set OFFSET clause.
Args::
count: Number of results to skip
Returns::
FulltextSearchBuilder: Self for chaining
"""
self._offset_value = count
return self
def _build_sql(self) -> str:
"""Build the final SQL query."""
if not self._table_name:
raise ValueError("Table name is required")
if not self._columns:
raise ValueError("Search columns are required")
query_string = self._query_obj.build()
if not query_string:
raise ValueError("Query is required")
# Build SELECT clause
select_parts = self._select_columns.copy()
if self._include_score:
columns_str = ", ".join(self._columns)
select_parts.append(f"MATCH({columns_str}) AGAINST('{query_string}' IN {self._search_mode}) AS score")
select_clause = f"SELECT {', '.join(select_parts)}"
# Build FROM clause
from_clause = f"FROM {self._table_name}"
# Build WHERE clause
where_parts = []
# Add fulltext search condition
columns_str = ", ".join(self._columns)
fulltext_condition = f"MATCH({columns_str}) AGAINST('{query_string}' IN {self._search_mode})"
where_parts.append(fulltext_condition)
# Add additional WHERE conditions
where_parts.extend(self._where_conditions)
where_clause = f"WHERE {' AND '.join(where_parts)}" if where_parts else ""
# Build ORDER BY clause
order_clause = f"ORDER BY {self._order_by}" if self._order_by else ""
# Build LIMIT clause
limit_clause = f"LIMIT {self._limit_value}" if self._limit_value else ""
# Build OFFSET clause
offset_clause = f"OFFSET {self._offset_value}" if self._offset_value else ""
# Combine all clauses
sql_parts = [
select_clause,
from_clause,
where_clause,
order_clause,
limit_clause,
offset_clause,
]
return " ".join(filter(None, sql_parts))
[docs]
def execute(self) -> Any:
"""
Execute the fulltext search query.
Returns::
Query results
"""
sql = self._build_sql()
return self.client.execute(sql)
[docs]
def explain(self) -> str:
"""
Get the SQL query that would be executed.
Returns::
SQL query string
"""
return self._build_sql()
class FulltextIndexManager:
"""Manager for fulltext index operations."""
def __init__(self, client: "Client"):
self.client = client
def create(
self,
table_name: str,
name: str,
columns: List[str],
algorithm: str = FulltextSearchAlgorithm.BM25,
) -> bool:
"""
Create a fulltext index.
Args::
table_name: Name of the table
name: Name of the index
columns: List of columns to index
algorithm: Search algorithm to use
Returns::
bool: True if successful
"""
from .fulltext_index import FulltextIndex
# Set the algorithm
self.client.execute(f'SET ft_relevancy_algorithm = "{algorithm}"')
# Create the index using session connection
with self.client.session() as session:
conn = session.connection()
success = FulltextIndex.create_index(
bind=conn,
table_name=table_name,
name=name,
columns=columns,
algorithm=algorithm,
)
session.commit()
return success
def drop(self, table_name: str, name: str) -> bool:
"""
Drop a fulltext index.
Args::
table_name: Name of the table
name: Name of the index
Returns::
bool: True if successful
"""
from .fulltext_index import FulltextIndex
# Drop the index using session connection
with self.client.session() as session:
conn = session.connection()
success = FulltextIndex.drop_index(bind=conn, table_name=table_name, name=name)
session.commit()
return success
def search(self) -> "FulltextSearchBuilder":
"""
Create a new fulltext search builder.
Returns::
FulltextSearchBuilder: New search builder instance
"""
return FulltextSearchBuilder(self.client)