In this tutorial, we will be setting up a pre-computed database index using database triggers to speed-up Postgres Full-Text Search in Django.
To avoid the computation of tsvector using SearchVector on the fly, we can pre-compute the result in an indexed column. This column will be of tsvector datatype and will be able to speed up Postgres Full-Text search (FTS) multi-fold.
We’ll be using GIN (Generalized Inverted Index), which is known to be very scalable and isn’t lossy, unlike GIST (Generalized Search Tree), which is often useful for frequently updated fields1.
Steps to support FTS using search_vector
column
Add custom field to models:
from django.db import models
from django.contrib.postgres.indexes import GinIndex
from django.contrib.postgres.search import SearchVectorField
class Article(models.Model):
# search vector field for FTS in title
title = models.CharField(max_length=200)
search_vector = SearchVectorField(null=True)
class Meta:
indexes = (GinIndex(fields=["search_vector"]),)
Next we will need to create a custom migration that will store the pre-computed search vector from the title
field of Article
in search_vector
field. This trigger will take care of updating the search vector when the corresponding title
is updated.
# Generated by Django 3.0.8 on 2021-04-07 10:50
from django.db import migrations
from django.contrib.postgres.search import SearchVector
def compute_search_vector(apps, schema_editor):
Article = apps.get_model("your_app", "Article")
Article.objects.update(search_vector=SearchVector("title"))
class Migration(migrations.Migration):
dependencies = [
("search", "001_auto_20210407_1049"),
]
operations = [
migrations.RunSQL(
sql="""
CREATE TRIGGER search_vector_trigger
BEFORE INSERT OR UPDATE OF title, search_vector
ON maps_article
FOR EACH ROW EXECUTE PROCEDURE
tsvector_update_trigger(
search_vector, 'pg_catalog.english', title
);
UPDATE maps_article SET search_vector = NULL;
""",
reverse_sql="""
DROP TRIGGER IF EXISTS search_vector_trigger
ON maps_article;
""",
),
migrations.RunPython(
compute_search_vector, reverse_code=migrations.RunPython.noop
),
]
NOTE:
Extending the above migration for more text-based fields is easy, but in case we want to also compute an array field as a search vector, we cannot use the tsvector_update_trigger
directly. You’ll need to compute a text version of the array field by using array_to_string
Postgres function in the Postgres trigger like this:
# Generated by Django 3.0.8 on 2021-04-07 10:50
from django.db import migrations
from django.contrib.postgres.search import SearchVector
def compute_search_vector(apps, schema_editor):
Article = apps.get_model("your_app", "Article")
Article.objects.update(search_vector=SearchVector("title", "location", "alternate_names"))
class Migration(migrations.Migration):
dependencies = [
("search", "001_auto_20210407_1049"),
]
operations = [
migrations.RunSQL(
sql="""
CREATE FUNCTION update_trigger() RETURNS trigger AS $$
begin
new.search_vector :=
setweight(to_tsvector('pg_catalog.english', coalesce(new.title,'')), 'A') ||
setweight(to_tsvector('pg_catalog.english', coalesce(new.location,'')), 'A') ||
setweight(to_tsvector('pg_catalog.english', coalesce(array_to_string(new.alternate_names,' '), '')), 'A');
return new;
end
$$ LANGUAGE plpgsql;
CREATE TRIGGER search_vector_trigger
BEFORE INSERT OR UPDATE
ON maps_article
FOR EACH ROW EXECUTE PROCEDURE
update_trigger();
UPDATE maps_article SET search_vector = NULL;
""",
reverse_sql="""
DROP TRIGGER IF EXISTS search_vector_trigger
ON maps_article;
""",
),
migrations.RunPython(
compute_search_vector, reverse_code=migrations.RunPython.noop
),
]
Here, title
, location
are text-based columns where alternate_names
is an array of texts.
This latest migration, although a bit-verbose, takes care of text based columns as well as array based columns.
Now, we can query directly using the search_vector
field:
search_term = "hello world"
Article.objects.filter(search_vector=search_term)
or if you are using DRF’s, you can use it via custom filters:
from rest_framework import filters
class FullTextArticleSearchFilter(filters.BaseFilterBackend):
"""Query Articles on the basis of `search` query param."""
def filter_queryset(self, request, queryset, view):
search_term = request.query_params.get("search", None)
if search_term:
search_term = unquote(search_term)
# We use the computed `search_vector` directly
# to speed up the search.
return queryset.filter(search_vector=search_term)
return queryset