Made the search filter use TrigramStrictWordSimilarity instead of the plain version and made a backup plain sqlite compatible search for when postgres is not avaiable

2024-08-17 15:53:35 +03:00
parent 0706fc5dc8
commit 2b8a4863c0
1 changed files with 137 additions and 7 deletions
--- a/starfields_drf_generics/filters.py
+++ b/starfields_drf_generics/filters.py
@@ -6,14 +6,32 @@ from rest_framework.filters import BaseFilterBackend
 from django.template import loader
 from django.utils.translation import gettext_lazy as _
 from django.db import models
-from django.db.models import Q, CharField
+from django.db.models import Q
+from rest_framework.fields import CharField
 from django.db.models.constants import LOOKUP_SEP
 from django.db.models.functions import Concat
-from django.contrib.postgres.search import TrigramSimilarity


 # TODO the dev pages are not done

+def search_smart_split(search_terms):
+    """generator that first splits string by spaces, leaving quoted phrases together,
+    then it splits non-quoted phrases by commas.
+    """
+    split_terms = []
+    for term in smart_split(search_terms):
+        # trim commas to avoid bad matching for quoted phrases
+        term = term.strip(',')
+        if term.startswith(('"', "'")) and term[0] == term[-1]:
+            # quoted phrases are kept together without any other split
+            split_terms.append(unescape_string_literal(term))
+        else:
+            # non-quoted tokens are split by comma, keeping only non-empty ones
+            for sub_term in term.split(','):
+                if sub_term:
+                    split_terms.append(sub_term.strip())
+    return split_terms
+

 class LessThanOrEqualFilter(BaseFilterBackend):
    def get_less_than_field(self, view, request):
@@ -403,24 +421,136 @@ class TrigramSearchFilter(BaseFilterBackend):

        return self.filters_dict

+    def get_search_fields(self, view, request):
+        """
+        Search fields are obtained from the view, but the request is always
+        passed to this method. Sub-classes can override this method to
+        dynamically change the search fields based on request content.
+        """
+        return getattr(view, 'search_fields', None)
+
+    def get_search_query(self, request):
+        """
+        Search terms are set by a ?search=... query parameter,
+        and may be whitespace delimited.
+        """
+        value = request.query_params.get(self.search_param, '')
+        field = CharField(trim_whitespace=False, allow_blank=True)
+        cleaned_value = field.run_validation(value)
+        return cleaned_value
+
+    def construct_search(self, field_name, queryset):
+        """
+        For the sqlite search
+        """
+        lookup = self.lookup_prefixes.get(field_name[0])
+        if lookup:
+            field_name = field_name[1:]
+        else:
+            # Use field_name if it includes a lookup.
+            opts = queryset.model._meta
+            lookup_fields = field_name.split(LOOKUP_SEP)
+            # Go through the fields, following all relations.
+            prev_field = None
+            for path_part in lookup_fields:
+                if path_part == "pk":
+                    path_part = opts.pk.name
+                try:
+                    field = opts.get_field(path_part)
+                except FieldDoesNotExist:
+                    # Use valid query lookups.
+                    if prev_field and prev_field.get_lookup(path_part):
+                        return field_name
+                else:
+                    prev_field = field
+                    if hasattr(field, "path_infos"):
+                        # Update opts to follow the relation.
+                        opts = field.path_infos[-1].to_opts
+                    # django < 4.1
+                    elif hasattr(field, 'get_path_info'):
+                        # Update opts to follow the relation.
+                        opts = field.get_path_info()[-1].to_opts
+            # Otherwise, use the field with icontains.
+            lookup = 'icontains'
+        return LOOKUP_SEP.join([field_name, lookup])
+
+    def must_call_distinct(self, queryset, search_fields):
+        """
+        Return True if 'distinct()' should be used to query the given lookups.
+        """
+        for search_field in search_fields:
+            opts = queryset.model._meta
+            if search_field[0] in self.lookup_prefixes:
+                search_field = search_field[1:]
+            # Annotated fields do not need to be distinct
+            if isinstance(queryset, models.QuerySet) and search_field in queryset.query.annotations:
+                continue
+            parts = search_field.split(LOOKUP_SEP)
+            for part in parts:
+                field = opts.get_field(part)
+                if hasattr(field, 'get_path_info'):
+                    # This field is a relation, update opts to follow the relation
+                    path_info = field.get_path_info()
+                    opts = path_info[-1].to_opts
+                    if any(path.m2m for path in path_info):
+                        # This field is a m2m relation so we know we need to call distinct
+                        return True
+                else:
+                    # This field has a custom __ query transform but is not a relational field.
+                    break
+        return False
+
    def filter_queryset(self, request, queryset, view):
-        search_fields = getattr(view, 'search_fields', None)
+        search_fields = self.get_search_fields(view, request)

        assert search_fields is not None, (
            f"{view.__class__.__name__} should include a `search_fields`"
            "attribute"
        )

-        query = request.query_params.get(self.search_param, '')
+        query = self.get_search_query(request)

-        if query:
+        if not query:
+            return queryset
+
+        try:
+            # Attempt postgresql's full text search
+            from django.contrib.postgres.search import TrigramStrictWordSimilarity
            queryset = queryset.annotate(
                            search_field=Concat(
                                *search_fields,
                                output_field=CharField()
                                )).annotate(
-                        similarity=TrigramSimilarity('search_field', query)
-                    ).filter(similarity__gt=0.05).distinct()
+                        similarity=TrigramStrictWordSimilarity(
+                            'search_field', query)
+                    ).filter(similarity__gt=0.05)
+
+        except ImportError:
+            # Perform very simple sqlite compatible search
+            search_terms = search_smart_split(query)
+
+            orm_lookups = [
+                self.construct_search(str(search_field), queryset)
+                for search_field in search_fields
+            ]
+
+            base = queryset
+            # generator which for each term builds the corresponding search
+            conditions = (
+                reduce(
+                    operator.or_,
+                    (models.Q(**{orm_lookup: term}) for orm_lookup in orm_lookups)
+                ) for term in search_terms
+            )
+            queryset = queryset.filter(reduce(operator.and_, conditions))
+
+        # Remove duplicates from results, if necessary
+        if self.must_call_distinct(queryset, search_fields):
+            # inspired by django.contrib.admin
+            # this is more accurate than .distinct form M2M relationship
+            # also is cross-database
+            queryset = queryset.filter(pk=models.OuterRef('pk'))
+            queryset = base.filter(models.Exists(queryset))

        return queryset