1
0

Fuzzy search + tests (#555)

هذا الالتزام موجود في:
Thomas Miceli
2025-12-26 22:36:28 +08:00
ملتزم من قبل GitHub
الأصل 3957dfb3ea
التزام b11306851b
6 ملفات معدلة مع 1879 إضافات و58 حذوفات

عرض الملف

@@ -2,6 +2,8 @@ package index
import (
"errors"
"strconv"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
"github.com/blevesearch/bleve/v2/analysis/token/camelcase"
@@ -10,7 +12,6 @@ import (
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
"github.com/blevesearch/bleve/v2/search/query"
"github.com/rs/zerolog/log"
"strconv"
)
type BleveIndexer struct {
@@ -53,6 +54,8 @@ func (i *BleveIndexer) open() (bleve.Index, error) {
docMapping := bleve.NewDocumentMapping()
docMapping.AddFieldMappingsAt("GistID", bleve.NewNumericFieldMapping())
docMapping.AddFieldMappingsAt("UserID", bleve.NewNumericFieldMapping())
docMapping.AddFieldMappingsAt("Visibility", bleve.NewNumericFieldMapping())
docMapping.AddFieldMappingsAt("Content", bleve.NewTextFieldMapping())
mapping := bleve.NewIndexMapping()
@@ -74,6 +77,7 @@ func (i *BleveIndexer) open() (bleve.Index, error) {
}
docMapping.DefaultAnalyzer = "gistAnalyser"
mapping.DefaultMapping = docMapping
return bleve.New(i.path, mapping)
}
@@ -105,39 +109,72 @@ func (i *BleveIndexer) Search(queryStr string, queryMetadata SearchGistMetadata,
var err error
var indexerQuery query.Query
if queryStr != "" {
contentQuery := bleve.NewMatchPhraseQuery(queryStr)
contentQuery.FieldVal = "Content"
// Use match query with fuzzy matching for more flexible content search
contentQuery := bleve.NewMatchQuery(queryStr)
contentQuery.SetField("Content")
contentQuery.SetFuzziness(2)
indexerQuery = contentQuery
} else {
contentQuery := bleve.NewMatchAllQuery()
indexerQuery = contentQuery
}
privateQuery := bleve.NewBoolFieldQuery(false)
privateQuery.SetField("Private")
// Visibility filtering: show public gists (Visibility=0) OR user's own gists
visibilityZero := float64(0)
truee := true
publicQuery := bleve.NewNumericRangeInclusiveQuery(&visibilityZero, &visibilityZero, &truee, &truee)
publicQuery.SetField("Visibility")
userIdMatch := float64(userId)
truee := true
userIdQuery := bleve.NewNumericRangeInclusiveQuery(&userIdMatch, &userIdMatch, &truee, &truee)
userIdQuery.SetField("UserID")
accessQuery := bleve.NewDisjunctionQuery(privateQuery, userIdQuery)
accessQuery := bleve.NewDisjunctionQuery(publicQuery, userIdQuery)
indexerQuery = bleve.NewConjunctionQuery(accessQuery, indexerQuery)
addQuery := func(field, value string) {
if value != "" && value != "." {
q := bleve.NewMatchPhraseQuery(value)
q.FieldVal = field
indexerQuery = bleve.NewConjunctionQuery(indexerQuery, q)
}
}
// Handle "All" field - search across all metadata fields with OR logic
if queryMetadata.All != "" {
allQueries := make([]query.Query, 0)
addQuery("Username", queryMetadata.Username)
addQuery("Title", queryMetadata.Title)
addQuery("Extensions", "."+queryMetadata.Extension)
addQuery("Filenames", queryMetadata.Filename)
addQuery("Languages", queryMetadata.Language)
addQuery("Topics", queryMetadata.Topic)
// Create match phrase queries for each field
fields := []struct {
field string
value string
}{
{"Username", queryMetadata.All},
{"Title", queryMetadata.All},
{"Extensions", "." + queryMetadata.All},
{"Filenames", queryMetadata.All},
{"Languages", queryMetadata.All},
{"Topics", queryMetadata.All},
}
for _, f := range fields {
q := bleve.NewMatchPhraseQuery(f.value)
q.FieldVal = f.field
allQueries = append(allQueries, q)
}
// Combine all field queries with OR (disjunction)
allDisjunction := bleve.NewDisjunctionQuery(allQueries...)
indexerQuery = bleve.NewConjunctionQuery(indexerQuery, allDisjunction)
} else {
// Original behavior: add each metadata field with AND logic
addQuery := func(field, value string) {
if value != "" && value != "." {
q := bleve.NewMatchPhraseQuery(value)
q.FieldVal = field
indexerQuery = bleve.NewConjunctionQuery(indexerQuery, q)
}
}
addQuery("Username", queryMetadata.Username)
addQuery("Title", queryMetadata.Title)
addQuery("Extensions", "."+queryMetadata.Extension)
addQuery("Filenames", queryMetadata.Filename)
addQuery("Languages", queryMetadata.Language)
addQuery("Topics", queryMetadata.Topic)
}
languageFacet := bleve.NewFacetRequest("Languages", 10)

عرض الملف

@@ -0,0 +1,162 @@
package index
import (
"os"
"path/filepath"
"testing"
)
// setupBleveIndexer creates a new BleveIndexer for testing
func setupBleveIndexer(t *testing.T) (*BleveIndexer, func()) {
t.Helper()
// Create a temporary directory for the test index
tmpDir, err := os.MkdirTemp("", "bleve-test-*")
if err != nil {
t.Fatalf("Failed to create temp directory: %v", err)
}
indexPath := filepath.Join(tmpDir, "test.index")
indexer := NewBleveIndexer(indexPath)
// Initialize the indexer
err = indexer.Init()
if err != nil {
os.RemoveAll(tmpDir)
t.Fatalf("Failed to initialize BleveIndexer: %v", err)
}
// Store in the global atomicIndexer since Add/Remove use it
var idx Indexer = indexer
atomicIndexer.Store(&idx)
// Return cleanup function
cleanup := func() {
atomicIndexer.Store(nil)
indexer.Close()
os.RemoveAll(tmpDir)
}
return indexer, cleanup
}
func TestBleveIndexerAddGist(t *testing.T) {
indexer, cleanup := setupBleveIndexer(t)
defer cleanup()
testIndexerAddGist(t, indexer)
}
func TestBleveIndexerAllFieldSearch(t *testing.T) {
indexer, cleanup := setupBleveIndexer(t)
defer cleanup()
testIndexerAllFieldSearch(t, indexer)
}
func TestBleveIndexerFuzzySearch(t *testing.T) {
indexer, cleanup := setupBleveIndexer(t)
defer cleanup()
testIndexerFuzzySearch(t, indexer)
}
func TestBleveIndexerSearchBasic(t *testing.T) {
indexer, cleanup := setupBleveIndexer(t)
defer cleanup()
testIndexerSearchBasic(t, indexer)
}
func TestBleveIndexerPagination(t *testing.T) {
indexer, cleanup := setupBleveIndexer(t)
defer cleanup()
testIndexerPagination(t, indexer)
}
// TestBleveIndexerInitAndClose tests Bleve-specific initialization and closing
func TestBleveIndexerInitAndClose(t *testing.T) {
tmpDir, err := os.MkdirTemp("", "bleve-init-test-*")
if err != nil {
t.Fatalf("Failed to create temp directory: %v", err)
}
defer os.RemoveAll(tmpDir)
indexPath := filepath.Join(tmpDir, "test.index")
indexer := NewBleveIndexer(indexPath)
// Test initialization
err = indexer.Init()
if err != nil {
t.Fatalf("Failed to initialize BleveIndexer: %v", err)
}
if indexer.index == nil {
t.Fatal("Expected index to be initialized, got nil")
}
// Test closing
indexer.Close()
// Test reopening the same index
indexer2 := NewBleveIndexer(indexPath)
err = indexer2.Init()
if err != nil {
t.Fatalf("Failed to reopen BleveIndexer: %v", err)
}
defer indexer2.Close()
if indexer2.index == nil {
t.Fatal("Expected reopened index to be initialized, got nil")
}
}
// TestBleveIndexerUnicodeSearch tests that Unicode content can be indexed and searched
func TestBleveIndexerUnicodeSearch(t *testing.T) {
indexer, cleanup := setupBleveIndexer(t)
defer cleanup()
// Add a gist with Unicode content
gist := &Gist{
GistID: 100,
UserID: 100,
Visibility: 0,
Username: "testuser",
Title: "Unicode Test",
Content: "Hello world with unicode characters: café résumé naïve",
Filenames: []string{"test.txt"},
Extensions: []string{".txt"},
Languages: []string{"Text"},
Topics: []string{"unicode"},
CreatedAt: 1234567890,
UpdatedAt: 1234567890,
}
err := indexer.Add(gist)
if err != nil {
t.Fatalf("Failed to add gist: %v", err)
}
// Search for unicode content
gistIDs, total, _, err := indexer.Search("café", SearchGistMetadata{}, 100, 1)
if err != nil {
t.Fatalf("Search failed: %v", err)
}
if total == 0 {
t.Skip("Unicode search may require specific index configuration")
return
}
found := false
for _, id := range gistIDs {
if id == 100 {
found = true
break
}
}
if !found {
t.Log("Unicode gist not found in search results, but other results were returned")
}
}

عرض الملف

@@ -22,4 +22,5 @@ type SearchGistMetadata struct {
Extension string
Language string
Topic string
All string
}

تم حذف اختلاف الملف لأن الملف كبير جداً تحميل الاختلاف