Unverified Commit d647bfb9 authored by jjchiw's avatar jjchiw Committed by GitHub

Scoring in RediSearch (#1254)

* Scoring in RediSearch

* Unit Test Added: EXPLAINSCORE
#1254
parent c7276c23
......@@ -231,7 +231,7 @@ public SearchResult Search(Query q)
q.SerializeRedisArgs(args);
var resp = (RedisResult[])DbSync.Execute("FT.SEARCH", args);
return new SearchResult(resp, !q.NoContent, q.WithScores, q.WithPayloads);
return new SearchResult(resp, !q.NoContent, q.WithScores, q.WithPayloads, q.ExplainScore);
}
/// <summary>
......@@ -248,7 +248,7 @@ public async Task<SearchResult> SearchAsync(Query q)
q.SerializeRedisArgs(args);
var resp = (RedisResult[])await _db.ExecuteAsync("FT.SEARCH", args).ConfigureAwait(false);
return new SearchResult(resp, !q.NoContent, q.WithScores, q.WithPayloads);
return new SearchResult(resp, !q.NoContent, q.WithScores, q.WithPayloads, q.ExplainScore);
}
/// <summary>
......
......@@ -13,6 +13,7 @@ public class Document
public string Id { get; }
public double Score { get; }
public byte[] Payload { get; }
public string[] ScoreExplained { get; private set; }
internal readonly Dictionary<string, RedisValue> _properties;
public Document(string id, double score, byte[] payload) : this(id, null, score, payload) { }
public Document(string id) : this(id, null, 1.0, null) { }
......@@ -42,6 +43,16 @@ public static Document Load(string id, double score, byte[] payload, RedisValue[
return ret;
}
public static Document Load(string id, double score, byte[] payload, RedisValue[] fields, string[] scoreExplained)
{
Document ret = Document.Load(id, score, payload, fields);
if (scoreExplained != null)
{
ret.ScoreExplained = scoreExplained;
}
return ret;
}
public RedisValue this[string key]
{
get { return _properties.TryGetValue(key, out var val) ? val : default(RedisValue); }
......
......@@ -141,6 +141,13 @@ public Paging(int offset, int count)
/// Set the query language, for stemming purposes; see http://redisearch.io for documentation on languages and stemming
/// </summary>
public string Language { get; set; }
/// <summary>
/// Set the query scoring. see https://oss.redislabs.com/redisearch/Scoring.html for documentation
/// </summary>
public string Scoring { get; set; }
public bool ExplainScore { get; set; }
internal string[] _fields = null;
internal string[] _keys = null;
internal string[] _returnFields = null;
......@@ -231,6 +238,17 @@ internal void SerializeRedisArgs(List<object> args)
args.Add((SortAscending ? "ASC" : "DESC").Literal());
}
if (Scoring != null)
{
args.Add("SCORER".Literal());
args.Add(Scoring);
if (ExplainScore)
{
args.Add("EXPLAINSCORE".Literal());
}
}
if (Payload != null)
{
args.Add("PAYLOAD".Literal());
......@@ -460,5 +478,28 @@ public Query SetLanguage(string language)
Language = language;
return this;
}
/// <summary>
/// RediSearch comes with a few very basic scoring functions to evaluate document relevance. They are all based on document scores and term frequency.
/// This is regardless of the ability to use sortable fields.
/// Scoring functions are specified by adding the SCORER {scorer_name} argument to a search query.
/// If you prefer a custom scoring function, it is possible to add more functions using the Extension API.
/// These are the pre-bundled scoring functions available in RediSearch and how they work.Each function is mentioned by registered name,
/// that can be passed as a SCORER argument in FT.SEARCH
/// Pre-bundled scoring:
/// - TFIDF (default) (https://oss.redislabs.com/redisearch/Scoring.html#tfidf_default)
/// - TFIDF.DOCNORM (https://oss.redislabs.com/redisearch/Scoring.html#tfidfdocnorm)
/// - BM25 (https://oss.redislabs.com/redisearch/Scoring.html#bm25)
/// - DISMAX (https://oss.redislabs.com/redisearch/Scoring.html#dismax)
/// - DOCSCORE (https://oss.redislabs.com/redisearch/Scoring.html#docscore)
/// - HAMMING (https://oss.redislabs.com/redisearch/Scoring.html#hamming)
/// </summary>
/// <param name="scoring"></param>
/// <returns></returns>
public Query SetScoring(string scoring)
{
Scoring = scoring;
return this;
}
}
}
......@@ -2,6 +2,7 @@
using StackExchange.Redis;
using System.Collections.Generic;
using System.Linq;
namespace NRediSearch
{
......@@ -15,7 +16,7 @@ public class SearchResult
public long TotalResults { get; }
public List<Document> Documents { get; }
internal SearchResult(RedisResult[] resp, bool hasContent, bool hasScores, bool hasPayloads)
internal SearchResult(RedisResult[] resp, bool hasContent, bool hasScores, bool hasPayloads, bool shouldExplainScore)
{
// Calculate the step distance to walk over the results.
// The order of results is id, score (if withScore), payLoad (if hasPayloads), fields
......@@ -28,6 +29,7 @@ internal SearchResult(RedisResult[] resp, bool hasContent, bool hasScores, bool
step++;
scoreOffset = 1;
contentOffset++;
}
if (hasContent)
{
......@@ -50,10 +52,21 @@ internal SearchResult(RedisResult[] resp, bool hasContent, bool hasScores, bool
double score = 1.0;
byte[] payload = null;
RedisValue[] fields = null;
string[] scoreExplained = null;
if (hasScores)
{
if (shouldExplainScore)
{
var scoreResult = (RedisResult[])resp[i + scoreOffset];
score = (double) scoreResult[0];
var redisResultsScoreExplained = (RedisResult[]) scoreResult[1];
scoreExplained = FlatRedisResultArray(redisResultsScoreExplained).ToArray();
}
else
{
score = (double)resp[i + scoreOffset];
}
}
if (hasPayloads)
{
payload = (byte[])resp[i + payloadOffset];
......@@ -64,7 +77,23 @@ internal SearchResult(RedisResult[] resp, bool hasContent, bool hasScores, bool
fields = (RedisValue[])resp[i + contentOffset];
}
docs.Add(Document.Load(id, score, payload, fields));
docs.Add(Document.Load(id, score, payload, fields, scoreExplained));
}
}
static IEnumerable<string> FlatRedisResultArray(RedisResult[] collection)
{
foreach (var o in collection)
{
if (o.Type == ResultType.MultiBulk)
{
foreach (string t in FlatRedisResultArray((RedisResult[])o))
yield return t;
}
else
{
yield return o.ToString();
}
}
}
}
......
......@@ -77,5 +77,116 @@ public void BasicUsage()
Assert.Equal("lorem ipsum", (string)item["body"]);
Assert.Equal(1337, (int)item["price"]);
}
[Fact]
public void BasicScoringUsage()
{
var client = GetClient();
try { client.DropIndex(); } catch { } // reset DB
CreateSchema(client);
var term = "petit*";
var query = new NRediSearch.Query(term);
query.Limit(0, 10);
query.WithScores = true;
var searchResult = client.Search(query);
var docResult = searchResult.Documents.FirstOrDefault();
Assert.Equal(1, searchResult.TotalResults);
Assert.NotEqual(0, docResult.Score);
Assert.Equal("1", docResult.Id);
Assert.Null(docResult.ScoreExplained);
}
[Fact]
public void BasicScoringUsageWithExplainScore()
{
var client = GetClient();
try { client.DropIndex(); } catch { } // reset DB
CreateSchema(client);
var term = "petit*";
var query = new NRediSearch.Query(term);
query.Limit(0, 10);
query.WithScores = true;
query.Scoring = "TFIDF";
query.ExplainScore = true;
var searchResult = client.Search(query);
var docResult = searchResult.Documents.FirstOrDefault();
Assert.Equal(1, searchResult.TotalResults);
Assert.NotEqual(0, docResult.Score);
Assert.Equal("1", docResult.Id);
Assert.NotEmpty(docResult.ScoreExplained);
Assert.Equal("Final TFIDF : words TFIDF 1.00 * document score 1.00 / norm 2 / slop 1", docResult.ScoreExplained[0]);
Assert.Equal("(Weight 1.00 * total children TFIDF 1.00)", docResult.ScoreExplained[1]);
Assert.Equal("(TFIDF 1.00 = Weight 1.00 * TF 1 * IDF 1.00)", docResult.ScoreExplained[2]);
}
[Fact]
public void BasicScoringUsageWithExplainScoreDifferentScorer()
{
var client = GetClient();
try { client.DropIndex(); } catch { } // reset DB
CreateSchema(client);
var term = "petit*";
var query = new NRediSearch.Query(term);
query.Limit(0, 10);
query.WithScores = true;
query.Scoring = "TFIDF.DOCNORM";
query.ExplainScore = true;
var searchResult = client.Search(query);
var docResult = searchResult.Documents.FirstOrDefault();
Assert.Equal(1, searchResult.TotalResults);
Assert.NotEqual(0, docResult.Score);
Assert.Equal("1", docResult.Id);
Assert.NotEmpty(docResult.ScoreExplained);
Assert.Equal("Final TFIDF : words TFIDF 1.00 * document score 1.00 / norm 20 / slop 1", docResult.ScoreExplained[0]);
Assert.Equal("(Weight 1.00 * total children TFIDF 1.00)", docResult.ScoreExplained[1]);
Assert.Equal("(TFIDF 1.00 = Weight 1.00 * TF 1 * IDF 1.00)", docResult.ScoreExplained[2]);
}
private void CreateSchema(Client client)
{
var schema = new NRediSearch.Schema();
schema
.AddSortableTextField("title")
.AddTextField("country")
.AddTextField("author")
.AddTextField("aka")
.AddTagField("language");
client.CreateIndex(schema, NRediSearch.Client.IndexOptions.Default);
var doc = new NRediSearch.Document("1");
doc
.Set("title", "Le Petit Prince")
.Set("country", "France")
.Set("author", "Antoine de Saint-Exupéry")
.Set("language", "fr_FR")
.Set("aka", "The Little Prince, El Principito");
client.AddDocument(doc);
}
}
}
......@@ -35,13 +35,15 @@ public void SerializeRedisArgs()
NoStopwords = true,
Verbatim = true,
WithPayloads = true,
WithScores = true
WithScores = true,
Scoring = "TFIDF.DOCNORM",
ExplainScore = true
};
var args = new List<object>();
query.SerializeRedisArgs(args);
Assert.Equal(8, args.Count);
Assert.Equal(11, args.Count);
Assert.Equal(query.QueryString, (string)args[0]);
Assert.Contains("NOCONTENT".Literal(), args);
Assert.Contains("NOSTOPWORDS".Literal(), args);
......@@ -50,9 +52,15 @@ public void SerializeRedisArgs()
Assert.Contains("WITHSCORES".Literal(), args);
Assert.Contains("LANGUAGE".Literal(), args);
Assert.Contains("", args);
Assert.Contains("SCORER".Literal(), args);
Assert.Contains("TFIDF.DOCNORM", args);
Assert.Contains("EXPLAINSCORE".Literal(), args);
var languageIndex = args.IndexOf("LANGUAGE".Literal());
Assert.Equal("", args[languageIndex + 1]);
var scoringIndex = args.IndexOf("SCORER".Literal());
Assert.Equal("TFIDF.DOCNORM", args[scoringIndex + 1]);
}
[Fact]
......@@ -168,5 +176,14 @@ public void SummarizeFields()
Assert.Equal(-1, query._summarizeFragmentLen);
Assert.Equal(-1, query._summarizeNumFragments);
}
[Fact]
public void SetScoring()
{
var query = GetQuery();
Assert.Null(query.Scoring);
Assert.Same(query, query.SetScoring("TFIDF.DOCNORM"));
Assert.Equal("TFIDF.DOCNORM", query.Scoring);
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment