In the SOLR admin, we can see there is a spellcheck option but it is not showing the result.
How this is works with the select query.
If I searched with the spell URL direct, It gives me result as expected
http://localhost:8983/solr/prashant1/spell?q=blakc&spellcheck=on&wt=json
Result
{
"responseHeader":{
"status":0,
"QTime":8},
"response":{"numFound":0,"start":0,"docs":[]
},
"spellcheck":{
"suggestions":[
"blakc",{
"numFound":10,
"startOffset":0,
"endOffset":5,
"origFreq":0,
"suggestion":[{
"word":"black",
"freq":65146},
{
"word":"blanc",
"freq":151},
{
"word":"blake",
"freq":10},
{
"word":"blac",
"freq":2},
{
"word":"block",
"freq":1863},
{
"word":"blanca",
"freq":32},
{
"word":"blank",
"freq":31},
{
"word":"blade",
"freq":23},
{
"word":"blacks",
"freq":12},
{
"word":"blanco",
"freq":11}]}],
"correctlySpelled":false,
"collations":[]}}
But I need the same result with the select query which is not working from the SOLR admin.
Solrconfig.xml
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
<str name="queryAnalyzerFieldType">text_general</str>
<!-- Multiple "Spell Checkers" can be declared and used by this
component
-->
<!-- a spellchecker built from a field of the main index -->
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">Name</str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<!-- the spellcheck distance measure used, the default is the internal levenshtein -->
<str name="distanceMeasure">internal</str>
<!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
<float name="accuracy">0.5</float>
<!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
<int name="maxEdits">2</int>
<!-- the minimum shared prefix when enumerating terms -->
<int name="minPrefix">1</int>
<!-- maximum number of inspections per result. -->
<int name="maxInspections">5</int>
<!-- minimum length of a query term to be considered for correction -->
<int name="minQueryLength">4</int>
<!-- maximum threshold of documents a query term can appear to be considered for correction -->
<float name="maxQueryFrequency">0.01</float>
<!-- uncomment this to require suggestions to occur in 1% of the documents
<float name="thresholdTokenFrequency">.01</float>
-->
</lst>
</searchComponent>
<requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<str name="spellcheck.dictionary">default</str>
<str name="spellcheck">on</str>
<str name="spellcheck.extendedResults">true</str>
<str name="spellcheck.count">10</str>
<str name="spellcheck.alternativeTermCount">5</str>
<str name="spellcheck.maxResultsForSuggest">5</str>
<str name="spellcheck.collate">true</str>
<str name="spellcheck.collateExtendedResults">true</str>
<str name="spellcheck.maxCollationTries">10</str>
<str name="spellcheck.maxCollations">5</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
It should work with:
http://localhost:8983/solr/prashant1/select?q=Name%3Ablakc&spellcheck.q=blakc&spellcheck=on
Is there any setting and steps to be done?
Try by adding the spellcheck component to the standard query handler like
<requestHandler name="/select" class="solr.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>
<int name="rows">10</int>
<str name="spellcheck">on</str>
<str name="spellcheck.extendedResults">true</str>
<str name="spellcheck.count">10</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
You can then call it like this:
http://localhost:8983/solr/select?q=yogik&spellcheck=true
Also don't forget to build the spellcheck dictionary before you use it:
http://localhost:8983/solr/select/?q=*:*&spellcheck=true&spellcheck.build=true
Related
I'm trying to use Solr's spell checking but the results are not working as I expected. For example, when the user's query is "amuxil" Solr returns "amul" as the first result, while "amoxil" is in the index so I expected it should be returned in the first position instead.
solrconfig.xml
<requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<str name="spellcheck.dictionary">default</str>
<str name="spellcheck">on</str>
<str name="spellcheck.extendedResults">true</str>
<str name="spellcheck.count">10</str>
<str name="spellcheck.alternativeTermCount">5</str>
<str name="spellcheck.maxResultsForSuggest">5</str>
<str name="spellcheck.collate">true</str>
<str name="spellcheck.collateExtendedResults">true</str>
<str name="spellcheck.maxCollationTries">5000</str>
<str name="spellcheck.maxCollations">3</str>
<!--<str name="spellcheck.onlyMorePopular">true</str>-->
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
<!-- a spellchecker built from a field of the main index -->
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">_text_</str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<!-- the spellcheck distance measure used, the default is the internal levenshtein -->
<str name="distanceMeasure">internal</str>
<!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
<float name="accuracy">0.5</float>
<!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
<int name="maxEdits">2</int>
<!-- the minimum shared prefix when enumerating terms -->
<int name="minPrefix">1</int>
<!-- maximum number of inspections per result. -->
<int name="maxInspections">5</int>
<!-- minimum length of a query term to be considered for correction -->
<int name="minQueryLength">3</int>
<str name="comparatorClass">freq</str>
<!-- maximum threshold of documents a query term can appear to be considered for correction
<float name="maxQueryFrequency">0.01</float>-->
<!-- uncomment this to require suggestions to occur in 1% of the documents
<float name="thresholdTokenFrequency">.01</float>-->
</lst>
</searchComponent>
I am using Solr for spell checking. Enabled both DirectSolrSpellChecker & WordBreakSolrSpellChecker. I have the following issue:
A. When I am querying for "worry". Solr is converting this term to "worri" and returning results for the same. If word is ending with "y" [ "injury","worry" etc..], the ending "y" is replaced with "i".
Example Query:
http://localhost:8983/solr/MY_CORE/spell?df=text&spellcheck.q=worry&spellcheck=true&spellcheck.extendedResults=true&spellcheck.onlyMorePopular=true
Solr Result:
<response>
<lst name="responseHeader">
<int name="status">0</int>
<int name="QTime">5</int>
</lst>
<result name="response" numFound="0" start="0"/>
<lst name="spellcheck">
<lst name="suggestions">
<lst name="worri">
<int name="numFound">9</int>
<int name="startOffset">0</int>
<int name="endOffset">5</int>
<int name="origFreq">5</int>
<arr name="suggestion">
<lst>
<str name="word">wo r ri</str>
<int name="freq">90</int>
</lst>
<lst>
<str name="word">worst</str>
<int name="freq">12</int>
</lst>
<lst>
<str name="word">wo r r i</str>
<int name="freq">5246</int>
</lst>
<lst>
<str name="word">work</str>
<int name="freq">2920</int>
</lst>
<lst>
<str name="word">w o r ri</str>
<int name="freq">530</int>
</lst>
<lst>
<str name="word">worn</str>
<int name="freq">81</int>
</lst>
<lst>
<str name="word">w o r r i</str>
<int name="freq">5246</int>
</lst>
<lst>
<str name="word">wors</str>
<int name="freq">79</int>
</lst>
<lst>
<str name="word">worm</str>
<int name="freq">10</int>
</lst>
</arr>
</lst>
</lst>
<bool name="correctlySpelled">false</bool>
</lst>
</response>
B. Also above output have words like "w o r r i", and I couldn't find any of those words in the solr field. I also don't know why solr is returning such words where letters are separated by spaces.
Below is schema file:
<field name=MY FIELD type="text_en" multiValued="false" indexed="true" stored="true"/>
Below is the config file:
<!-- a spellchecker built from a field of the main index -->
<lst name="spellchecker">
<str name="name">default</str>
<str name="field"> MY FIELD </str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<!-- the spellcheck distance measure used, the default is the internal levenshtein -->
<str name="distanceMeasure">internal</str>
<!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
<float name="accuracy">0.5</float>
<!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
<int name="maxEdits">2</int>
<!-- the minimum shared prefix when enumerating terms -->
<int name="minPrefix">1</int>
<!-- maximum number of inspections per result. -->
<int name="maxInspections">5</int>
<!-- minimum length of a query term to be considered for correction -->
<int name="minQueryLength">4</int>
<!-- maximum threshold of documents a query term can appear to be considered for correction -->
<float name="maxQueryFrequency">0.01</float>
<!-- uncomment this to require suggestions to occur in 1% of the documents
<float name="thresholdTokenFrequency">.01</float>
-->
</lst>
<!-- a spellchecker that can break or combine words. See "/spell" handler below for usage -->
<lst name="spellchecker">
<str name="name">wordbreak</str>
<str name="classname">solr.WordBreakSolrSpellChecker</str>
<str name="field">MY FIELD</str>
<str name="combineWords">false</str>
<str name="breakWords">true</str>
<int name="maxChanges">10</int>
</lst>
</searchComponent>
<requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<str name="spellcheck.dictionary">default</str>
<str name="spellcheck.dictionary">wordbreak</str>
<str name="spellcheck">on</str>
<str name="spellcheck.extendedResults">true</str>
<str name="spellcheck.count">10</str>
<str name="spellcheck.alternativeTermCount">5</str>
<str name="spellcheck.maxResultsForSuggest">5</str>
<str name="spellcheck.collate">false</str>
<str name="spellcheck.collateExtendedResults">false</str>
<str name="spellcheck.maxCollationTries">10</str>
<str name="spellcheck.maxCollations">5</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
I would really appreciate if someone can help me regarding this.
Thanks in advance !
The "strange" suggestions that you have like "wo r r i". You have them, because you're using WordBreakSolrSpellChecker and it breaks tokens trying to provide you some spellcheck capabilities, so if you will remove you shouldn't get these kind of suggestions. Here is the quote from the official documentation:
WordBreakSolrSpellChecker offers suggestions by combining adjacent
query terms and/or breaking terms into multiple words. It is a
SpellCheckComponent enhancement, leveraging Lucene's
WordBreakSpellChecker. It can detect spelling errors resulting from
misplaced whitespace without the use of shingle-based dictionaries and
provides collation support for word-break errors, including cases
where the user has a mix of single-word spelling errors and word-break
errors in the same query. It also provides shard support.
So, basically, in your example - you're getting normal suggestions from Solr index like: worst, work, worm, worn, wors. all other are just the result of WordBreakSolrSpellChecker and you will never find them in your index.
I'm having an annoying issue with the spellcheck component of solr 6.5.0. If I run a query through the spellcheck request handler, /spell, the query works as expected and I get suggested spelling for the incorrect words.
{
"responseHeader":{
"status":0,
"QTime":42},
"response":{"numFound":0,"start":0,"docs":[]
},
"spellcheck":{
"suggestions":{
"injary":{
"numFound":3,
"startOffset":0,
"endOffset":6,
"origFreq":0,
"suggestion":[{
"word":"injury",
"freq":121},
{
"word":"inward",
"freq":3},
{
"word":"injure",
"freq":1}]}},
"correctlySpelled":false,
"collations":{
"collation":{
"collationQuery":"injury",
"hits":121,
"misspellingsAndCorrections":[
"injary","injury"]},
"collation":{
"collationQuery":"inward",
"hits":3,
"misspellingsAndCorrections":[
"injary","inward"]},
"collation":{
"collationQuery":"injure",
"hits":1,
"misspellingsAndCorrections":[
"injary","injure"]}}}}
But if I run a query through the standard request handler, /select, I get no suggestions.
{
"responseHeader":{
"status":0,
"QTime":0,
"params":{
"q":"injary",
"indent":"on",
"spellcheck":"on",
"wt":"json",
"_":"1492780436450"}},
"response":{"numFound":0,"start":0,"docs":[]
}}
Any help would be greatly appreciated.
I modified the solrconfig.xml to bring the two request handlers into line as follows, the rest is default:
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">content</str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<str name="distanceMeasure">internal</str>
<float name="accuracy">0.5</float>
<int name="maxEdits">2</int>
<int name="minPrefix">1</int>
<int name="maxInspections">5</int>
<int name="minQueryLength">4</int>
<float name="maxQueryFrequency">0.01</float>
<float name="thresholdTokenFrequency">.0001</float>
</lst>
<requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
<!-- Solr will use suggestions from both the 'default' spellchecker
and from the 'wordbreak' spellchecker and combine them.
collations (re-written queries) can include a combination of
corrections from both spellcheckers -->
<str name="spellcheck.dictionary">default</str>
<str name="spellcheck">on</str>
<str name="spellcheck.extendedResults">true</str>
<str name="spellcheck.count">10</str>
<str name="spellcheck.alternativeTermCount">5</str>
<str name="spellcheck.maxResultsForSuggest">5</str>
<str name="spellcheck.collate">true</str>
<str name="spellcheck.collateExtendedResults">true</str>
<str name="spellcheck.maxCollationTries">10</str>
<str name="spellcheck.maxCollations">5</str>
<str name="wt">json</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
<requestHandler name="/select" class="solr.SearchHandler">
<!-- default values for query parameters can be specified, these
will be overridden by parameters in the request
-->
<lst name="defaults">
<str name="echoParams">explicit</str>
<int name="rows">10</int>
<str name="df">_text_</str>
<str name="wt">json</str>
<!-- spell check component configuration -->
<str name="spellcheck">true</str>
<str name="spellcheck.count">5</str>
<str name="spellcheck.collate">true</str>
<str name="spellcheck.maxCollationTries">5</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
It appears the issue was related to my managed-schema file.
I am parsing XML files and solr automatically adds the fields of the XML files to the managed-schema file as type strings. When I changed my dictionary field to type text_general it starting working as expected.
I hostly can't see how this worked but I made no other changes. I deleted my core and started from scratch to make sure I wasn't mistaken but it worked.
Using the following configuration to run the spellchecking on Solr queries:
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
<str name="queryAnalyzerFieldType">textTitle</str>
<lst name="spellchecker">
<str name="name">default</str>
<str name="field">text</str>
<str name="classname">solr.DirectSolrSpellChecker</str>
<str name="distanceMeasure">internal</str>
<float name="accuracy">0.7</float>
<int name="maxEdits">2</int>
<int name="minPrefix">1</int>
<int name="maxInspections">10</int>
<int name="minQueryLength">3</int>
<float name="maxQueryFrequency">0.10</float>
<float name="thresholdTokenFrequency">.00001</float>
</lst>
</searchComponent>
<requestHandler name="standard" class="solr.StandardRequestHandler" default="true">
<!-- default values for query parameters -->
<lst name="defaults">
<str name="spellcheck">true</str>
<str name="spellcheck.collate">true</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
I get the correct suggestions for bad spelled queries, however, is it possible to re-do the query again to return the results for the spellchecked terms instead of just returning the suggestions?
I am trying to set up spellchecker, according to solr documentation. But when I am testing, I don't have any suggestion. My piece of code follows:
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
<str name="queryAnalyzerFieldType">textSpell</str>
<lst name="spellchecker">
<str name="classname">solr.IndexBasedSpellChecker</str>
<str name="name">default</str>
<str name="field">name</str>
<str name="spellcheckIndexDir">./spellchecker</str>
</lst>
<str name="queryAnalyzerFieldType">textSpell</str>
</searchComponent>
<requestHandler name="/spellcheck" class="solr.SearchHandler">
<lst name="defaults">
<str name="echoParams">explicit</str>
<!-- Optional, must match spell checker's name as defined above, defaults to "default" -->
<str name="spellcheck.dictionary">default</str>
<!-- omp = Only More Popular -->
<str name="spellcheck.onlyMorePopular">false</str>
<!-- exr = Extended Results -->
<str name="spellcheck.extendedResults">false</str>
<!-- The number of suggestions to return -->
<str name="spellcheck.count">1</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>
</arr>
</requestHandler>
The query I send to Solr:
q=%2B%28text%3A%28gasal%29%29&suggestField=contentOriginal&ontologySeed=gasal&spellcheck.build=true&spellcheck.q=gasal&spellcheck=true&spellcheck.collate=true&hl=true&hl.snippets=5&hl.fl=text&hl.fl=text&rows=12&start=0&qt=%2Fsuggestprobabilistic
Does anybody know why?? Thanks in advance
First, don't repeat queryAnalyzerFieldType twice in the component configuration.
It is recommended not to use a /spellcheck handler but instead to bind the spellcheck component to the standard query handler (or dismax if it is what you use) like this:
<requestHandler name="standard" class="solr.SearchHandler" default="true">
<lst name="defaults">
...
</lst>
<arr name="last-components">
<str>spellcheck</str>
...
</arr>
</requestHandler>
You can then call it like this:
http://localhost:8983/solr/select?q=komputer&spellcheck=true
Also don't forget to build the spellcheck dictionary before you use it:
http://localhost:8983/solr/select/?q=*:*&spellcheck=true&spellcheck.build=true
You can force the dictionary to build at each commit by configuring it in the component:
<searchComponent name="spellcheck" class="solr.SpellCheckComponent">
<str name="queryAnalyzerFieldType">textSpell</str>
<lst name="spellchecker">
<str name="classname">solr.IndexBasedSpellChecker</str>
<str name="name">default</str>
<str name="field">name</str>
<str name="spellcheckIndexDir">./spellchecker1</str>
<str name="buildOnCommit">true</str>
</lst>
</searchComponent>
Finally, make sure that your name field is really an indexed field of type textSpell and that it contains enough content to build a good dictionary. In my case, I have a field named spellchecker that is populated from a couple of fields of my index (using copyField instructions in the schema).