Scoring profile with weights and a function - azure-cognitive-search

I'm using Azure Search with a scoring profile. I need text fields along with quantity sold to be a part of the scoring profile. I can configure the following profile, but the quantity sold doesn't seem to be factored in to the search score when I query the index. I'm thinking because quantity sold isn't a string, its an int. Therefore, I can't make the field searchable? I'm using the new featuresMode parameter in the query, the quantity sold field doesn't even appear in the scoring breakdown
"scoringProfiles": [
{
"name": "Product Name",
"functions": [
{
"fieldName": "QuantitySold",
"freshness": null,
"interpolation": "linear",
"magnitude": {
"boostingRangeStart": 0,
"boostingRangeEnd": 100000,
"constantBoostBeyondRange": true
},
"distance": null,
"tag": null,
"type": "magnitude",
"boost": 6
}
],
"functionAggregation": "sum",
"text": {
"weights": {
"ProductName": 4,
"ProductSet": 3,
"ProductDesc": 2
}
}
}
],

What type of boost you should use depends on the datatype. An int like QuantitySold should use type magnitude for boosting. A date would use type freshness and so on.
I recreated a minimal example with the simplest possible index with only two properties: Id and Title.
CREATE INDEX
{
"#odata.context": "https://{{SEARCH_SVC}}.{{DNS_SUFFIX}}/$metadata#indexes/$entity",
"#odata.etag": "\"0x8D8761DCBBCCD00\"",
"name": "{{INDEX}}",
"defaultScoringProfile": null,
"fields": [
{
"name": "Id",
"type": "Edm.String",
"facetable": false,
"filterable": true,
"key": true,
"retrievable": true,
"searchable": true,
"sortable": true,
"analyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
},
{
"name": "Title",
"type": "Edm.String",
"facetable": false,
"filterable": true,
"key": false,
"retrievable": true,
"searchable": true,
"sortable": true,
"analyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
},
{
"name": "QuantitySold",
"type": "Edm.Int32",
"facetable": true,
"filterable": true,
"key": false,
"retrievable": true,
"searchable": false,
"sortable": true,
"analyzer": null,
"searchAnalyzer": null,
"synonymMaps": [],
"fields": []
}
], "scoringProfiles": [
{
"name": "relevance",
"text": {
"weights": {
"Title": 1.5
}
}
},
{
"name": "sales",
"functions": [
{
"type": "magnitude",
"fieldName": "QuantitySold",
"boost": 100,
"interpolation": "linear",
"magnitude": {
"boostingRangeStart": 1,
"boostingRangeEnd": 100000,
"constantBoostBeyondRange": false
}
}
]
} ],"corsOptions": null, "suggesters": [], "analyzers": [], "tokenizers": ], "tokenFilters": [], "charFilters": [], "encryptionKey": null}
UPLOAD MINIMAL
I then submit two products. One called Apple iPhone with a low sales quantity. And Apple Juice with a high sales quantity.
{
"value": [
{
"#search.action": "mergeOrUpload",
"Id": "2",
"Title": "Apple Juice",
"QuantitySold": 10000
},
{
"#search.action": "mergeOrUpload",
"Id": "1",
"Title": "Apple iPhone",
"QuantitySold": 35
}
]
}
QUERY
Without using any scoring profile, I query for apple. As expected, the two items are equally relevant responses to my query. They both match one of two tokens. Both get score 0.25811607
https://{{SEARCH_SVC}}.{{DNS_SUFFIX}}/indexes/{{INDEX}}/docs?search=apple&$count=true&searchMode=all&queryType=full&api-version={{API-VERSION}}&featuresMode=enabled
{
"#odata.count": 2,
"value": [
{
"#search.score": 0.25811607,
"#search.features": {
"Title": {
"uniqueTokenMatches": 1.0,
"similarityScore": 0.25811607,
"termFrequency": 1.0
}
},
"Id": "2",
"Title": "Apple Juice",
"QuantitySold": 10000
},
{
"#search.score": 0.25811607,
"#search.features": {
"Title": {
"uniqueTokenMatches": 1.0,
"similarityScore": 0.25811607,
"termFrequency": 1.0
}
},
"Id": "1",
"Title": "Apple iPhone",
"QuantitySold": 35
}
]
}
QUERY WITH BOOST ON QUANTITY SOLD
I then repeat the query for apple, but this time I boost items with a high QuantitySold by selecting my scoring profile called sales. This boosts our Apple Juice item to the top with a score of 2.8123235. The Apple iPhone item has also received a boost, but much smaller with only a score of 0.26680434.
https://{{SEARCH_SVC}}.{{DNS_SUFFIX}}/indexes/{{INDEX}}/docs?search=apple&$count=true&searchMode=all&queryType=full&api-version={{API-VERSION}}&featuresMode=enabled&scoringProfile=sales
{
"#odata.count": 2,
"value": [
{
"#search.score": 2.813235,
"#search.features": {
"Title": {
"uniqueTokenMatches": 1.0,
"similarityScore": 0.25811607,
"termFrequency": 1.0
}
},
"Id": "2",
"Title": "Apple Juice",
"QuantitySold": 10000
},
{
"#search.score": 0.26680434,
"#search.features": {
"Title": {
"uniqueTokenMatches": 1.0,
"similarityScore": 0.25811607,
"termFrequency": 1.0
}
},
"Id": "1",
"Title": "Apple iPhone",
"QuantitySold": 35
}
]
}

Related

Parsing Nested JSON and Manipulating It in Ruby

This is my first attempt at parsing nested JSON with Ruby. I need to go through the JSON to pull out specific values for "_id", "name", and "type" for instance. I then need to create a reference table so that I can refer to each "_id" and associated information. I also need to combine information from multiple JSON responses. I've been able to get basic information and have tried a few things I've found online. I just need a little assistance with a starting point. If anyone has any ideas of where to start with this I'd really appreciate it.
Devices JSON response hash. Each device starts with _id.
{
"api": "1.0",
"error": null,
"id": "60b5d4c3077862123cfa4443",
"result": {
"devices": [
{
"_id": "123456787786211fd31f3dd",
"batteryPowered": true,
"category": "door_lock",
"deviceTypeId": "144_1_1",
"firmware": [
{
"id": "us.144.1_1.0",
"version": "2.6"
}
],
"gatewayId": "1234567807786214fbc6bd4e",
"info": {
"firmware.stack": "3.28",
"hardware": "0",
"manufacturer": "Kwikset",
"model": "912",
"protocol": "zwave",
"zwave.node": "2",
"zwave.smartstart": "no"
},
"name": "Garage Door",
"parentDeviceId": "",
"persistent": false,
"reachable": false,
"ready": true,
"roomId": "1234567807786211fd31f3eb",
"security": "middle",
"status": "idle",
"subcategory": "",
"type": "doorlock"
},
{
"_id": "1234567897786211fd31f3ed",
"batteryPowered": true,
"category": "door_lock",
"deviceTypeId": "59_1_1129",
"firmware": [
{
"id": "us.59.18064.0",
"version": "3.3"
},
{
"id": "us.59.18065.1",
"version": "11.0"
}
],
"gatewayId": "1234567897786214fbc6bd4e",
"info": {
"firmware.stack": "6.3",
"hardware": "3",
"manufacturer": "Schlage",
"model": "BE469ZP",
"protocol": "zwave",
"zwave.node": "3",
"zwave.smartstart": "no"
},
"name": "Front Door",
"parentDeviceId": "",
"persistent": false,
"reachable": true,
"ready": true,
"roomId": "1234567807786211fd31f3ec",
"security": "high",
"status": "idle",
"subcategory": "",
"type": "doorlock"
},
{
"_id": "1234567897786211fd31f40a",
"batteryPowered": false,
"category": "switch",
"deviceTypeId": "57_20562_12344",
"firmware": [
{
"id": "us.57.29240.0",
"version": "5.25"
}
],
"gatewayId": "1234567807786214fbc6bd4e",
"info": {
"firmware.stack": "4.54",
"hardware": "255",
"manufacturer": "Honeywell",
"model": "ZW4103/39337",
"protocol": "zwave",
"zwave.node": "4",
"zwave.smartstart": "no"
},
"name": "Lamp Switch",
"parentDeviceId": "",
"persistent": false,
"reachable": true,
"ready": true,
"roomId": "1234567807786211fd31f416",
"security": "no",
"status": "idle",
"subcategory": "interior_plugin",
"type": "switch.outlet"
},
{
"_id": "1234567b07786211fd31f40e",
"batteryPowered": false,
"category": "dimmable_light",
"deviceTypeId": "57_20548_12339",
"firmware": [
{
"id": "us.57.29747.0",
"version": "5.21"
}
],
"gatewayId": "1234567d07786214fbc6bd4e",
"info": {
"firmware.stack": "4.34",
"hardware": "255",
"manufacturer": "Honeywell",
"model": "39339/ZW3107",
"protocol": "zwave",
"zwave.node": "5",
"zwave.smartstart": "no"
},
"name": "Lamp Dimmer",
"parentDeviceId": "",
"persistent": false,
"reachable": true,
"ready": true,
"roomId": "1234567807786211fd31f416",
"security": "no",
"status": "idle",
"subcategory": "dimmable_plugged",
"type": "dimmer.outlet"
}
]
}
}
There is then also a JSON response that lists the functions for each device in the same format above. However instead of "devices"=> it is "items"=> and the beach function is the _id key again.
I'd like to combine function _id tags and descriptions with the device JSON, so I can create a way to send my script "unlock door lock 1" and it subs the number with the _id of the device and the function _id.
You can start with a very rough navigator function like this:
def find_device(data, name, index)
# Filter through the device list...
data['result']['devices'].select do |device|
# ...for matching names.
device.name == name
end[index] # Take indexed entry
end
Where now you can do find_device(data, 'door_lock', 0) to dig up that entry.
Converting "door lock 1" to [ 'door_lock', 0 ] should be pretty trivial:
def to_location(str)
# Split off the name component(s) and index number
*name, index = str.split(/\s+/)
# Reassemble with underscores and -1 to account for 0-index
[ name.join('_'), index.to_i - 1 ]
end

Azure Search Normalized Lowercase Field

I am unable to add a normalized copy of the "Title" field to our search index. Ultimately, I'm trying to use this field for case-insensitive order by. Currently, titles are returned in the following order (with $orderBy=TitleCaseInsensitive):
Abc
Bbc
abc
And instead I want: Abc->abc->Bbc. I have forked the "Title" field out into two fields via a Field Mapping and am then applying a Custom Analyzer with the "lowercase" tokenFilter, to the normalized field. Can someone explain why I am not getting the desired results? Here is the relevant portion of the index definition:
"index":{
"name": "current-local-inventory",
"fields": [
{"name": "TitleCaseInsensitive","indexAnalyzer":"caseInsensitiveAnalyzer","searchAnalyzer":"keyword", "type": "Edm.String","filterable": false, "sortable": true, "facetable": false, "searchable": true},
{"name": "Title", "type": "Edm.String","filterable": true, "sortable": true, "facetable": false, "searchable": true},
],
"analyzers": [
{
"#odata.type":"#Microsoft.Azure.Search.CustomAnalyzer",
"name":"caseInsensitiveAnalyzer",
"charFilters":[],
"tokenizer":"keyword_v2",
"tokenFilters":["lowercase"]
}
]
},
"indexers":[{
"fieldMappings" : [
{"sourceFieldName" : "Title", "targetFieldName" : "Title" },
{"sourceFieldName" : "Title", "targetFieldName" : "TitleCaseInsensitive" }
]
}]
See my answer in the related post Azure Search - Accent insensitive analyzer not working when sorting. When you include the lowercase token filter it only affects search and not sorting. See Azure Search User Voice entry Case-insensitive sorting for string fields
My suggested workaround as I explain in the related post is to create a forked/shadow property. However, using an analyzer with a lowercase token filter won't help. The only way I could get your example working was to include a copy of your Title property that was already lowercased. Notice that I don't use fieldMapping and I don't use different analyzers for indexing and search like you have in your example.
CREATE INDEX
Create the index. Replace variables wrapped in angle brackets as suitable for your env.
{
"#odata.context": "https://{{SEARCH_SVC}}.{{DNS_SUFFIX}}/$metadata#indexes/$entity",
"#odata.etag": "\"0x8D8761DCBBCCD00\"",
"name": "{{INDEX_NAME}}",
"defaultScoringProfile": null,
"fields": [
{"name": "Id", "type": "Edm.String", "searchable": false, "filterable": true, "retrievable": true, "sortable": true, "facetable": false, "key": true, "indexAnalyzer": null, "searchAnalyzer": null, "analyzer": null, "synonymMaps": [] },
{"name": "TitleCaseInsensitive","indexAnalyzer": null, "searchAnalyzer": null, "analyzer": "caseInsensitiveAnalyzer", "type": "Edm.String","filterable": false, "sortable": true, "facetable": false, "searchable": true},
{"name": "Title", "type": "Edm.String","filterable": true, "sortable": true, "facetable": false, "searchable": true}
],
"scoringProfiles": [],
"corsOptions": null,
"suggesters": [],
"analyzers": [ {
"#odata.type":"#Microsoft.Azure.Search.CustomAnalyzer",
"name":"caseInsensitiveAnalyzer",
"charFilters":[],
"tokenizer":"keyword_v2",
"tokenFilters":["lowercase"]
}],
"tokenizers": [],
"tokenFilters": [],
"charFilters": [],
"encryptionKey": null
}
UPLOAD
Upload three sample documents.
{
"value": [
{
"#search.action": "mergeOrUpload",
"Id": "1",
"Title": "Abc",
"TitleCaseInsensitive": "abc"
},
{
"#search.action": "mergeOrUpload",
"Id": "2",
"Title": "abc",
"TitleCaseInsensitive": "abc"
},
{
"#search.action": "mergeOrUpload",
"Id": "3",
"Title": "Bbc",
"TitleCaseInsensitive": "bbc"
}
]
}
QUERY
Then, query with $orderby on your lowercased (normalized) property.
https://{{SEARCH_SVC}}.{{DNS_SUFFIX}}/indexes/{{INDEX_NAME}}/docs?search=*&$count=true&$select=Id,Title,TitleCaseInsensitive&searchMode=all&queryType=full&api-version={{API-VERSION}}&$orderby=TitleCaseInsensitive asc
And you'll get the expected results where Title is sorted in a case-insensitive way.
{
"#odata.context": "https://<your-search-service>.search.windows.net/indexes('dg-test-65526118')/$metadata#docs(*)",
"#odata.count": 3,
"value": [
{
"#search.score": 1.0,
"Id": "2",
"TitleCaseInsensitive": "abc",
"Title": "abc"
},
{
"#search.score": 1.0,
"Id": "1",
"TitleCaseInsensitive": "abc",
"Title": "Abc"
},
{
"#search.score": 1.0,
"Id": "3",
"TitleCaseInsensitive": "bbc",
"Title": "Bbc"
}
]
}
I would love to be corrected with a simple way to accomplish this.
Please check out the Text normalization for case-insensitive filtering, faceting and sorting feature that's in Preview.
You can update your index to use this "normalizer" feature for the fields in which you'd like case-insensitive order-by operations.
You don't need a separate field TitleCaseInsensitive anymore. You can add "normalizer": "lowercase" to the Title field, and $orderBy=Title will sort in the order you'd like, ignoring casing.
The "lowercase" normalizer is pre-defined. If you'd like other filters to be applied, please look at predefined and custom normalizers
"index": {
"name": "current-local-inventory",
"fields": [
{"name": "Title", "type": "Edm.String", "filterable": true, "sortable": true, "facetable": false, "searchable": true, "normalizer":"lowercase"}
]
},
"indexers":[{
"fieldMappings" : [
{"sourceFieldName" : "Title", "targetFieldName" : "Title" }
]
}]

How can I ensure alphabetical order with json schema of json objects in json arrays?

I would like to ensure that json objects within a json array are ordered correctly by a specific property with a json schema.
Is that possible? And if so, how can I create such a json schema?
Schema:
{
"type": "object",
"properties": {
"cities": {
"type": "array",
"items": {
"type": "object",
"properties": {
"shortName": {
"type": "string"
},
"name": {
"type": "string"
},
"showInMap": {
"type": "boolean"
},
"active": {
"type": "boolean"
}
},
"??ORDERBY??": "shortName",
"??ORDER??": "ASC",
"required": [
"shortName"
]
}
}
}
}
I would like to filter out json files which are not correctly ordered.
Example: (invalid)
{
"cities": [
{
"shortName": "NY",
"name": "New York",
"showInMap": true,
"active": true
},
{
"shortName": "LD",
"name": "London",
"showInMap": true,
"active": false
},
{
"shortName": "MO",
"name": "Moscow",
"showInMap": false,
"active": false
}
]
}
And accept json files which are ordered correctly.
Example: (valid)
{
"cities": [
{
"shortName": "LD",
"name": "London",
"showInMap": true,
"active": false
},
{
"shortName": "MO",
"name": "Moscow",
"showInMap": false,
"active": false
},{
"shortName": "NY",
"name": "New York",
"showInMap": true,
"active": true
}
]
}
EDIT: This answer makes no use of json schema.
Here is a minimal solution working on your sample data. It is not safe for production, as it is missing all sorts of checks for undefined, but I think you can enhance it at will.
function isSorted(array, sortKey) {
return array.reduce((ordered, item, index) => {
return index > array.length - 2 ? ordered : ordered && item[sortKey] < array[index + 1][sortKey];
}, true);
}
const incorrectCase = [{
"shortName": "NY",
"name": "New York",
"showInMap": true,
"active": true
},
{
"shortName": "LD",
"name": "London",
"showInMap": true,
"active": false
},
{
"shortName": "MO",
"name": "Moscow",
"showInMap": false,
"active": false
}
]
const correctCase = [{
"shortName": "LD",
"name": "London",
"showInMap": true,
"active": false
},
{
"shortName": "MO",
"name": "Moscow",
"showInMap": false,
"active": false
}, {
"shortName": "NY",
"name": "New York",
"showInMap": true,
"active": true
}
];
console.log('incorrect case: ', isSorted(incorrectCase, "shortName"));
console.log('correct case: ', isSorted(correctCase, "shortName"));

Join 2 arrays by key and value (AngularJS)

I have 2 objects
{
"_id": "58b7f36b3354c24630f6f3b0",
"name": "refcode",
"caption": "Reference",
"type": "string",
"search": false,
"required": false,
"table": true,
"expansion": true
},
and
{
"_id": "58b7f36b3354c24630f6f3c8",
"vacancyid": "0",
"refcode": "THIS IS MY REF",
"position": "Test",
"jobtype": "Temp",
"department": "Industrial",
"branch": "Office",
"startdate": "02/12/2013",
"contactname": "Person Name",
"contactemail": "person#domain",
"Q_V_TYP": "Daily",
"score": 0
},
Object one defines what a field should be and what it is called
The second object is a job description.
What i need is to match a field to each key (this even sounds confusing i my head, so here is an example)
{
"_id": "58b7f36b3354c24630f6f3c8",
"vacancyid": "0",
"refcode": {
"_id": "58b7f36b3354c24630f6f3b0",
"name": "refcode",
"caption": "Reference",
"type": "string",
"search": false,
"required": false,
"table": true,
"expansion": true,
"value": "THIS IS MY REF"
}
},
"position": "Test",
"jobtype": "Temp",
"department": "Industrial",
"branch": "Office",
"startdate": "02/12/2013",
"contactname": "Person Name",
"contactemail": "person#domain",
"Q_V_TYP": "Daily",
"score": 0
},
Here you go:
var def = {
"_id": "58b7f36b3354c24630f6f3b0",
"name": "refcode",
"caption": "Reference",
"type": "string",
"search": false,
"required": false,
"table": true,
"expansion": true
};
var jobDesc = {
"_id": "58b7f36b3354c24630f6f3c8",
"vacancyid": "0",
"refcode": "THIS IS MY REF",
"position": "Test",
"jobtype": "Temp",
"department": "Industrial",
"branch": "Office",
"startdate": "02/12/2013",
"contactname": "Person Name",
"contactemail": "person#domain",
"Q_V_TYP": "Daily",
"score": 0
};
var jobDescKeysArr = Object.keys(jobDesc);
if (jobDescKeysArr.indexOf(def.name) !== -1) {
// A match.
def.value = jobDesc[def.name];
jobDesc[def.name] = Object.assign({}, def);
console.log(jobDesc)
}

How can I rank exact matches higher in azure search

I have an index in azure search that consists of person data like firstname and lastname.
When I search for 3 letter lastnames with a query like
rau&searchFields=LastName
/indexes/customers-index/docs?api-version=2016-09-01&search=rau&searchFields=LastName
The name rau is found but it is quite far at the end.
{
"#odata.context": "myurl/indexes('customers-index')/$metadata#docs(ID,FirstName,LastName)",
"value": [
{
"#search.score": 8.729204,
"ID": "someid",
"FirstName": "xxx",
"LastName": "Liebetrau"
},
{
"#search.score": 8.729204,
"ID": "someid",
"FirstName": "xxx",
"LastName": "Damerau"
},
{
"#search.score": 8.729204,
"ID": "someid",
"FirstName": "xxx",
"LastName": "Rau"
More to the top are names like "Liebetrau","Damerau".
Is there a way to have exact matches at the top?
EDIT
Querying the index definition using the RestApi
GET https://myproduct.search.windows.net/indexes('customers-index')?api-version=2015-02-28-Preview
returned for LastName
"name": "LastName",
"type": "Edm.String",
"searchable": true,
"filterable": true,
"retrievable": true,
"sortable": true,
"facetable": true,
"key": false,
"indexAnalyzer": "prefix",
"searchAnalyzer": "standard",
"analyzer": null,
"synonymMaps": []
Edit 1
The analyzer definition
"scoringProfiles": [],
"defaultScoringProfile": null,
"corsOptions": null,
"suggesters": [],
"analyzers": [
{
"name": "prefix",
"tokenizer": "standard",
"tokenFilters": [
"lowercase",
"my_edgeNGram"
],
"charFilters": []
}
],
"tokenizers": [],
"tokenFilters": [
{
"name": "my_edgeNGram",
"minGram": 2,
"maxGram": 20,
"side": "back"
}
],
"charFilters": []
Edit 2
At the end specifying a ScoringProfile that i use whene querying did the trick
{
"name": "person-index",
"fields": [
{
"name": "ID",
"type": "Edm.String",
"searchable": false,
"filterable": true,
"retrievable": true,
"sortable": true,
"facetable": true,
"key": true,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null
}
,
{
"name": "LastName",
"type": "Edm.String",
"searchable": true,
"filterable": true,
"retrievable": true,
"sortable": true,
"facetable": true,
"key": false,
"analyzer": "my_standard"
},
{
"name": "PartialLastName",
"type": "Edm.String",
"searchable": true,
"filterable": true,
"retrievable": true,
"sortable": true,
"facetable": true,
"key": false,
"indexAnalyzer": "prefix",
"searchAnalyzer": "standard",
"analyzer": null
}
],
"analyzers":[
{
"name":"my_standard",
"#odata.type":"#Microsoft.Azure.Search.CustomAnalyzer",
"tokenizer":"standard_v2",
"tokenFilters":[ "lowercase", "asciifolding" ]
},
{
"name":"prefix",
"#odata.type":"#Microsoft.Azure.Search.CustomAnalyzer",
"tokenizer":"standard_v2",
"tokenFilters":[ "lowercase", "my_edgeNGram" ]
}
],
"tokenFilters":[
{
"name":"my_edgeNGram",
"#odata.type":"#Microsoft.Azure.Search.EdgeNGramTokenFilterV2",
"minGram":2,
"maxGram":20,
"side": "back"
}
],
"scoringProfiles":[
{
"name":"exactFirst",
"text":{
"weights":{ "LastName":2, "PartialLastName":1 }
}
}
]
}
The analyzer "prefix" set on the LastName field produces the following terms for the name Liebetrau : au, rau, trau, etrau, betrau, ebetrau, iebetrau, libetrau. These are edge ngrams of length ranging from 2 to 20 starting from the back of the word, as defined in the my_edgeNGram token filter in your index definition. The analyzer will process other names in the same way.
When you search for the name rau, it matches all names as they all end with those characters. That's why all documents in your result set have the same relevance score.
You can test your analyzer configurations using the Analyze API.
To learn more about custom analyzers please go here and here.
Hope that helps

Resources