How to index related SQL tables in Azure Search? - azure-cognitive-search

There are 2 related tables: categories and products. products table has field categoryId, which contains id of it's category. How to index products table to get name or anything else of it's category?
I have 2 indexers for each table and one index.
// categories indexer
{
"name": "categories",
"dataSourceName": "categories",
"targetIndexName": "products",
"fieldMappings": [
{
"sourceFieldName": "id",
"targetFieldName": "id"
},
{
"sourceFieldName": "name",
"targetFieldName": "name"
}
}
// products indexer
{
"name": "products",
"dataSourceName": "products",
"targetIndexName": "products",
"fieldMappings": [
{
"sourceFieldName": "id",
"targetFieldName": "id"
},
{
"sourceFieldName": "name",
"targetFieldName": "name"
},
{
"sourceFieldName": "categoryId",
"targetFieldName": "categoryId"
}
}
// index
{
"name": "products",
"fields": [
{
"name": "id",
"type": "Edm.String",
...
},
{
"name": "categoryId",
"type": "Edm.String",
...
},
{
"name": "category_name",
"type": "Edm.String",
...
},
{
"name": "name",
"type": "Edm.String",
...
}
...

Create a view that joins categories and products tables, potentially denormalizing all caregoryIds into a collection field of a product document. (You can use the same approach for other category data). Set up an indexer for that view.
Another approach is to create two separate search indexes for products and categories, and perform the joins in the search client code.

Related

MongoDB nested list search and add element into the list

I have a list of documents which contains another list as a child, which also have the = children containing same document type and it also have children array and so on. My question is I want to search document by Id which might be the parent or in the children list or its grandchildren list or grand grand child list and deeper. How can I search for that in MongoDB?
This is what my tree structure likes :
[
{
"_id": {
"$oid": "632fcf89b79445a59228851a"
},
"label": "Cow",
"children": []
},
{
"_id": {
"$oid": "632fcf8db79445a59228851b"
},
"label": "Rat",
"children": [
{
"_id": {
"$oid": "632fd378d7316cdaf81c18a7"
},
"label": "Cub",
"children": [
{
"_id": {
"$oid": "632fd378d7316cdaf81c18a8"
},
"label": "Deer",
"children": []
}
]
}
]
},
{
"_id": {
"$oid": "632fcf8eb79445a59228851c"
},
"label": "Lion",
"children": []
}
]
This is what I tried but it adds element upto 1 level, however it does not search the children list exhaustively.
var toInsert = { _id: ObjectId(), "label" : "Tiger", "children": []};
db.animal.findOneAndUpdate(
{"_id": ObjectId("632fd378d7316cdaf81c18a7")},
{
$push: {
children: toInsert
}
}
);
db.collection.find({});

Update all objects in nested array with values from other collection

I have a collection of vehicles with the following car structure:
{
"_id": {}
brand : ""
model : ""
year : ""
suppliers : [
"name": "",
"contact": ""
"supplierId":"",
"orders":[], <-- Specific to the vehicles collection
"info":"" <-- Specific to the vehicles collection
]
}
And a Suppliers collection with a structure like:
{
"name":"",
"contact":"",
"_id":{}
"internalId":"",
"address":"",
...
}
I need to add a new field in the suppliers array within each document in the vehicles collection with the internalId field from the supplier in the suppliers collection that has the same _id.
if the supplier array has a document with the id 123, i should go to the suppliers collection and look for the supplier with the id 123 and retrieve the internalId. afterwards should create the field in the supplier array with that value.
So that i end up with the vehicles collection as:
{
"_id": {}
brand : ""
model : ""
year : ""
suppliers : [
"name": "",
"contact": ""
"supplierId":""
"internalId":"" <-- the new field
]
}
Tried:
db.vehicles.aggregate([
{
"$unwind": { "path": "$suppliers", "preserveNullAndEmptyArrays": false }
},
{
"$project": { "supplierObjId": { "$toObjectId": "$suppliers.supplierId" } }
},
{
"$lookup":
{
"from": "suppliers",
"localField": "supplierObjId",
"foreignField": "_id",
"as": "supplierInfo"
}
},{
"$set": {
"suppliers.internalId": "$supplierInfo.internalid"
}}
])
But it is adding the new field, to the returned values instead to the array item at the collection.
How can i achieve this?
But it is adding the new field, to the returned values instead to the array item at the collection.
The .aggregate method does not update documents, but it will just format the result documents,
You have to use 2 queries, first aggregate and second update,
I am not sure i guess you want to execute this query for one time, so i am suggesting a query you can execute in mongo shell,
Aggregation query:
$lookup with pipeline, pass suppliers.supplierId in let
$toString to convert object id to string type
$match the $in condition
$project to show required fields
$map to iterate loop of suppliers array
$reduce to iterate loop of suppliers_data array and find the matching record by supplierId
$mergeObjects to merge current object properties with new property internalId
Loop the result from aggregate query using forEach
Update Query to update suppliers array
db.vehicles.aggregate([
{
$lookup: {
from: "suppliers",
let: { supplierId: "$suppliers.supplierId" },
pipeline: [
{
$match: {
$expr: {
$in: [{ $toString: "$_id" }, "$$supplierId"]
}
}
},
{
$project: {
_id: 0,
supplierId: { $toString: "$_id" },
internalId: 1
}
}
],
as: "suppliers_data"
}
},
{
$project: {
suppliers: {
$map: {
input: "$suppliers",
as: "s",
in: {
$mergeObjects: [
"$$s",
{
internalId: {
$reduce: {
input: "$suppliers_data",
initialValue: "",
in: {
$cond: [
{ $eq: ["$$this.supplierId", "$$s.supplierId"] },
"$$this.internalId",
"$$value"
]
}
}
}
}
]
}
}
}
}
}
])
.forEach(function(doc) {
db.vehicles.updateOne({ _id: doc._id }, { $set: { suppliers: doc.suppliers } });
});
Playground for aggregation query, and Playground for update query.
It looks like one way to solve this is by using $addFields and $lookup. We first flatten any matching suppliers, then add the property, then regroup.
You can find a live demo here via Mongo Playground.
Database
Consider the following database structure:
[{
// Collection
"vehicles": [
{
"_id": "1",
brand: "ford",
model: "explorer",
year: "1999",
suppliers: [
{
name: "supplier1",
contact: "john doe",
supplierId: "001"
},
{
name: "supplier2",
contact: "jane doez",
supplierId: "002"
}
]
},
{
"_id": "2",
brand: "honda",
model: "accord",
year: "2002",
suppliers: [
{
name: "supplier1",
contact: "john doe",
supplierId: "001"
},
]
}
],
// Collection
"suppliers": [
{
"name": "supplier1",
"contact": "john doe",
"_id": "001",
"internalId": "999-001",
"address": "111 main street"
},
{
"name": "supplier2",
"contact": "jane doez",
"_id": "002",
"internalId": "999-002",
"address": "222 north street"
},
{
"name": "ignored_supplier",
"contact": "doesnt matter",
"_id": "xxxxxxx",
"internalId": "xxxxxxx",
"address": "0987 midtown"
}
]
}]
Query
This is the query that I was able to get working. I'm not sure how efficient it is, or if it can be improved, but this seemed to do the trick:
db.vehicles.aggregate([
{
$unwind: "$suppliers"
},
{
$lookup: {
from: "suppliers",
localField: "suppliers.supplierId",
foreignField: "_id", // <---- OR MATCH WHATEVER FIELD YOU WANT
as: "vehicle_suppliers"
}
},
{
$unwind: "$vehicle_suppliers"
},
{
$addFields: {
"suppliers.internalId": "$vehicle_suppliers.internalId"
}
},
{
$group: {
_id: "$_id",
brand: {
$first: "$brand"
},
model: {
$first: "$model"
},
year: {
$first: "$year"
},
suppliers: {
$push: "$suppliers"
}
}
}
])
Results
Which returns:
[
{
"_id": "2",
"brand": "honda",
"model": "accord",
"suppliers": [
{
"contact": "john doe",
"internalId": "999-001",
"name": "supplier1",
"supplierId": "001"
}
],
"year": "2002"
},
{
"_id": "1",
"brand": "ford",
"model": "explorer",
"suppliers": [
{
"contact": "john doe",
"internalId": "999-001",
"name": "supplier1",
"supplierId": "001"
},
{
"contact": "jane doez",
"internalId": "999-002",
"name": "supplier2",
"supplierId": "002"
}
],
"year": "1999"
}
]

JSON Schema: First Item of Array

I need a JSON array of arbitrary length. Each item in the array is a JSON object, they all have same keys and types.
But the schema should make one exception: the first object doesn't need all keys, so schemas required list should be shorter for the first item.
I tried schemas with "items" and "prefixItems" without luck. It seems that "prefixItems" will be ignored independently of draft version when used with "items". Because array can be of arbitrary length, I guess I cannot use multiple schemas with "items".
{
"description": "Schema for array data",
"$schema": "http://json-schema.org/draft-2020-12/schema#",
"version": "0-0-6",
"type": "object",
"required": [
"data"
],
"properties": {
"data": {
"type": "array",
"prefixItems": [{
"type": "object",
"required": [
"name"
],
"properties": {
"name" : { "type" :"string" },
"age" : { "type" : "number" },
"city" : { "type" : "string" }
}
}],
"items":
{
"type": "object",
"required": [
"name", "age", "city"
],
"properties": {
"name" : { "type" :"string" },
"age" : { "type" : "number" },
"city" : { "type" : "string" }
}
}
}
}
}
My data:
{
"data" : [
{
"name": "Tom"
},
{
"name": "Ben",
"age": 32,
"city": "Berlin"
},
{
"name": "Mike",
"age": 40,
"city": "Boston"
}
]
}
For validation I use:
https://www.jsonschemavalidator.net/
https://jsonlint.com/
Validation of my example gives error for first item:
"Required properties are missing from object: age, city."
Use additionalItems instead of items.
Schema:
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "array",
"prefixItems": [
{ "type": "number" },
{ "type": "string" },
{ "enum": ["Street", "Avenue", "Boulevard"] },
{ "enum": ["NW", "NE", "SW", "SE"] }
],
"additionalItems": {
"type": "number"
}
}
Data that validates:
[
1600,
"Pennsylvania",
"Avenue",
"NW",
1]
The issue you're running into is that the validators only support up to draft 7 of the standard, and using items the way we both want to was new in the 2019 draft. It's not supported yet.

MongoDB Realm issue: schema for namespace (tracker.users) must include partition key "_partition"

I'm trying to add a list of friends to each user as a property. But, I don't know why I get this error: MongoDB Realm issue: schema for namespace (tracker.users) must include partition key "_partition". For my project, tasks.users, I have the id, user_id, and name, with the friends list that will contain all three as well.
{
"title": "User",
"required": [
"_id",
"user_id",
"name"
],
"properties": {
"_id": {
"bsonType": "objectId"
},
"user_id": {
"bsonType": "string"
},
"name": {
"bsonType": "string"
},
"_partion": {
"bsonType": "string"
},
"friends": {
"bsonType": [
"array"
],
"uniqueItems": true,
"additionalProperties": false,
"items": {
"bsonType": [
"object"
],
"required": [
"id"
],
"additionalProperties": false,
"description": "'items' must contain the stated fields.",
"properties": {
"name": {
"bsonType": "string"
},
"id": {
"bsonType": "objectId"
},
"user_id": {
"bsonType": "string"
},
"rank": { "bsonType": "int" },
}
}
}
}
}
If you are using the BETA MongoDB Ream Sync, your objects need to include a partition key property
MongoDB Realm uses partition keys to map MongoDB documents in a synced
MongoDB Atlas cluster into Realm objects in realms.
You likely defined a partition key when you set up Sync in the MongoDB Realm console. Every object that you want sync'd need to contain that.
This is a really good read on how to use partition keys
Partition Atlas Data into Realms

Elasticsearch document mapping type is different as stored type

Hi I have an index in Elasticsearch 2 which contains this definition of property contacts:
...
,
"contacts": {
"properties": {
"domains": {
"type": "string",
"index": "not_analyzed"
},
"emails": {
"type": "string",
"index": "not_analyzed"
},
"phones": {
"type": "string",
"index": "not_analyzed"
}
}
},
...
and as I see data in this index for contacts property contains arrays istead of string as mapping says. How is it possible?
"contacts": {
"domains": [
"http://www.xxxxx.sk",
"http://www.sssss.sk",
"http://ddddd.sk",
"http://www.ddddd.sk",
"http://www.wwwww.sk",
"http://www.ffffffff.sk"
],
"phones": [
"123456789",
"987456321",
"852147963"
],
"emails": [
"ccc#cccccc.sk",
"ggggggg#vggggg.sk",
"qqqqqqq#qqqqq.sk",
"sssssssss#sssss.sk",
"nadacia#volkswagen.sk",
"vvvvvv#vvvvvvv.sk",
]
},
From the ES documentation: https://www.elastic.co/guide/en/elasticsearch/reference/current/array.html
In Elasticsearch, there is no dedicated array type. Any field can
contain zero or more values by default, however, all values in the
array must be of the same datatype.

Resources