Mongo aggregate guidance - arrays

I need some guidance please.
I have a data structure that looks like this (note that the certResults field is an array, in this case - apple.com and reuters.com, but typically there are more results within the certResults array):
{
"deviceTag": "",
"clientHostName": "555317e186a0",
"dataFormatVersion": 10,
"certResults":
[
{
"hostname": "apple.com",
"port": 443,
"startTime": "2022/07/01 03:50:57.867716",
"endTime": "2022/07/01 03:50:57.960064",
"queryTime": 92.35,
"certificateInfo":
{
"subject":
{
"countryName": "US",
"stateOrProvinceName": "California",
"localityName": "Cupertino",
"organizationName": "Apple Inc.",
"commonName": "apple.com"
},
"certificateIssuer":
{
"countryName": "US",
"organizationName": "Apple Inc.",
"commonName": "Apple Public EV Server ECC CA 1 - G1"
},
"version": 3,
"serialNumber": "6A1D3FA84A43C329F1051060FF4698BA",
"notBefore": "Apr 26 21:58:37 2022 GMT",
"notAfter": "May 26 21:58:36 2023 GMT",
"OCSP":
[
"http://ocsp.apple.com/ocsp03-apevsecc1g101"
],
"crlDistributionPoints":
[
"http://crl.apple.com/apevsecc1g1.crl"
],
"caIssuers":
[
"http://certs.apple.com/apevsecc1g1.der"
],
"subjectAltName":
{
"DNS0": "apple.com"
}
},
"timeLeft": "10 months, 25 days, 18 hours, 7 minutes, 39 seconds",
"percentageUtilization": 16.52
},
{
"hostname": "reuters.com",
"port": 443,
"startTime": "2022/07/01 03:50:57.962692",
"endTime": "2022/07/01 03:50:58.271235",
"queryTime": 308.54,
"certificateInfo":
{
"subject":
{
"commonName": "reuters.com"
},
"certificateIssuer":
{
"countryName": "US",
"organizationName": "Let's Encrypt",
"commonName": "R3"
},
"version": 3,
"serialNumber": "04203F2F15F8194772481DABC1061E213EAB",
"notBefore": "Jun 6 12:54:06 2022 GMT",
"notAfter": "Sep 4 12:54:05 2022 GMT",
"OCSP":
[
"http://r3.o.lencr.org"
],
"caIssuers":
[
"http://r3.i.lencr.org/"
],
"subjectAltName":
{
"DNS0": "reuters.com"
}
},
"timeLeft": "2 months, 3 days, 9 hours, 3 minutes, 7 seconds",
"percentageUtilization": 27.36
}
]
}
This data structure is similar in that it is executed every hour for the same hosts - i.e. each document is the same with updated results in the certResults array.
I'm struggling with the syntax for MongoDB aggregate function.
Using MongoDB's find function, I can collate entries for a specific host by first filtering the dataFormatVersion:10 and then looking at the certResults.hostname field for "reuters.com":
db.certCollection.find({ dataFormatVersion:10, certResults: {$elemMatch: {hostname: "reuters.com"}} }, {_id: 0, certResults: {"hostname.$": 1, "startTime": 1 , "port": 1, "percentageUtilization": 1 }} );
Which presents this information:
{ "certResults" : [ { "hostname" : "reuters.com", "port" : 443, "startTime" : "2022/06/28 16:31:49.919962", "percentageUtilization" : 24.61 } ] }
{ "certResults" : [ { "hostname" : "reuters.com", "port" : 443, "startTime" : "2022/06/28 16:34:55.868512", "percentageUtilization" : 24.61 } ] }
{ "certResults" : [ { "hostname" : "reuters.com", "port" : 443, "startTime" : "2022/06/28 16:57:38.926443", "percentageUtilization" : 24.63 } ] }
{ "certResults" : [ { "hostname" : "reuters.com", "port" : 443, "startTime" : "2022/06/28 17:00:02.359976", "percentageUtilization" : 24.63 } ] }
etc.
I'm not sure how to aggregate data for the percentageUtilization field.
I'd like to find out the average for the percentageUtilization field across the responding documents for this hostname "reuters.com" only. Is there a way to filter on startTime and endTime fields as well?
Similarly, I'd like to find out the average percentageUtilization across all host entries for all responding documents (i.e. reuters.com and apple.com) within a certain timeframe (based on startTime and endTime fields).
Any help would be greatly appreciated!
Thank you!

Aha! The trick I was looking for was to use $unwind for the certResults array.

Related

How to get the total number of marks based on sid?

Input is as follows:
[
{
"sid": 101,
"sname": "Rahul",
"sem": 1,
"marks": 9
},
{
"sid": 102,
"sname": "Rahul",
"sem": 2,
"marks": 9.5
},
{
"sid": 102,
"sname": "Rahul",
"sem": 3,
"marks": 8
},
{
"sid": 101,
"sname": "Rahul",
"sem": 4,
"marks": 7
}
]
I need to get the total number of marks based on the sid attribute.
You can group by the sid then for each sid sum all the marks.
%dw 2.0
output application/json
---
payload
groupBy ($.sid)
mapObject ($$): sum($.*marks)
Output:
{
"101": 16,
"102": 17.5
}
Note that if you are going to use this information for further processing it will be more efficient to output to application/java instead of JSON.
Alternatively if you prefer to have a list with one element per sid you can use pluck().
%dw 2.0
output application/json
---
payload
groupBy ($.sid)
pluck { sid: ($$), marksTotal: sum($.*marks) }
Output:
[
{
"sid": "101",
"marksTotal": 16
},
{
"sid": "102",
"marksTotal": 17.5
}
]

Querying CosmosDB based on timestamps

I am working with a CosmosDB setup by one of my colleagues and connecting to it using a connection string. The database contains several JSON documents with the following schema:
{
"period": "Raw",
"source": "Traffic",
"batchId": "ee737270-0b72-49b7-a2f1-201f642e9c81",
"periodName": "Raw",
"sourceName": "Traffic",
"groupKey": "gc4151_a",
"partitionKey": "traffic-gc4151_a-raw-raw",
"time": "2021-08-05T23:55:10",
"minute": 55,
"hour": 23,
"day": 05,
"month": 08,
"quarter": 3,
"year": 2021,
"minEventTime": "2021-08-05T23:55:09",
"maxEventTime": "2021-08-05T23:55:11",
"meta": {
"siteId": "GC4151_A",
"from": {
"lat": "55.860894822588506",
"long": "-4.284365958508686"
},
"to": {
"lat": "55.86038667864348",
"long": "-4.2826901232101795"
}
},
"measurements": {
"flow": [
{
"calculation": "Raw",
"name": "flow",
"calculationName": "Raw",
"value": 0
}
],
"concentration": [
{
"calculation": "Raw",
"name": "concentration",
"calculationName": "Raw",
"value": 0
}
]
},
"added": "2021-08-05T12:21:32.000819Z",
"updated": "2021-08-05T12:21:32.000819Z",
"id": "d4346f50-543e-4c4d-82cf-835b480914c2",
"_rid": "4RRTAIYVA1AIAAAAAAAAAA==",
"_self": "dbs/4RRTAA==/colls/4RRTAIYVA1A=/docs/4RRTAIYVA1AIAAAAAAAAAA==/",
"_etag": "\"1c0015a1-0000-1100-0000-5f3fbc4c0000\"",
"_attachments": "attachments/",
"_ts": 1598012492
}
I am trying to write a SQL query to select all the records that fall between the current date-time and one week earlier, so I can use these to perform future calculations.
I have attempted to use both of the following:
SELECT *
FROM c
WHERE c.time > date_sub(now(), interval 1 week);
and
SELECT *
FROM c
WHERE c.time >= DATE_ADD(CURDATE(), INTERVAL -7 DAY);
However, both of these return the following error:
Gateway Failed to Retrieve Query Plan: Message: {"errors":[{"severity":"Error","location":{"start":124,"end":125},"code":"SC1001","message":"Syntax error, incorrect syntax near '1'."}]}
ActivityId: 51c3b6f7-e760-4062-bd80-8cc9f8de5352, Microsoft.Azure.Documents.Common/2.14.0, Microsoft.Azure.Documents.Common/2.14.0
My question is what is the issue with my code, and how can I fix it?
You may use DateTimeAdd and GetCurrentDateTime() to achieve this. Eg.
SELECT *
FROM c
WHERE c.time > DateTimeAdd("day",-7,GetCurrentDateTime() )
Let me know if this works for you.

Array within Element within Array in Variant

How can I get the data out of this array stored in a variant column in Snowflake. I don't care if it's a new table, a view or a query. There is a second column of type varchar(256) that contains a unique ID.
If you can just help me read the "confirmed" data and the "editorIds" data I can probably take it from there. Many thanks!
Output example would be
UniqueID ConfirmationID EditorID
u3kd9 xxxx-436a-a2d7 nupd
u3kd9 xxxx-436a-a2d7 9l34c
R3nDo xxxx-436a-a3e4 5rnj
yP48a xxxx-436a-a477 jTpz8
yP48a xxxx-436a-a477 nupd
[
{
"confirmed": {
"Confirmation": "Entry ID=xxxx-436a-a2d7-3525158332f0: Confirmed order submitted.",
"ConfirmationID": "xxxx-436a-a2d7-3525158332f0",
"ConfirmedOrders": 1,
"Received": "8/29/2019 4:31:11 PM Central Time"
},
"editorIds": [
"xxsJYgWDENLoX",
"JR9bWcGwbaymm3a8v",
"JxncJrdpeFJeWsTbT"
] ,
"id": "xxxxx5AvGgeSHy8Ms6Ytyc-1",
"messages": [],
"orderJson": {
"EntryID": "xxxxx5AvGgeSHy8Ms6Ytyc-1",
"Orders": [
{
"DropShipFlag": 1,
"FromAddressValue": 1,
"OrderAttributes": [
{
"AttributeUID": 548
},
{
"AttributeUID": 553
},
{
"AttributeUID": 2418
}
],
"OrderItems": [
{
"EditorId": "aC3f5HsJYgWDENLoX",
"ItemAssets": [
{
"AssetPath": "https://xxxx573043eac521.png",
"DP2NodeID": "10000",
"ImageHash": "000000000000000FFFFFFFFFFFFFFFFF",
"ImageRotation": 0,
"OffsetX": 50,
"OffsetY": 50,
"PrintedFileName": "aC3f5HsJYgWDENLoX-10000",
"X": 50,
"Y": 52.03909266409266,
"ZoomX": 100,
"ZoomY": 93.75
}
],
"ItemAttributes": [
{
"AttributeUID": 2105
},
{
"AttributeUID": 125
}
],
"ItemBookAttribute": null,
"ProductUID": 52,
"Quantity": 1
}
],
"SendNotificationEmailToAccount": true,
"SequenceNumber": 1,
"ShipToAddress": {
"Addr1": "Addr1",
"Addr2": "0",
"City": "City",
"Country": "US",
"Name": "Name",
"State": "ST",
"Zip": "00000"
}
}
]
},
"orderNumber": null,
"status": "order_placed",
"submitted": {
"Account": "350000",
"ConfirmationID": "xxxxx-436a-a2d7-3525158332f0",
"EntryID": "xxxxx-5AvGgeSHy8Ms6Ytyc-1",
"Key": "D83590AFF0CC0000B54B",
"NumberOfOrders": 1,
"Orders": [
{
"LineItems": [],
"Note": "",
"Products": [
{
"Price": "00.30",
"ProductDescription": "xxxxxint 8x10",
"Quantity": 1
},
{
"Price": "00.40",
"ProductDescription": "xxxxxut Black 8x10",
"Quantity": 1
},
{
"Price": "00.50",
"ProductDescription": "xxxxx"
},
{
"Price": "00.50",
"ProductDescription": "xxxscount",
"Quantity": 1
}
],
"SequenceNumber": "1",
"SubTotal": "00.70",
"Tax": "1.01",
"Total": "00.71"
}
],
"Received": "8/29/2019 4:31:10 PM Central Time"
},
"tracking": null,
"updatedOn": 1.598736670503000e+12
}
]
So, this is how I'd query that exact JSON assuming the data is in column var in table x:
SELECT x.var[0]:confirmed:ConfirmationID::varchar as ConfirmationID,
f.value::varchar as EditorID
FROM x,
LATERAL FLATTEN(input => var[0]:editorIds) f
;
Since your sample output doesn't match the JSON that you provided, I will assume that this is what you need.
Also, as a note, your JSON includes outer [ ] which indicates that the entire JSON string is inside an array. This is the reason for var[0] in my query. If you have multiple records inside that array, then you should remove that. In general, you should exclude those and instead load each record into the table separately. I wasn't sure whether you could make that change, so I just wanted to make note.

Calling parse function in swift fails

I am working on a food delivery app, which uses parse as its backend. I am facing a problem while calling the placeOrder API through
PFCloud.callFunction(inBackground: PlaceOrder, withParameters: params) { (data, err) in}
Please have a look at the JSON which I need to post below.
{
"source": "card_1EVYuOEynlyM6L4SHgBMJYRQ",
"userId": "YjSZYSXEp7",
"data": {
"menuItems": [{
"id": "QSYa2JDcIm",
"title": "Rice With Tibss(Beef)",
"menuTitle": "Rice With Tibss",
"submenuItem": [{
"id": "zaOo6G4KSV",
"name": "Beef",
"price": 12,
"desc": "Fillings?"
}],
"price": 24,
"qty": 1,
"storeId": "yqBCDmzaDP",
"storeName": "Ibex Ethiopian Cusine and Bar",
"orderType": "takeout",
"taxState": 0.0925,
"storeInfo": {
"cart_storeId": "yqBCDmzaDP",
"cart_storeName": "Ibex Ethiopian Cusine and Bar",
"cart_storeImage": "https://res.cloudinary.com/http-get-tolofood-com/image/upload/c_scale,h_199,q_auto,w_270/v1461575640/Ibex_lopx38.jpg",
"cart_storeCuisine": "Ethiopian",
"cart_storeDescription": "We always serve a quality food. We always serve a quality food. We always serve a quality food. We always serve a quality food.",
"cart_storeRating": 3.33,
"cart_storeDelivery": false,
"takeout": true,
"address": "12255 Greenville Ave,Dallas, TX 75243",
"slugname": "TX_DAL_ibex_ethiopian_cuisine_and_bar",
"multiple_location": false,
"cart_storeDeliveryFee": 15,
"cart_storeServes": "Lunch,Dinner",
"busy": false,
"cart_storeSeoSlug": "ibex-ethiopian-cusine-and-bar"
},
"enable": true,
"voice_read_mi_label": "fbgcb",
"voice_read_mi_option": false,
"menuTypeName": "Standard"
}],
"lastOrderType": "takeout",
"searchedAddress": "takeout",
"timeData": {
"day": "06-05-2019",
"time": "12:55 am",
"tz": "America/Los_Angeles"
}
},
"unavailable_option": "restaurant_recommendation"
}
And below is the Swift code which I have used to make pass it.
let storeInfo: Dictionary = [CartStoreId: self.cartStoreId, CartStoreName: self.cartRestaurantName, CartStoreImage: self.cartStoreImage, CartStoreCuisine: self.cartStoreCuisine, CartStoreDescription: self.cartStoreDescription, CartStoreRating: self.cartStoreRating, CartStoreDelivery: self.cartStoreDelivery, Takeout: self.takeOut, Address: self.address, Slugname: self.slugName, MultipleLocation: self.multipleLocation, CartStoreDeliveryFee: self.cartStoreDelivery, CartStoreServes: self.cartStoreServes, Busy: self.busy, CartStoreSeoSlug: self.cartStoreSeoSlug] as Dictionary
let subMenuItem = ["id": "zaOo6G4KSV", "name": "Beef", "price": 12, "desc": "Fillings?", "voice_read_submi_label":"bf", "voice_read_submi_option":false, "disabled": false] as [String: Any]
let ordersDictionary = [
"id" : "1234",
"title" : "Test",
"menuTitle" : "MenuName",
"price" : 23,
"qty" : 2,
"storeId" : 23,
"orderType" : "standard",
"taxState" : 0.22,
"enable" : true,
"menuTypeName" : "Type Name",
"voice_read_mi_label":"fdfs",
"voice_read_mi_option":"false",
"submenuitem": subMenuItem,
"storeInfo": storeInfo
] as Dictionary
let timeData = ["day" : 17-06-2019, "time": "11:00 AM", "tz": "America/Los_Angeles"] as Dictionary
let data = ["menuItems": ordersDictionary, "lastOrderType": "takeout", "searchedAddress": "takeout", "timeData" : timeData] as Dictionary
let params = [UserId: self.userId, "source":"card_1EVYuOEynlyM6L4SHgBMJYRQ", "data": data, "unavailable_option":"restaurant_recommendation","_ApplicationId":"6EuadToYoFGJhI1sX8XnuFBz9tp9l3yH6HxzzXZO", "_JavaScriptKey":"rQkALu9saFtF2oq9yCibyw6mEcs3PVqct3uuP6vg", "_ClientVersion":"js1.6.14", "_InstallationId":"444ec64d-5fcc-7b8e-596e-6be627892c2a",
"_SessionToken":"r:c966376120c8eca77aa63c29d5bebe1a"] as Dictionary
After all this is done I call the parse function like below.
PFCloud.callFunction(inBackground: PlaceOrder, withParameters: params) { (data, err) in
if err != nil {
print(err!)
} else {
print(data!)
}
}
But this gives me error after a few seconds saying
"Error Domain=NSCocoaErrorDomain Code=3840 "JSON text did not start with array or object and option to allow fragments not set." UserInfo={NSDebugDescription=JSON text did not start with array or object and option to allow fragments not set.}"
I have searched the web with the error and made fixes accordingly but still no success. Please help me guys.
I noticed that your params var is not compatible with the JSON you sent, there are more fields and also missing fields. Moreover, menuItems and submenuItem are an Array in your JSON and an Object in your code. It is probably making the cloud code function to fail and you are therefore not receiving back a valid JSON. Try the following and check if it works. In the case it works, just replace the values by your vars.
let params = [
"source": "card_1EVYuOEynlyM6L4SHgBMJYRQ",
"userId": "YjSZYSXEp7",
"data": [
"menuItems": [[
"id": "QSYa2JDcIm",
"title": "Rice With Tibss(Beef)",
"menuTitle": "Rice With Tibss",
"submenuItem": [[
"id": "zaOo6G4KSV",
"name": "Beef",
"price": 12,
"desc": "Fillings?"
]],
"price": 24,
"qty": 1,
"storeId": "yqBCDmzaDP",
"storeName": "Ibex Ethiopian Cusine and Bar",
"orderType": "takeout",
"taxState": 0.0925,
"storeInfo": [
"cart_storeId": "yqBCDmzaDP",
"cart_storeName": "Ibex Ethiopian Cusine and Bar",
"cart_storeImage": "https://res.cloudinary.com/http-get-tolofood-com/image/upload/c_scale,h_199,q_auto,w_270/v1461575640/Ibex_lopx38.jpg",
"cart_storeCuisine": "Ethiopian",
"cart_storeDescription": "We always serve a quality food. We always serve a quality food. We always serve a quality food. We always serve a quality food.",
"cart_storeRating": 3.33,
"cart_storeDelivery": false,
"takeout": true,
"address": "12255 Greenville Ave,Dallas, TX 75243",
"slugname": "TX_DAL_ibex_ethiopian_cuisine_and_bar",
"multiple_location": false,
"cart_storeDeliveryFee": 15,
"cart_storeServes": "Lunch,Dinner",
"busy": false,
"cart_storeSeoSlug": "ibex-ethiopian-cusine-and-bar"
],
"enable": true,
"voice_read_mi_label": "fbgcb",
"voice_read_mi_option": false,
"menuTypeName": "Standard"
]],
"lastOrderType": "takeout",
"searchedAddress": "takeout",
"timeData": [
"day": "06-05-2019",
"time": "12:55 am",
"tz": "America/Los_Angeles"
]
],
"unavailable_option": "restaurant_recommendation"
]

How can I check for duplicate nested arrays inside of documents in Mongoose?

Here is an example of a nested document that I have in my collection:
{
"title" : "front-end developer",
"age" : 25,
"name" : "John",
"city" : "London",
"skills" : [
{
"name" : "js",
"project" : "1",
"scores" : [
{
max: 76,
date: date
},
{
max: 56,
date: date
}
]
},
{
"name" : "CSS",
"project" : "5",
"scores" : [
{
max: 86,
date: date
},
{
max: 36,
date: date
},
{
max: 56,
date: date
},
]
}
]
}
Is there a simple way of determining whether other documents have an identical/duplicate structure to the skills array only? e.g. has the same keys, value and array indexes? Any help would be greatly appreciated. Thanks!
Here's how you get that:
collection.aggregate({
"$group": {
"_id": "$skills",
"docs": {
"$push": "$$ROOT"
},
"count": {
$sum: 1
}
}
}, {
$match: {
"count": {
$gt: 1
}
}
})
If you are looking for developers with the same skillset, you can use the $all operator:
var john = db.developers.findOne(...);
var devs = db.developers.find({ 'skills.name': { $all: john.skills.map(x => x.name) } });

Resources