Alternative to 'coll.CountDocuments' function on Mongodb in golang. (Aggregation Pipeline) - database

As you can see I am calling the "coll.CountDocuments" functions multiples times. What I want is to write the code without calling the "coll.CountDocuments" function multiple times by aggregating all the filters into a single query.
func NoOfDocumentsInfo(DB string, col string, filters ...bson.D) ([]int64, error) {
if nil == dbInstance {
if nil == GetDBInstance() {
logger.Error("Not connecting to DB")
err := errors.New("DB connection error")
return nil, err
}
}
logger.Debugf("%s %s", DB, col)
coll := dbInstance.Database(DB).Collection(col)
counts := make([]int64, len(filters))
for i, filter := range filters {
count, err := coll.CountDocuments(context.TODO(), filter)
if err != nil {
logger.Fatal(err)
return nil, err
}
counts[i] = count
}
return counts, nil
}
I have tried to used aggragation pipeline but "cur" and "result" is giving null output.
`func NoOfDocumentsInfo(DB string, col string, filters ...bson.D) ([]int64, error) {
if dbInstance == nil {
if GetDBInstance() == nil {
logger.Error("Not connecting to DB")
err := errors.New("DB connection error")
return nil, err
}
}
logger.Debugf("%s %s", DB, col)
coll := dbInstance.Database(DB).Collection(col)
pipeline := make([]bson.M, 0, len(filters)+2)
pipeline = append(pipeline, bson.M{"$match": bson.M{"$or": filters}})
pipeline = append(pipeline, bson.M{"$group": bson.M{"_id": nil, "count": bson.M{"$sum": 1}}})
pipeline = append(pipeline, bson.M{"$group": bson.M{"_id": nil, "count": bson.M{"$first": "$count"}}})
var result struct {
Count int64 `bson:"count"`
}
cur, err := coll.Aggregate(context.TODO(), pipeline)
if err != nil {
logger.Fatal(err)
return nil, err
}
logger.Debugf("cur: %+v", cur)
err = cur.Decode(&result)
logger.Debugf("result: %+v, err: %v", result, err)
if err != nil {
logger.Fatal(err)
return nil, err
}
return []int64{result.Count}, nil
}`

A much simpler approach would be the one that I'm going to share here. Let's start with the code:
package main
import (
"context"
"fmt"
"time"
"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/mongo"
"go.mongodb.org/mongo-driver/mongo/options"
)
var (
dbInstance *mongo.Client
ctx context.Context
cancel context.CancelFunc
)
func NoOfDocumentsInfo(client *mongo.Client, DB string, col string, filters bson.A) (int64, error) {
coll := client.Database(DB).Collection(col)
myFilters := bson.D{
bson.E{
Key: "$and",
Value: filters,
},
}
counts, err := coll.CountDocuments(ctx, myFilters)
if err != nil {
panic(err)
}
return counts, nil
}
func main() {
ctx, cancel = context.WithTimeout(context.Background(), 20*time.Second)
defer cancel()
// set MongoDB connection
clientOptions := options.Client().ApplyURI("mongodb://root:root#localhost:27017")
mongoClient, err := mongo.Connect(ctx, clientOptions)
if err != nil {
panic(err)
}
defer mongoClient.Disconnect(ctx)
// query with filters
numDocs, err := NoOfDocumentsInfo(mongoClient, "demodb", "myCollection", bson.A{
bson.D{bson.E{Key: "Name", Value: bson.D{bson.E{Key: "$eq", Value: "John Doe"}}}},
bson.D{bson.E{Key: "Song", Value: bson.D{bson.E{Key: "$eq", Value: "White Roses"}}}},
})
if err != nil {
panic(err)
}
fmt.Println("num docs:", numDocs)
}
Let's see the relevant changes applied to the code:
Expect a parameter called filters of type bson.A which is the type for the array in the MongoDB environment.
Build the myFilters variable which is of type bson.D (slice) with the following single item (bson.E) in this way:
The Key is the logical operator
The Value is the array passed into the function
Build the array to pass to the function with all of the needed filters (e.g. two equal conditions: one on the Name key and the other on the Song).
Finally, I also did some improvements on how you've opened the MongoDB connection and how you've released the allocated resources.
Let me know if this solves your issue, thanks!

Related

MongoDB aggregation query not matching the structure that being defined

I have a requirement in my project where I have to perform a DB operation for getting a particular type of a total number of users. What I am doing is that, filtered all the queries in a slice and passing that Silce to my DB function.
This is the code snippet from where I am calling the DB function
{
filters = []bson.D{
{{Key: "Mykey", Value: myvalue}},
{{Key: "Mykey", Value: myvalue}},
{{Key: "Mykey", Value: myvalue}},
{{Key: "Mykey", Value: myvalue}},
counts, err := dbmain.NoOfDocumentsInfo(MyDBName, myCollectionName, filters...)
}
Below is my called function
func NoOfDocumentsInfo(DB string, col string, filters ...bson.D) ([]int64, error) {
if nil == dbInstance {
if nil == GetDBInstance() {
logger.Error("Not connecting to DB")
err := errors.New("DB connection error")
return nil, err
}
}
logger.Debugf("%s %s", DB, col)
coll := dbInstance.Database(DB).Collection(col)
counts := make([]int64, len(filters))
for i, filter := range filters {
count, err := coll.CountDocuments(context.TODO(), filter)
if err != nil {
logger.Fatal(err)
return nil, err
}
counts[i] = count
}
return counts, nil
}
As you can see I am calling the "coll.CountDocuments" functions multiple times. What I want is to write the code without calling the "coll.CountDocuments" function multiple times by aggregating all the filters into a single query.
I have tried to use the aggregation pipeline but my "cur" and "result" is giving null output. If you run the code you will be able to see it.
func NoOfDocumentsInfo(DB string, col string, filters ...bson.D) ([]int64, error) {
if dbInstance == nil {
if GetDBInstance() == nil {
logger.Error("Not connecting to DB")
err := errors.New("DB connection error")
return nil, err
}
}
logger.Debugf("%s %s", DB, col)
coll := dbInstance.Database(DB).Collection(col)
pipeline := make([]bson.M, 0, len(filters)+2)
pipeline = append(pipeline, bson.M{"$match": bson.M{"$or": filters}})
pipeline = append(pipeline, bson.M{"$group": bson.M{"_id": nil, "count": bson.M{"$sum": 1}}})
pipeline = append(pipeline, bson.M{"$group": bson.M{"_id": nil, "count": bson.M{"$first": "$count"}}})
var result struct {
Count int64 `bson:"count"`
}
cur, err := coll.Aggregate(context.TODO(), pipeline)
if err != nil {
logger.Fatal(err)
return nil, err
}
logger.Debugf("cur: %+v", cur)
err = cur.Decode(&result)
logger.Debugf("result: %+v, err: %v", result, err)
if err != nil {
logger.Fatal(err)
return nil, err
}
return []int64{result.Count}, nil
}
You have to add a field for each filter in $group, you may use $cond to conditionally increment the given counter. But this may very well end up not using indices, and thus being even slower than the separate, original count queries. Also note that using $or may also result in skipping indices. Also note that in $cond you may have to transform filters (e.g. add $ to field names).
You'd better launch concurrent count queries (using go) for each filter, and if they are indexed, they will complete fast. This is how it could look like:
func docCounts(db string, col string, filters ...bson.D) ([]int64, error) {
// ... obtain collection
coll := dbInstance.Database(db).Collection(col)
counts := make([]int64, len(filters))
errs := make([]error, len(filters))
wg := &sync.WaitGroup{}
wg.Add(len(filters))
for i := range filters {
go func(i int) {
defer wg.Done()
counts[i], errs[i] = coll.CountDocuments(context.TODO(), filters[i])
}(i)
}
wg.Wait()
// Produce some kind of error if any of the queries failed.
var err error
for _, e := range errs {
if e != nil {
err = fmt.Errorf("at least one query failed: %w", e)
break
}
}
// Note: starting with Go 1.20, you could simply write:
// err = errors.Join(errs)
return counts, err
}

How to return an array from reading a file?

I have two columns in a CSV file. I am accessing only the first column using the SearchData() function.
The problem is that I want to access the data as an array but when I return an array string in the AccessData() function and write the products[0] in the SearchData(), it gives me all the data by removing the bracket sign [] only and when I write products[1], it gives me runtime error: index out of range [1] with length 1.
Required result
products[0] = First Item
products[1] = Second Item
...
so on
Code
func AccessData(number int) string {
content, err := ioutil.ReadFile("products/data1.csv")
if err != nil {
log.Fatal(err)
}
Data := string(content)
sliceData := strings.Split(Data, ",")
return sliceData[number]
}
func SearchData(){
for i := 0; i <= 34; i = i + 2 {
products := AccessData(i)
fmt.Println(products)
}
}
This should do the trick:
func firstColumns(filename string) []string {
f, err := os.Open(filename)
if err != nil {
log.Fatal(err)
}
defer f.Close()
r := csv.NewReader(f)
var result []string
for {
row, err := r.Read()
if err != nil {
if err == io.EOF {
break
}
log.Fatal(err)
}
if len(row) > 0 {
result = append(result, row[0])
}
}
return result
}
func main() {
data := firstColumns("products/data1.csv")
fmt.Println(data)
fmt.Println(data[1])
}
This turns the the first column of every row into a []string which can be access index.
The output is:
[First item Second item]
Second item

Record inserted twice into database

I have code in Go like below :
package main
import (
"database/sql"
"log"
_ "github.com/lib/pq"
)
const (
insertLoginSQL = `insert into Logins(id, name,password) values($1, $2, $3)`
)
func main() {
db, err := sql.Open("postgres", "user=postgres password=admin dbname=Quality sslmode=disable")
if err != nil {
log.Fatal(err)
}
defer db.Close()
if err := Insert(db); err != nil {
log.Println("error with double insert", err)
}
}
func Insert(db *sql.DB) error {
tx, err := db.Begin()
if err != nil {
return err
}
stmt, err := tx.Prepare(insertLoginSQL)
if err != nil {
return err
}
defer stmt.Close()
if _, err := stmt.Exec(10, "user","pwd"); err != nil {
tx.Rollback()
return err
}
return tx.Commit()
}
When I run above code, records inserted twice in database. Can someone let me know why duplicate records inserted? Any issue with this code?
Probably commit is done twice. First time by some of previous operations like stmt.exec and second time when tx.Commit() executed.

iterate over a large number of entities on appengine with go

On app engine I have a large number of entities of a particular kind.
I want to run a function on each entity (e.g. edit the entity or copy it)
I would do this in a taskqueue but a taskqueue is limited to 10 minutes runtime and each function call is prone to many kinds of errors. What is the best way to do this?
Here's my solution although I'm hoping someone out there has a better solution. I also wonder if this is prone to fork bombs e.g. if the task runs twice, it will set off two chains of iteration.. ! I'm only using it to iterate a few hundred thousand entities, although the operation on each entity is expensive.
First I create a taskqueue for running each individual function call on an entity one at a time:
queue:
- name: entity-iter
rate: 100/s
max_concurrent_requests: 1
retry_parameters:
task_retry_limit: 3
task_age_limit: 30m
min_backoff_seconds: 200
and then I have an iterate entity method which, given the kind, will call your delay func on each entity with the key.
package sysadmin
import (
"google.golang.org/appengine/datastore"
"golang.org/x/net/context"
"google.golang.org/appengine/log"
"google.golang.org/appengine/delay"
"google.golang.org/appengine/taskqueue"
)
func ForEachEntity(kind string, f *delay.Function) *delay.Function {
var callWithNextKey *delay.Function // func(c context.Context, depth int, cursorString string) error
callWithNextKey = delay.Func("something", func(c context.Context, depth int, cursorString string) error {
q := datastore.NewQuery(kind).KeysOnly()
if cursorString != "" {
if curs, err := datastore.DecodeCursor(cursorString); err != nil {
log.Errorf(c, "error decoding cursor %v", err)
return err
} else {
q = q.Start(curs)
}
}
it := q.Run(c)
if key, err := it.Next(nil); err != nil {
if err == datastore.Done {
log.Infof(c, "Done %v", err)
return nil
}
log.Errorf(c, "datastore error %v", err)
return err
} else {
curs, _ := it.Cursor()
if t, err := f.Task(key); err != nil {
return err
} else if _, err = taskqueue.Add(c, t, "entity-iter"); err != nil {
log.Errorf(c, "error %v", err)
return err
}
if depth - 1 > 0 {
if err := callWithNextKey.Call(c, depth - 1, curs.String()); err != nil {
log.Errorf(c, "error2 %v", err)
return err
}
}
}
return nil
})
return callWithNextKey
}
example usage:
var DoCopyCourse = delay.Func("something2", CopyCourse)
var DoCopyCourses = ForEachEntity("Course", DoCopyCourse)
func CopyCourses(c context.Context) {
//sharedmodels.MakeMockCourses(c)
DoCopyCourses.Call(c, 9999999, "")
}

datastore: invalid entity type on get

Hey im trying to get an entity by passing an id ,
But it`s look like im getting a nill pointer
I tried to initialize the entity in couple of ways but the result is the same.
I'm trying to create my server as much as i can like this example
to-do
what am im missing here ?
type UserManager struct {
users []*Users
user *Users
}
func NewUserManager() *UserManager {
return &UserManager{}
}
func (userManager *UserManager) putUser(c appengine.Context, u *Users) (usreRes *Users, err error) {
key, err := datastore.Put(c, datastore.NewIncompleteKey(c, "users", nil), u)
if err != nil {
return nil, err
}
c.Debugf("file key inserted :%#v", key)
return u, nil
}
func (userManager *UserManager) getUserById(userKey string, c appengine.Context) (usreRes *Users, err error) {
entity_id_int, err := strconv.ParseInt(userKey, 10, 64)
if err != nil {
return userManager.user, err
}
k := datastore.NewKey(c, "users", "", entity_id_int, nil)
userRes := new(Users)
err = datastore.Get(c, k, &userRes)
//err = datastore.Get(c, k, &userManager.user)
if err != nil {
return userManager.user, err
}
//return userManager.user,nil
return userRes, nil
}
It's because datastore.Get(c, k, "pointer") expects a pointer to struct as its third argument, and you are passing a pointer of a pointer **userRes
userRes := new(Users)
There you are creating a pointer to struct see
So you should do:
datastore.Get(c, k, userRes)
Without the ampersand, because with new keyword you are already creating a pointer

Resources