New to Golang, Yesterday I've started to play with Golang and wrote some code which was actually written in PHP. I just wanted to see difference in performance.
I am doing the exact same thing in PHP response is exact same in http request but the Golang is performing really slow even after compiling it.
I am trying to understand what things that I am using in Golang I shouldn't be using and how can I improve performance in this piece of Code.
I know Iterating over map is slow but PHP using hash maps for implementing multidimentional arrays, well. I can gurantee the sql queries I used were exact same copy pasted from PHP, machines are same, and loop numbers are same in both codes.
package main
import (
"database/sql"
"encoding/json"
"fmt"
_ "github.com/go-sql-driver/mysql"
"net/http"
"reflect"
"strings"
)
func main() {
db, err := sql.Open("mysql", "***:****#tcp(****:3306)/****")
fmt.Println(reflect.TypeOf(db))
checkErr(err)
fmt.Println("Handle Request setup... OK")
http.HandleFunc("/", func(w http.ResponseWriter, req *http.Request) {
jsonData, err := getListings(db)
checkErr(err)
w.Write([]byte(jsonData))
})
fmt.Println("Starting Server....")
fmt.Println("Listening on port 8081")
http.ListenAndServe(":8081", nil)
}
func getListings(db *sql.DB) ([]byte, error) {
var userId string = "142"
normalListings := sqlToArray(db, `******`)
manualListings := sqlToArray(db, "******")
var groupIds []string
for key := range manualListings {
groupId := "142," + manualListings[key]["group_id"]
if !stringInSlice(groupId, groupIds) {
groupIds = append(groupIds, groupId)
}
}
var groupIdsString string
groupIdsString = strings.Join(groupIds, ", ")
manualGroups := sqlToArray(db, "*****")
for key := range manualListings {
for key2 := range manualGroups {
groupId := "142," + manualListings[key]["group_id"]
if groupId == manualGroups[key]["ticket_id"] {
entry := make(map[string]string)
entry["ticket_id"] = manualListings[key]["listing_id"]
entry["date_created"] = manualGroups[key2]["date_created"]
normalListings = append(normalListings, entry)
}
}
}
return json.Marshal(normalListings)
}
func stringInSlice(a string, list []string) bool {
for _, b := range list {
if b == a {
return true
}
}
return false
}
func sqlToArray(db *sql.DB, sqlString string) []map[string]string {
rows, err := db.Query(sqlString)
checkErr(err)
columns, err := rows.Columns()
count := len(columns)
values := make([]interface{}, count)
valuePtrs := make([]interface{}, count)
tableData := make([]map[string]string, 0)
for rows.Next() {
for i := 0; i < count; i++ {
valuePtrs[i] = &values[i]
}
rows.Scan(valuePtrs...)
entry := make(map[string]string)
for i, col := range columns {
val := values[i]
b, ok := val.([]byte)
if ok {
entry[col] = string(b)
} else {
entry[col] = string(b)
}
}
tableData = append(tableData, entry)
}
return tableData
}
func checkErr(err error) {
if err != nil {
panic(err)
}
}
Edits:
Changed the code to use statically typed structs instead of using maps and Identified the problematic piece of code
New code:
package main
import (
"database/sql"
"encoding/json"
"fmt"
_ "github.com/go-sql-driver/mysql"
"net/http"
"strings"
)
type listingsType struct {
TicketId string
DateCreated string
}
func main() {
db, err := sql.Open("mysql", "******")
checkErr(err)
fmt.Println("Handle Request setup... OK")
http.HandleFunc("/", func(w http.ResponseWriter, req *http.Request) {
jsonData, err := getListings(db)
checkErr(err)
w.Write([]byte(jsonData))
})
fmt.Println("Starting Server....")
fmt.Println("Listening on port 8081")
http.ListenAndServe(":8081", nil)
}
func getListings(db *sql.DB) ([]byte, error) {
var userId string = "142"
normalListings := sqlToArray(db, `*****`)
manualListings := sqlToArray(db, "*****")
var groupIds []string
for _, elem := range manualListings {
groupId := "142," + elem.DateCreated
if !stringInSlice(groupId, groupIds) {
groupIds = append(groupIds, groupId)
}
}
var groupIdsString string
groupIdsString = strings.Join(groupIds, ", ")
fmt.Println(groupIdsString)
manualGroups := sqlToArray(db, "******")
for _, manualList := range manualListings {
for _, manualGroup := range manualGroups {
groupId := "142," + manualList.DateCreated
if groupId == manualGroup.TicketId {
var entry listingsType
entry.TicketId = manualList.TicketId
entry.DateCreated = manualGroup.DateCreated
normalListings = append(normalListings, entry)
}
}
}
return json.Marshal(normalListings)
}
func stringInSlice(a string, list []string) bool {
for _, b := range list {
if b == a {
return true
}
}
return false
}
func sqlToArray(db *sql.DB, sqlString string) []listingsType {
rows, err := db.Query(sqlString)
checkErr(err)
tableData := []listingsType{}
for rows.Next() {
var entry listingsType
rows.Scan(&entry.TicketId, &entry.DateCreated)
tableData = append(tableData, entry)
}
return tableData
}
func checkErr(err error) {
if err != nil {
panic(err)
}
}
Problematic piece of code
As soon as I comment the following block of code the my code performs just fine.
Any idea what is wrong with this loop ?
for _, manualList := range manualListings {
for _, manualGroup := range manualGroups {
groupId := "142," + manualList.DateCreated
if groupId == manualGroup.TicketId {
var entry listingsType
entry.TicketId = manualList.TicketId
entry.DateCreated = manualGroup.DateCreated
normalListings = append(normalListings, entry)
}
}
}
Profiling Result
Ok So got it fixed by the way. I brought the request time from 5k+ MS to 500 MS, now finally my PHP code is slower which is 900 MS
I got rid of the inner loop to search by implementing a separate function to get SQL data in a different data structure in key value of maps instead of searching whole arrays I created the value as key which I was looking for in array, This way I got rid of the second loop which was making trouble by linear search on strings.
manualGroups := sqlToArraySpecial(db, "****")
for _, manualList := range manualListings {
//index := stringInSliceArray(manualList.DateCreated, manualGroups)
groupId := "142," + manualList.DateCreated
var entry listingsType
entry.TicketId = manualList.TicketId
entry.DateCreated = manualGroups[groupId]
normalListings = append(normalListings, entry)
}
and here is my new SQL function
func sqlToArraySpecial(db *sql.DB, sqlString string) map[string]string {
rows, err := db.Query(sqlString)
checkErr(err)
tableData := make(map[string]string)
for rows.Next() {
var date_created string
var ticket_id string
rows.Scan(&ticket_id, &date_created)
//fmt.Println(ticket_id)
tableData[ticket_id] = date_created
}
return tableData
}
although this is a dead post, I cannot help but note since no one else has (explicitly), and it's kinda' important to know why:
nested for loops exhibit quadratic running time complexity,
and as you stated, searching an array takes linear time,
so simply put:
the computation time will increase by the square of the total number of elements.
now to answer why this isn't the case in php -- well cause you were using a hash map which:
can be said, exhibits a constant time complexity
again, simply put this means that:
look up time is not related to the number of elements (aka the size of the collection).
see: big-o
with all that being said, please note:
i don't know php,
thus do not the know details regarding how the language implements arrays, and
im not an algorithm expert,
so please treat my post as a statement for the general case.
pce
Related
I have a requirement in my project where I have to perform a DB operation for getting a particular type of a total number of users. What I am doing is that, filtered all the queries in a slice and passing that Silce to my DB function.
This is the code snippet from where I am calling the DB function
{
filters = []bson.D{
{{Key: "Mykey", Value: myvalue}},
{{Key: "Mykey", Value: myvalue}},
{{Key: "Mykey", Value: myvalue}},
{{Key: "Mykey", Value: myvalue}},
counts, err := dbmain.NoOfDocumentsInfo(MyDBName, myCollectionName, filters...)
}
Below is my called function
func NoOfDocumentsInfo(DB string, col string, filters ...bson.D) ([]int64, error) {
if nil == dbInstance {
if nil == GetDBInstance() {
logger.Error("Not connecting to DB")
err := errors.New("DB connection error")
return nil, err
}
}
logger.Debugf("%s %s", DB, col)
coll := dbInstance.Database(DB).Collection(col)
counts := make([]int64, len(filters))
for i, filter := range filters {
count, err := coll.CountDocuments(context.TODO(), filter)
if err != nil {
logger.Fatal(err)
return nil, err
}
counts[i] = count
}
return counts, nil
}
As you can see I am calling the "coll.CountDocuments" functions multiple times. What I want is to write the code without calling the "coll.CountDocuments" function multiple times by aggregating all the filters into a single query.
I have tried to use the aggregation pipeline but my "cur" and "result" is giving null output. If you run the code you will be able to see it.
func NoOfDocumentsInfo(DB string, col string, filters ...bson.D) ([]int64, error) {
if dbInstance == nil {
if GetDBInstance() == nil {
logger.Error("Not connecting to DB")
err := errors.New("DB connection error")
return nil, err
}
}
logger.Debugf("%s %s", DB, col)
coll := dbInstance.Database(DB).Collection(col)
pipeline := make([]bson.M, 0, len(filters)+2)
pipeline = append(pipeline, bson.M{"$match": bson.M{"$or": filters}})
pipeline = append(pipeline, bson.M{"$group": bson.M{"_id": nil, "count": bson.M{"$sum": 1}}})
pipeline = append(pipeline, bson.M{"$group": bson.M{"_id": nil, "count": bson.M{"$first": "$count"}}})
var result struct {
Count int64 `bson:"count"`
}
cur, err := coll.Aggregate(context.TODO(), pipeline)
if err != nil {
logger.Fatal(err)
return nil, err
}
logger.Debugf("cur: %+v", cur)
err = cur.Decode(&result)
logger.Debugf("result: %+v, err: %v", result, err)
if err != nil {
logger.Fatal(err)
return nil, err
}
return []int64{result.Count}, nil
}
You have to add a field for each filter in $group, you may use $cond to conditionally increment the given counter. But this may very well end up not using indices, and thus being even slower than the separate, original count queries. Also note that using $or may also result in skipping indices. Also note that in $cond you may have to transform filters (e.g. add $ to field names).
You'd better launch concurrent count queries (using go) for each filter, and if they are indexed, they will complete fast. This is how it could look like:
func docCounts(db string, col string, filters ...bson.D) ([]int64, error) {
// ... obtain collection
coll := dbInstance.Database(db).Collection(col)
counts := make([]int64, len(filters))
errs := make([]error, len(filters))
wg := &sync.WaitGroup{}
wg.Add(len(filters))
for i := range filters {
go func(i int) {
defer wg.Done()
counts[i], errs[i] = coll.CountDocuments(context.TODO(), filters[i])
}(i)
}
wg.Wait()
// Produce some kind of error if any of the queries failed.
var err error
for _, e := range errs {
if e != nil {
err = fmt.Errorf("at least one query failed: %w", e)
break
}
}
// Note: starting with Go 1.20, you could simply write:
// err = errors.Join(errs)
return counts, err
}
I have two columns in a CSV file. I am accessing only the first column using the SearchData() function.
The problem is that I want to access the data as an array but when I return an array string in the AccessData() function and write the products[0] in the SearchData(), it gives me all the data by removing the bracket sign [] only and when I write products[1], it gives me runtime error: index out of range [1] with length 1.
Required result
products[0] = First Item
products[1] = Second Item
...
so on
Code
func AccessData(number int) string {
content, err := ioutil.ReadFile("products/data1.csv")
if err != nil {
log.Fatal(err)
}
Data := string(content)
sliceData := strings.Split(Data, ",")
return sliceData[number]
}
func SearchData(){
for i := 0; i <= 34; i = i + 2 {
products := AccessData(i)
fmt.Println(products)
}
}
This should do the trick:
func firstColumns(filename string) []string {
f, err := os.Open(filename)
if err != nil {
log.Fatal(err)
}
defer f.Close()
r := csv.NewReader(f)
var result []string
for {
row, err := r.Read()
if err != nil {
if err == io.EOF {
break
}
log.Fatal(err)
}
if len(row) > 0 {
result = append(result, row[0])
}
}
return result
}
func main() {
data := firstColumns("products/data1.csv")
fmt.Println(data)
fmt.Println(data[1])
}
This turns the the first column of every row into a []string which can be access index.
The output is:
[First item Second item]
Second item
I need to compute the hash (md5 is ok) for a large number of files. So, in Go I have this code:
package main
import (
"io"
"os"
"fmt"
"path/filepath"
"crypto/md5"
"encoding/hex"
)
func strSliceRemove(slice []string, str string) []string {
var tempSlice []string;
for _, item := range slice {
if item != str {
tempSlice = append(tempSlice, item)
}
}
return tempSlice
}
func fileMD5(path string) (string, error) {
var returnMD5String string
file, err := os.Open(path)
if err != nil {
return returnMD5String, err
}
defer file.Close()
hash := md5.New()
if _, err := io.Copy(hash, file); err != nil {
return returnMD5String, err
}
hashInBytes := hash.Sum(nil)[:16]
returnMD5String = hex.EncodeToString(hashInBytes)
return returnMD5String, nil
}
func main() {
var doRead func(string)
doRead = func(sd string) {
filepath.Walk(sd, func(path string, f os.FileInfo, err error) error {
resolvedPath, resolvedPathErr := filepath.EvalSymlinks(path)
if resolvedPathErr != nil {
return nil
}
if f.Mode()&os.ModeSymlink == os.ModeSymlink {
doRead(resolvedPath)
} else {
if !f.IsDir() {
md5, _ := fileMD5(path)
fmt.Printf("%s\n", md5)
}
}
return nil
})
}
doRead("/tmp/electron")
return
}
It hashes correctly 1400 files in almost one second. If I use my OSX md5 command line utility, it takes more than 10 times the time. It is 10 times slower:
for FILE in `find /tmp/electron`; do
if [ ! -d "$FILE" ]; then
md5 $FILE;
fi;
done;
I tried a basic c program that does the same (based on this answer How to calculate the MD5 hash of a large file in C?) and still the time seems more or less 10 seconds.
What kind of strategy / library does crypto/md5 use?
I'm trying to unmarshal a JSON array of the following type:
[
{"abc's": "n;05881364"},
{"abcoulomb": "n;13658345"},
{"abcs": "n;05881364"}
]
into a map[string]string. This question Golang parse JSON array into data structure almost answered my problem, but mine is a truly map, not an array of maps. Unmarshaling into a []map[string]string worked but I now get a map of map[string]string, not a simple map of string as it should be
There is no way to do it directly with the json package; you have to do the conversion yourself. This is simple:
package main
import (
"encoding/json"
"fmt"
)
func main() {
data := []byte(`
[
{"abc's": "n;05881364"},
{"abcoulomb": "n;13658345"},
{"abcs": "n;05881364"}
]
`)
var mapSlice []map[string]string
if err := json.Unmarshal(data, &mapSlice); err != nil {
panic(err)
}
resultingMap := map[string]string{}
for _, m := range mapSlice {
for k, v := range m {
resultingMap[k] = v
}
}
fmt.Println(resultingMap)
}
Output
map[abc's:n;05881364 abcoulomb:n;13658345 abcs:n;05881364]
An alternative (though very similar) to Alex's answer is to define your own type along with an UnmarshalJSON function.
package main
import (
"encoding/json"
"fmt"
)
type myMapping map[string]string
func (mm myMapping) UnmarshalJSON(b []byte) error {
var temp []map[string]string
if err := json.Unmarshal(b, &temp); err != nil {
return err
}
for _, m := range temp {
for k, v := range m {
mm[k] = v
}
}
return nil
}
func main() {
data := []byte(`
[
{"abc's": "n;05881364"},
{"abcoulomb": "n;13658345"},
{"abcs": "n;05881364"}
]`)
resultingMap := myMapping{}
if err := json.Unmarshal(data, &resultingMap); err != nil {
panic(err)
}
fmt.Println(resultingMap)
}
Playground
Basically after doing a query I'd like to take the resulting rows and produce a []map[string]interface{}, but I do not see how to do this with the API since the Rows.Scan() function needs a specific number of parameters matching the requested number of columns (and possibly the types as well) to correctly obtain the data.
Again, I'd like to generalize this call and take any query and turn it into a []map[string]interface{}, where the map contains column names mapped to the values for that row.
This is likely very inefficient, and I plan on changing the structure later so that interface{} is a struct for a single data point.
How would I do this using just the database/sql package, or if necessary the database/sql/driver package?
Look at using sqlx, which can do this a little more easily than the standard database/sql library:
places := []Place{}
err := db.Select(&places, "SELECT * FROM place ORDER BY telcode ASC")
if err != nil {
fmt.Printf(err)
return
}
You could obviously replace []Place{} with a []map[string]interface{}, but where possible it is better to use a struct if you know the structure of your database. You won't need to undertake any type assertions as you might on an interface{}.
I haven't used it (yet), but I believe the "common" way to do what you are asking (more or less) is to use gorp.
You can create a struct that maintains the map key to the position of the []interface{} slice. By doing this, you do not need to create a predefined struct. For example:
IDOrder: 0
IsClose: 1
IsConfirm: 2
IDUser: 3
Then, you can use it like this:
// create a fieldbinding object.
var fArr []string
fb := fieldbinding.NewFieldBinding()
if fArr, err = rs.Columns(); err != nil {
return nil, err
}
fb.PutFields(fArr)
//
outArr := []interface{}{}
for rs.Next() {
if err := rs.Scan(fb.GetFieldPtrArr()...); err != nil {
return nil, err
}
fmt.Printf("Row: %v, %v, %v, %s\n", fb.Get("IDOrder"), fb.Get("IsConfirm"), fb.Get("IDUser"), fb.Get("Created"))
outArr = append(outArr, fb.GetFieldArr())
}
Sample output:
Row: 1, 1, 1, 2016-07-15 10:39:37 +0000 UTC
Row: 2, 1, 11, 2016-07-15 10:42:04 +0000 UTC
Row: 3, 1, 10, 2016-07-15 10:46:20 +0000 UTC
SampleQuery: [{"Created":"2016-07-15T10:39:37Z","IDOrder":1,"IDUser":1,"IsClose":0,"IsConfirm":1},{"Created":"2016-07-15T10:42:04Z","IDOrder":2,"IDUser":11,"IsClose":0,"IsConfirm":1},{"Created":"2016-07-15T10:46:20Z","IDOrder":3,"IDUser":10,"IsClose":0,"IsConfirm":1}]
Please see the full example below or at fieldbinding:
main.go
package main
import (
"bytes"
"database/sql"
"encoding/json"
"fmt"
)
import (
_ "github.com/go-sql-driver/mysql"
"github.com/junhsieh/goexamples/fieldbinding/fieldbinding"
)
var (
db *sql.DB
)
// Table definition
// CREATE TABLE `salorder` (
// `IDOrder` int(10) unsigned NOT NULL AUTO_INCREMENT,
// `IsClose` tinyint(4) NOT NULL,
// `IsConfirm` tinyint(4) NOT NULL,
// `IDUser` int(11) NOT NULL,
// `Created` datetime NOT NULL,
// `Changed` datetime NOT NULL,
// PRIMARY KEY (`IDOrder`),
// KEY `IsClose` (`IsClose`)
// ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
func main() {
var err error
// starting database server
db, err = sql.Open("mysql", "Username:Password#tcp(Host:Port)/DBName?parseTime=true")
if err != nil {
panic(err.Error()) // Just for example purpose. You should use proper error handling instead of panic
}
defer db.Close()
// SampleQuery
if v, err := SampleQuery(); err != nil {
fmt.Printf("%s\n", err.Error())
} else {
var b bytes.Buffer
if err := json.NewEncoder(&b).Encode(v); err != nil {
fmt.Printf("SampleQuery: %v\n", err.Error())
}
fmt.Printf("SampleQuery: %v\n", b.String())
}
}
func SampleQuery() ([]interface{}, error) {
param := []interface{}{}
param = append(param, 1)
sql := "SELECT "
sql += " SalOrder.IDOrder "
sql += ", SalOrder.IsClose "
sql += ", SalOrder.IsConfirm "
sql += ", SalOrder.IDUser "
sql += ", SalOrder.Created "
sql += "FROM SalOrder "
sql += "WHERE "
sql += "IsConfirm = ? "
sql += "ORDER BY SalOrder.IDOrder ASC "
rs, err := db.Query(sql, param...)
if err != nil {
return nil, err
}
defer rs.Close()
// create a fieldbinding object.
var fArr []string
fb := fieldbinding.NewFieldBinding()
if fArr, err = rs.Columns(); err != nil {
return nil, err
}
fb.PutFields(fArr)
//
outArr := []interface{}{}
for rs.Next() {
if err := rs.Scan(fb.GetFieldPtrArr()...); err != nil {
return nil, err
}
fmt.Printf("Row: %v, %v, %v, %s\n", fb.Get("IDOrder"), fb.Get("IsConfirm"), fb.Get("IDUser"), fb.Get("Created"))
outArr = append(outArr, fb.GetFieldArr())
}
if err := rs.Err(); err != nil {
return nil, err
}
return outArr, nil
}
fieldbinding package:
package fieldbinding
import (
"sync"
)
// NewFieldBinding ...
func NewFieldBinding() *FieldBinding {
return &FieldBinding{}
}
// FieldBinding is deisgned for SQL rows.Scan() query.
type FieldBinding struct {
sync.RWMutex // embedded. see http://golang.org/ref/spec#Struct_types
FieldArr []interface{}
FieldPtrArr []interface{}
FieldCount int64
MapFieldToID map[string]int64
}
func (fb *FieldBinding) put(k string, v int64) {
fb.Lock()
defer fb.Unlock()
fb.MapFieldToID[k] = v
}
// Get ...
func (fb *FieldBinding) Get(k string) interface{} {
fb.RLock()
defer fb.RUnlock()
// TODO: check map key exist and fb.FieldArr boundary.
return fb.FieldArr[fb.MapFieldToID[k]]
}
// PutFields ...
func (fb *FieldBinding) PutFields(fArr []string) {
fCount := len(fArr)
fb.FieldArr = make([]interface{}, fCount)
fb.FieldPtrArr = make([]interface{}, fCount)
fb.MapFieldToID = make(map[string]int64, fCount)
for k, v := range fArr {
fb.FieldPtrArr[k] = &fb.FieldArr[k]
fb.put(v, int64(k))
}
}
// GetFieldPtrArr ...
func (fb *FieldBinding) GetFieldPtrArr() []interface{} {
return fb.FieldPtrArr
}
// GetFieldArr ...
func (fb *FieldBinding) GetFieldArr() map[string]interface{} {
m := make(map[string]interface{}, fb.FieldCount)
for k, v := range fb.MapFieldToID {
m[k] = fb.FieldArr[v]
}
return m
}
If you really want a map, which is needed in some cases, have a look at dbr, but you need to use the fork (since the pr got rejected in the original repo). The fork seems more up to date anyway:
https://github.com/mailru/dbr
For info on how to use it:
https://github.com/gocraft/dbr/issues/83
package main
import (
"fmt"
"github.com/bobby96333/goSqlHelper"
)
func main(){
fmt.Println("hello")
conn,err :=goSqlHelper.MysqlOpen("user:password#tcp(127.0.0.1:3306)/dbname")
checkErr(err)
row,err := conn.QueryRow("select * from table where col1 = ? and col2 = ?","123","abc")
checkErr(err)
if *row==nil {
fmt.Println("no found row")
}else{
fmt.Printf("%+v",row)
}
}
func checkErr(err error){
if err!=nil {
panic(err)
}
}
output:
&map[col1:abc col2:123]