I need to compute the hash (md5 is ok) for a large number of files. So, in Go I have this code:
package main
import (
"io"
"os"
"fmt"
"path/filepath"
"crypto/md5"
"encoding/hex"
)
func strSliceRemove(slice []string, str string) []string {
var tempSlice []string;
for _, item := range slice {
if item != str {
tempSlice = append(tempSlice, item)
}
}
return tempSlice
}
func fileMD5(path string) (string, error) {
var returnMD5String string
file, err := os.Open(path)
if err != nil {
return returnMD5String, err
}
defer file.Close()
hash := md5.New()
if _, err := io.Copy(hash, file); err != nil {
return returnMD5String, err
}
hashInBytes := hash.Sum(nil)[:16]
returnMD5String = hex.EncodeToString(hashInBytes)
return returnMD5String, nil
}
func main() {
var doRead func(string)
doRead = func(sd string) {
filepath.Walk(sd, func(path string, f os.FileInfo, err error) error {
resolvedPath, resolvedPathErr := filepath.EvalSymlinks(path)
if resolvedPathErr != nil {
return nil
}
if f.Mode()&os.ModeSymlink == os.ModeSymlink {
doRead(resolvedPath)
} else {
if !f.IsDir() {
md5, _ := fileMD5(path)
fmt.Printf("%s\n", md5)
}
}
return nil
})
}
doRead("/tmp/electron")
return
}
It hashes correctly 1400 files in almost one second. If I use my OSX md5 command line utility, it takes more than 10 times the time. It is 10 times slower:
for FILE in `find /tmp/electron`; do
if [ ! -d "$FILE" ]; then
md5 $FILE;
fi;
done;
I tried a basic c program that does the same (based on this answer How to calculate the MD5 hash of a large file in C?) and still the time seems more or less 10 seconds.
What kind of strategy / library does crypto/md5 use?
Related
i am trying to learn some new stuff with GoLang, and got a litlebit stuck, probaly the reason is just that i am not very good at using arrays.
So heres what i want to do:
Make variable.
Download with that variable.
Add ++1 for that variable
Download with added 1
and loop it lets say 10 times.
I am all good with points 1 and two, but little stuck with 3 & 4. :).
all the files come in .pdf, thats why i made that strconv there.
I probaly should make somekind of Loop in main, and call DownloadFile function with some array parameters in there?
package main
import (
"fmt"
"io"
"net/http"
"os"
"strconv"
)
func main() {
url_id := strconv.Itoa(23430815+2)
filename := url_id+".pdf"
fileUrl := "https://someurLid="+url_id
if err := DownloadFile(filename, fileUrl); err != nil {
panic(err)
}
fmt.Println(fileUrl)
}
func DownloadFile(filepath string, url string) error {
resp, err := http.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
out, err := os.Create(filepath)
if err != nil {
return err
}
defer out.Close()
_, err = io.Copy(out, resp.Body)
return err
}
try this.
package main
import (
"fmt"
"io"
"net/http"
"os"
"strconv"
)
func main() {
url_id_num := 23430815+2
for i := 0; i < 10; i++ {
url_id := strconv.Itoa(url_id_num+i)
filename := url_id+".pdf"
fileUrl := "https://someurLid="+url_id
if err := DownloadFile(filename, fileUrl); err != nil {
panic(err)
}
fmt.Println(fileUrl)
}
}
func DownloadFile(filepath string, url string) error {
resp, err := http.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
out, err := os.Create(filepath)
if err != nil {
return err
}
defer out.Close()
_, err = io.Copy(out, resp.Body)
return err
}
Cerise Limón gave the answer and thats thats how it worked out.
arr := make([]uint8, 3) //How many times it loops
url_id := 23430815 //Starting from id, filename
for range arr {
filename := strconv.Itoa(url_id)+".pdf"
fileUrl := "https://someurl?id="+strconv.Itoa(url_id)
if err := DownloadFile(filename, fileUrl); err != nil {
panic(err)
}
fmt.Println(fileUrl)
url_id++
}
Thank you for pointing out where i should start! :).
What I'm trying to do:
Transform all arrays of length 1 in a JSON file to non arrays.
e.g.
Input: {"path": [{"secret/foo": [{"capabilities": ["read"]}]}]}
Output: {"path": {"secret/foo": {"capabilities": "read"}}}
I can't use Structs as the JSON format will vary...
Right now I've managed to at least detect the 1 length slices:
package main
import (
"encoding/json"
"fmt"
)
func findSingletons(value interface{}) {
switch value.(type) {
case []interface{}:
if len(value.([]interface{})) == 1 {
fmt.Println("1 length array found!", value)
}
for _, v := range value.([]interface{}) {
findSingletons(v)
}
case map[string]interface{}:
for _, v := range value.(map[string]interface{}) {
findSingletons(v)
}
}
}
func removeSingletonsFromJSON(input string) {
jsonFromInput := json.RawMessage(input)
jsonMap := make(map[string]interface{})
err := json.Unmarshal([]byte(jsonFromInput), &jsonMap)
if err != nil {
panic(err)
}
findSingletons(jsonMap)
fmt.Printf("JSON value of without singletons:%s\n", jsonMap)
}
func main() {
jsonParsed := []byte(`{"path": [{"secret/foo": [{"capabilities": ["read"]}]}]}`)
removeSingletonsFromJSON(string(jsonParsed))
fmt.Println(`Should have output {"path": {"secret/foo": {"capabilities": "read"}}}`)
}
Which outputs
1 length array found! [map[secret/foo:[map[capabilities:[read]]]]]
1 length array found! [map[capabilities:[read]]]
1 length array found! [read]
JSON value of without singletons:map[path:[map[secret/foo:[map[capabilities:[read]]]]]]
Should have output {"path": {"secret/foo": {"capabilities": "read"}}}
But I'm not sure how I can change them into non-arrays...
The type switch is your friend:
switch t := v.(type) {
case []interface{}:
if len(t) == 1 {
data[k] = t[0]
And you may use recursion to remove inside elements, like so:
func removeOneElementSlice(data map[string]interface{}) {
for k, v := range data {
switch t := v.(type) {
case []interface{}:
if len(t) == 1 {
data[k] = t[0]
if v, ok := data[k].(map[string]interface{}); ok {
removeOneElementSlice(v)
}
}
}
}
}
I would do this to convert
{"path":[{"secret/foo":[{"capabilities":["read"]}]}]}
to
{"path":{"secret/foo":{"capabilities":"read"}}}:
package main
import (
"encoding/json"
"fmt"
"log"
)
func main() {
s := `{"path":[{"secret/foo":[{"capabilities":["read"]}]}]}`
fmt.Println(s)
var data map[string]interface{}
if err := json.Unmarshal([]byte(s), &data); err != nil {
panic(err)
}
removeOneElementSlice(data)
buf, err := json.Marshal(data)
if err != nil {
log.Fatal(err)
}
fmt.Println(string(buf)) //{"a":"a","n":7}
}
func removeOneElementSlice(data map[string]interface{}) {
for k, v := range data {
switch t := v.(type) {
case []interface{}:
if len(t) == 1 {
data[k] = t[0]
if v, ok := data[k].(map[string]interface{}); ok {
removeOneElementSlice(v)
}
}
}
}
}
Hi i have a implementation to read a file Line Feed, but it does not work to Carriage Return files the implementation is:
file, err := os.Open(filePath)
if err != nil {
ross := int32(1)
fileValidation = append(fileValidation, p.createPharmacyPanelLoaderResultErr(pharmacyPanel, &ross, err.Error(), err.Error()))
return nil, fileValidation, int32(0)
}
scanner := bufio.NewScanner(file)
for i := 0; scanner.Scan(); i++ {
line := scanner.Text()
}
i want to transform this function to can work with both Carriage Return and Line Feed
I took the ScanLines function from the source code and changed it so it works with \r but now it will only work with \r and not \n or \r\n
package main
import (
"bufio"
"bytes"
"fmt"
"os"
)
func ScanLinesWithCR(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
if i := bytes.IndexByte(data, '\r'); i >= 0 {
// We have a full newline-terminated line.
return i + 1, data[0:i], nil
}
// If we're at EOF, we have a final, non-terminated line. Return it.
if atEOF {
return len(data), data, nil
}
// Request more data.
return 0, nil, nil
}
func main() {
f, _ := os.Open("test.txt")
defer f.Close()
scanner := bufio.NewScanner(f)
scanner.Split(ScanLinesWithCR)
for scanner.Scan() {
fmt.Println(">", scanner.Text())
}
}
I'm trying to unmarshal a JSON array of the following type:
[
{"abc's": "n;05881364"},
{"abcoulomb": "n;13658345"},
{"abcs": "n;05881364"}
]
into a map[string]string. This question Golang parse JSON array into data structure almost answered my problem, but mine is a truly map, not an array of maps. Unmarshaling into a []map[string]string worked but I now get a map of map[string]string, not a simple map of string as it should be
There is no way to do it directly with the json package; you have to do the conversion yourself. This is simple:
package main
import (
"encoding/json"
"fmt"
)
func main() {
data := []byte(`
[
{"abc's": "n;05881364"},
{"abcoulomb": "n;13658345"},
{"abcs": "n;05881364"}
]
`)
var mapSlice []map[string]string
if err := json.Unmarshal(data, &mapSlice); err != nil {
panic(err)
}
resultingMap := map[string]string{}
for _, m := range mapSlice {
for k, v := range m {
resultingMap[k] = v
}
}
fmt.Println(resultingMap)
}
Output
map[abc's:n;05881364 abcoulomb:n;13658345 abcs:n;05881364]
An alternative (though very similar) to Alex's answer is to define your own type along with an UnmarshalJSON function.
package main
import (
"encoding/json"
"fmt"
)
type myMapping map[string]string
func (mm myMapping) UnmarshalJSON(b []byte) error {
var temp []map[string]string
if err := json.Unmarshal(b, &temp); err != nil {
return err
}
for _, m := range temp {
for k, v := range m {
mm[k] = v
}
}
return nil
}
func main() {
data := []byte(`
[
{"abc's": "n;05881364"},
{"abcoulomb": "n;13658345"},
{"abcs": "n;05881364"}
]`)
resultingMap := myMapping{}
if err := json.Unmarshal(data, &resultingMap); err != nil {
panic(err)
}
fmt.Println(resultingMap)
}
Playground
New to Golang, Yesterday I've started to play with Golang and wrote some code which was actually written in PHP. I just wanted to see difference in performance.
I am doing the exact same thing in PHP response is exact same in http request but the Golang is performing really slow even after compiling it.
I am trying to understand what things that I am using in Golang I shouldn't be using and how can I improve performance in this piece of Code.
I know Iterating over map is slow but PHP using hash maps for implementing multidimentional arrays, well. I can gurantee the sql queries I used were exact same copy pasted from PHP, machines are same, and loop numbers are same in both codes.
package main
import (
"database/sql"
"encoding/json"
"fmt"
_ "github.com/go-sql-driver/mysql"
"net/http"
"reflect"
"strings"
)
func main() {
db, err := sql.Open("mysql", "***:****#tcp(****:3306)/****")
fmt.Println(reflect.TypeOf(db))
checkErr(err)
fmt.Println("Handle Request setup... OK")
http.HandleFunc("/", func(w http.ResponseWriter, req *http.Request) {
jsonData, err := getListings(db)
checkErr(err)
w.Write([]byte(jsonData))
})
fmt.Println("Starting Server....")
fmt.Println("Listening on port 8081")
http.ListenAndServe(":8081", nil)
}
func getListings(db *sql.DB) ([]byte, error) {
var userId string = "142"
normalListings := sqlToArray(db, `******`)
manualListings := sqlToArray(db, "******")
var groupIds []string
for key := range manualListings {
groupId := "142," + manualListings[key]["group_id"]
if !stringInSlice(groupId, groupIds) {
groupIds = append(groupIds, groupId)
}
}
var groupIdsString string
groupIdsString = strings.Join(groupIds, ", ")
manualGroups := sqlToArray(db, "*****")
for key := range manualListings {
for key2 := range manualGroups {
groupId := "142," + manualListings[key]["group_id"]
if groupId == manualGroups[key]["ticket_id"] {
entry := make(map[string]string)
entry["ticket_id"] = manualListings[key]["listing_id"]
entry["date_created"] = manualGroups[key2]["date_created"]
normalListings = append(normalListings, entry)
}
}
}
return json.Marshal(normalListings)
}
func stringInSlice(a string, list []string) bool {
for _, b := range list {
if b == a {
return true
}
}
return false
}
func sqlToArray(db *sql.DB, sqlString string) []map[string]string {
rows, err := db.Query(sqlString)
checkErr(err)
columns, err := rows.Columns()
count := len(columns)
values := make([]interface{}, count)
valuePtrs := make([]interface{}, count)
tableData := make([]map[string]string, 0)
for rows.Next() {
for i := 0; i < count; i++ {
valuePtrs[i] = &values[i]
}
rows.Scan(valuePtrs...)
entry := make(map[string]string)
for i, col := range columns {
val := values[i]
b, ok := val.([]byte)
if ok {
entry[col] = string(b)
} else {
entry[col] = string(b)
}
}
tableData = append(tableData, entry)
}
return tableData
}
func checkErr(err error) {
if err != nil {
panic(err)
}
}
Edits:
Changed the code to use statically typed structs instead of using maps and Identified the problematic piece of code
New code:
package main
import (
"database/sql"
"encoding/json"
"fmt"
_ "github.com/go-sql-driver/mysql"
"net/http"
"strings"
)
type listingsType struct {
TicketId string
DateCreated string
}
func main() {
db, err := sql.Open("mysql", "******")
checkErr(err)
fmt.Println("Handle Request setup... OK")
http.HandleFunc("/", func(w http.ResponseWriter, req *http.Request) {
jsonData, err := getListings(db)
checkErr(err)
w.Write([]byte(jsonData))
})
fmt.Println("Starting Server....")
fmt.Println("Listening on port 8081")
http.ListenAndServe(":8081", nil)
}
func getListings(db *sql.DB) ([]byte, error) {
var userId string = "142"
normalListings := sqlToArray(db, `*****`)
manualListings := sqlToArray(db, "*****")
var groupIds []string
for _, elem := range manualListings {
groupId := "142," + elem.DateCreated
if !stringInSlice(groupId, groupIds) {
groupIds = append(groupIds, groupId)
}
}
var groupIdsString string
groupIdsString = strings.Join(groupIds, ", ")
fmt.Println(groupIdsString)
manualGroups := sqlToArray(db, "******")
for _, manualList := range manualListings {
for _, manualGroup := range manualGroups {
groupId := "142," + manualList.DateCreated
if groupId == manualGroup.TicketId {
var entry listingsType
entry.TicketId = manualList.TicketId
entry.DateCreated = manualGroup.DateCreated
normalListings = append(normalListings, entry)
}
}
}
return json.Marshal(normalListings)
}
func stringInSlice(a string, list []string) bool {
for _, b := range list {
if b == a {
return true
}
}
return false
}
func sqlToArray(db *sql.DB, sqlString string) []listingsType {
rows, err := db.Query(sqlString)
checkErr(err)
tableData := []listingsType{}
for rows.Next() {
var entry listingsType
rows.Scan(&entry.TicketId, &entry.DateCreated)
tableData = append(tableData, entry)
}
return tableData
}
func checkErr(err error) {
if err != nil {
panic(err)
}
}
Problematic piece of code
As soon as I comment the following block of code the my code performs just fine.
Any idea what is wrong with this loop ?
for _, manualList := range manualListings {
for _, manualGroup := range manualGroups {
groupId := "142," + manualList.DateCreated
if groupId == manualGroup.TicketId {
var entry listingsType
entry.TicketId = manualList.TicketId
entry.DateCreated = manualGroup.DateCreated
normalListings = append(normalListings, entry)
}
}
}
Profiling Result
Ok So got it fixed by the way. I brought the request time from 5k+ MS to 500 MS, now finally my PHP code is slower which is 900 MS
I got rid of the inner loop to search by implementing a separate function to get SQL data in a different data structure in key value of maps instead of searching whole arrays I created the value as key which I was looking for in array, This way I got rid of the second loop which was making trouble by linear search on strings.
manualGroups := sqlToArraySpecial(db, "****")
for _, manualList := range manualListings {
//index := stringInSliceArray(manualList.DateCreated, manualGroups)
groupId := "142," + manualList.DateCreated
var entry listingsType
entry.TicketId = manualList.TicketId
entry.DateCreated = manualGroups[groupId]
normalListings = append(normalListings, entry)
}
and here is my new SQL function
func sqlToArraySpecial(db *sql.DB, sqlString string) map[string]string {
rows, err := db.Query(sqlString)
checkErr(err)
tableData := make(map[string]string)
for rows.Next() {
var date_created string
var ticket_id string
rows.Scan(&ticket_id, &date_created)
//fmt.Println(ticket_id)
tableData[ticket_id] = date_created
}
return tableData
}
although this is a dead post, I cannot help but note since no one else has (explicitly), and it's kinda' important to know why:
nested for loops exhibit quadratic running time complexity,
and as you stated, searching an array takes linear time,
so simply put:
the computation time will increase by the square of the total number of elements.
now to answer why this isn't the case in php -- well cause you were using a hash map which:
can be said, exhibits a constant time complexity
again, simply put this means that:
look up time is not related to the number of elements (aka the size of the collection).
see: big-o
with all that being said, please note:
i don't know php,
thus do not the know details regarding how the language implements arrays, and
im not an algorithm expert,
so please treat my post as a statement for the general case.
pce