Related
I have two columns in a CSV file. I am accessing only the first column using the SearchData() function.
The problem is that I want to access the data as an array but when I return an array string in the AccessData() function and write the products[0] in the SearchData(), it gives me all the data by removing the bracket sign [] only and when I write products[1], it gives me runtime error: index out of range [1] with length 1.
Required result
products[0] = First Item
products[1] = Second Item
...
so on
Code
func AccessData(number int) string {
content, err := ioutil.ReadFile("products/data1.csv")
if err != nil {
log.Fatal(err)
}
Data := string(content)
sliceData := strings.Split(Data, ",")
return sliceData[number]
}
func SearchData(){
for i := 0; i <= 34; i = i + 2 {
products := AccessData(i)
fmt.Println(products)
}
}
This should do the trick:
func firstColumns(filename string) []string {
f, err := os.Open(filename)
if err != nil {
log.Fatal(err)
}
defer f.Close()
r := csv.NewReader(f)
var result []string
for {
row, err := r.Read()
if err != nil {
if err == io.EOF {
break
}
log.Fatal(err)
}
if len(row) > 0 {
result = append(result, row[0])
}
}
return result
}
func main() {
data := firstColumns("products/data1.csv")
fmt.Println(data)
fmt.Println(data[1])
}
This turns the the first column of every row into a []string which can be access index.
The output is:
[First item Second item]
Second item
Let's say I have a list of student cities and the size of it could be 100 or 1000, and I want to filter out all duplicates cities.
I want a generic solution that I can use to remove all duplicate strings from any slice.
I am new to Go Language, So I tried to do it by looping and checking if the element exists using another loop function.
Students' Cities List (Data):
studentsCities := []string{"Mumbai", "Delhi", "Ahmedabad", "Mumbai", "Bangalore", "Delhi", "Kolkata", "Pune"}
Functions that I created, and it's doing the job:
func contains(s []string, e string) bool {
for _, a := range s {
if a == e {
return true
}
}
return false
}
func removeDuplicates(strList []string) []string {
list := []string{}
for _, item := range strList {
fmt.Println(item)
if contains(list, item) == false {
list = append(list, item)
}
}
return list
}
My solution test
func main() {
studentsCities := []string{"Mumbai", "Delhi", "Ahmedabad", "Mumbai", "Bangalore", "Delhi", "Kolkata", "Pune"}
uniqueStudentsCities := removeDuplicates(studentsCities)
fmt.Println(uniqueStudentsCities) // Expected output [Mumbai Delhi Ahmedabad Bangalore Kolkata Pune]
}
I believe that the above solution that I tried is not an optimum solution. Therefore, I need help from you guys to suggest the fastest way to remove duplicates from the slice?
I checked StackOverflow, this question is not being asked yet, so I didn't get any solution.
I found Burak's and Fazlan's solution helpful. Based on that, I implemented the simple functions that help to remove or filter duplicate data from slices of strings, integers, or any other types with generic approach.
Here are my three functions, first is generic, second one for strings and last one for integers of slices. You have to pass your data and return all the unique values as a result.
Generic solution: => Go v1.18
func removeDuplicate[T string | int](sliceList []T) []T {
allKeys := make(map[T]bool)
list := []T{}
for _, item := range sliceList {
if _, value := allKeys[item]; !value {
allKeys[item] = true
list = append(list, item)
}
}
return list
}
To remove duplicate strings from slice:
func removeDuplicateStr(strSlice []string) []string {
allKeys := make(map[string]bool)
list := []string{}
for _, item := range strSlice {
if _, value := allKeys[item]; !value {
allKeys[item] = true
list = append(list, item)
}
}
return list
}
To remove duplicate integers from slice:
func removeDuplicateInt(intSlice []int) []int {
allKeys := make(map[int]bool)
list := []int{}
for _, item := range intSlice {
if _, value := allKeys[item]; !value {
allKeys[item] = true
list = append(list, item)
}
}
return list
}
You can update the slice type, and it will filter out all duplicates data for all types of slices.
Here is the GoPlayground link: https://go.dev/play/p/iyb97KcftMa
Adding this answer which worked for me, does require/include sorting, however.
func removeDuplicateStrings(s []string) []string {
if len(s) < 1 {
return s
}
sort.Strings(s)
prev := 1
for curr := 1; curr < len(s); curr++ {
if s[curr-1] != s[curr] {
s[prev] = s[curr]
prev++
}
}
return s[:prev]
}
For fun, I tried using generics! (Go 1.18+ only)
type SliceType interface {
~string | ~int | ~float64 // add more *comparable* types as needed
}
func removeDuplicates[T SliceType](s []T) []T {
if len(s) < 1 {
return s
}
// sort
sort.SliceStable(s, func(i, j int) bool {
return s[i] < s[j]
})
prev := 1
for curr := 1; curr < len(s); curr++ {
if s[curr-1] != s[curr] {
s[prev] = s[curr]
prev++
}
}
return s[:prev]
}
Go Playground Link with tests: https://go.dev/play/p/bw1PP1osJJQ
You can do in-place replacement guided with a map:
processed := map[string]struct{}{}
w := 0
for _, s := range cities {
if _, exists := processed[s]; !exists {
// If this city has not been seen yet, add it to the list
processed[s] = struct{}{}
cities[w] = s
w++
}
}
cities = cities[:w]
reduce memory usage:
package main
import (
"fmt"
"reflect"
)
type void struct{}
func main() {
digits := [6]string{"one", "two", "three", "four", "five", "five"}
set := make(map[string]void)
for _, element := range digits {
set[element] = void{}
}
fmt.Println(reflect.ValueOf(set).MapKeys())
}
p.s. playground
Simple to understand.
func RemoveDuplicate(array []string) []string {
m := make(map[string]string)
for _, x := range array {
m[x] = x
}
var ClearedArr []string
for x, _ := range m {
ClearedArr = append(ClearedArr, x)
}
return ClearedArr
}
If you want to don't waste memory allocating another array for copy the values, you can remove in place the value, as following:
package main
import "fmt"
var studentsCities = []string{"Mumbai", "Delhi", "Ahmedabad", "Mumbai", "Bangalore", "Delhi", "Kolkata", "Pune"}
func contains(s []string, e string) bool {
for _, a := range s {
if a == e {
return true
}
}
return false
}
func main() {
fmt.Printf("Cities before remove: %+v\n", studentsCities)
for i := 0; i < len(studentsCities); i++ {
if contains(studentsCities[i+1:], studentsCities[i]) {
studentsCities = remove(studentsCities, i)
i--
}
}
fmt.Printf("Cities after remove: %+v\n", studentsCities)
}
func remove(slice []string, s int) []string {
return append(slice[:s], slice[s+1:]...)
}
Result:
Cities before remove: [Mumbai Delhi Ahmedabad Mumbai Bangalore Delhi Kolkata Pune]
Cities after remove: [Ahmedabad Mumbai Bangalore Delhi Kolkata Pune]
It can also be done with a set-like map:
ddpStrings := []string{}
m := map[string]struct{}{}
for _, s := range strings {
if _, ok := m[scopeStr]; ok {
continue
}
ddpStrings = append(ddpStrings, s)
m[s] = struct{}{}
}
func UniqueNonEmptyElementsOf(s []string) []string {
unique := make(map[string]bool, len(s))
var us []string
for _, elem := range s {
if len(elem) != 0 {
if !unique[elem] {
us = append(us, elem)
unique[elem] = true
}
}
}
return us
}
send the duplicated splice to the above function, this will return the splice with unique elements.
func main() {
studentsCities := []string{"Mumbai", "Delhi", "Ahmedabad", "Mumbai", "Bangalore", "Delhi", "Kolkata", "Pune"}
uniqueStudentsCities := UniqueNonEmptyElementsOf(studentsCities)
fmt.Println(uniqueStudentsCities)
}
Here's a mapless index based slice's duplicate "remover"/trimmer. It use a sort method.
The n value is always 1 value lower than the total of non duplicate elements that's because this methods compare the current (consecutive/single) elements with the next (consecutive/single) elements and there is no matches after the lasts so you have to pad it to include the last.
Note that this snippet doesn't empty the duplicate elements into a nil value. However since the n+1 integer start at the duplicated item's indexes, you can loop from said integer and nil the rest of the elements.
sort.Strings(strs)
for n, i := 0, 0; ; {
if strs[n] != strs[i] {
if i-n > 1 {
strs[n+1] = strs[i]
}
n++
}
i++
if i == len(strs) {
if n != i {
strs = strs[:n+1]
}
break
}
}
fmt.Println(strs)
Based on Riyaz's solution, you can use generics since Go 1.18
func removeDuplicate[T string | int](tSlice []T) []T {
allKeys := make(map[T]bool)
list := []T{}
for _, item := range tSlice {
if _, value := allKeys[item]; !value {
allKeys[item] = true
list = append(list, item)
}
}
return list
}
Generics minimizes code duplication.
Go Playground link : https://go.dev/play/p/Y3fEtHJpP7Q
So far #snassr has given the best answer as it is the most optimized way in terms of memory (no extra memory) and runtime (nlogn). But one thing I want to emphasis here is if we want to delete any index/element of an array we should loop from end to start as it reduces complexity. If we loop from start to end then if we delete nth index then we will accidentally miss the nth element (which was n+1th before deleting nth element) as in the next iteration we will get the n+1th element.
Example Code
func Dedup(strs []string) {
sort.Strings(strs)
for i := len(strs) - 1; i > 0; i-- {
if strs[i] == strs[i-1] {
strs = append(strs[:i], strs[i+1:]...)
}
}
}
try: https://github.com/samber/lo#uniq
names := lo.Uniq[string]([]string{"Samuel", "John", "Samuel"})
// []string{"Samuel", "John"}
I need to compute the hash (md5 is ok) for a large number of files. So, in Go I have this code:
package main
import (
"io"
"os"
"fmt"
"path/filepath"
"crypto/md5"
"encoding/hex"
)
func strSliceRemove(slice []string, str string) []string {
var tempSlice []string;
for _, item := range slice {
if item != str {
tempSlice = append(tempSlice, item)
}
}
return tempSlice
}
func fileMD5(path string) (string, error) {
var returnMD5String string
file, err := os.Open(path)
if err != nil {
return returnMD5String, err
}
defer file.Close()
hash := md5.New()
if _, err := io.Copy(hash, file); err != nil {
return returnMD5String, err
}
hashInBytes := hash.Sum(nil)[:16]
returnMD5String = hex.EncodeToString(hashInBytes)
return returnMD5String, nil
}
func main() {
var doRead func(string)
doRead = func(sd string) {
filepath.Walk(sd, func(path string, f os.FileInfo, err error) error {
resolvedPath, resolvedPathErr := filepath.EvalSymlinks(path)
if resolvedPathErr != nil {
return nil
}
if f.Mode()&os.ModeSymlink == os.ModeSymlink {
doRead(resolvedPath)
} else {
if !f.IsDir() {
md5, _ := fileMD5(path)
fmt.Printf("%s\n", md5)
}
}
return nil
})
}
doRead("/tmp/electron")
return
}
It hashes correctly 1400 files in almost one second. If I use my OSX md5 command line utility, it takes more than 10 times the time. It is 10 times slower:
for FILE in `find /tmp/electron`; do
if [ ! -d "$FILE" ]; then
md5 $FILE;
fi;
done;
I tried a basic c program that does the same (based on this answer How to calculate the MD5 hash of a large file in C?) and still the time seems more or less 10 seconds.
What kind of strategy / library does crypto/md5 use?
I'm trying to unmarshal a JSON array of the following type:
[
{"abc's": "n;05881364"},
{"abcoulomb": "n;13658345"},
{"abcs": "n;05881364"}
]
into a map[string]string. This question Golang parse JSON array into data structure almost answered my problem, but mine is a truly map, not an array of maps. Unmarshaling into a []map[string]string worked but I now get a map of map[string]string, not a simple map of string as it should be
There is no way to do it directly with the json package; you have to do the conversion yourself. This is simple:
package main
import (
"encoding/json"
"fmt"
)
func main() {
data := []byte(`
[
{"abc's": "n;05881364"},
{"abcoulomb": "n;13658345"},
{"abcs": "n;05881364"}
]
`)
var mapSlice []map[string]string
if err := json.Unmarshal(data, &mapSlice); err != nil {
panic(err)
}
resultingMap := map[string]string{}
for _, m := range mapSlice {
for k, v := range m {
resultingMap[k] = v
}
}
fmt.Println(resultingMap)
}
Output
map[abc's:n;05881364 abcoulomb:n;13658345 abcs:n;05881364]
An alternative (though very similar) to Alex's answer is to define your own type along with an UnmarshalJSON function.
package main
import (
"encoding/json"
"fmt"
)
type myMapping map[string]string
func (mm myMapping) UnmarshalJSON(b []byte) error {
var temp []map[string]string
if err := json.Unmarshal(b, &temp); err != nil {
return err
}
for _, m := range temp {
for k, v := range m {
mm[k] = v
}
}
return nil
}
func main() {
data := []byte(`
[
{"abc's": "n;05881364"},
{"abcoulomb": "n;13658345"},
{"abcs": "n;05881364"}
]`)
resultingMap := myMapping{}
if err := json.Unmarshal(data, &resultingMap); err != nil {
panic(err)
}
fmt.Println(resultingMap)
}
Playground
I found some posts on how to decoding json nested objects in go, I tried to apply the answers to my problem, but I only managed to find a partial solution.
My json file look like this:
{
"user":{
"gender":"male",
"age":"21-30",
"id":"80b1ea88-19d7-24e8-52cc-65cf6fb9b380"
},
"trials":{
"0":{"index":0,"word":"WORD 1","Time":3000,"keyboard":true,"train":true,"type":"A"},
"1":{"index":1,"word":"WORD 2","Time":3000,"keyboard":true,"train":true,"type":"A"},
},
"answers":{
"training":[
{"ans":0,"RT":null,"gtAns":"WORD 1","correct":0},
{"ans":0,"RT":null,"gtAns":"WORD 2","correct":0}
],
"test":[
{"ans":0,"RT":null,"gtAns":true,"correct":0},
{"ans":0,"RT":null,"gtAns":true,"correct":0}
]
}
}
Basically I need to parse the information inside it and save them into go structure. With the code below I managed to extract the user information, but it looks too complicated to me and it won't be easy to apply the same thing to the "answers" fields which contains 2 arrays with more than 100 entries each. Here the code I'm using now:
type userDetails struct {
Id string `json:"id"`
Age string `json:"age"`
Gender string `json:"gender"`
}
type jsonRawData map[string]interface {
}
func getJsonContent(r *http.Request) ( userDetails) {
defer r.Body.Close()
jsonBody, err := ioutil.ReadAll(r.Body)
var userDataCurr userDetails
if err != nil {
log.Printf("Couldn't read request body: %s", err)
} else {
var f jsonRawData
err := json.Unmarshal(jsonBody, &f)
if err != nil {
log.Printf("Error unmashalling: %s", err)
} else {
user := f["user"].(map[string]interface{})
userDataCurr.Id = user["id"].(string)
userDataCurr.Gender = user["gender"].(string)
userDataCurr.Age = user["age"].(string)
}
}
return userDataCurr
}
Any suggestions? Thanks a lot!
You're doing it the hard way by using interface{} and not taking advantage of what encoding/json gives you.
I'd do it something like this (note I assumed there was an error with the type of the "gtAns" field and I made it a boolean, you don't give enough information to know what to do with the "RT" field):
package main
import (
"encoding/json"
"fmt"
"io"
"log"
"strconv"
"strings"
)
const input = `{
"user":{
"gender":"male",
"age":"21-30",
"id":"80b1ea88-19d7-24e8-52cc-65cf6fb9b380"
},
"trials":{
"0":{"index":0,"word":"WORD 1","Time":3000,"keyboard":true,"train":true,"type":"A"},
"1":{"index":1,"word":"WORD 2","Time":3000,"keyboard":true,"train":true,"type":"A"}
},
"answers":{
"training":[
{"ans":0,"RT":null,"gtAns":true,"correct":0},
{"ans":0,"RT":null,"gtAns":true,"correct":0}
],
"test":[
{"ans":0,"RT":null,"gtAns":true,"correct":0},
{"ans":0,"RT":null,"gtAns":true,"correct":0}
]
}
}`
type Whatever struct {
User struct {
Gender Gender `json:"gender"`
Age Range `json:"age"`
ID IDString `json:"id"`
} `json:"user"`
Trials map[string]struct {
Index int `json:"index"`
Word string `json:"word"`
Time int // should this be a time.Duration?
Train bool `json:"train"`
Type string `json:"type"`
} `json:"trials"`
Answers map[string][]struct {
Answer int `json:"ans"`
RT json.RawMessage // ??? what type is this
GotAnswer bool `json:"gtAns"`
Correct int `json:"correct"`
} `json:"answers"`
}
// Using some custom types to show custom marshalling:
type IDString string // TODO custom unmarshal and format/error checking
type Gender int
const (
Male Gender = iota
Female
)
func (g *Gender) UnmarshalJSON(b []byte) error {
var s string
err := json.Unmarshal(b, &s)
if err != nil {
return err
}
switch strings.ToLower(s) {
case "male":
*g = Male
case "female":
*g = Female
default:
return fmt.Errorf("invalid gender %q", s)
}
return nil
}
func (g Gender) MarshalJSON() ([]byte, error) {
switch g {
case Male:
return []byte(`"male"`), nil
case Female:
return []byte(`"female"`), nil
default:
return nil, fmt.Errorf("invalid gender %v", g)
}
}
type Range struct{ Min, Max int }
func (r *Range) UnmarshalJSON(b []byte) error {
// XXX could be improved
_, err := fmt.Sscanf(string(b), `"%d-%d"`, &r.Min, &r.Max)
return err
}
func (r Range) MarshalJSON() ([]byte, error) {
return []byte(fmt.Sprintf(`"%d-%d"`, r.Min, r.Max)), nil
// Or:
b := make([]byte, 0, 8)
b = append(b, '"')
b = strconv.AppendInt(b, int64(r.Min), 10)
b = append(b, '-')
b = strconv.AppendInt(b, int64(r.Max), 10)
b = append(b, '"')
return b, nil
}
func fromJSON(r io.Reader) (Whatever, error) {
var x Whatever
dec := json.NewDecoder(r)
err := dec.Decode(&x)
return x, err
}
func main() {
// Use http.Get or whatever to get an io.Reader,
// (e.g. response.Body).
// For playground, substitute a fixed string
r := strings.NewReader(input)
// If you actually had a string or []byte:
// var x Whatever
// err := json.Unmarshal([]byte(input), &x)
x, err := fromJSON(r)
if err != nil {
log.Fatal(err)
}
fmt.Println(x)
fmt.Printf("%+v\n", x)
b, err := json.MarshalIndent(x, "", " ")
if err != nil {
log.Fatal(err)
}
fmt.Printf("Re-marshalled: %s\n", b)
}
Playground
Of course if you want to reuse those sub-types you could pull them out of the "Whatever" type into their own named types.
Also, note the use of a json.Decoder rather than reading in all the data ahead of time. Usually try and avoid any use of ioutil.ReadAll unless you really need all the data at once.