This is my first Go program. I'm learning the language but it's a bit difficult to understand all the concepts so in order to practice I wrote a code to detect same file. It's a simple program which recursively check for duplicated files in a directory.
but:
how to detect duplicate file in directory files
the matter isn't directory recursively. the matter is how to compare
You could take the hash of each file body and then compare the hashes in a dictionary/map.
package main
import (
"crypto/md5"
"fmt"
"io"
"io/ioutil"
"log"
"os"
)
func main() {
contentHashes := make(map[string]string)
if err := readDir("./", contentHashes); err != nil {
log.Fatal(err)
}
}
func readDir(dirName string, contentHashes map[string]string) (err error) {
filesInfos, err := ioutil.ReadDir(dirName)
if err != nil {
return
}
for _, fi := range filesInfos {
if fi.IsDir() {
err := readDir(dirName+fi.Name()+"/", contentHashes)
if err != nil {
return err
}
} else {
// The important bits for this question
location := dirName + fi.Name()
// open the file
f, err := os.Open(location)
if err != nil {
return err
}
h := md5.New()
// copy the file body into the hash function
if _, err := io.Copy(h, f); err != nil {
return err
}
// Check if a file body with the same hash already exists
key := fmt.Sprintf("%x", h.Sum(nil))
if val, exists := contentHashes[key]; exists {
fmt.Println("Duplicate found", val, location)
} else {
contentHashes[key] = location
}
}
}
return
}
use sha256 to compare files
example:
package main
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"os"
"path/filepath"
"sync"
"flag"
"runtime"
"io"
)
var dir string
var workers int
type Result struct {
file string
sha256 [32]byte
}
func worker(input chan string, results chan<- *Result, wg *sync.WaitGroup) {
for file := range input {
var h = sha256.New()
var sum [32]byte
f, err := os.Open(file)
if err != nil {
fmt.Fprintln(os.Stderr, err)
continue
}
if _, err = io.Copy(h, f); err != nil {
fmt.Fprintln(os.Stderr, err)
f.Close()
continue
}
f.Close()
copy(sum[:], h.Sum(nil))
results <- &Result{
file: file,
sha256: sum,
}
}
wg.Done()
}
func search(input chan string) {
filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if err != nil {
fmt.Fprintln(os.Stderr, err)
} else if info.Mode().IsRegular() {
input <- path
}
return nil
})
close(input)
}
func main() {
flag.StringVar(&dir, "dir", ".", "directory to search")
flag.IntVar(&workers, "workers", runtime.NumCPU(), "number of workers")
flag.Parse()
fmt.Printf("Searching in %s using %d workers...\n", dir, workers)
input := make(chan string)
results := make(chan *Result)
wg := sync.WaitGroup{}
wg.Add(workers)
for i := 0; i < workers; i++ {
go worker(input, results, &wg)
}
go search(input)
go func() {
wg.Wait()
close(results)
}()
counter := make(map[[32]byte][]string)
for result := range results {
counter[result.sha256] = append(counter[result.sha256], result.file)
}
for sha, files := range counter {
if len(files) > 1 {
fmt.Printf("Found %d duplicates for %s: \n", len(files), hex.EncodeToString(sha[:]))
for _, f := range files {
fmt.Println("-> ", f)
}
}
}
}
Related
I am new to go, I am trying to get 3 functions to return them as follows
function 1 - To return memory usage of the system
function 2 - To return disk usage of the system
function 3 - To return CPU usage of the system
So far I am able to do this much only (PS: trying not to use any libs)
func getCPUTrack() (idle, total uint64) {
contents, err := ioutil.ReadFile("/proc/stat")
if err != nil {
return
}
lines := strings.Split(string(contents), "\n")
for _, line := range lines {
fields := strings.Fields(line)
if fields[0] == "cpu" {
numFields := len(fields)
for i := 1; i < numFields; i++ {
val, err := strconv.ParseUint(fields[i], 10, 64)
if err != nil {
fmt.Println("Error: ", i, fields[i], err)
}
total += val // tally up all the numbers to get total ticks
if i == 4 { // idle is the 5th field in the cpu line
idle = val
}
}
return
}
}
return
}
idle0, total0 := getCPUTrack()
time.Sleep(3 * time.Second)
idle1, total1 := getCPUTrack()
idleTicks := float64(idle1 - idle0)
totalTicks := float64(total1 - total0)
cpuUsage := 100 * (totalTicks - idleTicks) / totalTicks
fmt.Printf("CPU usage is %f%% [busy: %f, total: %f]\n", cpuUsage, totalTicks-idleTicks, totalTicks)
Can anyone help me with this?
Thanks
There is a pretty cool library you can use Go-osstat, or see in detail how it is implemented so you can build your own.
I've developed a client that uses this library and runs in the background sending Memory and CPU usage metrics
package main
import (
"fmt"
"os"
"time"
"github.com/mackerelio/go-osstat/cpu"
"github.com/mackerelio/go-osstat/memory"
)
const (
memoryMetric = "memory"
cpuMetric = "cpu"
retries = 10
)
type client struct {
packageName string
memoryIteration int
cpuIteration int
OSClient OSClient
}
type Client interface {
EmitMetrics()
}
type osClient struct{}
type OSClient interface {
GetMemory() (*memory.Stats, error)
GetCPU() (*cpu.Stats, error)
}
func (osc osClient) GetMemory() (*memory.Stats, error) { return memory.Get() }
func (osc osClient) GetCPU() (*cpu.Stats, error) { return cpu.Get() }
func NewClient(packageName string, memoryIteration, cpuIteration int) Client {
return newClient(packageName, memoryIteration, cpuIteration, osClient{})
}
func newClient(packageName string, memoryIteration, cpuIteration int, osclient OSClient) Client {
return &client{
packageName: packageName,
memoryIteration: memoryIteration,
cpuIteration: cpuIteration,
OSClient: osclient,
}
}
func (c *client) EmitMetrics() {
protectFromPanic := func(metric string) {
if r := recover(); r != nil {
fmt.Printf(fmt.Sprintf("Recover from fail sending %s metrics for %s", metric, c.packageName), zap.Any("recover", r))
}
}
c.sendMemoryMetrics(protectFromPanic)
c.sendCPUMetrics(protectFromPanic)
}
func (c *client) sendMemoryMetrics(f func(string)) {
count := 0
go func() {
defer func() {
f(memoryMetric)
}()
for {
memory, err := c.OSClient.GetMemory()
if err != nil {
fmt.Fprintf(os.Stderr, "%s\n", err)
count++
if count == retries {
return
}
} else {
count = 0
EmitMemoryMetrics(c.packageName, memory.Total, memory.Used, memory.Cached, memory.Free)
time.Sleep(time.Duration(c.memoryIteration) * time.Millisecond)
}
}
}()
}
func (c *client) sendCPUMetrics(f func(string)) {
go func() {
defer func() {
f(cpuMetric)
}()
for {
before, err := c.OSClient.GetCPU()
if err != nil {
fmt.Fprintf(os.Stderr, "%s\n", err)
return
}
time.Sleep(time.Duration(c.cpuIteration) * time.Millisecond)
after, err := c.OSClient.GetCPU()
if err != nil {
fmt.Fprintf(os.Stderr, "%s\n", err)
return
}
total := float64(after.Total - before.Total)
EmitCPUMetrics(c.packageName,
total,
float64(after.User-before.User)/total*100,
float64(after.System-before.System)/total*100,
float64(after.Idle-before.Idle)/total*100)
}
}()
}
I have a text file with this content:
192.168.1.2$nick
192.168.1.3$peter
192.168.1.4$mike
192.168.1.5$joe
A web server is running on each IP in the list.
I need to check if the servers are currently available and output a message if not available.
I wrote a small application. It works, but periodically produces incorrect results - it does not output messages for servers that are not actually available.
I can't figure out what's going on and in fact I'm not sure if I'm using http.Client correctly in goroutines.
Help me plese.
package main
import "fmt"
import "os"
import "strings"
import "io/ioutil"
import "net/http"
import "crypto/tls"
import "time"
import "strconv"
func makeGetRequest(URL string, c *http.Client) {
resp, err := c.Get(URL)
if err != nil {
fmt.Println(err)
}
defer resp.Body.Close()
if !((resp.StatusCode >= 200 && resp.StatusCode <= 209)) {
fmt.Printf("%s-%d\n", URL, resp.StatusCode)
}
}
func makeHeadRequestAsync(tasks chan string, done chan bool, c *http.Client) {
for {
URL := <-tasks
if len(URL) == 0 {
break
}
resp, err := c.Head(URL)
if err != nil {
fmt.Println(err)
continue
}
defer resp.Body.Close()
if !((resp.StatusCode >= 200 && resp.StatusCode <= 209)) {
makeGetRequest(URL, c) // !!! Some servers do not support HEAD requests. !!!
}
}
done <- true
}
func main() {
if len(os.Args) < 3 {
fmt.Println("Usage: main <number of threads> <input-file>")
os.Exit(0)
}
threadsNum, err := strconv.Atoi(os.Args[1])
if err != nil {
fmt.Println("Bad first parameter. Exit.")
os.Exit(0)
}
http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
client := &http.Client {
Timeout: 30 * time.Second,
}
file, err := ioutil.ReadFile(os.Args[2])
if err != nil {
fmt.Println(err)
os.Exit(1)
}
fileLines := strings.Split(string(file), "\n")
tasks := make(chan string, threadsNum)
done := make(chan bool)
for i := 0; i < threadsNum; i++ {
go makeHeadRequestAsync(tasks, done, client)
}
for i := 0; i < len(fileLines); i++ {
tasks <- strings.Split(string(fileLines[i]), "$")[0:1][0]
}
for i := 0; i < threadsNum; i++ {
tasks <- ""
<-done
}
}
The program terminates when the main() function returns. The code does not ensure that all goroutines are done before returning from main.
Fix by doing the following:
Use a sync.WaitGroup to wait for the goroutines to complete before exiting the program.
Exit the goroutine when tasks is closed. Close tasks after submitting all work.
Here's the code:
func makeHeadRequestAsync(tasks chan string, wg *sync.WaitGroup, c *http.Client) {
defer wg.Done()
// for range on channel breaks when the channel is closed.
for URL := range tasks {
resp, err := c.Head(URL)
if err != nil {
fmt.Println(err)
continue
}
defer resp.Body.Close()
if !(resp.StatusCode >= 200 && resp.StatusCode <= 209) {
makeGetRequest(URL, c) // !!! Some servers do not support HEAD requests. !!!
}
}
}
func main() {
if len(os.Args) < 3 {
fmt.Println("Usage: main <number of threads> <input-file>")
os.Exit(0)
}
threadsNum, err := strconv.Atoi(os.Args[1])
if err != nil {
fmt.Println("Bad first parameter. Exit.")
os.Exit(0)
}
http.DefaultTransport.(*http.Transport).TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
client := &http.Client{
Timeout: 30 * time.Second,
}
file, err := ioutil.ReadFile(os.Args[2])
if err != nil {
fmt.Println(err)
os.Exit(1)
}
fileLines := strings.Split(string(file), "\n")
tasks := make(chan string)
var wg sync.WaitGroup
wg.Add(threadsNum)
for i := 0; i < threadsNum; i++ {
go makeHeadRequestAsync(tasks, &wg, client)
}
for i := 0; i < len(fileLines); i++ {
tasks <- strings.Split(string(fileLines[i]), "$")[0:1][0]
}
close(tasks)
wg.Wait()
}
My java code below:
public static byte[] gzip(String str) throws Exception{
ByteArrayOutputStream baos = new ByteArrayOutputStream();
GZIPOutputStream gos = new GZIPOutputStream(baos);
gos.write(str.getBytes("UTF-8"));
gos.close();
return baos.toByteArray();
}
How to gzip string and return byte array in golang as my java done?
Here is complete example of gzipString function which uses standard library compress/gzip
package main
import (
"bytes"
"compress/gzip"
"fmt"
)
func gzipString(src string) ([]byte, error) {
var buf bytes.Buffer
zw := gzip.NewWriter(&buf)
_, err := zw.Write([]byte(src))
if err != nil {
return nil, err
}
if err := zw.Close(); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
func main() {
gzippedBytes, err := gzipString("")
if err != nil {
panic(err)
}
fmt.Printf("Zipped out: %v", gzippedBytes)
}
Have a look at following snippet. Playgorund: https://play.golang.org/p/3kXBmQ-c9xE
Golang has everything in its standard library. Check https://golang.org/pkg/compress/gzip
package main
import (
"bytes"
"compress/gzip"
"fmt"
"log"
"strings"
"io"
)
func main() {
s := "Hello, playground"
// Create source reader
src := strings.NewReader(s)
buf := bytes.NewBuffer(nil)
// Create destination writer
dst := gzip.NewWriter(buf)
// copy the content as gzip compressed
_, err := io.Copy(dst, src)
if err != nil {
log.Fatal(err)
}
fmt.Println(buf.String())
}
I am downloading a large file in concurrent Chunks of 10MB using GO as shown below.
package main
import (
"fmt"
"io/ioutil"
"net/http"
"strconv"
)
func main() {
chunkSize := 1024 * 1024 * 10 // 10MB
url := "http://path/to/large/zip/file/zipfile.zip"
filepath := "zipfile.zip"
res, _ := http.Head(url)
maps := res.Header
length, _ := strconv.Atoi(maps["Content-Length"][0]) // Get the content length from the header request
// startByte and endByte determines the positions of the chunk that should be downloaded
var startByte = 0
var endByte = chunkSize - 1
for startByte < length {
if endByte > length {
endByte = length - 1
}
go func(startByte, endByte int) {
client := &http.Client {}
req, _ := http.NewRequest("GET", url, nil)
rangeHeader := fmt.Sprintf("bytes=%d-%d", startByte, endByte)
req.Header.Add("Range", rangeHeader)
resp,_ := client.Do(req)
defer resp.Body.Close()
data, _ := ioutil.ReadAll(resp.Body)
addToFile(filepath, startByte, endByte, data)
}(startByte, endByte)
startByte = endByte + 1
endByte += chunkSize
}
}
func addToFile(filepath string, startByte, endByte int, data []byte) {
// TODO: write to byte range in file
}
How should I go about creating the file, and writing to a specified byte range within the file corresponding to the byte range of the chunk?
For example, if I get the data from the bytes 262144000-272629759, the addToFile function should write to 262144000-272629759 within the zipfile.zip. Then, if data from another range is obtained, that should be written to the respective range in zipfile.zip.
Figured out how to do this. Change the addToFile function as shown below.
func addToFile(filepath string, startByte int, data []byte) {
f, err := os.OpenFile(filepath, os.O_CREATE | os.O_WRONLY, os.ModeAppend)
if err != nil {
panic("File not found")
}
whence := io.SeekStart
_, err = f.Seek(int64(startByte), whence)
f.Write(data)
f.Sync() //flush to disk
f.Close()
}
For example,
package main
import (
"fmt"
"io"
"io/ioutil"
"os"
)
func write(ws io.WriteSeeker, offset int64, p []byte) (n int, err error) {
_, err = ws.Seek(offset, io.SeekStart)
if err != nil {
return 0, err
}
n, err = ws.Write(p)
if err != nil {
return 0, err
}
return n, nil
}
func main() {
filename := `test.file`
f, err := os.Create(filename)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return
}
defer f.Close()
buf := make([]byte, 16)
for i := range buf {
buf[i] = byte('A' + i)
}
_, err = write(f, 16, buf)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return
}
for i := range buf {
buf[i] = byte('a' + i)
}
_, err = write(f, 0, buf)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return
}
err = f.Close()
if err != nil {
fmt.Fprintln(os.Stderr, err)
return
}
data, err := ioutil.ReadFile(filename)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return
}
fmt.Printf("%q\n", data)
}
Output:
"abcdefghijklmnopABCDEFGHIJKLMNOP"
I am trying to write a go function that will read in lines in a text file, sort them (alphabetize), and overwrite them back to the file. Right now, I am able to essentially emulate cat, but I can't seem to be able to manipulate the contents of the elements in read_line.
func sort() {
ff, _ := os.OpenFile(file, os.O_RDWR, 0666)
f := bufio.NewReader(ff)
for {
read_line, _ := f.ReadString('\n')
fmt.Print(read_line)
if read_line == "" {
break
}
}
ff.Close()
}
when i use ReadString, how can i store each line into a slice (or is there a better way to store them so i can manipulate them)? Then I would use the sort package in a manner similar to this:
sorted := sort.Strings(lines)
then, to write to the file, i am using something similar to the following, although i have not included it because i have not yet gotten "sort" to work:
io.WriteString(ff, (lines + "\n"))
Thank you in advance for any suggestions!
For example,
package main
import (
"bufio"
"fmt"
"os"
"sort"
)
func readLines(file string) (lines []string, err os.Error) {
f, err := os.Open(file)
if err != nil {
return nil, err
}
defer f.Close()
r := bufio.NewReader(f)
for {
const delim = '\n'
line, err := r.ReadString(delim)
if err == nil || len(line) > 0 {
if err != nil {
line += string(delim)
}
lines = append(lines, line)
}
if err != nil {
if err == os.EOF {
break
}
return nil, err
}
}
return lines, nil
}
func writeLines(file string, lines []string) (err os.Error) {
f, err := os.Create(file)
if err != nil {
return err
}
defer f.Close()
w := bufio.NewWriter(f)
defer w.Flush()
for _, line := range lines {
_, err := w.WriteString(line)
if err != nil {
return err
}
}
return nil
}
func main() {
file := `lines.txt`
lines, err := readLines(file)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
sort.Strings(lines)
err = writeLines(file, lines)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
}
This is a pretty simple way of doing it.
import (
"bytes"
"io/ioutil"
"sort"
)
// allow [][]byte to implement the sort.Interface interface
type lexicographically [][]byte
// bytes.Compare compares the byte slices lexicographically (alphabetically)
func (l lexicographically) Less(i, j int) bool { return bytes.Compare(l[i], l[j]) < 0 }
func (l lexicographically) Len() int { return len(l) }
func (l lexicographically) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
func SortFile(name string) error {
content, err := ioutil.ReadFile(name)
if err != nil {
return err
}
lines := bytes.Split(content, []byte{'\n'})
sort.Sort(lexicographically(lines))
content = bytes.Join(lines, []byte{'\n'})
return ioutil.WriteFile(name, content, 0644)
}
since you are about to sort the lines, you pretty much need to read the entire file. you can either slurp the file with io/ioutil.ReadAll or you can just write a small slurp function. once you have the lines of the file, sorting them can be done with a call to sort.Strings. i'll add a perhaps overly verbose version which hopefully illustrates how it can be done. i also recomment reading this excellent explanation on how go's sort package works: Go's sort package
package main
import (
"os"
"bufio"
"fmt"
"sort"
)
// slurp file into slice of lines/strings
func slurp(f string) (lines []string, e os.Error) {
var fd *os.File
var line string
var bufRd *bufio.Reader
var keepReading bool = true
fd, e = os.Open(f)
if e != nil {
return nil, e
}
defer fd.Close()
bufRd = bufio.NewReader(fd)
for keepReading {
line, e = bufRd.ReadString('\n')
switch e {
case nil:
lines = append(lines, line)
case os.EOF:
lines = append(lines, line)
keepReading = false
default:
return lines, e
}
}
return lines, nil
}
// test stuff out..
func main() {
if len(os.Args) > 1 {
lines, e := slurp(os.Args[1])
if e != nil {
fmt.Fprintf(os.Stderr,"%s\n", e)
os.Exit(1)
}
fmt.Println("\n----- unsorted -----\n")
for _, line := range lines {
fmt.Printf("%s", line)
}
fmt.Println("\n----- sorted -----\n")
sort.Strings(lines)
for _, line := range lines {
fmt.Printf("%s", line)
}
}
}
note that the sort is in-place, so it does not return anything
Just wondering how convenient is using Unix's sort for this purpose. I know it's not possible to have this code working in many deployment scenarios, but I see it worth it to mention as an option:
package main
import (
"os"
"os/exec"
)
func main() {
file := "file.txt"
command := []string{"sort", file, "-o", file}
cmd := exec.Command(command[0], command[1:]...)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
panic(err)
}
}
Thoughts?