Efficient way to read Mmap - go

I am using syscall to read a byte array out of mmap:
file, e := os.Open(path)
if e != nil {...}
defer file.Close()
fi, e := file.Stat()
if e != nil {...}
data, e := syscall.Mmap(int(file.Fd()), 0, int(fi.Size()), syscall.PROT_READ, syscall.MAP_SHARED)
if e != nil {...}
data is the binary array I need.
I am using || as a delimiter, so I can get slices by using bytes.Split:
slices := bytes.Split(data, []byte("||"))
for _, s := range slices {
str := string(s[:])
fmt.Println(str)
}
This works fine, and I also stored the total number of messages (its type is uint32 which takes 8 bytes) at the beginning of the mmap.
When a new message is written in, I can get the total number of messages by reading the first 8 bytes.
Assuming I have the number of messages as n, I still need to do the following to read the new message:
slices := bytes.Split(data, []byte("||"))
s := slices[n - 1]
str := string(s[:])
fmt.Println(str)
Is there a more efficient way to do this?

Related

Binary Encoding/Decoding File in Golang Gives Different Checksum

I'm working on encoding and decoding files in golang. I specifically do need the 2D array that I'm using, this is just test code to show the point. I'm not entirely sure what I'm doing wrong, I'm attempting to convert the file into a list of uint32 numbers and then take those numbers and convert them back to a file. The problem is that when I do it the file looks fine but the checksum doesn't line up. I suspect that I'm doing something wrong in the conversion to uint32. I have to do the switch/case because I have no way of knowing how many bytes I'll read for sure at the end of a given file.
package main
import (
"bufio"
"encoding/binary"
"fmt"
"io"
"os"
)
const (
headerSeq = 8
body = 24
)
type part struct {
Seq int
Data uint32
}
func main() {
f, err := os.Open("speech.pdf")
if err != nil {
panic(err)
}
defer f.Close()
reader := bufio.NewReader(f)
b := make([]byte, 4)
o := make([][]byte, 0)
var value uint32
for {
n, err := reader.Read(b)
if err != nil {
if err != io.EOF {
panic(err)
}
}
if n == 0 {
break
}
fmt.Printf("len array %d\n", len(b))
fmt.Printf("len n %d\n", n)
switch n {
case 1:
value = uint32(b[0])
case 2:
value = uint32(uint32(b[1]) | uint32(b[0])<<8)
case 3:
value = uint32(uint32(b[2]) | uint32(b[1])<<8 | uint32(b[0])<<16)
case 4:
value = uint32(uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24)
}
fmt.Println(value)
bs := make([]byte, 4)
binary.BigEndian.PutUint32(bs, value)
o = append(o, bs)
}
fo, err := os.OpenFile("test.pdf", os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0600)
if err != nil {
panic(err)
}
defer fo.Close()
for _, ba := range o {
_, err := fo.Write(ba)
if err != nil {
panic(err)
}
}
}
So, you want to write and read arrays of varying length in a file.
import "encoding/binary"
// You need a consistent byte order for reading and writing multi-byte data types
const order = binary.LittleEndian
var dataToWrite = []byte{ ... ... ... }
var err error
// To write a recoverable array of varying length
var w io.Writer
// First, encode the length of data that will be written
err = binary.Write(w, order, int64(len(dataToWrite)))
// Check error
err = binary.Write(w, order, dataToWrite)
// Check error
// To read a variable length array
var r io.Reader
var dataLen int64
// First, we need to know the length of data to be read
err = binary.Read(r, order, &dataLen)
// Check error
// Allocate a slice to hold the expected amount of data
dataReadIn := make([]byte, dataLen)
err = binary.Read(r, order, dataReadIn)
// Check error
This pattern works not just with byte, but any other fixed size data type. See binary.Write for specifics about the encoding.
If the size of encoded data is a big concern, you can save some bytes by storing the array length as a varint with binary.PutVarint and binary.ReadVarint

Advise on panic error writing buffer to io.Writer

I'm having some issue with writing []float32 data in stream order to a io.Writer buffer in Go.
I have the following 16 bit per pixel image noise16.jpeg that I open, and read as follows in my test:
func TestNewFITSImageFrom2DDataWriteFloatData(t *testing.T) {
f, err := os.Open("../../images/noise16.jpeg")
if err != nil {
t.Errorf("Error opening image: %s", err)
}
defer f.Close()
img, err := jpeg.Decode(f)
if err != nil {
t.Errorf("Error decoding image: %s", err)
}
bounds := img.Bounds()
ex := make([][]uint32, bounds.Dx())
for x := 0; x < bounds.Dx(); x++ {
col := make([]uint32, bounds.Dy())
ex[x] = col
}
for j := 0; j < bounds.Dy(); j++ {
for i := 0; i < bounds.Dx(); i++ {
r, g, b, _ := img.At(i, j).RGBA()
lum := 0.299*float64(r) + 0.587*float64(g) + 0.114*float64(b)
ex[i][j] = uint32(lum)
}
}
var fit = NewFITSImageFrom2DData(ex, 16, 2, int32(bounds.Dx()), int32(bounds.Dy()))
var w io.Writer
err = writeFloat32Array(w, fit.Data, true)
if err != nil {
t.Errorf("Error writing float32 array: %s", err)
}
}
I essentially open the image, and read in each pixel to an "exposure" array (the []float32 data). I then pass the data to the following method, with an instantiated writer:
// Writes FITS binary body data in network byte order.
// Optionally replaces NaNs with zeros for compatibility with other software
func writeFloat32Array(w io.Writer, data []float32, replaceNaNs bool) error {
bufLen := 16 * 1024
buff := make([]byte, bufLen)
// Process the data in chunks of BUFFER_LENGTH bytes:
for block := 0; block < len(data); block += (bufLen >> 2) {
size := len(data) - block
if size > (bufLen >> 2) {
size = (bufLen >> 2)
}
for offset := 0; offset < size; offset++ {
d := data[block+offset]
if replaceNaNs && math.IsNaN(float64(d)) {
d = 0
}
val := math.Float32bits(d)
buff[(offset<<2)+0] = byte(val >> 24)
buff[(offset<<2)+1] = byte(val >> 16)
buff[(offset<<2)+2] = byte(val >> 8)
buff[(offset<<2)+3] = byte(val)
}
_, err := w.Write(buff[:(size << 2)])
if err != nil {
return err
}
}
// Complete the last partial block, for strictly FITS compliant software
bytesWritten := len(data) << 2
lastPartialBlock := bytesWritten % 2880
if lastPartialBlock != 0 {
sb := strings.Builder{}
for i := lastPartialBlock; i < 2880; i++ {
sb.WriteRune(' ')
}
_, err := w.Write([]byte(sb.String()))
if err != nil {
fmt.Println(err)
return err
}
}
return nil
}
My testing is coming back with a Panic error:
panic: runtime error: invalid memory address or nil pointer dereference [recovered]
panic: runtime error: invalid memory address or nil pointer dereference
[signal SIGSEGV: segmentation violation code=0x1 addr=0x18 pc=0x119101f]
Which signifies that this line is throwing an error from inside the writeFloat32Array function in the first iteration of the for block := 0; block < len(data); block += (bufLen >> 2) loop:
_, err := w.Write(buff[:(size << 2)])
Could anyone shed some light on what I have possibly done wrong? My instincts are telling me something around bufLen is incorrect, this should probably be the number of bytes in the image, rather than 16384 bytes?
However, when I set a very large number ... I still see the same error?
I have also tried as #jdizzle suggested, and encoded with the binary package but I am still seeing the panic error:
// Writes FITS binary body data in network byte order.
// Optionally replaces NaNs with zeros for compatibility with other software
func writeFloat32Array(w io.Writer, data []float32, replaceNaNs bool) error {
buf := new(bytes.Buffer)
err := binary.Write(buf, binary.BigEndian, data)
if err != nil {
return err
}
_, err = w.Write(buf.Bytes())
if err != nil {
return err
}
return nil
}

Why don't goroutines write in parallel using WriteAt?

I'm experimenting a bit with reading and writing from a file.
To write to a file concurrently I created the following function:
func write(f *os.File, b []byte, off int64, c chan int) {
var _, err = f.WriteAt(b, off)
check(err)
c <- 0
}
I then create a file and 100000 goroutines to perform the write operations.
They each write an array of 16384 bytes to the hard disk:
func main() {
path := "E:/test"
f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE, 0666)
check(err)
size := int64(16384)
ones := make([]byte, size)
n := int64(100000)
c := make(chan int, n)
for i := int64(0); i < size; i++ {
ones[i] = 1
}
// Start timing
start := time.Now()
for i := int64(0); i < n; i++ {
go write(f, ones, size*i, c)
}
for i := int64(0); i < n; i++ {
<-c
}
// Check elapsed time
fmt.Println(time.Now().Sub(start))
err = f.Sync()
check(err)
err = f.Close()
check(err)
}
In this case about 1.6 GB is written where each goroutine writes to a non-overlapping byte range. The documentation for the io package states that Clients of WriteAt can execute parallel WriteAt calls on the same destination if the ranges do not overlap.
So what I expect to see, is that when I use go write(f, ones, 0, c), it would take much longer since all write operations would be on the same byterange.
However after testing this my results are quite unexpected:
Using go write(f, ones, size*i, c) took an average of about 3s
But using go write(f, ones, 0, c) only took an average of about 480ms
Do I use the WriteAt function in the wrong way? How could i achieve concurrent writing to non-overlapping byteranges?

Newbie: Properly sizing a []byte size in GO (Chunking)

Go Newbie alert!
Not quite sure how to do this - I want to make a "file chunker" where I grab fixed slices out of a binary file for later upload as a learning project.
I currently have this:
type (
fileChunk []byte
fileChunks []fileChunk
)
func NumChunks(fi os.FileInfo, chunkSize int) int {
chunks := fi.Size() / int64(chunkSize)
if rem := fi.Size() % int64(chunkSize) != 0; rem {
chunks++
}
return int(chunks)
}
// left out err checks for brevity
func chunker(filePtr *string) fileChunks {
f, err := os.Open(*filePtr)
defer f.Close()
// create the initial container to hold the slices
file_chunks := make(fileChunks, 0)
fi, err := f.Stat()
// show me how big the original file is
fmt.Printf("File Name: %s, Size: %d\n", fi.Name(), fi.Size())
// let's partition it into 10000 byte pieces
chunkSize := 10000
chunks := NumChunks(fi, chunkSize)
fmt.Printf("Need %d chunks for this file", chunks)
for i := 0; i < chunks; i++ {
b := make(fileChunk, chunkSize) // allocate a chunk, 10000 bytes
n1, err := f.Read(b)
fmt.Printf("Chunk: %d, %d bytes read\n", i, n1)
// add chunk to "container"
file_chunks = append(file_chunks, b)
}
fmt.Println(len(file_chunks))
return file_chunks
}
This all works mostly fine, but here's what happens if my fize size is 31234 bytes, then I'll end up with three slices full of the first 30000 bytes from the file, the final "chunk" will consist of 1234 "file bytes" followed by "padding" to the 10000 byte chunk size - I'd like the "remainder" filechunk ([]byte) to be sized to 1234, not the full capacity - what would the proper way to do this be? On the receiving side I would then "stitch" together all the pieces to recreate the original file.
You need to re-slice the remainder chunk to be just the length of the last chunk read:
n1, err := f.Read(b)
fmt.Printf("Chunk: %d, %d bytes read\n", i, n1)
b = b[:n1]
This does the re-slicing for all chunks. Normally, n1 will be 10000 for all the non-remainder chunks, but there is no guarantee. The docs say "Read reads up to len(b) bytes from the File." So it's good to pay attention to n1 all the time.

limitation on bytes.Buffer?

I am trying to gzip a slice of bytes using the package "compress/gzip". I am writing to a bytes.Buffer and I am writing 45976 bytes, when I am trying to uncompress the content using a gzip.reader and then reader function - I find that the not all of the content is recovered. Is there some limitations to bytes.buffer? and is it a way to by pass or alter this? here is my code (edit):
func compress_and_uncompress() {
var buf bytes.Buffer
w := gzip.NewWriter(&buf)
i,err := w.Write([]byte(long_string))
if(err!=nil){
log.Fatal(err)
}
w.Close()
b2 := make([]byte, 80000)
r, _ := gzip.NewReader(&buf)
j, err := r.Read(b2)
if(err!=nil){
log.Fatal(err)
}
r.Close()
fmt.Println("Wrote:", i, "Read:", j)
}
output from testing (with a chosen string as long_string) would give
Wrote: 45976, Read 32768
Continue reading to get the remaining 13208 bytes. The first read returns 32768 bytes, the second read returns 13208 bytes, and the third read returns zero bytes and EOF.
For example,
package main
import (
"bytes"
"compress/gzip"
"fmt"
"io"
"log"
)
func compress_and_uncompress() {
var buf bytes.Buffer
w := gzip.NewWriter(&buf)
i, err := w.Write([]byte(long_string))
if err != nil {
log.Fatal(err)
}
w.Close()
b2 := make([]byte, 80000)
r, _ := gzip.NewReader(&buf)
j := 0
for {
n, err := r.Read(b2[:cap(b2)])
b2 = b2[:n]
j += n
if err != nil {
if err != io.EOF {
log.Fatal(err)
}
if n == 0 {
break
}
}
fmt.Println(len(b2))
}
r.Close()
fmt.Println("Wrote:", i, "Read:", j)
}
var long_string string
func main() {
long_string = string(make([]byte, 45976))
compress_and_uncompress()
}
Output:
32768
13208
Wrote: 45976 Read: 45976
Use ioutil.ReadAll. The contract for io.Reader says it doesn't have to return all the data and there is a good reason for it not to to do with sizes of internal buffers. ioutil.ReadAll works like io.Reader but will read until EOF.
Eg (untested)
import "io/ioutil"
func compress_and_uncompress() {
var buf bytes.Buffer
w := gzip.NewWriter(&buf)
i,err := w.Write([]byte(long_string))
if err!=nil {
log.Fatal(err)
}
w.Close()
r, _ := gzip.NewReader(&buf)
b2, err := ioutil.ReadAll(r)
if err!=nil {
log.Fatal(err)
}
r.Close()
fmt.Println("Wrote:", i, "Read:", len(b2))
}
If the read from gzip.NewReader does not return the whole expected slice. You can just keep re-reading until you have recieved all the data in the buffer.
Regarding you problem where if you re-read the subsequent reads did not append to the end of the slice, but instead at the beginning; the answer can be found in the implementation of gzip's Read function, which includes
208 z.digest.Write(p[0:n])
This will result in an "append" at the beginning of the string.
This can be solves in this manner
func compress_and_uncompress(long_string string) {
// Writer
var buf bytes.Buffer
w := gzip.NewWriter(&buf)
i,err := w.Write([]byte(long_string))
if(err!=nil){
log.Fatal(err)
}
w.Close()
// Reader
var j, k int
b2 := make([]byte, 80000)
r, _ := gzip.NewReader(&buf)
for j=0 ; ; j+=k {
k, err = r.Read(b2[j:]) // Add the offset here
if(err!=nil){
if(err != io.EOF){
log.Fatal(err)
} else{
break
}
}
}
r.Close()
fmt.Println("Wrote:", i, "Read:", j)
}
The result will be:
Wrote: 45976 Read: 45976
Also after testing with a string of 45976 characters i can confirm that the output is in exactly the same manner as the input, where the second part is correctly appended after the first part.
Source for gzip.Read: http://golang.org/src/pkg/compress/gzip/gunzip.go?s=4633:4683#L189

Resources