I am trying to send a frame to another process in order to display it. I'm using dranger tutorial02.
I was thinking to serialize the SDL_Overlay struct to bytes after the call to sws_scale, send it to the other process, deserialize it and call SDL_DisplayYUVOverlay to display it.
Do you think this is my best choice?
If so, I'm having hard time serializing this struct.. here's the code:
size_t size_of_Overlay(SDL_Overlay *bmp) {
/*
* typedef struct {
*
* Uint32 format;
* int w, h;
* int planes;
* Uint16 *pitches;
* Uint8 **pixels;
* Uint32 hw_overlay:1; <- can I ignore it? cant point to a bit-field..
*
* } SDL_Overlay;
*/
// w,h,planes format pitches pixels
return sizeof(int)*3 + sizeof(Uint32) + sizeof(Uint16)*bmp->w + sizeof(Uint8)*bmp->h*3;
}
void overlay_to_buf(SDL_Overlay* bmp, char* buf) {
if(!bmp || !buf) {
perror("overlay_to_buf");
exit(1);
}
memcpy(buf, &bmp->format, sizeof(Uint32));
buf += sizeof(Uint32);
memcpy(buf, &bmp->w, sizeof(int));
buf += sizeof(int);
memcpy(buf, &bmp->h, sizeof(int));
buf += sizeof(int);
memcpy(buf, &bmp->planes, sizeof(int));
buf += sizeof(int);
memcpy(buf, bmp->pitches, sizeof(Uint16)*bmp->w);
buf += sizeof(Uint16)*bmp->w;
memcpy(buf, bmp->pixels[0], sizeof(Uint8)*bmp->h);
buf += sizeof(Uint8)*bmp->h;
memcpy(buf, bmp->pixels[1], sizeof(Uint8)*bmp->h);
buf += sizeof(Uint8)*bmp->h;
memcpy(buf, bmp->pixels[2], sizeof(Uint8)*bmp->h);
buf += sizeof(Uint8)*bmp->h;
}
void buf_to_overlay(SDL_Overlay *bmp, char* buf) {
if(!bmp || !buf) {
perror("buf_to_overlay");
exit(1);
}
memcpy(&bmp->format, buf, sizeof(Uint32));
buf += sizeof(Uint32);
memcpy(&bmp->w, buf, sizeof(int));
buf += sizeof(int);
memcpy(&bmp->h, buf, sizeof(int));
buf += sizeof(int);
memcpy(&bmp->planes, buf, sizeof(int));
buf += sizeof(int);
bmp->pitches = (Uint16*)malloc(sizeof(Uint16)*bmp->w);
memcpy(bmp->pitches, buf, sizeof(Uint16)*bmp->w);
buf += sizeof(Uint16)*bmp->w;
bmp->pixels[0] = (Uint8*)malloc(sizeof(Uint8)*bmp->h);
memcpy(bmp->pixels[0], buf, sizeof(Uint8)*bmp->h);
buf += sizeof(Uint8)*bmp->h;
bmp->pixels[1] = (Uint8*)malloc(sizeof(Uint8)*bmp->h);
memcpy(bmp->pixels[1], buf, sizeof(Uint8)*bmp->h);
buf += sizeof(Uint8)*bmp->h;
bmp->pixels[2] = (Uint8*)malloc(sizeof(Uint8)*bmp->h);
memcpy(bmp->pixels[2], buf, sizeof(Uint8)*bmp->h);
buf += sizeof(Uint8)*bmp->h;
}
I've succeeded to serialize it and display on the other process..
The way of serialization was wrong, here's the correct code:
size_t size_of_Overlay(SDL_Overlay *bmp) {
/*
* typedef struct {
*
* Uint32 format;
* int w, h;
* int planes;
* Uint16 *pitches;
* Uint8 **pixels;
* Uint32 hw_overlay:1; <- doesn't being accounted
*
* } SDL_Overlay;
*/
return sizeof(int)*3 + sizeof(Uint32) + sizeof(Uint16)*3 + sizeof(Uint8)*bmp->h*bmp->w*3;
}
void overlay_to_buf(SDL_Overlay* bmp, char* buf) {
if(!bmp || !buf) {
perror("overlay_to_buf");
exit(-1);
}
memcpy(buf, &bmp->format, sizeof(Uint32));
buf += sizeof(Uint32);
memcpy(buf, &bmp->w, sizeof(int));
buf += sizeof(int);
memcpy(buf, &bmp->h, sizeof(int));
buf += sizeof(int);
memcpy(buf, &bmp->planes, sizeof(int));
buf += sizeof(int);
memcpy(buf, &bmp->pitches[0], sizeof(Uint16));
buf += sizeof(Uint16);
memcpy(buf, &bmp->pitches[1], sizeof(Uint16));
buf += sizeof(Uint16);
memcpy(buf, &bmp->pitches[2], sizeof(Uint16));
buf += sizeof(Uint16);
memcpy(buf, bmp->pixels[0], sizeof(Uint8)*bmp->h*bmp->w);
buf += sizeof(Uint8)*bmp->h*bmp->w;
memcpy(buf, bmp->pixels[1], sizeof(Uint8)*bmp->h*bmp->w);
buf += sizeof(Uint8)*bmp->h*bmp->w;
memcpy(buf, bmp->pixels[2], sizeof(Uint8)*bmp->h*bmp->w);
}
void buf_to_overlay(SDL_Overlay *bmp, char* buf) {
if(!bmp || !buf) {
perror("to_message");
exit(-1);
}
memcpy(&bmp->format, buf, sizeof(Uint32));
buf += sizeof(Uint32);
memcpy(&bmp->w, buf, sizeof(int));
buf += sizeof(int);
memcpy(&bmp->h, buf, sizeof(int));
buf += sizeof(int);
memcpy(&bmp->planes, buf, sizeof(int));
buf += sizeof(int);
memcpy(&bmp->pitches[0], buf, sizeof(Uint16));
buf += sizeof(Uint16);
memcpy(&bmp->pitches[1], buf, sizeof(Uint16));
buf += sizeof(Uint16);
memcpy(&bmp->pitches[2], buf, sizeof(Uint16));
buf += sizeof(Uint16);
/* pixels are allocated outside the function,
* just one allocation during the whole video since
* the screen size doesn't change.
*/
memcpy(bmp->pixels[0], buf, sizeof(Uint8)*bmp->h*bmp->w);
buf += sizeof(Uint8)*bmp->h*bmp->w;
memcpy(bmp->pixels[1], buf, sizeof(Uint8)*bmp->h*bmp->w);
buf += sizeof(Uint8)*bmp->h*bmp->w;
memcpy(bmp->pixels[2], buf, sizeof(Uint8)*bmp->h*bmp->w);
}
Related
I'm trying to implement a Gaussian Blur on golang image.Image objects. For the following image:
The output image generated is:
As one can see, the output image contains some unprocessed borders that corresponds to the current implementation decision to not process the edges, which leads me to think that I might have messed up on calculations somehow (what I mean is, this part of the implementation works, so I can discard off-by-one errors while iterating through image pixels). I've reviewed this code many times, but I can't find my mistake. I would really appreciate some help and considerations on the implementation, that could help me solve the problem. The code is contained below. If any edits or clarifications are necessary, please let me know!
package main
import (
"image"
"image/color"
"image/draw"
"image/jpeg"
"math"
"os"
)
func main() {
f, err := os.Open("dog.jpeg")
if err != nil {
panic(err)
}
img, err := jpeg.Decode(f)
if err != nil {
panic(err)
}
newImg := gaussianBlur(img, 3)
out, err := os.Create("dog-blurred.jpeg")
if err != nil {
panic(err)
}
err = jpeg.Encode(out, newImg, nil)
if err != nil {
panic(err)
}
}
func applyGaussianFunction(x, y, stdDev float64) float64 {
// eFactor := 1 / (2 * math.Pi * stdDev*stdDev);
ePowNominator := -(x*x + y*y);
ePowDenominator := 2 * stdDev*stdDev;
return math.Pow(math.E, (ePowNominator/ePowDenominator));
}
func generateKernel(radius int) [][]float64 {
size := 1 + (radius * 2);
kernel := make([][]float64, size);
stdDev := math.Max(float64(radius / 2), 1);
sum := float64(0);
for i := 0; i < size; i++ {
kernel[i] = make([]float64, size);
}
for i := -radius; i < radius + 1; i++ {
for j := -radius; j < radius + 1; j++ {
val := applyGaussianFunction(float64(j), float64(i), stdDev);
kernel[i + radius][j + radius] = val;
sum += val;
}
}
for i := 0; i < size; i++ {
for j := 0; j < size; j++ {
kernel[i][j] /= sum;
}
}
return kernel;
}
func makeImageRGBA(src image.Image) *image.RGBA {
b := src.Bounds().Size();
rgba := image.NewRGBA(image.Rect(0, 0, b.X, b.Y));
draw.Draw(rgba, rgba.Bounds(), src, image.Pt(0, 0), draw.Src);
return rgba;
}
func gaussianBlur(img image.Image, radius int) image.Image {
size := img.Bounds().Size();
rgbaImg := image.NewRGBA(image.Rect(0, 0, size.X, size.Y));
kernel := generateKernel(radius);
for y := radius; y < size.Y - radius; y++ {
for x := radius; x < size.X - radius; x++ {
var nr, ng, nb, na float64 = 0, 0, 0, 0;
for i := -radius; i < radius + 1; i++ {
for j := -radius; j < radius + 1; j++ {
// NEW: Get pixels from original Image
pr, pg, pb, pa := img.At(x - j, y - i).RGBA();
nr += float64(pr) * kernel[i + radius][j + radius];
ng += float64(pg) * kernel[i + radius][j + radius];
nb += float64(pb) * kernel[i + radius][j + radius];
na += float64(pa) * kernel[i + radius][j + radius];
}
}
// Handle overflow by using 64-bit alphapremultiplied values
rgbaImg.Set(x, y, color.RGBA64{uint16(nr), uint16(ng), uint16(nb), uint16(na)});
}
}
return rgbaImg;
}
EDITS
I modified the code so that pixels are read from the original image, not from rgbaImg
I've also commented eFactor from the applyGaussianFunction function, since I'm already normalizing the kernel with the sum variable
Modified .Set method to use 64-bit RGBA struct
This is the newly generated image
Those black borders are easy to solve, I'm already working them out. This is not a part of the problem anymore.
You're reading from the same image that you're writing to. You shall read from the original image instead:
pr, pg, pb, pa := img.At(x+j, y+i).RGBA()
EDIT:
Additionally, Image.At returns color.RGBA, and func (color.RGBA) RGBA returns colors in the 0 to 0xFFFF range. However color.RGBA constructor expects them to be in 0 to 255 range. You may want to use color.RGBA64 when writing the result:
rgbaImg.Set(x, y, color.RGBA64{uint16(nr), uint16(ng), uint16(nb), uint16(na)});
I want to retrieve an image from a specific application and convert it to image.Image for later use.
What I have now is a HBITMAP from windows API call. After trying a lots, I can't manage to convert the created HBITMAP to an image.Image (or at least an []byte).
rc := w32.GetClientRect(hwnd)
if rc != nil {
// create
HDCScreen := w32.GetWindowDC(hwnd)
hdc := w32.CreateCompatibleDC(HDCScreen)
hbmp := w32.CreateCompatibleBitmap(HDCScreen, int(rc.Right)-int(rc.Left), int(rc.Bottom)-int(rc.Top))
w32.SelectObject(hdc, w32.HGDIOBJ(hbmp))
// Print to memory hdc
w32.PrintWindow(hwnd, hdc, 0x00000002)
// ------------------------------------------------
var bmpInfo *w32.BITMAPINFO = &w32.BITMAPINFO{}
bmpInfo.BmiHeader.BiSize = uint32(unsafe.Sizeof(bmpInfo.BmiHeader))
firstDIBits := w32.GetDIBits(HDCScreen, hbmp, 0, 0, nil, bmpInfo, w32.DIB_RGB_COLORS)
fmt.Printf("firstDIBits: %v\n", firstDIBits)
var lpPixels *[]byte
bmpInfo.BmiHeader.BiBitCount = 32
bmpInfo.BmiHeader.BiCompression = w32.BI_RGB
bmpInfo.BmiHeader.BiHeight = int32(math.Abs(float64(bmpInfo.BmiHeader.BiHeight)))
bmpInfo.BmiHeader.BiCompression = w32.BI_RGB
secondDIBits := w32.GetDIBits(hdc, hbmp, 0, uint(bmpInfo.BmiHeader.BiHeight), unsafe.Pointer(lpPixels), bmpInfo, w32.DIB_RGB_COLORS)
fmt.Printf("secondDIBits: %v\n", secondDIBits)
fmt.Printf("lpPixels: %v\n", lpPixels)
// ------------------------------------------------
// copy to clipBoard
w32.OpenClipboard(0)
w32.EmptyClipboard()
w32.SetClipboardData(w32.CF_BITMAP, w32.HANDLE(hbmp))
w32.CloseClipboard()
// release
w32.DeleteDC(hdc)
w32.DeleteObject(w32.HGDIOBJ(hbmp))
w32.ReleaseDC(0, HDCScreen)
}
Both of GetDIBits() call return 1 but lpPixels is always nil.
The Bitmap class inherits from the Image class.In my opinion, you couldn't need the conversion.
According to the Doc:GetDIBits function
[out] lpvBits
A pointer to a buffer to receive the bitmap data. If this parameter is
NULL, the function passes the dimensions and format of the bitmap to
the BITMAPINFO structure pointed to by the lpbi parameter.
I don't know much about Go language, but the lpvBits parameter of GetDIBits seem incorrect. I suggest you could refer to the threads:
Using GetDIBits to load a bitmap
How should I use the GetDIBits function
Finally made it. Posting the code in case it can help someone else. :)
rect := image.Rect(0, 0, 1920, 1080)
img := image.NewRGBA(rect)
rc := w32.GetClientRect(hwnd)
if rc != nil {
width := int(rc.Right) - int(rc.Left)
height := int(rc.Bottom) - int(rc.Top)
hwndwin := win.HWND(hwnd)
hdc := win.GetDC(hwndwin)
if hdc == 0 {
panic("GetDC failed")
}
defer win.ReleaseDC(hwndwin, hdc)
memory_device := win.CreateCompatibleDC(hdc)
if memory_device == 0 {
panic("CreateCompatibleDC failed")
}
defer win.DeleteDC(memory_device)
bitmap := win.CreateCompatibleBitmap(hdc, int32(width), int32(height))
if bitmap == 0 {
panic("CreateCompatibleBitmap failed")
}
defer win.DeleteObject(win.HGDIOBJ(bitmap))
var header win.BITMAPINFOHEADER
header.BiSize = uint32(unsafe.Sizeof(header))
header.BiPlanes = 1
header.BiBitCount = 32
header.BiWidth = int32(width)
header.BiHeight = int32(-height)
header.BiCompression = win.BI_RGB
header.BiSizeImage = 0
// GetDIBits balks at using Go memory on some systems. The MSDN example uses GlobalAlloc
// https://docs.microsoft.com/en-gb/windows/desktop/gdi/capturing-an-image
bitmapDataSize := uintptr(((int64(width)*int64(header.BiBitCount) + 31) / 32) * 4 * int64(height))
hmem := win.GlobalAlloc(win.GMEM_MOVEABLE, bitmapDataSize)
defer win.GlobalFree(hmem)
memptr := win.GlobalLock(hmem)
defer win.GlobalUnlock(hmem)
old := win.SelectObject(memory_device, win.HGDIOBJ(bitmap))
if old == 0 {
panic("SelectObject failed")
}
defer win.SelectObject(memory_device, old)
if !win.BitBlt(memory_device, 0, 0, int32(width), int32(height), hdc, int32(0), int32(0), win.SRCCOPY) {
panic("BitBlt failed")
}
if win.GetDIBits(hdc, bitmap, 0, uint32(height), (*uint8)(memptr), (*win.BITMAPINFO)(unsafe.Pointer(&header)), win.DIB_RGB_COLORS) == 0 {
panic("GetDIBits failed")
}
i := 0
src := uintptr(memptr)
for y := 0; y < height; y++ {
for x := 0; x < width; x++ {
v0 := *(*uint8)(unsafe.Pointer(src))
v1 := *(*uint8)(unsafe.Pointer(src + 1))
v2 := *(*uint8)(unsafe.Pointer(src + 2))
// BGRA => RGBA, and set A to 255
img.Pix[i], img.Pix[i+1], img.Pix[i+2], img.Pix[i+3] = v2, v1, v0, 255
i += 4
src += 4
}
}
when entering data for the array from the user side. After entering n = 5 the end a[0] is automatically assigned to 0 and is ignored to continue typing a[1]. I tried on other machines and replit but it seems to only happen on my computer. I also tried uninstalling and reinstalling golang but it didn't fix it
package main
import (
"fmt"
)
func main() {
var a = [100]int{}
var n int
fmt.Print("N = ")
fmt.Scanf("%v", &n)
for i := 0; i < n; i++ {
fmt.Printf("a[%v] = ", i)
fmt.Scanf("%v", &a[i])
}
for i := 0; i < n; i++ {
fmt.Printf("%v ", a[i])
}
fmt.Println()
}
This will solve the problem for you, it's important to debug in go language.
var a = make([]int, 100)
var n int = 5
fmt.Print("N = ")
fmt.Scanf("%v \n", &n)
for i := 0; i < n; i++ {
fmt.Printf("a[%v] = ", i)
_, err := fmt.Scanf("%v \n", &a[i])
if err != nil {
fmt.Println("error", err)
}
}
for i := 0; i < n; i++ {
fmt.Printf("%v ", a[i])
}
fmt.Println()
https://pkg.go.dev/fmt#pkg-overview
when recording the microphone, the recorded chunks was raw PCM8 format and I was able to send it and play it by changing bitDepthInBytes = 2 without any noise, but when I've sent encoded opus frames through a network and decode them to PCM16, I couldn't play them unless I convert them to PCM8 but it was noisy. Here is my code:
const sampleRate = 48000
const channels = 1
....
....
dec, err := opus.NewDecoder(sampleRate, channels)
if err != nil {
fmt.Println(err)
return
}
var frameSizeMs float32 = 20
frameSize := int(channels * frameSizeMs * sampleRate / 1000)
pcm := make([]int16, frameSize)
// (sampleRate int, channelNum int, bitDepthInBytes int, bufferSizeInBytes int)
context, err := oto.NewContext(sampleRate, channels, 1, frameSize*2)
if err != nil {
log.Fatal(err)
}
player := context.NewPlayer()
...
...
_, err := dec.Decode(data, pcm)
if err != nil {
fmt.Println(err)
}
var mask uint16 = 0x8000
pcm8 := make([]byte, frameSize)
for i := 0; i < frameSize; i++ {
// using this work and play sound but it has noise
pcm8[i] = byte((uint16(pcm[i]) ^ mask) >> 8)
}
_, _ = player.Write(pcm8)
By reading this, I was able to know how to format your PCM buffer to playable audio bytes https://github.com/philfrei/AudioCue/blob/master/src/main/java/com/adonax/audiocue/AudioCue.java, this the snippet I've used:
public static byte[] fromBufferToAudioBytes(byte[] audioBytes, float[] buffer)
{
for (int i = 0, n = buffer.length; i < n; i++)
{
buffer[i] *= 32767;
audioBytes[i*2] = (byte) buffer[i];
audioBytes[i*2 + 1] = (byte)((int)buffer[i] >> 8 );
}
return audioBytes;
}
And this is what I've updated in my code
pcm8 := make([]byte, frameSize * 2)
for i := 0; i < frameSize; i++ {
//pcm[i] *= 32767 // uncomment when pcm array is float insteand of int16
pcm8[i*2] = byte(uint16(pcm[i]))
pcm8[i*2 + 1] = byte(uint16(pcm[i]) >> 8)
}
I am new to Go and having difficulty in converting the *_Ctype_float datatype into []float32. Is there something that I am missing? I even thought of converting *_Ctype_float into string but even that was not successful.
I have this C function named predictInstance which returns float*. I am calling this function from Go by
predictionValues := C.predictInstance(
handle,
(*C.float)(unsafe.Pointer(&req.FlatInput[0])),
)
Now when I look at the type of predictionValues it says it is *Ctype_float. Now I want to convert this into []float32
I have a C function which returns a float* array which I wish to convert to []float32. I am calling this
function from Go with a float* array argument.
A working example,
package main
/*
#include <stdlib.h>
#include <float.h>
float *reverse(float *f, int len) {
float *g = calloc(len, sizeof(float));
for (int i = 0; i < len; i++) {
g[i] = f[len-1-i];
}
return g;
}
*/
import "C"
import (
"fmt"
"math"
"unsafe"
)
func main() {
a := []float32{3.14159, 2.718, 1}
r := make([]float32, len(a))
fmt.Println("a:", a, "r:", r)
c := C.reverse((*C.float)(&a[0]), C.int(len(a)))
copy(r, (*[1 << 20]float32)(unsafe.Pointer(c))[:])
C.free(unsafe.Pointer(c))
fmt.Println("a:", a, "r:", r)
}
var okCFloat = func() bool {
if C.sizeof_float != unsafe.Sizeof(float32(0)) {
panic("C float != Go float32")
}
if C.FLT_MAX != math.MaxFloat32 {
panic("C float != Go float32")
}
return true
}()
Output:
a: [3.14159 2.718 1] r: [0 0 0]
a: [3.14159 2.718 1] r: [1 2.718 3.14159]