The Go code is here:
package main
func add(a, b int) int {
sum := 0
sum = a + b
return sum
}
func main() {
println(add(1, 2))
}
The Go version is
$ go version
go version go1.19.1 darwin/amd64
I use the following command to get assembly:
$ go tool compile -N -l -S main.go
main.add STEXT nosplit size=70 args=0x10 locals=0x18 funcid=0x0 align=0x0
0x0000 00000 (main.go:3) TEXT main.add(SB), NOSPLIT|ABIInternal, $24-16
0x0000 00000 (main.go:3) SUBQ $24, SP
0x0004 00004 (main.go:3) MOVQ BP, 16(SP)
0x0009 00009 (main.go:3) LEAQ 16(SP), BP
0x000e 00014 (main.go:3) FUNCDATA $0, gclocals·g2BeySu+wFnoycgXfElmcg==(SB)
0x000e 00014 (main.go:3) FUNCDATA $1, gclocals·g2BeySu+wFnoycgXfElmcg==(SB)
0x000e 00014 (main.go:3) FUNCDATA $5, main.add.arginfo1(SB)
0x000e 00014 (main.go:3) MOVQ AX, main.a+32(SP)
0x0013 00019 (main.go:3) MOVQ BX, main.b+40(SP)
0x0018 00024 (main.go:3) MOVQ $0, main.~r0(SP)
0x0020 00032 (main.go:4) MOVQ $0, main.sum+8(SP)
0x0029 00041 (main.go:5) MOVQ main.a+32(SP), AX
0x002e 00046 (main.go:5) ADDQ main.b+40(SP), AX
0x0033 00051 (main.go:5) MOVQ AX, main.sum+8(SP)
0x0038 00056 (main.go:6) MOVQ AX, main.~r0(SP)
0x003c 00060 (main.go:6) MOVQ 16(SP), BP
0x0041 00065 (main.go:6) ADDQ $24, SP
0x0045 00069 (main.go:6) RET
0x0000 48 83 ec 18 48 89 6c 24 10 48 8d 6c 24 10 48 89 H...H.l$.H.l$.H.
0x0010 44 24 20 48 89 5c 24 28 48 c7 04 24 00 00 00 00 D$ H.\$(H..$....
0x0020 48 c7 44 24 08 00 00 00 00 48 8b 44 24 20 48 03 H.D$.....H.D$ H.
0x0030 44 24 28 48 89 44 24 08 48 89 04 24 48 8b 6c 24 D$(H.D$.H..$H.l$
0x0040 10 48 83 c4 18 c3 .H....
main.main STEXT size=86 args=0x0 locals=0x20 funcid=0x0 align=0x0
0x0000 00000 (main.go:8) TEXT main.main(SB), ABIInternal, $32-0
0x0000 00000 (main.go:8) CMPQ SP, 16(R14)
0x0004 00004 (main.go:8) PCDATA $0, $-2
0x0004 00004 (main.go:8) JLS 79
0x0006 00006 (main.go:8) PCDATA $0, $-1
0x0006 00006 (main.go:8) SUBQ $32, SP
0x000a 00010 (main.go:8) MOVQ BP, 24(SP)
0x000f 00015 (main.go:8) LEAQ 24(SP), BP
0x0014 00020 (main.go:8) FUNCDATA $0, gclocals·g2BeySu+wFnoycgXfElmcg==(SB)
0x0014 00020 (main.go:8) FUNCDATA $1, gclocals·g2BeySu+wFnoycgXfElmcg==(SB)
0x0014 00020 (main.go:9) MOVL $1, AX
0x0019 00025 (main.go:9) MOVL $2, BX
0x001e 00030 (main.go:9) PCDATA $1, $0
0x001e 00030 (main.go:9) NOP
0x0020 00032 (main.go:9) CALL main.add(SB)
0x0025 00037 (main.go:9) MOVQ AX, main..autotmp_0+16(SP)
0x002a 00042 (main.go:9) CALL runtime.printlock(SB)
0x002f 00047 (main.go:9) MOVQ main..autotmp_0+16(SP), AX
0x0034 00052 (main.go:9) CALL runtime.printint(SB)
0x0039 00057 (main.go:9) CALL runtime.printnl(SB)
0x003e 00062 (main.go:9) NOP
0x0040 00064 (main.go:9) CALL runtime.printunlock(SB)
0x0045 00069 (main.go:10) MOVQ 24(SP), BP
0x004a 00074 (main.go:10) ADDQ $32, SP
0x004e 00078 (main.go:10) RET
0x004f 00079 (main.go:10) NOP
0x004f 00079 (main.go:8) PCDATA $1, $-1
0x004f 00079 (main.go:8) PCDATA $0, $-2
0x004f 00079 (main.go:8) CALL runtime.morestack_noctxt(SB)
0x0054 00084 (main.go:8) PCDATA $0, $-1
0x0054 00084 (main.go:8) JMP 0
0x0000 49 3b 66 10 76 49 48 83 ec 20 48 89 6c 24 18 48 I;f.vIH.. H.l$.H
0x0010 8d 6c 24 18 b8 01 00 00 00 bb 02 00 00 00 66 90 .l$...........f.
0x0020 e8 00 00 00 00 48 89 44 24 10 e8 00 00 00 00 48 .....H.D$......H
0x0030 8b 44 24 10 e8 00 00 00 00 e8 00 00 00 00 66 90 .D$...........f.
0x0040 e8 00 00 00 00 48 8b 6c 24 18 48 83 c4 20 c3 e8 .....H.l$.H.. ..
0x0050 00 00 00 00 eb aa ......
rel 33+4 t=7 main.add+0
rel 43+4 t=7 runtime.printlock+0
rel 53+4 t=7 runtime.printint+0
rel 58+4 t=7 runtime.printnl+0
rel 65+4 t=7 runtime.printunlock+0
rel 80+4 t=7 runtime.morestack_noctxt+0
go.cuinfo.producer.<unlinkable> SDWARFCUINFO dupok size=0
0x0000 2d 4e 20 2d 6c 20 72 65 67 61 62 69 -N -l regabi
go.cuinfo.packagename.main SDWARFCUINFO dupok size=0
0x0000 6d 61 69 6e main
main..inittask SNOPTRDATA size=24
0x0000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
0x0010 00 00 00 00 00 00 00 00 ........
gclocals·g2BeySu+wFnoycgXfElmcg== SRODATA dupok size=8
0x0000 01 00 00 00 00 00 00 00 ........
main.add.arginfo1 SRODATA static dupok size=5
0x0000 00 08 08 08 ff .....
In my understanding, in the processing of the function main call function add, the stack would be(SP is the top of function add):
0~8: ~ro, the return value of add
8~16: sum, the local variable sum
16~24: BP, use to return caller main
32~40: a, the parameter a of add
40~456: b, the parameter b of add
But what is stored in the 24~32? I can not get it by reading assembly.
This question already has an answer here:
Assembly do we need the endings? [duplicate]
(1 answer)
Closed 1 year ago.
Recently, I read some books about computer science. I wrote some C code, and disassembled them, using gcc and objdump.
The following C code:
#include <stdio.h>
#include <stdbool.h>
int dojob()
{
static short num[ ][4] = { {2, 9, -1, 5}, {3, 8, 2, -6}};
static short *pn[ ] = {num[0], num[1]};
static short s[2] = {0, 0};
int i, j;
for (i=0; i<2; i++) {
for (j=0; j<4; j++){
s[i] += *pn[i]++;
}
printf ("sum of line %d: %d\n", i+1, s[i]);
}
return 0;
}
int main ( )
{
dojob();
}
got the following assembly code (AT&T syntex; only assembly of function dojob and some data is list):
00401350 <_dojob>:
401350: 55 push %ebp
401351: 89 e5 mov %esp,%ebp
401353: 83 ec 28 sub $0x28,%esp
401356: c7 45 f4 00 00 00 00 movl $0x0,-0xc(%ebp)
40135d: eb 75 jmp 4013d4 <_dojob+0x84>
40135f: c7 45 f0 00 00 00 00 movl $0x0,-0x10(%ebp)
401366: eb 3c jmp 4013a4 <_dojob+0x54>
401368: 8b 45 f4 mov -0xc(%ebp),%eax
40136b: 8b 04 85 00 20 40 00 mov 0x402000(,%eax,4),%eax
401372: 8d 48 02 lea 0x2(%eax),%ecx
401375: 8b 55 f4 mov -0xc(%ebp),%edx
401378: 89 0c 95 00 20 40 00 mov %ecx,0x402000(,%edx,4)
40137f: 0f b7 10 movzwl (%eax),%edx
401382: 8b 45 f4 mov -0xc(%ebp),%eax
401385: 0f b7 84 00 08 50 40 movzwl 0x405008(%eax,%eax,1),%eax
40138c: 00
40138d: 89 c1 mov %eax,%ecx
40138f: 89 d0 mov %edx,%eax
401391: 01 c8 add %ecx,%eax
401393: 89 c2 mov %eax,%edx
401395: 8b 45 f4 mov -0xc(%ebp),%eax
401398: 66 89 94 00 08 50 40 mov %dx,0x405008(%eax,%eax,1)
40139f: 00
4013a0: 83 45 f0 01 addl $0x1,-0x10(%ebp)
4013a4: 83 7d f0 03 cmpl $0x3,-0x10(%ebp)
4013a8: 7e be jle 401368 <_dojob+0x18>
4013aa: 8b 45 f4 mov -0xc(%ebp),%eax
4013ad: 0f b7 84 00 08 50 40 movzwl 0x405008(%eax,%eax,1),%eax
4013b4: 00
4013b5: 98 cwtl
4013b6: 8b 55 f4 mov -0xc(%ebp),%edx
4013b9: 83 c2 01 add $0x1,%edx
4013bc: 89 44 24 08 mov %eax,0x8(%esp)
4013c0: 89 54 24 04 mov %edx,0x4(%esp)
4013c4: c7 04 24 24 30 40 00 movl $0x403024,(%esp)
4013cb: e8 50 08 00 00 call 401c20 <_printf>
4013d0: 83 45 f4 01 addl $0x1,-0xc(%ebp)
4013d4: 83 7d f4 01 cmpl $0x1,-0xc(%ebp)
4013d8: 7e 85 jle 40135f <_dojob+0xf>
4013da: b8 00 00 00 00 mov $0x0,%eax
4013df: c9 leave
4013e0: c3 ret
Disassembly of section .data:
00402000 <__data_start__>:
402000: 08 20 or %ah,(%eax)
402002: 40 inc %eax
402003: 00 10 add %dl,(%eax)
402005: 20 40 00 and %al,0x0(%eax)
Disassembly of section .bss:
...
00405008 <_s.1927>:
405008: 00 00 add %al,(%eax)
...
I have two questions:
I don't understand the difference between mov and movl instruction? Why the compiler generate mov for some code, and movl for others?
I completely understand the meaning of the C code, but not the assembly that the compiler generated. Who can make some comments for it for me to understand? I will thank a lot.
The MOVL instruction was generated because you put two int (i and j variables), MOVL will perform a MOV of 32 bits, and integer' size is 32 bits.
a non exhaustive list of all MOV* exist (like MOVD for doubleword or MOVQ for quadword) to allow to optimize your code and use the better expression to gain most time as possible.
PS: may be the -M intel objdump's argument can help you to have a better comprehension of the disassembly, a lot of man on the Intel syntax can may be find easily.
I have an elf file, and I would like to know if it's possible to get some infos about, where an variable (string) is used in the executable.
If I print out the strings of the .elf I find an interesting string, and I would like to know, in which function it is used, is this somehow possible?
Thank you!
Let's consider the following example:
test.c
#include <stdlib.h>
#include <stdio.h>
int
main(int argc, char *argv[])
{
char *str_a = "a";
char *str_abc = "abc";
printf("%s\n", str_a);
printf("%s\n", str_abc);
exit(EXIT_SUCCESS);
}
Let's compile it: gcc -Wall -pedantic-errors -o ~/test ~/test.c.
This will produce the file called test.
Let's examine .rodata section: readelf -x .rodata ./test.
This output is as follows:
Hex dump of section '.rodata':
0x00400610 01000200 61006162 6300 ....a.abc.
The starting address 0x00400610 is displayed. The four dots are displayed before the first string constant and thus the address of the string a will be 0x00400614. Skipping a and a null byte (delimiter) skips 2 bytes, and the address of abc will be 0x00400616.
So, at this point, the two addresses are known.
Next, let's perform objdump -M intel -d ./test.
Here is the listing of main within .text section:
0000000000400546 <main>:
400546: 55 push rbp
400547: 48 89 e5 mov rbp,rsp
40054a: 48 83 ec 20 sub rsp,0x20
40054e: 89 7d ec mov DWORD PTR [rbp-0x14],edi
400551: 48 89 75 e0 mov QWORD PTR [rbp-0x20],rsi
400555: 48 c7 45 f8 14 06 40 mov QWORD PTR [rbp-0x8],0x400614
40055c: 00
40055d: 48 c7 45 f0 16 06 40 mov QWORD PTR [rbp-0x10],0x400616
400564: 00
400565: 48 8b 45 f8 mov rax,QWORD PTR [rbp-0x8]
400569: 48 89 c7 mov rdi,rax
40056c: e8 9f fe ff ff call 400410 <puts#plt>
400571: 48 8b 45 f0 mov rax,QWORD PTR [rbp-0x10]
400575: 48 89 c7 mov rdi,rax
400578: e8 93 fe ff ff call 400410 <puts#plt>
40057d: bf 00 00 00 00 mov edi,0x0
400582: e8 b9 fe ff ff call 400440 <exit#plt>
400587: 66 0f 1f 84 00 00 00 nop WORD PTR [rax+rax*1+0x0]
40058e: 00 00
So, it is quite clear from the listing how the two strings are put on the screen. The listing mentions the two addresses found above - they are highlighted.
So, all in all, you may examine .rodata section, find the location address of a particular string and then just grep for the address found within .text section.
I am currently writing various implementations of a color to black/white image converter. I would like to do a :
Simple C++ implementation
Self made ASM implementation
Self made ASM implementation with AVX vector instructions.
The goal is to benchmark each one of these and analyse the performance improvement I get.
The following snippet of code is the C++ implementation. It only treats a single portion of the image, because I also want to do multithreaded computing.
void CBwConverter::run(const CImg<uint8_t> &src, CImg<uint8_t> &dst, uint32_t pixel, size_t size) const {
const uint8_t *rC = src.data(0,pixel,0,0);
const uint8_t *gC = src.data(0,pixel,0,1);
const uint8_t *bC = src.data(0,pixel,0,2);
uint8_t *mC = dst.data(0,pixel,0,0);
for(size_t c = 0; c < size; c++, rC++, gC++, bC++, mC++) {
*mC = (uint8_t)(0.299f*(*rC) + 0.587f*(*gC) + 0.114f*(*bC));
}
}
Now, before starting the ASM version, I had my C++ code compiled and disassembled just to see how it looks like. After compiling with gcc -std=c++11 -g -O2 -c CBwConverter.cc, I obtained the following output with objdump -d CBwConvert.o :
0000000000000000 <_ZNK12CBwConverter3runERKN12cimg_library4CImgIhEERS2_jm>:
0: 53 push %rbx
1: 8b 3e mov (%rsi),%edi
3: 89 c8 mov %ecx,%eax
5: 44 8b 56 04 mov 0x4(%rsi),%r10d
9: 44 8b 5e 08 mov 0x8(%rsi),%r11d
d: 89 c9 mov %ecx,%ecx
f: 48 8b 5e 18 mov 0x18(%rsi),%rbx
13: 0f af c7 imul %edi,%eax
16: 4c 0f af d7 imul %rdi,%r10
1a: 4b 8d 34 1b lea (%r11,%r11,1),%rsi
1e: 4c 8d 0c 03 lea (%rbx,%rax,1),%r9
22: 4c 89 d7 mov %r10,%rdi
25: 49 0f af fb imul %r11,%rdi
29: 4c 0f af d6 imul %rsi,%r10
2d: 48 01 c7 add %rax,%rdi
30: 4c 01 d0 add %r10,%rax
33: 48 01 df add %rbx,%rdi
36: 48 8d 34 03 lea (%rbx,%rax,1),%rsi
3a: 8b 02 mov (%rdx),%eax
3c: 48 0f af c8 imul %rax,%rcx
40: 48 03 4a 18 add 0x18(%rdx),%rcx
44: 4d 85 c0 test %r8,%r8
47: 74 6b je b4 <_ZNK12CBwConverter3runERKN12cimg_library4CImgIhEERS2_jm+0xb4>
49: 31 d2 xor %edx,%edx
4b: f3 0f 10 25 00 00 00 movss 0x0(%rip),%xmm4 # 53 <_ZNK12CBwConverter3runERKN12cimg_library4CImgIhEERS2_jm+0x53>
52: 00
53: f3 0f 10 1d 00 00 00 movss 0x0(%rip),%xmm3 # 5b <_ZNK12CBwConverter3runERKN12cimg_library4CImgIhEERS2_jm+0x5b>
5a: 00
5b: f3 0f 10 15 00 00 00 movss 0x0(%rip),%xmm2 # 63 <_ZNK12CBwConverter3runERKN12cimg_library4CImgIhEERS2_jm+0x63>
62: 00
63: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
68: 41 0f b6 04 11 movzbl (%r9,%rdx,1),%eax
6d: 66 0f ef c0 pxor %xmm0,%xmm0
71: f3 0f 2a c0 cvtsi2ss %eax,%xmm0
75: 0f b6 04 17 movzbl (%rdi,%rdx,1),%eax
79: 0f 28 c8 movaps %xmm0,%xmm1
7c: 66 0f ef c0 pxor %xmm0,%xmm0
80: f3 0f 59 cc mulss %xmm4,%xmm1
84: f3 0f 2a c0 cvtsi2ss %eax,%xmm0
88: 0f b6 04 16 movzbl (%rsi,%rdx,1),%eax
8c: f3 0f 59 c3 mulss %xmm3,%xmm0
90: f3 0f 58 c1 addss %xmm1,%xmm0
94: 66 0f ef c9 pxor %xmm1,%xmm1
98: f3 0f 2a c8 cvtsi2ss %eax,%xmm1
9c: f3 0f 59 ca mulss %xmm2,%xmm1
a0: f3 0f 58 c1 addss %xmm1,%xmm0
a4: f3 0f 2c c0 cvttss2si %xmm0,%eax
a8: 88 04 11 mov %al,(%rcx,%rdx,1)
ab: 48 83 c2 01 add $0x1,%rdx
af: 49 39 d0 cmp %rdx,%r8
b2: 75 b4 jne 68 <_ZNK12CBwConverter3runERKN12cimg_library4CImgIhEERS2_jm+0x68>
b4: 5b pop %rbx
b5: c3 retq
I can already tell that the for-loop start at 68 and ends at b2.
Something bothers me in the disassembled program. Why does the compiler decide to set registers %xmm0 and %xmm1 to 0, typically at 6d with instruction pxor ? These registers are overwritten just after with instruction cvtsi2ss which loads an integer and converts it to a single-precision number and then finally stores it into them. Why set them to 0 when they are overwritten just after ? If the compiler does it, am I supposed to do the same when writing my own asm version ?
I'am trying to inverse a matrix(actually it is a s-box for AES). The dimensions are 10*16 (10 rows and 16 columns)
The box is filled up with numbers and I want to inverse this box
like this :
a[0][0]=63 (for example in the first row/column the value is 63)
after inverse operation:
a[6][3]=00 i want change it with this
I tried some algorithms but didn't work
Well, in the page you linked the matrix is 16x16 and it's stored in a single 1D array of hexadecimal values. The inversion is the easiest part of that algorythm. I'll give you a c++ code for example:
#include <iostream>
#include <iomanip>
#define ROWS 16
#define COLS 16
#define N_ELEMENTS 256
void Invert(unsigned char * in, unsigned char * out) {
for ( int i = 0; i < N_ELEMENTS; i++ ) out[in[i]] = i;
}
void showMatrix(unsigned char * m) {
for ( int i = 0; i < ROWS; i++) {
for (int j = 0; j < COLS; j++) {
std::cout << " " << std::setbase(16) << std::setfill('0') << std::setw(2)
<< (unsigned int)m[i*COLS + j];
}
std::cout << std::endl;
}
}
int main() {
unsigned char s[256] = {
0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16
};
unsigned char inv[256];
Invert(s,inv);
std::cout << "This is the original matrix:\n";
showMatrix(s);
std::cout << "\nThis is the inverted matrix:\n";
showMatrix(inv);
return 0;
}
The output is this:
This is the original matrix:
63 7c 77 7b f2 6b 6f c5 30 01 67 2b fe d7 ab 76
ca 82 c9 7d fa 59 47 f0 ad d4 a2 af 9c a4 72 c0
b7 fd 93 26 36 3f f7 cc 34 a5 e5 f1 71 d8 31 15
04 c7 23 c3 18 96 05 9a 07 12 80 e2 eb 27 b2 75
09 83 2c 1a 1b 6e 5a a0 52 3b d6 b3 29 e3 2f 84
53 d1 00 ed 20 fc b1 5b 6a cb be 39 4a 4c 58 cf
d0 ef aa fb 43 4d 33 85 45 f9 02 7f 50 3c 9f a8
51 a3 40 8f 92 9d 38 f5 bc b6 da 21 10 ff f3 d2
cd 0c 13 ec 5f 97 44 17 c4 a7 7e 3d 64 5d 19 73
60 81 4f dc 22 2a 90 88 46 ee b8 14 de 5e 0b db
e0 32 3a 0a 49 06 24 5c c2 d3 ac 62 91 95 e4 79
e7 c8 37 6d 8d d5 4e a9 6c 56 f4 ea 65 7a ae 08
ba 78 25 2e 1c a6 b4 c6 e8 dd 74 1f 4b bd 8b 8a
70 3e b5 66 48 03 f6 0e 61 35 57 b9 86 c1 1d 9e
e1 f8 98 11 69 d9 8e 94 9b 1e 87 e9 ce 55 28 df
8c a1 89 0d bf e6 42 68 41 99 2d 0f b0 54 bb 16
This is the inverted matrix:
52 09 6a d5 30 36 a5 38 bf 40 a3 9e 81 f3 d7 fb
7c e3 39 82 9b 2f ff 87 34 8e 43 44 c4 de e9 cb
54 7b 94 32 a6 c2 23 3d ee 4c 95 0b 42 fa c3 4e
08 2e a1 66 28 d9 24 b2 76 5b a2 49 6d 8b d1 25
72 f8 f6 64 86 68 98 16 d4 a4 5c cc 5d 65 b6 92
6c 70 48 50 fd ed b9 da 5e 15 46 57 a7 8d 9d 84
90 d8 ab 00 8c bc d3 0a f7 e4 58 05 b8 b3 45 06
d0 2c 1e 8f ca 3f 0f 02 c1 af bd 03 01 13 8a 6b
3a 91 11 41 4f 67 dc ea 97 f2 cf ce f0 b4 e6 73
96 ac 74 22 e7 ad 35 85 e2 f9 37 e8 1c 75 df 6e
47 f1 1a 71 1d 29 c5 89 6f b7 62 0e aa 18 be 1b
fc 56 3e 4b c6 d2 79 20 9a db c0 fe 78 cd 5a f4
1f dd a8 33 88 07 c7 31 b1 12 10 59 27 80 ec 5f
60 51 7f a9 19 b5 4a 0d 2d e5 7a 9f 93 c9 9c ef
a0 e0 3b 4d ae 2a f5 b0 c8 eb bb 3c 83 53 99 61
17 2b 04 7e ba 77 d6 26 e1 69 14 63 55 21 0c 7d
Something like this should do the trick, although there's probably a more efficient way.
Matrix Invert(Matrix in)
{
Matrix out = new Matrix[10][16];
for(int i = 0; i < 10; i++)
{
for(int j = 0; j < 16, j++)
{
int row = Math.Floor(in[i][j] / 16);
int column = in[i][j] % 16;
out[row][column] = i * 16 + j;
}
}
return out;
}