I’m having a mind blank on this one. I am receiving strings of the format..
AB1234
ABC1234
ABC123
AB12
etc etc. Essentially, flight numbers
They could have one or two letters and anything from 1 to 5 numbers. I want to split the string so that I end up with two strings, one with the numbers and one with the letters.
Any ideas? I’ve looked through these but can’t see one that would do the job
https://www.dotnetperls.com/split-go
Update:
Just found and will use this unless there’s a better option. Delete all letters / numbers to create the strings needed https://golangcode.com/how-to-remove-all-non-alphanumerical-characters-from-a-string/
You could take advantage of the fact that Go is a programming language and write a simple Go function. For example,
package main
import (
"fmt"
)
func parseFlight(s string) (letters, numbers string) {
var l, n []rune
for _, r := range s {
switch {
case r >= 'A' && r <= 'Z':
l = append(l, r)
case r >= 'a' && r <= 'z':
l = append(l, r)
case r >= '0' && r <= '9':
n = append(n, r)
}
}
return string(l), string(n)
}
func main() {
flights := []string{"AB1234", "ABC1234", "ABC123", "AB12"}
for _, flight := range flights {
letters, numbers := parseFlight(flight)
fmt.Printf("%q: %q %q\n", flight, letters, numbers)
}
}
Playground: https://play.golang.org/p/pDrsqntAP6E
Output:
"AB1234": "AB" "1234"
"ABC1234": "ABC" "1234"
"ABC123": "ABC" "123"
"AB12": "AB" "12"
It looks like Go's regex syntax does not support lookahead, so you will have to match the two parts and extract them manually, rather than using a split method.
package main
import (
"regexp"
"fmt"
)
var reFlightNumbers = regexp.MustCompile("([A-Z]+)([0-9]+)")
func main() {
matches := reFlightNumbers.FindStringSubmatch("ABC123")
fmt.Println(matches[1])
fmt.Println(matches[2])
}
You can use strings.IndexAny:
package flight
import "strings"
func split(s string) (string, string) {
n := strings.IndexAny(s, "0123456789")
return s[:n], s[n:]
}
or strings.IndexFunc:
package flight
import (
"strings"
"unicode"
)
func split(s string) (string, string) {
n := strings.IndexFunc(s, unicode.IsDigit)
return s[:n], s[n:]
}
https://golang.org/pkg/strings#IndexAny
https://golang.org/pkg/strings#IndexFunc
Related
I have the below code, where I want the user to enter some key words, then find what from these words are exisiting in a given string, but the resulting matches slice is an empty slice of a length equal to text to be checked
playground
package main
import (
"fmt"
"regexp"
)
func main() {
p := []string{}
p = append(p, "engineer")
p = append(p, "doctor")
var skills string
for _, z := range p {
skills += `|` + z
}
fmt.Println(skills)
re := regexp.MustCompile(`(?i)` + skills)
matches := re.FindAllString("I'm an engineer not a doctor", -1)
fmt.Println(matches)
for i, j := range matches {
fmt.Println(i, j)
}
}
Thanks to the comments provided, I got is as:
package main
import (
"fmt"
"regexp"
"strings"
)
func main() {
p := []string{}
p = append(p, "engineer")
p = append(p, "doctor")
p = append(p, "chemical (permit)")
skills := strings.Join(p, "|")
fmt.Println(skills)
re := regexp.MustCompile(`(?i)` + skills)
matches := re.FindAllString("I'm an engineer not a doctor who is getting chemical permits", -1)
fmt.Println(matches, len(matches))
for i, j := range matches {
fmt.Println(i, j)
}
}
The output is:
engineer|doctor|chemical (permit)
[engineer doctor chemical permit] 3
0 engineer
1 doctor
2 chemical permit
For example, if the input was this
1 3 4 5
all separated by a space, I want to apply the function of squaring each individual number then adding it.
I just don't know how to apply the function to each number. All I can figure is that I have to put the numbers into a slice then apply the function to each of the numbers. I have looked everywhere and can't find out how to do this.
in Python I just do it like this and I already put the values into a list called "n".
#The list is pasted from the initial puzzle
n=[10, 10, 9, 8, 10, 10, 10]
# The list is first squared
b = (list(map(lambda x:x**2,n)))
b becomes the new list where the function is done to each number.
You can do it like this if your integers are actually a string separated by spaces.
package main
import "fmt"
import "strings"
import "strconv"
func main() {
numbers := "1 3 4 5"
var n []int
for _, v := range strings.Fields(numbers) {
i, err := strconv.Atoi(v)
if err != nil {
fmt.Println(err.Error())
break
}
n = append(n, i*i)
}
fmt.Println(n)
}
https://play.golang.org/p/JcivNd29Gzg
package main
import (
"strconv"
"fmt"
"strings"
)
func main() {
stringwithnumbers := "1 2 3 4 5"
numberarray := strings.Split(stringwithnumbers, " ")
stringwithnumbers = ""
for _, number := range numberarray {
numbernew,err := strconv.Atoi(number)
if err != nil{
return
}
numbernew = numbernew * 2
stringwithnumbers += strconv.Itoa(numbernew)
stringwithnumbers += " "
}
stringwithnumbers = strings.Trim(stringwithnumbers, " ")
//You can check the result...
fmt.Print(stringwithnumbers)
}
You can check the code and your changes here:
https://play.golang.org/
When printing out some values from a map of structs. I see certain float64 values with alternative notation. The test passes but how do you read this notation (4e-06). Is this value indeed the same as "0.000004"?
package main
import (
"fmt"
"strconv"
"testing"
)
func TestXxx(t *testing.T) {
num := fmt.Sprintf("%f", float64(1.225788)-float64(1.225784)) // 0.000004
f, _ := strconv.ParseFloat(num, 64)
if f == 0.000004 {
t.Log("Success")
} else {
t.Error("Not Equal", num)
}
if getFloat(f) == 0.000004 {
t.Log("Success")
}else{
t.Error("Fail", getFloat(f))
}
}
func getFloat(f float64) float64 {
fmt.Println("My Float:",f) // 4e-06
return f
}
The notation is called Scientific notation, and it is a convenient way to print very small or very large numbers in compact, short form.
It has the form of
m × 10n
(m times ten raised to the power of n)
In programming languages it is written / printed as:
men
See Spec: Floating-point literals.
Your number: 4e-06, where m=4 and n=-6, which means 4*10-6 which equals to 0.000004.
In order to print your floats in a regular way you can do something like this example:
package main
import (
"fmt"
"strconv"
)
func main() {
a, _ := strconv.ParseFloat("0.000004", 64)
b, _ := strconv.ParseFloat("0.0000000004", 64)
c := fmt.Sprintf("10.0004")
cc, _ := strconv.ParseFloat(c, 64)
fmt.Printf("%.6f\n", a) // 6 numbers after the point
fmt.Printf("%.10f\n", b) // 10 numbers afer the point
fmt.Printf("%.4f\n", cc) // 4 numbers after the point
}
Output:
0.000004
0.0000000004
10.0004
It is the same number. You can use fmt.Printf("My Float: %.6f\n",f) if you don't like the scientific notation. (This format requests that 6 digits will be printed after the decimal point.)
I have a large dataset where I needed to do some string manipulation (I know strings are immutable). The Replace() function in the strings package does exactly what I need, except I need it to search in reverse.
Say I have this string: AA-BB-CC-DD-EE
Run this script:
package main
import (
"fmt"
"strings"
)
func main() {
fmt.Println(strings.Replace("AA-BB-CC-DD-EE", "-", "", 1))
}
It outputs: AABB-CC-DD-EE
What I need is: AA-BBCCDDEE, where the first instance of the search key is found, and the rest discarded.
Splitting the string, inserting the dash, and joining it back together works. But, I'm thinking there is a more performant way to achieve this.
String slices!
in := "AA-BB-CC-DD-EE"
afterDash := strings.Index(in, "-") + 1
fmt.Println(in[:afterDash] + strings.Replace(in[afterDash:], "-", "", -1))
(might require some tweaking to get the behavior you want in the case that the input has no dashes).
This can be another solution
package main
import (
"strings"
"fmt"
)
func Reverse(s string) string {
n := len(s)
runes := make([]rune, n)
for _, rune := range s {
n--
runes[n] = rune
}
return string(runes[n:])
}
func main() {
S := "AA-BB-CC-DD-EE"
S = Reverse(strings.Replace(Reverse(S), "-", "", strings.Count(S, "-")-1))
fmt.Println(S)
}
Another solution:
package main
import (
"fmt"
"strings"
)
func main() {
S := strings.Replace("AA-BB-CC-DD-EE", "-", "*", 1)
S = strings.Replace(S, "-", "", -1)
fmt.Println(strings.Replace( S, "*", "-", 1))
}
I think you want to use strings.Map rather than rigging things with compositions of functions. It's basically meant for this scenario: character replacement with more complex requirements than Replace and cousins can handle. The definition:
Map returns a copy of the string s with all its characters modified according to the mapping function. If mapping returns a negative value, the character is dropped from the string with no replacement.
Your mapping function can be built with a fairly simple closure:
func makeReplaceFn(toReplace rune, skipCount int) func(rune) rune {
count := 0
return func(r rune) rune {
if r == toReplace && count < skipCount {
count++
} else if r == toReplace && count >= skipCount {
return -1
}
return r
}
}
From there, it's a very straightforward program:
strings.Map(makeReplaceFn('-', 1), "AA-BB-CC-DD-EE")
Playground, this produces the desired output:
AA-BBCCDDEE
Program exited.
I'm not sure whether this is faster or slower than other solutions without benchmarking, because on one hand it has to call a function for each rune in the string, while on the other hand it doesn't have to convert (and thus copy) between a []byte/[]rune and string between each function call (though the subslicing answer by hobbs is probably overall the best).
In addition, the method can be easily adapted to other scenarios (e.g. retaining every other dash), with the caveat that strings.Map can only do rune to rune mapping, and not rune to string mapping like strings.Replace does.
This was a fun question to answer. While the solutions offered work neatly, splitting and replacing, to say nothing of calling Replace 3 times doesn't seem likely to be performant.
The answer? Don't reinvent the wheel, the go standard library has already almost solved this problem with Replace(), let's tweak it. I stumbled a bit over how the API of our new function should work, finally settling on leaving the signature unchanged, but deciding on minimal change from strings.Replace:
func ReplaceAfter(s,old,new string,skip int) string
The variable skip replaces n to clarify what it does since the caller will specify how many instances of old to skip replacing. skip==0 is defined as replacing every instance and skip==-1 is defined as replacing no instances.
From here there were really only a few bits of the function that needed changing.
func ReplaceAfter(s, old, new string, skip int) string {
if old == new || skip == -1 { // changed
return s // avoid allocation
}
// Compute number of replacements.
m := strings.Count(s, old)
if m == 0 || m < skip { // changed
return s // avoid allocation
} // changed (removed else if)
// Apply replacements to buffer.
n := m - skip // changed, n means the same thing but is calculated
t := make([]byte, len(s)+n*(len(new)-len(old))) // longer buffer
w := 0
start := 0
for i := 0; i < m; i++ {
j := start
if len(old) == 0 {
if i > 0 {
_, wid := utf8.DecodeRuneInString(s[start:])
j += wid
}
} else {
j += strings.Index(s[start:], old)
}
if i >= skip { // changed, replace
w += copy(t[w:], s[start:j])
w += copy(t[w:], new)
} else { // changed, skip ahead
w += copy(t[w:], s[start:j+len(old)])
}
start = j + len(old)
}
w += copy(t[w:], s[start:])
return string(t[0:w])
}
Here's a playground link with a working demo. If you're interested, I also copied and adapted the relevant Test functions from go/src/strings/, to make sure that the function as written behaved itself predictably.
I need to read [100]byte to transfer a bunch of string data.
Because not all of the strings are precisely 100 characters long, the remaining part of the byte array is padded with 0s.
If I convert [100]byte to string by: string(byteArray[:]), the tailing 0s are displayed as ^#^#s.
In C, the string will terminate upon 0, so what's the best way to convert this byte array to string in Go?
Methods that read data into byte slices return the number of bytes read. You should save that number and then use it to create your string. If n is the number of bytes read, your code would look like this:
s := string(byteArray[:n])
To convert the full string, this can be used:
s := string(byteArray[:len(byteArray)])
This is equivalent to:
s := string(byteArray[:])
If for some reason you don't know n, you could use the bytes package to find it, assuming your input doesn't have a null character embedded in it.
n := bytes.Index(byteArray[:], []byte{0})
Or as icza pointed out, you can use the code below:
n := bytes.IndexByte(byteArray[:], 0)
Use:
s := string(byteArray[:])
Simplistic solution:
str := fmt.Sprintf("%s", byteArray)
I'm not sure how performant this is though.
For example,
package main
import "fmt"
func CToGoString(c []byte) string {
n := -1
for i, b := range c {
if b == 0 {
break
}
n = i
}
return string(c[:n+1])
}
func main() {
c := [100]byte{'a', 'b', 'c'}
fmt.Println("C: ", len(c), c[:4])
g := CToGoString(c[:])
fmt.Println("Go:", len(g), g)
}
Output:
C: 100 [97 98 99 0]
Go: 3 abc
The following code is looking for '\0', and under the assumptions of the question the array can be considered sorted since all non-'\0' precede all '\0'. This assumption won't hold if the array can contain '\0' within the data.
Find the location of the first zero-byte using a binary search, then slice.
You can find the zero-byte like this:
package main
import "fmt"
func FirstZero(b []byte) int {
min, max := 0, len(b)
for {
if min + 1 == max { return max }
mid := (min + max) / 2
if b[mid] == '\000' {
max = mid
} else {
min = mid
}
}
return len(b)
}
func main() {
b := []byte{1, 2, 3, 0, 0, 0}
fmt.Println(FirstZero(b))
}
It may be faster just to naively scan the byte array looking for the zero-byte, especially if most of your strings are short.
When you do not know the exact length of non-nil bytes in the array, you can trim it first:
string(bytes.Trim(arr, "\x00"))
Use this:
bytes.NewBuffer(byteArray).String()
Only use for performance tuning.
package main
import (
"fmt"
"reflect"
"unsafe"
)
func BytesToString(b []byte) string {
return *(*string)(unsafe.Pointer(&b))
}
func StringToBytes(s string) []byte {
return *(*[]byte)(unsafe.Pointer(&s))
}
func main() {
b := []byte{'b', 'y', 't', 'e'}
s := BytesToString(b)
fmt.Println(s)
b = StringToBytes(s)
fmt.Println(string(b))
}
Though not extremely performant, the only readable solution is:
// Split by separator and pick the first one.
// This has all the characters till null, excluding null itself.
retByteArray := bytes.Split(byteArray[:], []byte{0}) [0]
// OR
// If you want a true C-like string, including the null character
retByteArray := bytes.SplitAfter(byteArray[:], []byte{0}) [0]
A full example to have a C-style byte array:
package main
import (
"bytes"
"fmt"
)
func main() {
var byteArray = [6]byte{97,98,0,100,0,99}
cStyleString := bytes.SplitAfter(byteArray[:], []byte{0}) [0]
fmt.Println(cStyleString)
}
A full example to have a Go style string excluding the nulls:
package main
import (
"bytes"
"fmt"
)
func main() {
var byteArray = [6]byte{97, 98, 0, 100, 0, 99}
goStyleString := string(bytes.Split(byteArray[:], []byte{0}) [0])
fmt.Println(goStyleString)
}
This allocates a slice of slice of bytes. So keep an eye on performance if it is used heavily or repeatedly.
Use slices instead of arrays for reading. For example, io.Reader accepts a slice, not an array.
Use slicing instead of zero padding.
Example:
buf := make([]byte, 100)
n, err := myReader.Read(buf)
if n == 0 && err != nil {
log.Fatal(err)
}
consume(buf[:n]) // consume() will see an exact (not padded) slice of read data
Here is an option that removes the null bytes:
package main
import "golang.org/x/sys/windows"
func main() {
b := []byte{'M', 'a', 'r', 'c', 'h', 0}
s := windows.ByteSliceToString(b)
println(s == "March")
}
https://pkg.go.dev/golang.org/x/sys/unix#ByteSliceToString
https://pkg.go.dev/golang.org/x/sys/windows#ByteSliceToString