Building clean and flexible binary trees in Rust - data-structures

I'm using binary trees to create a simple computation graph. I understand that linked lists are a pain in Rust, but it's a very convenient data structure for what I'm doing. I tried using Box and Rc<RefCell> for the children nodes, but it didn't work out how I wanted, so I used unsafe:
use std::ops::{Add, Mul};
#[derive(Debug, Copy, Clone)]
struct MyStruct {
value: i32,
lchild: Option<*mut MyStruct>,
rchild: Option<*mut MyStruct>,
}
impl MyStruct {
unsafe fn print_tree(&mut self, set_to_zero: bool) {
if set_to_zero {
self.value = 0;
}
println!("{:?}", self);
let mut nodes = vec![self.lchild, self.rchild];
while nodes.len() > 0 {
let child;
match nodes.pop() {
Some(popped_child) => child = popped_child.unwrap(),
None => continue,
}
if set_to_zero {
(*child).value = 0;
}
println!("{:?}", *child);
if !(*child).lchild.is_none() {
nodes.push((*child).lchild);
}
if !(*child).rchild.is_none() {
nodes.push((*child).rchild);
}
}
println!("");
}
}
impl Add for MyStruct {
type Output = Self;
fn add(self, other: Self) -> MyStruct {
MyStruct{
value: self.value + other.value,
lchild: Some(&self as *const _ as *mut _),
rchild: Some(&other as *const _ as *mut _),
}
}
}
impl Mul for MyStruct {
type Output = Self;
fn mul(self, other: Self) -> Self {
MyStruct{
value: self.value * other.value,
lchild: Some(&self as *const _ as *mut _),
rchild: Some(&other as *const _ as *mut _),
}
}
}
fn main() {
let mut tree: MyStruct;
{
let a = MyStruct{ value: 10, lchild: None, rchild: None };
let b = MyStruct{ value: 20, lchild: None, rchild: None };
let c = a + b;
println!("c.value: {}", c.value); // 30
let mut d = a + b;
println!("d.value: {}", d.value); // 30
d.value = 40;
println!("d.value: {}", d.value); // 40
let mut e = c * d;
println!("e.value: {}", e.value); // 1200
unsafe {
e.print_tree(false); // correct values
e.print_tree(true); // all zeros
e.print_tree(false); // all zeros, everything is set correctly
}
tree = e;
}
unsafe { tree.print_tree(false); } // same here, only zeros
}
Link to the playground
I honestly don't mind that much using unsafe, but is there a safe way doing it? How bad is the use of unsafe here?

You can just box both of the children, since you have a unidirectional tree:
use std::ops::{Add, Mul};
use std::fmt;
#[derive(Clone)]
struct MyStruct {
value: i32,
lchild: Option<Box<MyStruct>>,
rchild: Option<Box<MyStruct>>,
}
impl fmt::Debug for MyStruct {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
f.debug_struct("MyStruct")
.field("value", &self.value)
.field("lchild", &self.lchild.as_deref())
.field("rchild", &self.rchild.as_deref())
.finish()
}
}
impl MyStruct {
fn print_tree(&mut self, set_to_zero: bool) {
if set_to_zero {
self.value = 0;
}
println!("MyStruct {{ value: {:?}, lchild: {:?}, rchild: {:?} }}", self.value, &self.lchild as *const _, &self.rchild as *const _);
if let Some(child) = &mut self.lchild {
child.print_tree(set_to_zero);
}
if let Some(child) = &mut self.rchild {
child.print_tree(set_to_zero);
}
}
}
impl Add for MyStruct {
type Output = Self;
fn add(self, other: Self) -> MyStruct {
MyStruct {
value: self.value + other.value,
lchild: Some(Box::new(self)),
rchild: Some(Box::new(other)),
}
}
}
impl Mul for MyStruct {
type Output = Self;
fn mul(self, other: Self) -> Self {
MyStruct {
value: self.value * other.value,
lchild: Some(Box::new(self)),
rchild: Some(Box::new(other)),
}
}
}
fn main() {
let tree = {
let a = MyStruct {
value: 10,
lchild: None,
rchild: None,
};
let b = MyStruct {
value: 20,
lchild: None,
rchild: None,
};
let c = a.clone() + b.clone();
println!("c.value: {}", c.value); // 30
let mut d = a.clone() + b.clone();
println!("d.value: {}", d.value); // 30
d.value = 40;
println!("d.value: {}", d.value); // 40
let mut e = c * d;
println!("e.value: {}", e.value); // 1200
println!("");
e.print_tree(false); // correct values
println!("");
e.print_tree(true); // all zeros
println!("");
e.print_tree(false); // all zeros, everything is set correctly
println!("");
e
};
dbg!(tree);
}
I implemented Debug manually and reimplemented print_tree recursively. I don't know if there is a way to implement print_tree as mutable like that without recursion, but it's certainly possible if you take &self instead (removing the set_to_zero stuff).
playground
Edit: Turns out it is possible to mutably iterate over the tree values without recursion. The following code is derived from the playground in this comment by #Shepmaster.
impl MyStruct {
fn zero_tree(&mut self) {
let mut node_stack = vec![self];
let mut value_stack = vec![];
// collect mutable references to each value
while let Some(MyStruct { value, lchild, rchild }) = node_stack.pop() {
value_stack.push(value);
if let Some(child) = lchild {
node_stack.push(child);
}
if let Some(child) = rchild {
node_stack.push(child);
}
}
// iterate over mutable references to values
for value in value_stack {
*value = 0;
}
}
}

Related

Why does my program print a UTF-8 BOM on Windows?

I have the following program that works fine on Linux:
Cargo.toml
[package]
name = "ansi-color-codec"
authors = ["Richard Neumann <mail#richard-neumann.de>"]
description = "Encode bytes as ANSI background colors"
license-file = "LICENSE"
homepage = "https://github.com/conqp/ansi-color-codec/"
repository = "https://github.com/conqp/ansi-color-codec/"
readme = "README.md"
documentation = "https://docs.rs/ansi-color-codec"
keywords = [ "ANSI", "color", "encoding"]
categories = ["command-line-utilities", "encoding"]
version = "0.3.8"
edition = "2021"
exclude = [
".gitignore",
"input.txt",
]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
clap = { version = "4.0.23", features = ["derive"] }
ctrlc = "3.2.3"
[profile.release]
strip = true
lto = true
codegen-units = 1
panic = "abort"use std::iter::FlatMap;
src/lib.rs
const MASK_LOW: u8 = 0b00001111;
const MASK_HIGH: u8 = 0b11110000;
const MASK_BITS: u8 = 4;
const MASK_TRIPLET: u8 = MASK_LOW >> 1;
const COLOR_OFFSET_LOW: u8 = 40;
const COLOR_OFFSET_HIGH: u8 = 100;
const COLOR_CODE_LOW_MAX: u8 = MASK_TRIPLET;
const COLOR_CODE_MAX: u8 = MASK_LOW;
const COLOR_CODE_HIGH_BIT: u8 = 0b1000;
const MAX_DIGITS: u8 = 3;
const CODE_START: u8 = 0x1b;
const NUMBER_PREFIX: char = '[';
const NUMBER_SUFFIX: char = 'm';
const UNEXPECTED_TERMINATION_MSG: &str = "Byte stream terminated unexpectedly";
type ColorCodes<T> = FlatMap<T, [ColorCode; 2], fn(u8) -> [ColorCode; 2]>;
pub trait ColorCodec<T>
where
T: Iterator<Item = u8>,
{
fn ansi_color_encode(self) -> ColorCodes<T>;
fn ansi_color_decode(self) -> ColorCodesToBytes<ColorCodesFromBytes<T>>;
}
impl<T> ColorCodec<T> for T
where
T: Iterator<Item = u8>,
{
fn ansi_color_encode(self) -> ColorCodes<T> {
self.flat_map(|byte| byte.to_color_codes())
}
fn ansi_color_decode(self) -> ColorCodesToBytes<ColorCodesFromBytes<T>> {
ColorCodesToBytes::from(ColorCodesFromBytes::from(self))
}
}
#[derive(Debug, Eq, PartialEq)]
pub struct ColorCode {
number: u8,
}
impl ColorCode {
pub fn new(number: u8) -> Result<Self, String> {
if (0..=COLOR_OFFSET_LOW + COLOR_CODE_LOW_MAX).contains(&number)
|| (COLOR_OFFSET_HIGH..=COLOR_OFFSET_HIGH + COLOR_CODE_LOW_MAX).contains(&number)
{
Ok(Self { number })
} else {
Err(format!("Invalid color code: {}", number))
}
}
pub fn normalized(&self) -> u8 {
if self.number < COLOR_OFFSET_HIGH {
self.number - COLOR_OFFSET_LOW
} else {
self.number - COLOR_OFFSET_HIGH + COLOR_CODE_HIGH_BIT
}
}
}
impl TryFrom<u8> for ColorCode {
type Error = String;
fn try_from(value: u8) -> Result<Self, Self::Error> {
if value <= COLOR_CODE_LOW_MAX {
Self::new(value + COLOR_OFFSET_LOW)
} else if value <= COLOR_CODE_MAX {
Self::new((value & MASK_TRIPLET) + COLOR_OFFSET_HIGH)
} else {
Err(format!("Value out of bounds for color code: {}", value))
}
}
}
impl ToString for ColorCode {
fn to_string(&self) -> String {
format!("\x1b[{}m ", self.number)
}
}
trait ColorEncodable {
fn to_color_codes(&self) -> [ColorCode; 2];
fn from_color_codes(color_codes: [ColorCode; 2]) -> Self;
}
impl ColorEncodable for u8 {
fn to_color_codes(&self) -> [ColorCode; 2] {
[
ColorCode::try_from((self & MASK_HIGH) >> MASK_BITS).unwrap(),
ColorCode::try_from(self & MASK_LOW).unwrap(),
]
}
fn from_color_codes(color_codes: [ColorCode; 2]) -> Self {
(color_codes[0].normalized() << MASK_BITS) + color_codes[1].normalized()
}
}
#[derive(Debug, Eq, PartialEq)]
pub struct ColorCodesFromBytes<T>
where
T: Iterator<Item = u8>,
{
bytes: T,
}
impl<T> ColorCodesFromBytes<T>
where
T: Iterator<Item = u8>,
{
fn next_header(&mut self) -> Option<Result<(), String>> {
match self.bytes.next() {
Some(byte) => {
if byte == CODE_START {
match self.bytes.next() {
Some(byte) => {
if byte as char == NUMBER_PREFIX {
Some(Ok(()))
} else {
Some(Err(format!("Invalid number prefix: {}", byte)))
}
}
None => Some(Err(UNEXPECTED_TERMINATION_MSG.to_string())),
}
} else {
Some(Err(format!("Invalid start byte: {}", byte)))
}
}
None => None,
}
}
fn read_digits(&mut self) -> Result<String, String> {
let mut digits = String::new();
for count in 0..=MAX_DIGITS {
match self.bytes.next() {
Some(byte) => {
if byte.is_ascii_digit() {
if count < MAX_DIGITS {
digits.push(byte as char);
} else {
return Err(format!("Expected at most {} digits", MAX_DIGITS));
}
} else if byte as char == NUMBER_SUFFIX {
return if digits.is_empty() {
Err("Expected at least one digit".to_string())
} else {
Ok(digits)
};
} else {
return Err(format!("Encountered Unexpected byte \"{}\"", byte));
}
}
None => return Err(UNEXPECTED_TERMINATION_MSG.to_string()),
}
}
Ok(digits)
}
fn parse_color_code(&mut self) -> Result<u8, String> {
let digits = self.read_digits()?;
self.bytes.next(); // Discard bg-color encoded char
match digits.parse::<u8>() {
Ok(number) => Ok(number),
Err(_) => Err(format!("Could not parse u8 from {}", digits)),
}
}
}
impl<T> From<T> for ColorCodesFromBytes<T>
where
T: Iterator<Item = u8>,
{
fn from(bytes: T) -> Self {
Self { bytes }
}
}
impl<T> Iterator for ColorCodesFromBytes<T>
where
T: Iterator<Item = u8>,
{
type Item = Result<ColorCode, String>;
fn next(&mut self) -> Option<Self::Item> {
if let Err(msg) = self.next_header()? {
return Some(Err(msg));
}
match self.parse_color_code() {
Ok(sum) => {
if sum == 0 {
None
} else {
Some(ColorCode::new(sum))
}
}
Err(msg) => Some(Err(format!("{} while parsing color code", msg))),
}
}
}
#[derive(Debug, Eq, PartialEq)]
pub struct ColorCodesToBytes<T>
where
T: Iterator<Item = Result<ColorCode, String>>,
{
codes: T,
}
impl<T> From<T> for ColorCodesToBytes<T>
where
T: Iterator<Item = Result<ColorCode, String>>,
{
fn from(codes: T) -> Self {
Self { codes }
}
}
impl<T> Iterator for ColorCodesToBytes<T>
where
T: Iterator<Item = Result<ColorCode, String>>,
{
type Item = Result<u8, String>;
fn next(&mut self) -> Option<Self::Item> {
match self.codes.next() {
Some(high) => match high {
Ok(high) => match self.codes.next() {
Some(low) => match low {
Ok(low) => Some(Ok(u8::from_color_codes([high, low]))),
Err(msg) => Some(Err(msg)),
},
None => Some(Err("Missing second color code block".to_string())),
},
Err(msg) => Some(Err(msg)),
},
None => None,
}
}
}
src/main.rs
use ansi_color_codec::ColorCodec;
use clap::Parser;
use ctrlc::set_handler;
use std::io::{stdin, stdout, Read, Write};
use std::process::exit;
use std::sync::{
atomic::{AtomicBool, Ordering},
Arc,
};
const STDOUT_WRITE_ERR: &str = "Could not write bytes to STDOUT";
#[derive(Parser)]
#[clap(about, author, version)]
struct Args {
#[clap(short, long, name = "decode")]
pub decode: bool,
#[clap(short, long, name = "no-clear")]
pub no_clear: bool,
}
fn main() {
let args = Args::parse();
let running = Arc::new(AtomicBool::new(true));
let bytes = stream_stdin(running.clone());
set_handler(move || {
running.store(false, Ordering::SeqCst);
})
.expect("Error setting Ctrl-C handler");
if args.decode {
decode(bytes)
} else {
encode(bytes, !args.no_clear)
}
}
fn decode(bytes: impl Iterator<Item = u8>) {
for result in bytes.ansi_color_decode() {
match result {
Ok(byte) => {
stdout().write_all(&[byte]).expect(STDOUT_WRITE_ERR);
}
Err(msg) => {
eprintln!("{}", msg);
exit(1);
}
}
}
stdout().flush().expect("Could not flush STDOUT")
}
fn encode(bytes: impl Iterator<Item = u8>, clear: bool) {
for code in bytes.ansi_color_encode() {
stdout()
.write_all(code.to_string().as_bytes())
.expect(STDOUT_WRITE_ERR);
}
if clear {
println!("\x1b[0m ");
}
}
fn stream_stdin(running: Arc<AtomicBool>) -> impl Iterator<Item = u8> {
stdin()
.bytes()
.take_while(move |byte| byte.is_ok() && running.load(Ordering::SeqCst))
.map(|byte| byte.unwrap())
}
However, when I run
> echo "Windows doing Windows stuff" | ansi-color-codec | ansi-color-codec -d
on Windows, the program fails with
Invalid start byte: 239
When I inspect the first (three) bytes, I can see that ansi-color-codec -d receives the UTF-8 BOM from ansi-color-codec. But why? My program does not print it and only puts raw bytes onto STDOUT
Found the answer on Reddit:
EDIT : [SOLVED]
CAUSE : Windows powershell (at least) uses different code page from external programs ( rust program in this case ) which caused inconsistent inter process communication.
SOLUTION : Set the following environment variable for consistent communication with rust programs.
$OutputEncoding = [console]::InputEncoding = [console]::OutputEncoding = New-Object System.Text.UTF8Encoding

strange process names when trying to get a PID in rust with the Windows api

Hello my goal is to create a rust function which takes a process name as string and returns a PID.
I came up with this function:
pub unsafe fn get_proc_id(proc_name: String) -> u32 {
let mut proc_id: u32 = 0;
let mut h_snap = windows::Win32::System::Diagnostics::ToolHelp::CreateToolhelp32Snapshot(
TH32CS_SNAPPROCESS,
0,
);
let h_snap = match h_snap {
Ok(t) => t,
Err(e) => panic!("eror {}", e),
};
let mut proc_entry: PROCESSENTRY32 = PROCESSENTRY32 {
..PROCESSENTRY32::default()
};
proc_entry.dwSize = std::mem::size_of::<PROCESSENTRY32>() as u32;
let entry_ptr = &mut proc_entry as *mut PROCESSENTRY32;
if windows::Win32::System::Diagnostics::ToolHelp::Process32First(h_snap, entry_ptr).as_bool() {
loop {
let mut proc_exe_string: String = String::new();
if proc_exe_string.eq(&proc_name) {
proc_id = proc_entry.th32ProcessID;
break;
}
for e in proc_entry.szExeFile {
if e.0 != 0 {
proc_exe_string.push(e.0 as char)
}
}
println!("{}", proc_exe_string);
if !Process32Next(h_snap, entry_ptr).as_bool() {
break;
}
}
}
CloseHandle(h_snap);
return proc_id; }
The function prints some strange process names. for example I'm looking for "ac_client.exe" however the function shows this process as ac_client.exeexe.exeee and this is similar for most process names. some other examples:
chrome.exexeexe.exeee
Discord.exer.exee.exee

How do I implement a trait for an enum and its respective variants?

I'm trying to use enum variants to capture data which is heterogeneous in nature (has different collections of fields) but which is of the same "type" from a protocol perspective. However, I'm not sure how to implement subtype-specific methods and traits. Here is a minimal example of how I can create an enumeration of Data and I can use enum variant constructors to specify the types, but if I implement a trait on the variant, calling that function is not something I've figured out how to do.
use std::fmt;
enum Data {
N(NData),
S(SData),
}
struct NData {
numeric: u32,
}
impl fmt::Display for NData {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.numeric)
}
}
struct SData {
stringy: Vec<String>,
}
fn main() {
let d_n: Data = Data::N(NData { numeric: 0x0 });
let n = NData { numeric: 0xff };
// Fails, fmt::Display not implemented for Data
println!("{}", d_n);
// Just fine!
println!("{}", n);
}
One possible solution could be to implement your trait for the variants as well as for the enum, which as you can see here only calls the specific implementations of the variants:
use std::fmt;
struct NData {
numeric: u32,
}
impl fmt::Display for NData {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.numeric)
}
}
struct SData {
strings: Vec<String>,
}
impl fmt::Display for SData {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:?}", self.strings)
}
}
enum Data {
N(NData),
S(SData),
}
impl fmt::Display for Data {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Data::N(n_data) => n_data.fmt(f),
Data::S(s_data) => s_data.fmt(f),
}
}
}
fn main() {
let n = NData { numeric: 0xff };
let s = SData { strings: vec!["hello".to_string(), "world".to_string()] };
println!("{}", n);
println!("{}", s);
let d_n = Data::N(n);
let d_s = Data::S(s);
println!("{}", d_n);
println!("{}", d_s);
}
Which will produce the following output:
255
["hello", "world"]
255
["hello", "world"]

Idiomatic way to create static iterable collection of named structs?

What is the idiomatic way to create static iterable collection of named structs? I have n instances of a struct, where n is known at compile time and is less than 20. I would like to be able to iterate over all the entries and also be able to refer to each entry by a name instead of an index. All the data is known at compile time.
I could use an array or enum, along with hand written constants which map the labels to indexes; but this seems finicky.
fn common_behaviour(x: f64) {
print!("{}", x);
}
const ADD: usize = 0;
const SUBTRACT: usize = 1;
fn main () {
let mut foos: [f64; 2] = [0.0; 2];
foos[ADD] = 4.0;
foos[SUBTRACT] = 2.0;
for foo in &foos {
common_behaviour(*foo);
}
foos[ADD] += 1.0;
foos[SUBTRACT] -= 1.0;
}
Alternatively, I could just pay the performance cost and use a HashMap as the hashing overhead might not actually matter that much, but this seems suboptimal as well.
Perhaps, I could refactor my code to use function pointers instead special casing the different special cases.
fn common_behaviour(x: f64) {
print!("{}", x);
}
fn add(x: f64) -> f64 {
x + 1.0
}
fn subtract(x: f64) -> f64 {
x - 1.0
}
struct Foo {
data: f64,
special: fn(f64) -> f64
}
impl Foo {
fn new(data: f64, special: fn(f64) -> f64) -> Foo {
Foo { data, special }
}
}
fn main() {
let mut foos = [Foo::new(4.0, add), Foo::new(2.0, subtract)];
for foo in &mut foos {
common_behaviour(foo.data);
foo.data = (foo.special)(foo.data);
}
}
What is most idiomatic way to handle this situation?
Looking at:
fn main() {
let mut foos = [Foo::new(4.0, add), Foo::new(2.0, subtract)];
for foo in &mut foos {
common_behaviour(foo.data);
foo.data = (foo.special)(foo.data);
}
}
I see a Command Pattern struggling to emerge, and Rust is great at expressing this pattern, thanks to enum:
enum Foo {
Add(f64),
Sub(f64),
}
impl Foo {
fn apply(&mut self) {
match self {
Foo::Add(x) => {
Self::common(*x);
*x += 1.0;
},
Foo::Sub(x) => {
Self::common(*x);
*x -= 1.0;
},
}
}
fn common(x: f64) {
print!("{}", x);
}
}
And your example becomes:
fn main() {
let mut foos = [Foo::Add(4.0), Foo::Sub(2.0)];
for foo in &mut foos {
foo.apply();
}
}

Is there a way to do a for loop that is neither iterative nor linear?

Can I write a Rust for loop equivalent to this C code:
for(int i = 2; i <= 128; i=i*i){
//do something
}
I'm only seeing things like
for i in 0..128 { /* do something */ }
or
let v = vec![0, 1, 2, /* ... */ ];
for i in v.iter() { /* do something */ }
Should I just use a while loop?
You can always create a custom iterator that does whatever unique sequence you need:
struct Doubling {
current: u64,
max: u64,
}
impl Iterator for Doubling {
type Item = u64;
fn next(&mut self) -> Option<Self::Item> {
if self.current > self.max {
None
} else {
let v = Some(self.current);
self.current *= 2;
v
}
}
}
fn main() {
let iter = Doubling { current: 2, max: 128 };
let values: Vec<_> = iter.collect();
println!("{:?}", values);
}
It's important to recognize that this logic (like the original C!) has nasty edge cases when the value is doubled beyond the size of the type.
In this particular case, you can also recognize that you have an exponential series:
fn main() {
let iter = (1..8).map(|p| 2i32.pow(p));
let values: Vec<_> = iter.collect();
println!("{:?}", values);
}
If you want to get really experimental, check out Lazy sequence generation in Rust. Adapted here:
#![feature(generators, generator_trait, conservative_impl_trait)]
use std::ops::{Generator, GeneratorState};
fn doubling(mut start: u64, max: u64) -> impl Iterator<Item = u64> {
GeneratorIteratorAdapter(move || {
while start <= max {
yield start;
start *= 2;
}
})
}
fn main() {
let iter = doubling(2, 128);
let sum: Vec<_> = iter.collect();
println!("{:?}", sum);
}
/* copy-pasta */
struct GeneratorIteratorAdapter<G>(G);
impl<G> Iterator for GeneratorIteratorAdapter<G>
where
G: Generator<Return = ()>,
{
type Item = G::Yield;
fn next(&mut self) -> Option<Self::Item> {
match self.0.resume() {
GeneratorState::Yielded(x) => Some(x),
GeneratorState::Complete(_) => None,
}
}
}
can I write a for loop equivalent to this C code:
That specifically, yes:
extern crate itertools;
for i in itertools::iterate(2, |&i| i*i).take_while(|&i| i <= 128) {
// do something
}
But in general, no. There is no single, direct equivalent to all possible uses of C's for loop. If there's no way to write it using iterators then yes, you need to use a more general loop form:
{
let mut i = 2;
while i <= 128 {
// do something
i = i*i;
}
}

Resources