Trying to read MacOS clipboard contents - cocoa

On my adventure to learn Rust I decided to try and print to the cli contents of the clipboard. I've done this before in Swift so thought I would have much issues in Rust.
However I'm having a hard time printing the contents of the returned NSArray. I've spent a few hours playing around with different functions but haven't made much progress.
The Swift code I have that works:
import Foundation
import AppKit
let pasteboard = NSPasteboard.general
func reload() -> [String]{
var clipboardItems: [String] = []
for element in pasteboard.pasteboardItems! {
if let str = element.string(forType: NSPasteboard.PasteboardType(rawValue: "public.utf8-plain-text")) {
clipboardItems.append(str)
}
}
return clipboardItems;
}
// Access the item in the clipboard
while true {
let firstClipboardItem = reload()
print(firstClipboardItem);
sleep(1);
}
Here is the Rust code:
use cocoa::appkit::{NSApp, NSPasteboard, NSPasteboardReading, NSPasteboardTypeString};
use cocoa::foundation::NSArray;
fn main() {
unsafe {
let app = NSApp();
let pid = NSPasteboard::generalPasteboard(app);
let changec = pid.changeCount();
let pid_item = pid.pasteboardItems();
if pid_item.count() != 0 {
let items = &*pid_item.objectAtIndex(0);
println!("{:?}", items);
}
println!("{:?}", *pid.stringForType(NSPasteboardTypeString));
}
}
The code above produces: *<NSPasteboardItem: 0x6000021a3de0>*
EDIT:
I've made a little progress but stuck on one last bit. I've managed to get the first UTF8 char out of the clipboard.
The issue I have is if I copy the text: World the system will loop the correct amount of times for the word length but will only print the first letter, in this case W. Output below:
TEXT 'W'
TEXT 'W'
TEXT 'W'
TEXT 'W'
TEXT 'W'
The bit I'm trying to get my head around is how to move to the next i8. I can't seem to find a way to point to the next i8.
The NSString function UTF8String() returns *const i8. I'm scratching my head with how one would walk the text.
use cocoa::appkit::{NSApp, NSPasteboard, NSPasteboardTypeString};
use cocoa::foundation::{NSArray, NSString};
fn main() {
unsafe {
let app = NSApp();
let pid = NSPasteboard::generalPasteboard(app);
let changec = pid.changeCount();
let nsarray_ptr = pid.pasteboardItems();
if nsarray_ptr.count() != 0 {
for i in 0..NSArray::count(nsarray_ptr) {
let raw_item_ptr = NSArray::objectAtIndex(nsarray_ptr, i);
let itm = raw_item_ptr.stringForType(NSPasteboardTypeString);
for u in 0..itm.len() {
let stri = itm.UTF8String();
println!("TEXT {:?}", *stri as u8 as char);
}
}
}
}
}
To everyone who's looked/commented on this so far thank you.

After reading some tests provided by cocoa I figured out what I needed to do.
The code below prints the contents of the clipboard. Thanks to those who pointed me in the right direction.
use cocoa::appkit::{NSApp, NSPasteboard, NSPasteboardTypeString};
use cocoa::foundation::{NSArray, NSString};
use std::{str, slice};
fn main() {
unsafe {
let app = NSApp();
let pid = NSPasteboard::generalPasteboard(app);
let nsarray_ptr = pid.pasteboardItems();
if nsarray_ptr.count() != 0 {
for i in 0..NSArray::count(nsarray_ptr) {
let raw_item_ptr = NSArray::objectAtIndex(nsarray_ptr, i);
let itm = raw_item_ptr.stringForType(NSPasteboardTypeString);
let stri = itm.UTF8String() as *const u8;
let clipboard = str::from_utf8(slice::from_raw_parts(stri, itm.len()))
.unwrap();
println!("{}", clipboard);
}
}
}
}

Related

Slow Rust Performance for a SSH Log Parsing project

I'm a student who is interested in learning rust. For a class project I wrote a rust script that parses a SSH log file, which specifically captured dates and IP addresses in the log.
When I first finished the project, the script took 3 minutes to run through a log file with 655147 entries. After major optimizations I got the processing down to 30 seconds. This is fine, but other students' python programs did it in 3 seconds. So I know it's definitely my fault and I want to know how to write it better. Could someone show me where I went wrong?
Here are the structs I made for reference:
struct DateLogins {
date: NaiveDate,
success: i32,
failure: i32,
}
struct IpAuth {
success: i32,
failure: i32,
first_attempt: NaiveDateTime,
successful_attempt: NaiveDateTime,
failed_reverse: bool,
break_in_attempt: bool,
ip_addr: String,
}
struct MinedReport {
start_date: NaiveDateTime,
end_date: NaiveDateTime,
total_success: i32,
total_failure: i32,
total_addrs: i32,
login_attempts: HashMap<String, DateLogins>,
unique_addrs: HashMap<String, IpAuth>,
}
And here is the main processing logic:
lazy_static! {
static ref IP_RGX: Regex = Regex::new(r"(\d{1,3}\.\d{1,3}\.\d{1,3}\d\.\d{1,3})").unwrap(); // regex for capturing the IP address of a message.
static ref LOGIN_GOOD_RGX: Regex = Regex::new(r"Accepted password").unwrap(); // regex for successful login attempts -- for total, date, and IP address.
static ref LOGIN_FAIL_RGX: Regex = Regex::new(r"Failed password").unwrap(); // regex for failed login attempts -- for total, date, and IP address
static ref REVERSE_LOOK_RGX: Regex = Regex::new(r"reverse mapping checking getaddrinfo").unwrap(); // regex for a failed reverse lookup
static ref BREAK_IN_RGX: Regex = Regex::new(r"POSSIBLE BREAK-IN ATTEMPT!").unwrap(); // regex for a break in attempt -- for IP address
}
fn main() {
let start: std::time::Instant;
let duration: std::time::Duration;
let file: File = File::open("./SSH.log").expect("Could not open log file!");
let reader: BufReader<File> = BufReader::new(file);
let origin_date: NaiveDateTime = NaiveDateTime::parse_from_str("0000 01 01 00:00:00", "%Y %m %d %H:%M:%S").expect("Could not parse start time!");
let mut report: MinedReport = MinedReport::new(origin_date.clone(), origin_date.clone());
start = Instant::now();
for line in reader.lines() {
let l = line.expect("Could not read a line!");
parse_line(l, &mut report, origin_date);
}
duration = start.elapsed();
store_log(report, "./report.txt");
println!("Total time elapsed: {:?}\n", duration);
}
// parses the values in each line
fn parse_line(line: String, report: &mut MinedReport, origin_date: NaiveDateTime) {
let time: &str = &line[..15];
let message: &str = &line[15..];
// parse the time to DateTime. Store things in the report.
let date_time: NaiveDateTime = parse_time(time, report, origin_date).unwrap();
// parse the IP address of the line. Store things in the report.
parse_ip(message, report, origin_date, date_time);
}
// parses the time value for each line.
fn parse_time(time_cap: &str, report: &mut MinedReport, origin_date: NaiveDateTime) -> Result<NaiveDateTime, ParseError> {
// add a random year just to have a string.
let time_str: String = time_cap.to_string();
let full_date: String;
let date_time: NaiveDateTime;
let date: NaiveDate;
let d: String;
// add a year. Move to the next year if it's january.
// (I know this is a bad solution, but the only months in the log are Dec and Jan, with no year)
if &time_str[..3] == "Dec" {
full_date = format!("{}{}", "0000 ", time_cap); // No year given, set it to 0000
} else {
full_date = format!("{}{}", "0001 ", time_cap); // No year given, set it to 0001
}
// get date time and date only.
date_time = NaiveDateTime::parse_from_str(&full_date, "%Y %b %d %H:%M:%S").unwrap();
date = date_time.date();
d = date.to_string();
if report.start_date == origin_date {
report.start_date = date_time;
}
report.end_date = date_time;
if !report.login_attempts.contains_key(&d) {
report.login_attempts.insert(d, DateLogins::new(date));
}
Ok(date_time)
}
fn parse_ip(message: &str, report: &mut MinedReport, origin_date: NaiveDateTime, current_date: NaiveDateTime) {
for ip in IP_RGX.captures_iter(message) {
let new_ip: String = String::from(&ip[1]);
let ip_clone: String = new_ip.clone();
let date: NaiveDate = current_date.date();
let d: String = date.to_string();
report.total_addrs += 1;
if !report.unique_addrs.contains_key(&new_ip) {
report.unique_addrs.insert(new_ip, IpAuth::new(ip_clone.clone(), origin_date.clone(), current_date.clone()));
}
let login_date = report.login_attempts.get_mut(&d).unwrap();
let unique_ip = report.unique_addrs.get_mut(&ip_clone).unwrap();
if LOGIN_FAIL_RGX.is_match(message) {
report.total_failure += 1;
login_date.failure += 1;
unique_ip.failure += 1;
} else if LOGIN_GOOD_RGX.is_match(message) {
report.total_success += 1;
login_date.success += 1;
unique_ip.success += 1;
unique_ip.successful_attempt = current_date.clone();
} else {
if REVERSE_LOOK_RGX.is_match(message) {
unique_ip.failed_reverse = true;
}
if BREAK_IN_RGX.is_match(message) {
unique_ip.break_in_attempt = true;
}
}
}
}
Like I said I'm new to rust, and programming in general, so there may be something I just don't know about. I already switched to using a hash map from a vector, but maybe there's something better I can use? I don't know. I have also wondered if the chrono or regex crates are my issue here and maybe there's a faster alternative. Either way, thanks to anyone who tries to understand and correct my code!

Ownership question (case with immutable and mutable borrow)

I have a newbie question about ownership, I'm trying to update (+= 1) on the last bytes and print out the UTF-8 characters.
But I have mutable borrow to the String s in order to change the last byte thus I can't print it (using immutable borrow).
What would be the Rustacean way to do so?
Note: I'm aware I'm not doing it properly, I'm at learning stage, thanks.
fn main() {
let s = vec![240, 159, 140, 145];
let mut s = unsafe {
String::from_utf8_unchecked(s)
};
unsafe {
let bytes = s.as_bytes_mut(); // mutable borrow occurs here
for _ in 0..7 {
println!("{}", s); // Crash here as immutable borrow occurs here
bytes[3] += 1;
}
}
println!("{}", s);
}
You can use std::str::from_utf8 to make a &str from bytes to print it as a string.

Insert into hashmap in a loop

I'm opening a CSV file and reading it using BufReader and splitting each line into a vector. Then I try to insert or update the count in a HashMap using a specific column as key.
let mut map: HashMap<&str, i32> = HashMap::new();
let reader = BufReader::new(input_file);
for line in reader.lines() {
let s = line.unwrap().to_string();
let tokens: Vec<&str> = s.split(&d).collect(); // <-- `s` does not live long enough
if tokens.len() > c {
println!("{}", tokens[c]);
let count = map.entry(tokens[c].to_string()).or_insert(0);
*count += 1;
}
}
The compiler kindly tells me s is shortlived. Storing from inside a loop a borrowed value to container in outer scope? suggests "owning" the string, so I tried to change
let count = map.entry(tokens[c]).or_insert(0);
to
let count = map.entry(tokens[c].to_string()).or_insert(0);
but I get the error
expected `&str`, found struct `std::string::String`
help: consider borrowing here: `&tokens[c].to_string()`
When I prepend ampersand (&) the error is
creates a temporary which is freed while still in use
note: consider using a `let` binding to create a longer lived
There is some deficiency in my Rust knowledge about borrowing. How can I make the hashmap own the string passed as key?
The easiest way for this to work is for your map to own the keys. This means that you must change its type from HasMap<&str, i32> (which borrows the keys) to HashMap<String, i32>. At which point you can call to_string to convert your tokens into owned strings:
let mut map: HashMap<String, i32> = HashMap::new();
let reader = BufReader::new(input_file);
for line in reader.lines() {
let s = line.unwrap().to_string();
let tokens:Vec<&str> = s.split(&d).collect();
if tokens.len() > c {
println!("{}", tokens[c]);
let count = map.entry(tokens[c].to_string()).or_insert(0);
*count += 1;
}
}
Note however that this means that tokens[c] will be duplicated even if it was already present in the map. You can avoid the extra duplication by trying to modify the counter with get_mut first, but this requires two lookups when the key is missing:
let mut map: HashMap<String, i32> = HashMap::new();
let reader = BufReader::new(input_file);
for line in reader.lines() {
let s = line.unwrap().to_string();
let tokens:Vec<&str> = s.split(&d).collect();
if tokens.len() > c {
println!("{}", tokens[c]);
if let Some (count) = map.get_mut (tokens[c]) {
*count += 1;
} else {
map.insert (tokens[c].to_string(), 1);
}
}
}
I don't know of a solution that would only copy the key when there was no previous entry but still do a single lookup.

What does Some() do on the left hand side of a variable assignment?

I was reading some Rust code and I came across this line
if let Some(path) = env::args().nth(1) {
Inside of this function
fn main() {
if let Some(path) = env::args().nth(1) {
// Try reading the file provided by the path.
let mut file = File::open(path).expect("Failed reading file.");
let mut content = String::new();
file.read_to_string(&mut content);
perform_conversion(content.as_str()).expect("Conversion failed.");
} else {
println!(
"provide a path to a .cue file to be converted into a MusicBrainz compatible tracklist."
)
}
}
The line seems to be assigning the env argument to the variable path but I can't work out what the Some() around it is doing.
I took a look at the documentation for Option and I understand how it works when used on the right hand side of = but on the left hand side I am a little confused.
Am I right in thinking this line is equivalent to
if let path = Some(env::args().nth(1)) {
From the reference :
An if let expression is semantically similar to an if expression but
in place of a condition expression it expects the keyword let followed
by a refutable pattern, an = and an expression. If the value of the
expression on the right hand side of the = matches the pattern, the
corresponding block will execute, otherwise flow proceeds to the
following else block if it exists. Like if expressions, if let
expressions have a value determined by the block that is evaluated.
In here the important part is refutability. What it means refutable pattern in here it can be in different forms. For example :
enum Test {
First(String, i32, usize),
Second(i32, usize),
Third(i32),
}
You can check the x's value for a value for 3 different pattern like :
fn main() {
let x = Test::Second(14, 55);
if let Test::First(a, b, c) = x {}
if let Test::Second(a, b) = x {} //This block will be executed
if let Test::Third(a) = x {}
}
This is called refutability. But consider your code like this:
enum Test {
Second(i32, usize),
}
fn main() {
let x = Test::Second(14, 55);
if let Test::Second(a, b) = x {}
}
This code will not compile because x's pattern is obvious, it has single pattern.
You can get more information from the reference of refutability.
Also you are not right thinking for this:
if let path = Some(env::args().nth(1)) {
Compiler will throw error like irrefutable if-let pattern because as the reference says: "keyword let followed by a refutable pattern". In here there is no refutable pattern after "let". Actually this code tries to create a variable named path which is an Option and this make no sense because there is no "If" needed,
Instead Rust expects from you to write like this:
let path = Some(env::args().nth(1)); // This will be seem like Some(Some(value))
The other answers go into a lot of detail, which might be more than you need to know.
Essentially, this:
if let Some(path) = env::args().nth(1) {
// Do something with path
} else {
// otherwise do something else
}
is identical to this:
match env::args().nth(1) {
Some(path) => { /* Do something with path */ }
_ => { /* otherwise do something else */ }
}

How to read one single char in Rust? [duplicate]

I want to run an executable that blocks on stdin and when a key is pressed that same character is printed immediately without Enter having to be pressed.
How can I read one character from stdin without having to hit Enter? I started with this example:
fn main() {
println!("Type something!");
let mut line = String::new();
let input = std::io::stdin().read_line(&mut line).expect("Failed to read line");
println!("{}", input);
}
I looked through the API and tried replacing read_line() with bytes(), but everything I try requires me to hit Enter before read occurs.
This question was asked for C/C++, but there seems to be no standard way to do it: Capture characters from standard input without waiting for enter to be pressed
It might not be doable in Rust considering it's not simple in C/C++.
While #Jon's solution using ncurses works, ncurses clears the screen by design. I came up with this solution that uses the termios crate for my little project to learn Rust. The idea is to modify ECHO and ICANON flags by accessing tcsetattr through termios bindings.
extern crate termios;
use std::io;
use std::io::Read;
use std::io::Write;
use termios::{Termios, TCSANOW, ECHO, ICANON, tcsetattr};
fn main() {
let stdin = 0; // couldn't get std::os::unix::io::FromRawFd to work
// on /dev/stdin or /dev/tty
let termios = Termios::from_fd(stdin).unwrap();
let mut new_termios = termios.clone(); // make a mutable copy of termios
// that we will modify
new_termios.c_lflag &= !(ICANON | ECHO); // no echo and canonical mode
tcsetattr(stdin, TCSANOW, &mut new_termios).unwrap();
let stdout = io::stdout();
let mut reader = io::stdin();
let mut buffer = [0;1]; // read exactly one byte
print!("Hit a key! ");
stdout.lock().flush().unwrap();
reader.read_exact(&mut buffer).unwrap();
println!("You have hit: {:?}", buffer);
tcsetattr(stdin, TCSANOW, & termios).unwrap(); // reset the stdin to
// original termios data
}
One advantage of reading a single byte is capturing arrow keys, ctrl etc. Extended F-keys are not captured (although ncurses can capture these).
This solution is intended for UNIX-like platforms. I have no experience with Windows, but according to this forum perhaps something similar can be achieved using SetConsoleMode in Windows.
Use one of the 'ncurses' libraries now available, for instance this one.
Add the dependency in Cargo
[dependencies]
ncurses = "5.86.0"
and include in main.rs:
extern crate ncurses;
use ncurses::*; // watch for globs
Follow the examples in the library to initialize ncurses and wait for single character input like this:
initscr();
/* Print to the back buffer. */
printw("Hello, world!");
/* Update the screen. */
refresh();
/* Wait for a key press. */
getch();
/* Terminate ncurses. */
endwin();
You can also use termion, but you will have to enable the raw TTY mode which changes the behavior of stdout as well. See the example below (tested with Rust 1.34.0). Note that internally, it also wraps the termios UNIX API.
Cargo.toml
[dependencies]
termion = "1.5.2"
main.rs
use std::io;
use std::io::Write;
use std::thread;
use std::time;
use termion;
use termion::input::TermRead;
use termion::raw::IntoRawMode;
fn main() {
// Set terminal to raw mode to allow reading stdin one key at a time
let mut stdout = io::stdout().into_raw_mode().unwrap();
// Use asynchronous stdin
let mut stdin = termion::async_stdin().keys();
loop {
// Read input (if any)
let input = stdin.next();
// If a key was pressed
if let Some(Ok(key)) = input {
match key {
// Exit if 'q' is pressed
termion::event::Key::Char('q') => break,
// Else print the pressed key
_ => {
write!(
stdout,
"{}{}Key pressed: {:?}",
termion::clear::All,
termion::cursor::Goto(1, 1),
key
)
.unwrap();
stdout.lock().flush().unwrap();
}
}
}
thread::sleep(time::Duration::from_millis(50));
}
}
Here's a lightweight solution only using the libc crate based some code from the console crate:
fn setup_raw_terminal() -> io::Result<()> {
unsafe {
let tty;
let fd = if libc::isatty(libc::STDIN_FILENO) == 1 {
libc::STDIN_FILENO
} else {
tty = fs::File::open("/dev/tty")?;
tty.as_raw_fd()
};
let mut ptr = core::mem::MaybeUninit::uninit();
if libc::tcgetattr(fd, ptr.as_mut_ptr()) == 0 {
let mut termios = ptr.assume_init();
let c_oflag = termios.c_oflag;
libc::cfmakeraw(&mut termios);
termios.c_oflag = c_oflag;
if libc::tcsetattr(fd, libc::TCSADRAIN, &termios) == 0 {
return Ok(());
}
}
}
Err(io::Error::last_os_error())
}
It needs to be called before reading stdin:
let mut buf = [0u8; 1024];
let mut stdin = io::stdin();
setup_raw_terminal()?;
loop {
let size = stdin.read(&mut buf)?;
let data = &buf[0..size];
println!("stdin data: {}", data);
}

Resources