converting UTF-8 string to ASCII in pure LUA - utf-8

I have a question about sending and receiving data with special chars. (German Umlauts)
When I send the string "Café Zeezicht" with the code below, then on the server-side the string is oke.
But how can I receive and decode the receiving data that containing the same chars? Now it look likes "Caf? Zeezicht"
I am searching for a pure LUA function, because I have no ability to load libraries.
------------------------------------------------------------
-- Function voor converting ASCII naar UTF8
------------------------------------------------------------
-- return char as utf8 string
local function CodeToUTF8 (Unicode)
if (Unicode == nil) then
return ""
end
if (Unicode < 0x20) then return ' '; end;
if (Unicode <= 0x7F) then return string.char(Unicode); end;
if (Unicode <= 0x7FF) then
local Byte0 = 0xC0 + math.floor(Unicode / 0x40);
local Byte1 = 0x80 + (Unicode % 0x40);
return string.char(Byte0, Byte1);
end;
if (Unicode <= 0xFFFF) then
local Byte0 = 0xE0 + math.floor(Unicode / 0x1000);
local Byte1 = 0x80 + (math.floor(Unicode / 0x40) % 0x40);
local Byte2 = 0x80 + (Unicode % 0x40);
return string.char(Byte0, Byte1, Byte2);
end;
return ""; -- ignore UTF-32 for the moment
end;
-- convert ascii string to utf8 string
function AsciiToUTF8(str)
result = ""
for i = 1, #str do
result = result .. CodeToUTF8(string.byte(str, i, i+1))
end
return result
end
------------------------------------------------------------
-- Einde Function voor converting ASCII naar UTF8
------------------------------------------------------------

local char, byte, pairs, floor = string.char, string.byte, pairs, math.floor
local table_insert, table_concat = table.insert, table.concat
local unpack = table.unpack or unpack
local function unicode_to_utf8(code)
-- converts numeric UTF code (U+code) to UTF-8 string
local t, h = {}, 128
while code >= h do
t[#t+1] = 128 + code%64
code = floor(code/64)
h = h > 32 and 32 or h/2
end
t[#t+1] = 256 - 2*h + code
return char(unpack(t)):reverse()
end
local function utf8_to_unicode(utf8str, pos)
-- pos = starting byte position inside input string (default 1)
pos = pos or 1
local code, size = utf8str:byte(pos), 1
if code >= 0xC0 and code < 0xFE then
local mask = 64
code = code - 128
repeat
local next_byte = utf8str:byte(pos + size) or 0
if next_byte >= 0x80 and next_byte < 0xC0 then
code, size = (code - mask - 2) * 64 + next_byte, size + 1
else
code, size = utf8str:byte(pos), 1
end
mask = mask * 32
until code < mask
end
-- returns code, number of bytes in this utf8 char
return code, size
end
local map_1252_to_unicode = {
[0x80] = 0x20AC,
[0x81] = 0x81,
[0x82] = 0x201A,
[0x83] = 0x0192,
[0x84] = 0x201E,
[0x85] = 0x2026,
[0x86] = 0x2020,
[0x87] = 0x2021,
[0x88] = 0x02C6,
[0x89] = 0x2030,
[0x8A] = 0x0160,
[0x8B] = 0x2039,
[0x8C] = 0x0152,
[0x8D] = 0x8D,
[0x8E] = 0x017D,
[0x8F] = 0x8F,
[0x90] = 0x90,
[0x91] = 0x2018,
[0x92] = 0x2019,
[0x93] = 0x201C,
[0x94] = 0x201D,
[0x95] = 0x2022,
[0x96] = 0x2013,
[0x97] = 0x2014,
[0x98] = 0x02DC,
[0x99] = 0x2122,
[0x9A] = 0x0161,
[0x9B] = 0x203A,
[0x9C] = 0x0153,
[0x9D] = 0x9D,
[0x9E] = 0x017E,
[0x9F] = 0x0178,
[0xA0] = 0x00A0,
[0xA1] = 0x00A1,
[0xA2] = 0x00A2,
[0xA3] = 0x00A3,
[0xA4] = 0x00A4,
[0xA5] = 0x00A5,
[0xA6] = 0x00A6,
[0xA7] = 0x00A7,
[0xA8] = 0x00A8,
[0xA9] = 0x00A9,
[0xAA] = 0x00AA,
[0xAB] = 0x00AB,
[0xAC] = 0x00AC,
[0xAD] = 0x00AD,
[0xAE] = 0x00AE,
[0xAF] = 0x00AF,
[0xB0] = 0x00B0,
[0xB1] = 0x00B1,
[0xB2] = 0x00B2,
[0xB3] = 0x00B3,
[0xB4] = 0x00B4,
[0xB5] = 0x00B5,
[0xB6] = 0x00B6,
[0xB7] = 0x00B7,
[0xB8] = 0x00B8,
[0xB9] = 0x00B9,
[0xBA] = 0x00BA,
[0xBB] = 0x00BB,
[0xBC] = 0x00BC,
[0xBD] = 0x00BD,
[0xBE] = 0x00BE,
[0xBF] = 0x00BF,
[0xC0] = 0x00C0,
[0xC1] = 0x00C1,
[0xC2] = 0x00C2,
[0xC3] = 0x00C3,
[0xC4] = 0x00C4,
[0xC5] = 0x00C5,
[0xC6] = 0x00C6,
[0xC7] = 0x00C7,
[0xC8] = 0x00C8,
[0xC9] = 0x00C9,
[0xCA] = 0x00CA,
[0xCB] = 0x00CB,
[0xCC] = 0x00CC,
[0xCD] = 0x00CD,
[0xCE] = 0x00CE,
[0xCF] = 0x00CF,
[0xD0] = 0x00D0,
[0xD1] = 0x00D1,
[0xD2] = 0x00D2,
[0xD3] = 0x00D3,
[0xD4] = 0x00D4,
[0xD5] = 0x00D5,
[0xD6] = 0x00D6,
[0xD7] = 0x00D7,
[0xD8] = 0x00D8,
[0xD9] = 0x00D9,
[0xDA] = 0x00DA,
[0xDB] = 0x00DB,
[0xDC] = 0x00DC,
[0xDD] = 0x00DD,
[0xDE] = 0x00DE,
[0xDF] = 0x00DF,
[0xE0] = 0x00E0,
[0xE1] = 0x00E1,
[0xE2] = 0x00E2,
[0xE3] = 0x00E3,
[0xE4] = 0x00E4,
[0xE5] = 0x00E5,
[0xE6] = 0x00E6,
[0xE7] = 0x00E7,
[0xE8] = 0x00E8,
[0xE9] = 0x00E9,
[0xEA] = 0x00EA,
[0xEB] = 0x00EB,
[0xEC] = 0x00EC,
[0xED] = 0x00ED,
[0xEE] = 0x00EE,
[0xEF] = 0x00EF,
[0xF0] = 0x00F0,
[0xF1] = 0x00F1,
[0xF2] = 0x00F2,
[0xF3] = 0x00F3,
[0xF4] = 0x00F4,
[0xF5] = 0x00F5,
[0xF6] = 0x00F6,
[0xF7] = 0x00F7,
[0xF8] = 0x00F8,
[0xF9] = 0x00F9,
[0xFA] = 0x00FA,
[0xFB] = 0x00FB,
[0xFC] = 0x00FC,
[0xFD] = 0x00FD,
[0xFE] = 0x00FE,
[0xFF] = 0x00FF,
}
local map_unicode_to_1252 = {}
for code1252, code in pairs(map_1252_to_unicode) do
map_unicode_to_1252[code] = code1252
end
function string.fromutf8(utf8str)
local pos, result_1252 = 1, {}
while pos <= #utf8str do
local code, size = utf8_to_unicode(utf8str, pos)
pos = pos + size
code = code < 128 and code or map_unicode_to_1252[code] or ('?'):byte()
table_insert(result_1252, char(code))
end
return table_concat(result_1252)
end
function string.toutf8(str1252)
local result_utf8 = {}
for pos = 1, #str1252 do
local code = str1252:byte(pos)
table_insert(result_utf8, unicode_to_utf8(map_1252_to_unicode[code] or code))
end
return table_concat(result_utf8)
end
Usage:
local str1252 = "1\128" -- "one euro" in latin-1
local str_utf8 = str1252:toutf8() -- "1\226\130\172" -- one euro in utf-8
local str1252_2 = str_utf8:fromutf8()

Related

"for limit must be a number" error in roblox (Data Saving and Loading)

I leveled up my character and got some items, when i rejoined, it didn't load my previous data, It gave me level 0 and didn't load any items (it didn't give any output for level problem, but it gave this output for item problem: 'for' limit must be a number), but PLD.ItemNumber is a number, because It is from "ItemNumber" variable in the saving function. How to fix it? Code:
local DataStoreService = game:GetService("DataStoreService");
local PD = DataStoreService:GetDataStore("PlayerData");
function SaveData(Player)
local TableSave = {};
TableSave.Level = Player.PlayerData.Stats.Level.Value;
local TableItems = {};
local ItemNumber = 0;
for i,v in pairs(Player.PlayerData.Items:GetChildren()) do
ItemNumber = ItemNumber + 1;
TableItems["Item"..ItemNumber]={};
local TI = TableItems["Item"..ItemNumber];
TI.ItemName = v.Name;
TI.Level = v.Level.Value;
TI.Damage = v.Damage.Value;
end
table.insert(TableSave,TableItems);
TableSave.ItemNumber = ItemNumber;
PD:SetAsync(Player.userId,TableSave);
end
function LoadData(Player)
local success, err = pcall(function()
PLD = PD:GetAsync(Player.userId);
end)
if success then
Player.PlayerData.Stats.Level.Value = PLD.Level;
for i = 1,PLD.ItemNumber do
local CurrentItem = Instance.new("Folder");
CurrentItem.Name = PLD[1]["Item"..i].ItemName;
local Lv = Instance.new("IntValue");
Lv.Name = "Level";
Lv.Value = PLD[1]["Item"..i].Level;
Lv.Parent = CurrentItem;
local Dm = Instance.new("IntValue");
Dm.Name = "Damage";
Dm.Value = PLD[1]["Item"..i].Damage;
Dm.Parent = CurrentItem;
CurrentItem.Parent=Player.PlayerData.Items;
end
else
print("ERROR IN GET ASYNC");
end
end
game.Players.PlayerRemoving:Connect(function(plr)
repeat wait() until plr.PlayerData;
SaveData(plr);
end)
game.Players.PlayerAdded:Connect(function(plr)
repeat wait() until plr.PlayerData;
LoadData(plr);
end)

Getting the disk signature as negative

import wmi
wmi_connector = wmi.WMI()
def get_win_drive_mappings_locally(drivemappings):
for physical_disk in wmi_connector.Win32_DiskDrive():
for partition in physical_disk.associators("Win32_DiskDriveToDiskPartition"):
for logical_disk in partition.associators("Win32_LogicalDiskToPartition"):
print (physical_disk.Signature)
I am using wmi to get information of disks and signature.
when i print the instance of physical_disk the output is as below:
instance of Win32_DiskDrive
{
BytesPerSector = 512;
Capabilities = {3, 4};
CapabilityDescriptions = {"Random Access", "Supports Writing"};
Caption = "XXXXX SCSI Disk Device";
ConfigManagerErrorCode = 0;
ConfigManagerUserConfig = FALSE;
CreationClassName = "Win32_DiskDrive";
Description = "Disk drive";
DeviceID = "\\\\.\\PHYSICALDRIVE1";
FirmwareRevision = "0 ";
Index = 1;
InterfaceType = "SCSI";
Manufacturer = "(Standard disk drives)";
MediaLoaded = TRUE;
MediaType = "Fixed hard disk media";
Model = "XXXX SCSI Disk Device";
Name = "\\\\.\\PHYSICALDRIVE1";
Partitions = 1;
PNPDeviceID = "SCSI\\DISK&XXXXX&PROD_K\\4&5393C0A&0&000100";
SCSIBus = 0;
SCSILogicalUnit = 0;
SCSIPort = 2;
SCSITargetId = 1;
SectorsPerTrack = 63;
SerialNumber = "XXXXX";
Signature = **3908409726**;
Size = "107372805120";
Status = "OK";
SystemCreationClassName = "Win32_ComputerSystem";
SystemName = "SQLSERVER";
TotalCylinders = "13054";
TotalHeads = 255;
TotalSectors = "209712510";
TotalTracks = "3328770";
TracksPerCylinder = 255;
};
But when i print physical_disk.Signature the output is:
-386557570, i am not able to understand where its going wrong,expected output is 3908409726
-386557570 is indeed 3908409726 interpreted as a 32 bit signed integer (in 2's complement arithmetic); probably the Python WMI connector interprets all 32 bit values as signed.
To interpret it as an unsigned value, check if it's negative, and in that case add 1<<32.
def as_uint32(v):
if v<0:
return v + (1<<32)
return v
# ...
print (as_uint32(physical_disk.Signature))

tensorflow: After adding a rnn the whole work doesn't work

I have downloaded a code of FCN for image segmentation and it ran well. Now I want to add a rnn layer attempting to refine the result according to the work "ReSeg: A Recurrent Neural Network-Based Model for Semantic Segmentation". My code shows as follows:
This part is for the inference:
def inference(image, keep_prob):
"""
Semantic segmentation network definition
:param image: input image. Should have values in range 0-255
:param keep_prob:
:return:
"""
print("setting up vgg initialized conv layers ...")
#model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL)
model_data = scipy.io.loadmat("H:/Deep Learning/FCN.tensorflow-master/imagenet-vgg-verydeep-19.mat")
mean = model_data['normalization'][0][0][0]
mean_pixel = np.mean(mean, axis=(0, 1))
weights = np.squeeze(model_data['layers'])
processed_image = utils.process_image(image, mean_pixel)
with tf.variable_scope("inference"):
image_net = vgg_net(weights, processed_image)
conv_final_layer = image_net["conv5_3"]
pool5 = utils.max_pool_2x2(conv_final_layer)
W6 = utils.weight_variable([7, 7, 512, 4096], name="W6")
b6 = utils.bias_variable([4096], name="b6")
conv6 = utils.conv2d_basic(pool5, W6, b6)
relu6 = tf.nn.relu(conv6, name="relu6")
if FLAGS.debug:
utils.add_activation_summary(relu6)
relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob)
W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7")
b7 = utils.bias_variable([4096], name="b7")
conv7 = utils.conv2d_basic(relu_dropout6, W7, b7)
relu7 = tf.nn.relu(conv7, name="relu7")
if FLAGS.debug:
utils.add_activation_summary(relu7)
relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob)
W8 = utils.weight_variable([1, 1, 4096, NUM_OF_CLASSESS], name="W8")
b8 = utils.bias_variable([NUM_OF_CLASSESS], name="b8")
conv8 = utils.conv2d_basic(relu_dropout7, W8, b8)
# annotation_pred1 = tf.argmax(conv8, dimension=3, name="prediction1")
# now to upscale to actual image size
deconv_shape1 = image_net["pool4"].get_shape()
W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1")
b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1")
conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape(image_net["pool4"]))
#fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1")
deconv_shape2 = image_net["pool3"].get_shape()
W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2")
b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2")
conv_t2 = utils.conv2d_transpose_strided(conv_t1, W_t2, b_t2, output_shape=tf.shape(image_net["pool3"]))
#fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2")
shape = tf.shape(image)
deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSESS])
W_t3 = utils.weight_variable([16, 16, NUM_OF_CLASSESS, deconv_shape2[3].value], name="W_t3")
b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3")
conv_t3 = utils.conv2d_transpose_strided(conv_t2, W_t3, b_t3, output_shape=deconv_shape3, stride=8)
/////////////////////////////////////////////////////this is from where i added the rnn
shape_5 = tf.shape(image)
W_a = 224
H_a = 224
p_size_a = NUM_OF_CLASSESS
# x = tf.reshape(conv_t1, [shape_5[0],H_a,W_a, p_size_a])
x = tf.transpose(conv_t3, perm=[0,2,1,3])
x = tf.reshape(x,[-1,H_a,p_size_a])
mat = tf.unstack(x, H_a, 1)
lstm_fw_cell = rnn.BasicLSTMCell(N_HIDDEN, forget_bias=1.0)
lstm_bw_cell = rnn.BasicLSTMCell(N_HIDDEN, forget_bias=1.0)
#with tf.variable_scope('rnn1_1'):
try:
outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, mat,
dtype=tf.float32,scope='rnn1_1')
except Exception: # Old TensorFlow version only returns outputs not states
outputs = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, mat,
dtype=tf.float32)
outputs1 = tf.reshape(outputs,[H_a, shape_5[0], W_a, 2 * N_HIDDEN])
outputs1 = tf.transpose(outputs1,(1,0,2,3))
x_1 = tf.reshape(outputs1,[-1,W_a,2 * N_HIDDEN])
mat_1 = tf.unstack(x_1, W_a, 1)
lstm_lw_cell = rnn.BasicLSTMCell(N_HIDDEN, forget_bias=1.0)
lstm_rw_cell = rnn.BasicLSTMCell(N_HIDDEN, forget_bias=1.0)
#with tf.variable_scope('rnn1_2'):
try:
outputs2, _, _ = rnn.static_bidirectional_rnn(lstm_lw_cell, lstm_rw_cell, mat_1,
dtype=tf.float32,scope = 'rnn1_2')
except Exception: # Old TensorFlow version only returns outputs not states
outputs2 = rnn.static_bidirectional_rnn(lstm_lw_cell, lstm_rw_cell, mat_1,
dtype=tf.float32)
outputs2 = tf.reshape(outputs,[W_a, shape_5[0], H_a, 2 * N_HIDDEN])
outputs2 = tf.transpose(outputs2,(1,2,0,3))
///////////////////////////////////////////////////till here
annotation_pred = tf.argmax(outputs2, dimension=3, name="prediction")
return tf.expand_dims(annotation_pred, dim=3), outputs2
and this part is for the training:
def train(loss_val, var_list):
optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
grads = optimizer.compute_gradients(loss_val, var_list=var_list)
if FLAGS.debug:
# print(len(var_list))
for grad, var in grads:
utils.add_gradient_summary(grad, var)
return optimizer.apply_gradients(grads)
def main(argv=None):
keep_probability = tf.placeholder(tf.float32, name="keep_probabilty")
image = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3], name="input_image")
annotation = tf.placeholder(tf.int32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name="annotation")
pred_annotation, logits = inference(image, keep_probability)
tf.summary.image("input_image", image, max_outputs=2)
tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2)
tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2)
loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
labels=tf.squeeze(annotation, squeeze_dims=[3]),
name="entropy")))
tf.summary.scalar("entropy", loss)
trainable_var = tf.trainable_variables()
if FLAGS.debug:
for var in trainable_var:
utils.add_to_regularization_and_summary(var)
train_op = train(loss, trainable_var)
print("Setting up summary op...")
summary_op = tf.summary.merge_all()
print("Setting up image reader...")
train_records, valid_records = scene_parsing.read_dataset(FLAGS.data_dir)
print(len(train_records))
print(len(valid_records))
print("Setting up dataset reader")
image_options = {'resize': True, 'resize_size': IMAGE_SIZE}
if FLAGS.mode == 'train':
train_dataset_reader = dataset.BatchDatset(train_records, image_options)
validation_dataset_reader = dataset.BatchDatset(valid_records, image_options)
sess = tf.Session()
print("Setting up Saver...")
saver = tf.train.Saver()
summary_writer = tf.summary.FileWriter(FLAGS.logs_dir, sess.graph)
sess.run(tf.global_variables_initializer())
ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
print("Model restored...")
if FLAGS.mode == "train":
for itr in xrange(MAX_ITERATION):
train_images, train_annotations = train_dataset_reader.next_batch(FLAGS.batch_size)
feed_dict = {image: train_images, annotation: train_annotations, keep_probability: 0.85}
sess.run(train_op, feed_dict=feed_dict)
if itr % 10 == 0:
train_loss, summary_str = sess.run([loss, summary_op], feed_dict=feed_dict)
print("Step: %d, Train_loss:%g" % (itr, train_loss))
summary_writer.add_summary(summary_str, itr)
if itr % 500 == 0:
valid_images, valid_annotations = validation_dataset_reader.next_batch(FLAGS.batch_size)
valid_loss = sess.run(loss, feed_dict={image: valid_images, annotation: valid_annotations,
keep_probability: 1.0})
print("%s ---> Validation_loss: %g" % (datetime.datetime.now(), valid_loss))
saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr)
elif FLAGS.mode == "visualize":
valid_images, valid_annotations = validation_dataset_reader.get_random_batch(FLAGS.batch_size)
pred = sess.run(pred_annotation, feed_dict={image: valid_images, annotation: valid_annotations,
keep_probability: 1.0})
valid_annotations = np.squeeze(valid_annotations, axis=3)
pred = np.squeeze(pred, axis=3)
for itr in range(FLAGS.batch_size):
utils.save_image(valid_images[itr].astype(np.uint8), FLAGS.logs_dir, name="inp_" + str(5+itr))
utils.save_image(valid_annotations[itr].astype(np.uint8), FLAGS.logs_dir, name="gt_" + str(5+itr))
utils.save_image(pred[itr].astype(np.uint8), FLAGS.logs_dir, name="pred_" + str(5+itr))
print("Saved image: %d" % itr)
The error was described as:
Not found: Key inference/rnn1_2/fw/basic_lstm_cell/weights not found in checkpoint
So i think there must be something wrong with the variables.
I'll be very appreciate if someone could tell me how to fix it!
looking forward to your help!

Formula to convert IPv6 Address to IP number

I'm looking for a formula to convert IPV6 address to IP number. This is required to map with geoip location information we have.
Input IPV6 address : 2001:0db8:0000:0000:0000:ff00:0042:8329
Output IP Number converted : 42540766411282592856904265327123268393
Thanks...
Below are the sample codes in multiple languages to convert IPv6 address to number taken from http://lite.ip2location.com/faqs
PHP
$ipv6 = '2404:6800:4001:805::1006';
$int = inet_pton($ipv6);
$bits = 15;
$ipv6long = 0;
while($bits >= 0){
$bin = sprintf("%08b", (ord($int[$bits])));
if($ipv6long){
$ipv6long = $bin . $ipv6long;
}
else{
$ipv6long = $bin;
}
$bits--;
}
$ipv6long = gmp_strval(gmp_init($ipv6long, 2), 10);
Java
java.math.BigInteger Dot2LongIP(String ipv6) {
java.net.InetAddress ia = java.net.InetAddress.getByName(ipv6);
byte byteArr[] = ia.getAddress();
if (ia instanceof java.net.Inet6Address) {
java.math.BigInteger ipnumber = new java.math.BigInteger(1, byteArr);
return ipnumber;
}
}
C#
string strIP = "2404:6800:4001:805::1006";
System.Net.IPAddress address;
System.Numerics.BigInteger ipnum;
if (System.Net.IPAddress.TryParse(strIP, out address)) {
byte[] addrBytes = address.GetAddressBytes();
if (System.BitConverter.IsLittleEndian) {
System.Collections.Generic.List byteList = new System.Collections.Generic.List(addrBytes);
byteList.Reverse();
addrBytes = byteList.ToArray();
}
if (addrBytes.Length > 8) {
//IPv6
ipnum = System.BitConverter.ToUInt64(addrBytes, 8);
ipnum <<= 64;
ipnum += System.BitConverter.ToUInt64(addrBytes, 0);
} else {
//IPv4
ipnum = System.BitConverter.ToUInt32(addrBytes, 0);
}
}
VB.NET
Dim strIP As String = "2404:6800:4001:805::1006"
Dim address As System.Net.IPAddress
Dim ipnum As System.Numerics.BigInteger
If System.Net.IPAddress.TryParse(strIP, address) Then
Dim addrBytes() As Byte = address.GetAddressBytes()
If System.BitConverter.IsLittleEndian Then
Dim byteList As New System.Collections.Generic.List(Of Byte)(addrBytes)
byteList.Reverse()
addrBytes = byteList.ToArray()
End If
If addrBytes.Length > 8 Then
'IPv6
ipnum = System.BitConverter.ToUInt64(addrBytes, 8)
ipnum <<= 64
ipnum += System.BitConverter.ToUInt64(addrBytes, 0)
Else
'IPv4
ipnum = System.BitConverter.ToUInt32(addrBytes, 0)
End If
End If

What's the best way to extract Excel cell comments using Perl or Ruby?

I've been parsing Excel documents in Perl successfully with Spreadhsheet::ParseExcel (as recommended in What's the best way to parse Excel file in Perl?), but I can't figure out how to extract cell comments.
Any ideas? A solution in Perl or Ruby would be ideal.
The Python xlrd library will parse cell comments (if you turn on xlrd.sheet.OBJ_MSO_DEBUG, you'll see them), but it doesn't expose them from the API. You could either parse the dump or hack on it a bit so you can get to them programmatically. Here's a start (tested extremely minimally):
diff --git a/xlrd/sheet.py b/xlrd/sheet.py
--- a/xlrd/sheet.py
+++ b/xlrd/sheet.py
## -206,6 +206,7 ##
self._dimncols = 0
self._cell_values = []
self._cell_types = []
+ self._cell_notes = []
self._cell_xf_indexes = []
self._need_fix_ragged_rows = 0
self.defcolwidth = None
## -252,6 +253,7 ##
return Cell(
self._cell_types[rowx][colx],
self._cell_values[rowx][colx],
+ self._cell_notes[rowx][colx],
xfx,
)
## -422,12 +424,14 ##
if self.formatting_info:
self._cell_xf_indexes[nrx].extend(aa('h', [-1]) * nextra)
self._cell_values[nrx].extend([''] * nextra)
+ self._cell_notes[nrx].extend([None] * nextra)
if nc > self.ncols:
self.ncols = nc
self._need_fix_ragged_rows = 1
if nr > self.nrows:
scta = self._cell_types.append
scva = self._cell_values.append
+ scna = self._cell_notes.append
scxa = self._cell_xf_indexes.append
fmt_info = self.formatting_info
xce = XL_CELL_EMPTY
## -436,6 +440,7 ##
for _unused in xrange(self.nrows, nr):
scta([xce] * nc)
scva([''] * nc)
+ scna([None] * nc)
if fmt_info:
scxa([-1] * nc)
else:
## -443,6 +448,7 ##
for _unused in xrange(self.nrows, nr):
scta(aa('B', [xce]) * nc)
scva([''] * nc)
+ scna([None] * nc)
if fmt_info:
scxa(aa('h', [-1]) * nc)
self.nrows = nr
## -454,6 +460,7 ##
aa = array_array
s_cell_types = self._cell_types
s_cell_values = self._cell_values
+ s_cell_notes = self._cell_notes
s_cell_xf_indexes = self._cell_xf_indexes
s_dont_use_array = self.dont_use_array
s_fmt_info = self.formatting_info
## -465,6 +472,7 ##
nextra = ncols - rlen
if nextra > 0:
s_cell_values[rowx][rlen:] = [''] * nextra
+ s_cell_notes[rowx][rlen:] = [None] * nextra
if s_dont_use_array:
trow[rlen:] = [xce] * nextra
if s_fmt_info:
## -600,6 +608,7 ##
bk_get_record_parts = bk.get_record_parts
bv = self.biff_version
fmt_info = self.formatting_info
+ txos = {}
eof_found = 0
while 1:
# if DEBUG: print "SHEET.READ: about to read from position %d" % bk._position
## -877,13 +886,23 ##
break
elif rc == XL_OBJ:
# handle SHEET-level objects; note there's a separate Book.handle_obj
- self.handle_obj(data)
+ obj = self.handle_obj(data)
+ if obj:
+ obj_id = obj.id
+ else:
+ obj_id = None
elif rc == XL_MSO_DRAWING:
self.handle_msodrawingetc(rc, data_len, data)
elif rc == XL_TXO:
- self.handle_txo(data)
+ txo = self.handle_txo(data)
+ if txo and obj_id:
+ txos[obj_id] = txo
+ obj_id = None
elif rc == XL_NOTE:
- self.handle_note(data)
+ note = self.handle_note(data)
+ txo = txos.get(note.object_id)
+ if txo:
+ self._cell_notes[note.rowx][note.colx] = txo.text
elif rc == XL_FEAT11:
self.handle_feat11(data)
elif rc in bofcodes: ##### EMBEDDED BOF #####
## -1387,19 +1406,16 ##
def handle_obj(self, data):
- if not OBJ_MSO_DEBUG:
- return
- DEBUG = 1
if self.biff_version < 80:
return
o = MSObj()
data_len = len(data)
pos = 0
- if DEBUG:
+ if OBJ_MSO_DEBUG:
fprintf(self.logfile, "... OBJ record ...\n")
while pos < data_len:
ft, cb = unpack('<HH', data[pos:pos+4])
- if DEBUG:
+ if OBJ_MSO_DEBUG:
hex_char_dump(data, pos, cb, base=0, fout=self.logfile)
if ft == 0x15: # ftCmo ... s/b first
assert pos == 0
## -1430,16 +1446,14 ##
else:
# didn't break out of while loop
assert pos == data_len
- if DEBUG:
+ if OBJ_MSO_DEBUG:
o.dump(self.logfile, header="=== MSOBj ===", footer= " ")
+ return o
def handle_note(self, data):
- if not OBJ_MSO_DEBUG:
- return
- DEBUG = 1
if self.biff_version < 80:
return
- if DEBUG:
+ if OBJ_MSO_DEBUG:
fprintf(self.logfile, '... NOTE record ...\n')
hex_char_dump(data, 0, len(data), base=0, fout=self.logfile)
o = MSNote()
## -1453,13 +1467,11 ##
o.original_author, endpos = unpack_unicode_update_pos(data, 8, lenlen=2)
assert endpos == data_len - 1
o.last_byte = data[-1]
- if DEBUG:
+ if OBJ_MSO_DEBUG:
o.dump(self.logfile, header="=== MSNote ===", footer= " ")
+ return o
def handle_txo(self, data):
- if not OBJ_MSO_DEBUG:
- return
- DEBUG = 1
if self.biff_version < 80:
return
o = MSTxo()
## -1477,8 +1489,9 ##
rc3, data3_len, data3 = self.book.get_record_parts()
assert rc3 == XL_CONTINUE
# ignore the formatting runs for the moment
- if DEBUG:
+ if OBJ_MSO_DEBUG:
o.dump(self.logfile, header="=== MSTxo ===", footer= " ")
+ return o
def handle_feat11(self, data):
if not OBJ_MSO_DEBUG:
## -1638,11 +1651,12 ##
class Cell(BaseObject):
- __slots__ = ['ctype', 'value', 'xf_index']
+ __slots__ = ['ctype', 'value', 'note', 'xf_index']
- def __init__(self, ctype, value, xf_index=None):
+ def __init__(self, ctype, value, note=None, xf_index=None):
self.ctype = ctype
self.value = value
+ self.note = note
self.xf_index = xf_index
def __repr__(self):
Then you could write something like:
import xlrd
xlrd.sheet.OBJ_MSO_DEBUG = True
xls = xlrd.open_workbook('foo.xls')
for sheet in xls.sheets():
print 'sheet %s (%d x %d)' % (sheet.name, sheet.nrows, sheet.ncols)
for rownum in xrange(sheet.nrows):
for cell in sheet.row(rownum):
print cell, cell.note
One option is to use Ruby's win32ole library.
The following (somewhat verbose) example connects to an open Excel worksheet and gets the comment text from cell B2.
require 'win32ole'
xl = WIN32OLE.connect('Excel.Application')
ws = xl.ActiveSheet
cell = ws.Range('B2')
comment = cell.Comment
text = comment.Text
More info and examples of using Ruby's win32ole library to automate Excel can be found here:
http://rubyonwindows.blogspot.com/search/label/excel

Resources