Performance difference when reading/writing many files with EPPlus versus Spreadsheet Gear - performance

I've made a simple performance test between EPPlus and Spreadsheet Gear to see if there is any significant difference that would justify buying Spreadsheet Gear.
I am no expert at either application so it's possible the tests aren't written the most efficient way.
The test does the following:
1. Opens an existing Excel-file with 1000 rows and 3 columns. Saves the three values into an entity that is saved into a List<>.
2. Open a new Excel-object
3. Create a header row (bold) with the title of each column.
4. Write back the 1000 entities.
5. Save the new Excelfile.
If I run this test once EPPlus comes out the winner (approx times are EPPlus = 280ms, SG = 500ms). If I run the test 10 times in a row instead (a for-loop opening, copying, saving 10 seperate times) Spreadsheet Gear is faster instead (approx times per file: EPPlus = 165ms, SG = 95ms). For 20 tests the approx times are EPPlus = 160ms / file and SG = 60ms / file.
It seems like (to a certain extent at least) Spreadsheet Gears gets faster and faster the more files I create.
Could anyone explain why EPPlus is the slower one when running consecutive tests? And can I make changes to the code to change this?
EPPlus test function:
var timer = new Stopwatch();
timer.Start();
var data = new List<Item>();
using (var excelIn = new ExcelPackage(new FileInfo(folder + fileIn)))
{
var sheet = excelIn.Workbook.Worksheets[1];
var row = 2;
while (sheet.Cells[row, 1].Value != null)
{
data.Add(new Item()
{
Id = int.Parse(sheet.Cells[row, 1].Text),
Title = sheet.Cells[row, 2].Text,
Value = int.Parse(sheet.Cells[row, 3].Text)
});
row++;
}
}
using (var excelOut = new ExcelPackage())
{
var sheet = excelOut.Workbook.Worksheets.Add("Out");
sheet.Cells.LoadFromCollection(data);
sheet.InsertRow(1, 1);
sheet.Cells[1, 1, 1, 3].Style.Font.Bold = true;
sheet.Cells[1, 1].Value = "Id";
sheet.Cells[1, 2].Value = "Title";
sheet.Cells[1, 3].Value = "Value";
excelOut.SaveAs(new FileInfo(folder + "EPPlus_" + Guid.NewGuid() + ".xlsx"));
}
timer.Stop();
return timer.ElapsedMilliseconds;
Spreadsheet Gear:
var timer = new Stopwatch();
timer.Start();
var data = new List<Item>();
var excelIn = Factory.GetWorkbook(folder + fileIn);
var sheetIn = excelIn.Worksheets[0];
var rowIn = 1;
while (sheetIn.Cells[rowIn, 0].Value != null)
{
data.Add(new Item()
{
Id = int.Parse(sheetIn.Cells[rowIn, 0].Text),
Title = sheetIn.Cells[rowIn, 1].Text,
Value = int.Parse(sheetIn.Cells[rowIn, 2].Text)
});
rowIn++;
}
excelIn.Close();
var excelOut = Factory.GetWorkbook();
var sheetOut = excelOut.Worksheets.Add();
sheetOut.Name = "Out";
var rowOut = 0;
sheetOut.Cells[rowOut, 0, rowOut, 2].Font.Bold = true;
sheetOut.Cells[rowOut, 0].Value = "Id";
sheetOut.Cells[rowOut, 1].Value = "Title";
sheetOut.Cells[rowOut++, 2].Value = "Value";
foreach (var item in data)
{
sheetOut.Cells[rowOut, 0].Value = item.Id;
sheetOut.Cells[rowOut, 1].Value = item.Title;
sheetOut.Cells[rowOut++, 2].Value = item.Value;
}
excelOut.SaveAs(folder + "SpreadsheetGear_" + Guid.NewGuid() + ".xlsx", FileFormat.OpenXMLWorkbook);
excelOut.Close();
timer.Stop();
return timer.ElapsedMilliseconds;
Main function
var runs = 1;
var testerG = new TestSpreadsheetGear();
var testerE = new TestEpPlus();
var msE = 0.0;
var msG = 0.0;
var i = 0;
for (i = 0; i < runs; ++i)
{
msG += new TestSpreadsheetGear().Run(folder, originalFile);
}
for(i = 0; i < runs; ++i)
{
msE += new TestEpPlus().Run(folder, originalFile);
}
Console.WriteLine("Spreadsheet time: " + msG + ". Per file: " + msG / runs);
Console.WriteLine("EP Plus time: " + msE + ". Per file: " + msE / runs);
Console.ReadKey();

I believe that the reason for the results you are seeing is the fact that on the first run the .NET CLR must JIT the code. Since SpreadsheetGear is a complete spreadsheet engine under the hood (as opposed to a read / write library) there is more code to JIT - thus the first run is taking longer for SpreadsheetGear than EPPlus (I am speculating here but have a great deal of experience in benchmarking .NET code over the last 10 years).
I do not have EPPlus installed but I did write a test which tries to do the same thing you are doing. with SpreadsheetGear 2012 Since I don't have your starting workbook I first build the workbook. Then, I used more optimal SpreadsheetGear APIs. The first time I run I get 141 milliseconds for SpreadsheetGear 2012. After the first run I get 9 or 10 milliseconds for each run on an overclocked Core i7-980x running Win7 x86 and a release build run without debugger.
I have pasted my code below (just paste it into a .NET 4.0 C# console application).
One more thought I have is that this is a very small test case. To really see the performance of SpreadsheetGear 2012 try this with 100,000 rows or even 1 million rows.
Disclaimer: I own SpreadsheetGear LLC
using System;
using System.Collections.Generic;
using System.Diagnostics;
using SpreadsheetGear;
namespace SGvsEPPlus
{
class Program
{
internal struct Item
{
internal Item(int id, string title, int value)
{
Id = id;
Title = title;
Value = value;
}
internal int Id;
internal string Title;
internal int Value;
}
static void Test(int rows)
{
string filename = #"C:\tmp\MyWorkbook.xlsx";
Console.Write("Test({0})...", rows);
var timer = new Stopwatch();
// Create workbook since we don't have poster's original workbook.
timer.Restart();
var workbook = Factory.GetWorkbook();
var values = (SpreadsheetGear.Advanced.Cells.IValues)workbook.Worksheets[0];
for (int row = 1; row <= rows; row++)
{
values.SetNumber(row, 0, row);
values.SetText(row, 1, "Title " + row);
values.SetNumber(row, 2, row * 10);
}
Console.Write("Create workbook={0:0}...", timer.Elapsed.TotalMilliseconds);
// Save workbook
timer.Restart();
workbook.SaveAs(filename, FileFormat.OpenXMLWorkbook);
Console.Write("Save workbook={0:0}...", timer.Elapsed.TotalMilliseconds);
// Track total time of original test.
var totalTimer = Stopwatch.StartNew();
// Open workbook
timer.Restart();
var excelIn = Factory.GetWorkbook(filename);
Console.Write("Open excelIn={0:0}...", timer.Elapsed.TotalMilliseconds);
// Copy workbook to list
timer.Restart();
var sheetIn = excelIn.Worksheets[0];
var valuesIn = (SpreadsheetGear.Advanced.Cells.IValues)sheetIn;
var rowIn = 1;
var data = new List<Item>(rows);
while (valuesIn[rowIn, 0] != null)
{
data.Add(new Item(
(int)valuesIn[rowIn, 0].Number,
valuesIn[rowIn, 1].Text,
(int)valuesIn[rowIn, 2].Number));
rowIn++;
}
excelIn.Close(); // Not necessary but left for consistency.
Console.Write("excelIn->data={0:0}...", timer.Elapsed.TotalMilliseconds);
timer.Restart();
var excelOut = Factory.GetWorkbook();
var sheetOut = excelOut.Worksheets[0];
var valuesOut = (SpreadsheetGear.Advanced.Cells.IValues)sheetOut;
sheetOut.Name = "Out";
var rowOut = 0;
sheetOut.Cells[rowOut, 0, rowOut, 2].Font.Bold = true;
sheetOut.Cells[rowOut, 0].Value = "Id";
sheetOut.Cells[rowOut, 1].Value = "Title";
sheetOut.Cells[rowOut++, 2].Value = "Value";
foreach (var item in data)
{
valuesOut.SetNumber(rowOut, 0, item.Id);
valuesOut.SetText(rowOut, 1, item.Title);
valuesOut.SetNumber(rowOut, 2, item.Value);
rowOut++;
}
Console.Write("data->excelOut={0:0}...", timer.Elapsed.TotalMilliseconds);
timer.Restart();
excelOut.SaveAs(#"C:\tmp\SpreadsheetGear_" + Guid.NewGuid() + ".xlsx", FileFormat.OpenXMLWorkbook);
excelOut.Close(); // Again - not necessary.
Console.WriteLine("Save excelOut={0:0}...", timer.Elapsed.TotalMilliseconds);
Console.WriteLine(" Total={0:0}", totalTimer.Elapsed.TotalMilliseconds);
}
static void Main(string[] args)
{
// Do it three times with 1000 rows. Note that the first
// time takes longer because code must be JITted.
Test(1000);
Test(1000);
Test(1000);
}
}
}

Related

Filter & Delete rows of data based off of column value fast! (Google Sheets)

Is there a way to filter the data in column Q off my google sheet faster then reading line one by one. There is daily about 400+ lines it needs to scan through and I need to delete every row of data if the data in column Q is less than 1 right now watching it, it takes about 10+ minutes.
function UpdateLog() {
var returnSheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('CancelRawData');
var rowCount = returnSheet.getLastRow();
for (i = rowCount; i > 0; i--) {
var rrCell = 'Q' + i;
var cell = returnSheet.getRange(rrCell).getValue();
if (cell < 1 ){
returnSheet.deleteRow(i);
}
}
{
SpreadsheetApp.getUi().alert("🎉 Congratulations, your data has been updated", SpreadsheetApp.getUi().ButtonSet.OK);
}
}
Try it this way:
function UpdateLog() {
const ss = SpreadsheetApp.getActive();
const sh = ss.getSheetByName('Sheet0');
const vs = sh.getDataRange().getValues();
let d = 0;
vs.forEach((r, i) => {
if (!isNaN(r[16]) && r[16] < 1){
sh.deleteRow(i + 1 - d++);
}
});
}
This is a bit quicker
function UpdateLog() {
const ss = SpreadsheetApp.getActive();
const sh = ss.getSheetByName('Sheet0');
const vs = sh.getDataRange().getValues().filter(r => !isNaN(r[16]) && r[16] < 1);
sh.clearContents();
sh.getRange(1,1,vs.length,vs[0].length).setValues(vs);
}

How to Dump latest list in LinqPad?

So the following code will do a dump of the whole list every second.
var list = new List<object>();
for (int i = 0; i < 100; i++)
{
list.Add(new { A = i.ToString(), B = new Random().Next() });
list.Dump(); // How to DumpLatest()?
Thread.Sleep(1000);
}
But how can I make it to just update the dump output without adding a new one?
There is a related Q/A here but it doesn't work for me.
The DumpLatest() extension method only applies to IObservable<T>; there's no way to detect that an item is added to a List<T>, so LinqPad can't display the last value added.
Instead you can use a DumpContainer and change its content explicitly:
var list = new List<object>();
var container = new DumpContainer();
container.Dump();
for (int i = 0; i < 100; i++)
{
var item = new { A = i.ToString(), B = new Random().Next() };
list.Add(item);
container.Content = item;
Thread.Sleep(1000);
}
You could also achieve the same result with a Subject<T> (arguably more elegant):
var subject = new Subject<object>();
subject.DumpLatest();
for (int i = 0; i < 100; i++)
{
var item = new { A = i.ToString(), B = new Random().Next() };
subject.OnNext(item);
Thread.Sleep(1000);
}
EDIT: OK, I thought you wanted to see only the last item. To print the whole list, just use subject.Dump(), as mentioned by Joe in the comments. If you use the first approach, put the list itself in the DumpContainer, and call Refresh() on it in the loop.
Basically same with Thomas Levesque's answer, a little shorter.
Observable.Interval(TimeSpan.FromSeconds(1))
.Select(t=> new { A = t.ToString(), B = new Random().Next() })
.Take(100)
.Dump(); // all 100
//.DumpLatest(); //only latest one

OPserver: SQL CPU Graph Not Displayed

i am using opserver tool to monitor SQL performance everything works fine except it is not display CPU graph sparks for both standalone and cluster
i replace SQLCPUSpark function with this code
public ActionResult SQLCPUSpark(string node)
{
var instance = SQLInstance.Get(node);
if (instance == null)
return ContentNotFound("SQLNode not found with name = '" + node + "'");
var dataPoints = instance.CPUHistoryLastHour;
var chart = new Chart();
var area = new ChartArea();
area.AxisX.Enabled = AxisEnabled.False;
area.AxisY.Enabled = AxisEnabled.False;
area.AxisY.Maximum = 100;
// configure your chart area (dimensions, etc) here.
chart.ChartAreas.Add(area);
// create and customize your data series.
var series = new Series();
foreach (var item in dataPoints.Data)
{
series.Points.AddXY(item.EventTime.ToOADate(), item.ProcessUtilization);
}
series.Label = "";
series.Font = new Font("Segoe UI", 8.0f, FontStyle.Bold);
series.ChartType = SeriesChartType.Area;
chart.Series.Add(series);
return chart.ToResult();
}

Reading Excel spreasheet using EPPlus

Could someone point me in the right direction on how to read a Excel spreasheet, loop through all the rows and columns to retreive value using EPPlus and MVC? So fare I see examples to create a spreasheet, but did not find any on opening an excel file and read values from it. Any help would be appreciated.
TIA
Sue..
Simple example
// Get the file we are going to process
var existingFile = new FileInfo(filePath);
// Open and read the XlSX file.
using (var package = new ExcelPackage(existingFile))
{
// Get the work book in the file
var workBook = package.Workbook;
if (workBook != null)
{
if (workBook.Worksheets.Count > 0)
{
// Get the first worksheet
var currentWorksheet = workBook.Worksheets.First();
// read some data
object col1Header = currentWorksheet.Cells[1, 1].Value;
A simple example how you can read excel file using EPPlus in .net 4.5
public void readXLS(string FilePath)
{
FileInfo existingFile = new FileInfo(FilePath);
using (ExcelPackage package = new ExcelPackage(existingFile))
{
//get the first worksheet in the workbook
ExcelWorksheet worksheet = package.Workbook.Worksheets[1];
int colCount = worksheet.Dimension.End.Column; //get Column Count
int rowCount = worksheet.Dimension.End.Row; //get row count
for (int row = 1; row <= rowCount; row++)
{
for (int col = 1; col <= colCount; col++)
{
Console.WriteLine(" Row:" + row + " column:" + col + " Value:" + worksheet.Cells[row, col].Value.ToString().Trim());
}
}
}
}

Add table to powerpoint slide using Open XML

I got the code for the Table in the powerpoint using the code generator, but I am not able to add the table to an existing powerpoint document.
I tried adding another table to the intended slide and doing the following:
Table table = slidePart.Slide.Descendants<Table>().First();
table.RemoveAllChildren();
Table createdTable = CreateTable();
foreach (OpenXmlElement childElement in createdTable.ChildElements)
{
table.AppendChild(childElement.CloneNode(true));
}
But that didn't work.
I am out of ideas on this issue.
My original target is to add a table with dynamic number of columns and fixed number of row to my presentation.
I know its been very long since this question is posted, but just in case if some one needs a working code to create table in pptx.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Presentation;
using A = DocumentFormat.OpenXml.Drawing;
using System.IO;
namespace ANF.Slides.TestEngine
{
class Program
{
static int index = 1;
static void Main(string[] args)
{
Console.WriteLine("Preparing Presentation");
PopulateData();
// GeneratedClass cls=new GeneratedClass();
//cls.CreatePackage(#"E:\output.pptx");
Console.WriteLine("Completed Presentation");
Console.ReadLine();
}
private static void PopulateData()
{
var overflow = false;
const int pageBorder = 3000000;
var db = new AdventureWorksEntities();
var products = db.Products;//.Take(5);
const string outputFile = #"E:\openxml\output.pptx";
File.Copy(#"E:\OpenXml\Template.pptx", outputFile, true);
using (var myPres = PresentationDocument.Open(outputFile, true))
{
var presPart = myPres.PresentationPart;
var slideIdList = presPart.Presentation.SlideIdList;
var list = slideIdList.ChildElements
.Cast<SlideId>()
.Select(x => presPart.GetPartById(x.RelationshipId))
.Cast<SlidePart>();
var tableSlidePart = (SlidePart)list.Last();
var current = tableSlidePart;
long totalHeight = 0;
foreach (var product in products)
{
if (overflow)
{
var newTablePart = CloneSlidePart(presPart, tableSlidePart);
current = newTablePart;
overflow = false;
totalHeight = 0;
}
var tbl = current.Slide.Descendants<A.Table>().First();
var tr = new A.TableRow();
tr.Height = 200000;
tr.Append(CreateTextCell(product.Name));
tr.Append(CreateTextCell(product.ProductNumber));
tr.Append(CreateTextCell(product.Size));
tr.Append(CreateTextCell(String.Format("{0:00}", product.ListPrice)));
tr.Append(CreateTextCell(product.SellStartDate.ToShortDateString()));
tbl.Append(tr);
totalHeight += tr.Height;
if (totalHeight > pageBorder)
overflow = true;
}
}
}
static SlidePart CloneSlidePart(PresentationPart presentationPart, SlidePart slideTemplate)
{
//Create a new slide part in the presentation
SlidePart newSlidePart = presentationPart.AddNewPart<SlidePart>("newSlide" + index);
index++;
//Add the slide template content into the new slide
newSlidePart.FeedData(slideTemplate.GetStream(FileMode.Open));
//make sure the new slide references the proper slide layout
newSlidePart.AddPart(slideTemplate.SlideLayoutPart);
//Get the list of slide ids
SlideIdList slideIdList = presentationPart.Presentation.SlideIdList;
//Figure out where to add the next slide (find max slide)
uint maxSlideId = 1;
SlideId prevSlideId = null;
foreach (SlideId slideId in slideIdList.ChildElements)
{
if (slideId.Id > maxSlideId)
{
maxSlideId = slideId.Id;
prevSlideId = slideId;
}
}
maxSlideId++;
//Add new slide at the end of the deck
SlideId newSlideId = slideIdList.InsertAfter(new SlideId(), prevSlideId);
//Make sure id and relid is set appropriately
newSlideId.Id = maxSlideId;
newSlideId.RelationshipId = presentationPart.GetIdOfPart(newSlidePart);
return newSlidePart;
}
private static A.TableCell CreateTextCell(string text)
{
var textCol = new string[2];
if (!string.IsNullOrEmpty(text))
{
if (text.Length > 25)
{
textCol[0] = text.Substring(0, 25);
textCol[1] = text.Substring(26);
}
else
{
textCol[0] = text;
}
}
else
{
textCol[0] = string.Empty;
}
A.TableCell tableCell3 = new A.TableCell();
A.TextBody textBody3 = new A.TextBody();
A.BodyProperties bodyProperties3 = new A.BodyProperties();
A.ListStyle listStyle3 = new A.ListStyle();
textBody3.Append(bodyProperties3);
textBody3.Append(listStyle3);
var nonNull = textCol.Where(t => !string.IsNullOrEmpty(t)).ToList();
foreach (var textVal in nonNull)
{
//if (!string.IsNullOrEmpty(textVal))
//{
A.Paragraph paragraph3 = new A.Paragraph();
A.Run run2 = new A.Run();
A.RunProperties runProperties2 = new A.RunProperties() { Language = "en-US", Dirty = false, SmartTagClean = false };
A.Text text2 = new A.Text();
text2.Text = textVal;
run2.Append(runProperties2);
run2.Append(text2);
paragraph3.Append(run2);
textBody3.Append(paragraph3);
//}
}
A.TableCellProperties tableCellProperties3 = new A.TableCellProperties();
tableCell3.Append(textBody3);
tableCell3.Append(tableCellProperties3);
//var tc = new A.TableCell(
// new A.TextBody(
// new A.BodyProperties(),
// new A.Paragraph(
// new A.Run(
// new A.Text(text)))),
// new A.TableCellProperties());
//return tc;
return tableCell3;
}
}
}

Resources