Put value in datatable group by week - linq

I have a datatable which comprises values from 1st January to march, something like this:
DATE Employer Job1 Job2
1/4/2013 A 1.3 2
1/4/2013 B 2.5 6
1/6/2013 C 3.7 2.4
1/7/2013 D 11
1/7/2013 F 334 0
1/8/2013 A 1.87 1
1/8/2013 B 6.85 2
1/9/2013 C 58 226
1/16/2013 A 9.43 1.45
1/16/2013 B 5.27 0.6
1/122/2013 C 45.4 5
1/23/2013 A 44 4.78
1/29/2013 B 45 40
2/2/2013 C 45 54.12
2/2/2013 D 7 4.4587
2/3/2013 F 265 11.486
Update:
DataTable datatable = new DataTable("Employee");
datatable.Columns.Add("Date", typeof(string));
datatable.Columns.Add("Employee", typeof(string));
datatable.Columns.Add("Job1", typeof(double));
datatable.Columns.Add("Job2", typeof(double));
datatable.Rows.Add(new Object[] { "1/4/2013", "A", 1.3, 2 });
datatable.Rows.Add(new Object[] { "1/4/2013", "B", 2.5, 6 });
datatable.Rows.Add(new Object[] { "1/6/2013", "C", 3.7, 2.4 });
datatable.Rows.Add(new Object[] { "1/7/2013", "D", 11, 0.0 });
datatable.Rows.Add(new Object[] { "1/7/2013", "F", 334, 0 });
datatable.Rows.Add(new Object[] { "1/8/2013", "A", 1.87, 1 });
datatable.Rows.Add(new Object[] { "1/8/2013", "B", 6.85, 2 });
datatable.Rows.Add(new Object[] { "1/9/2013", "C", 58, 226 });
datatable.Rows.Add(new Object[] { "1/16/2013", "A", 9.43, 1.45 });
datatable.Rows.Add(new Object[] { "1/16/2013", "B", 5.27, 0.6 });
datatable.Rows.Add(new Object[] { "1/22/2013", "C", 45.4, 5 });
datatable.Rows.Add(new Object[] { "1/23/2013", "A", 44, 4.78 });
datatable.Rows.Add(new Object[] { "1/29/2013", "B", 45, 40 });
datatable.Rows.Add(new Object[] { "2/2/2013", "C", 45, 54.12 });
datatable.Rows.Add(new Object[] { "2/2/2013", "D", 7, 4.4587 });
datatable.Rows.Add(new Object[] { "2/3/2013", "F", "265", 11.486 });
datatable.Rows.Add(new Object[] { "3/3/2013", "A", "25", 28.124 });
I want to sum the values for job1 week wise where week starts from Monday to Sunday. This is the code I have written so far.
DateTime minDate = datatable.AsEnumerable()
.Min(r => DateTime.Parse(r.Field<string>("DATE")));
DateTime startDate = minDate.Date.Date.AddDays(+((6 + minDate.DayOfWeek
- DayOfWeek.Monday) % 7));
DateTime nextDate = startDate.AddDays(6);
DateTime maxDate = datatable.AsEnumerable()
.Max(r => DateTime.Parse(r.Field<string>("DATE")));
while (nextDate < maxDate)
{
var weekEmpGroups = datatable.AsEnumerable()
.Select(r => new
{
Row = r,
Employee = r.Field<String>("Employee"),
Date = DateTime.Parse(r.Field<string>("DATE"))
// week = minDate.Date.Date.AddDays(+((6 + minDate.DayOfWeek
- DayOfWeek.Monday) % 7))
})
.GroupBy(x => x.Employee);
DataTable dtWeeklyResults = new DataTable();
dtWeeklyResults.Columns.Add("Employee", typeof(string));
var dtf = System.Globalization.CultureInfo.CurrentCulture.DateTimeFormat;
double weekCount = 0.0;
string expression;
DataRow[] foundRows;
foreach (var empGroup in weekEmpGroups)
{
string employee = empGroup.Key;
var newRow = dtWeeklyResults.Rows.Add();
newRow["Employee"] = employee;
expression = "Employee=" + employee + " AND Date Between " + startDate
+ " And " + nextDate;
foundRows = datatable.Select(expression);
if (foundRows.Length > 0)
{
// add values using linq
}
}
Please suggest if this is correct way to do this and also how to add all values week wise? The result should look like this for Job1:
Employee 1/7-1/13 1/14-1/20 1/21-1/27 1/28-2/3 and so on...
A sum of values for this 7 days
B
C
D
Can anybody suggest how to achieve this by LINQ?

Helper methods
private static string GetColumnName(int weekNumber)
{
DateTime jan1 = new DateTime(2013, 1, 1);
int daysOffset = DayOfWeek.Monday - jan1.DayOfWeek;
DateTime firstMonday = jan1.AddDays(daysOffset);
var cal = ci.Calendar;
int firstWeek = cal.GetWeekOfYear(firstMonday, ci.DateTimeFormat.CalendarWeekRule, ci.DateTimeFormat.FirstDayOfWeek);
if (firstWeek <= 1)
{
weekNumber -= 1;
}
DateTime result = firstMonday.AddDays((weekNumber-1) * 7);
return string.Format("{0}-{1}", result.ToString("M/d", ci), result.AddDays(6).ToString("M/d", ci));
}
private static int GetWeekOfYear(DateTime value)
{
return ci.Calendar.GetWeekOfYear(value, ci.DateTimeFormat.CalendarWeekRule, ci.DateTimeFormat.FirstDayOfWeek);
}
CultureInfo instance
static CultureInfo ci = new CultureInfo("en-us");
Logic
// load parsed data from DataTable to a list
var data = (from row in dt.AsEnumerable()
select new
{
Date = DateTime.Parse(row.Field<string>("Date"), ci),
Employee = row.Field<string>("Employee"),
Value = row.Field<double>("Job1")
}).ToList();
// find min/max date and week number
var minDateTime = data.Select(i => i.Date).Min();
var maxDateTime = data.Select(i => i.Date).Max();
var minWeekNumber = GetWeekOfYear(minDateTime);
var maxWeekNumber = GetWeekOfYear(maxDateTime);
// prepare result DataTable
var resultDt = new DataTable("Job1");
resultDt.Columns.Add("Employee", typeof(string));
for (int i = minWeekNumber; i <= maxWeekNumber; i++)
resultDt.Columns.Add(i.ToString(), typeof(double));
// prepare grouped data query
var employeeData = from d in data
group d by d.Employee into g
select new
{
Employee = g.Key,
Items = g.GroupBy(x => GetWeekOfYear(x.Date))
.Select(x => new
{
Week = x.Key,
Value = x.Sum(xx => xx.Value)
})
};
// iterate over query results and fill resultsDt
foreach (var e in employeeData)
{
var newRow = resultDt.NewRow();
newRow["Employee"] = e.Employee;
foreach (var d in e.Items)
newRow[d.Week.ToString()] = d.Value;
resultDt.Rows.Add(newRow);
}
// change column names from week numbers to proper start-end dates
foreach(DataColumn col in resultDt.Columns)
{
int weekNumber;
if (int.TryParse(col.ColumnName, out weekNumber))
col.ColumnName = GetColumnName(weekNumber);
}
Results:
Job1
Employee 1/7-1/13 1/14-1/20 1/21-1/27 1/28-2/3 2/4-2/10 2/11-2/17 2/18-2/24 2/25-3/3 3/4-3/10 3/11-3/
A 1,3 1,87 9,43 44 2
B 2,5 6,85 5,27 45
C 61,7 45,4 45
D 11 7
F 334 265

If anybody have the same requirement that is to show the results week wise then here is the code:
private static DataTable GetWeeklyColumnsAndData(DataTable datatable, string resultFor)
{
DateTime minDate = datatable.AsEnumerable()
.Min(r => DateTime.Parse(r.Field<string>("DATE")));
DateTime maxDate = datatable.AsEnumerable()
.Max(r => DateTime.Parse(r.Field<string>("DATE")));
var distinctValues = datatable.AsEnumerable()
.Select(row => new
{
Employee = row.Field<string>("Employee")
})
.Distinct()
.ToList();
int totalEmployeeCount = System.Linq.Enumerable.Count(distinctValues);
DataTable resultDt = new DataTable();
resultDt.Columns.Add("Employee", typeof(string));
DateTime firstMonday = (minDate.DayOfWeek == DayOfWeek.Monday) ? minDate : GetNextWeekday(minDate, DayOfWeek.Monday);
DateTime startingMonday = firstMonday;
// add columns first
while (firstMonday < maxDate)
{
string weekName = string.Format("{0}-{1}", firstMonday.ToString("M/d", ci), firstMonday.AddDays(6).ToString("M/d", ci));
resultDt.Columns.Add(weekName, typeof(string));
firstMonday = firstMonday.AddDays(7);
}
for (int row = 0; row < totalEmployeeCount; row++)
{
DateTime startDate = startingMonday;
DateTime endDate = startingMonday.AddDays(6);
DataRow newRow = resultDt.NewRow();
string employee = distinctValues[row].Employee.ToString();
// first column for entity
newRow[0] = employee;
for (int col = 1; col < resultDt.Columns.Count; col++)
{
bool isBlank = false;
double total = 0;
string formattedMonday = endDate.ToString("M/d/yyyy");
string expression = String.Format("Employee = '{0}' AND DATE >= #{1}# AND DATE <= #{2}#", employee, startDate.ToString("M/d/yyyy"), formattedMonday);
DataView dv = datatable.DefaultView;
dv.RowFilter = expression;
if (dv.Count > 0)
{
foreach (DataRowView rowView in dv)
{
DataRow r = rowView.Row;
string value = r[resultFor].ToString();
if (value != "")
{
total += Convert.ToDouble(value);
}
else
{
isBlank = true;
}
}
}
else
{
isBlank = true;
}
if (total == 0 && isBlank)
{
newRow[col] = "";
}
else
{
newRow[col] = total;
}
startDate = endDate.AddDays(1);
endDate = startDate.AddDays(6);
}
resultDt.Rows.Add(newRow);
}
return resultDt;
}
public static DateTime GetNextWeekday(DateTime start, DayOfWeek day)
{
// The (... + 7) % 7 ensures we end up with a value in the range [0, 6]
int daysToAdd = ((int)day - (int)start.DayOfWeek + 7) % 7;
return start.AddDays(daysToAdd);
}

Related

get sum from list of objects in linq C#

I have list of objects as described below:
List<Maths> mObjs = new List<Maths>();
mObjs.Add(new Maths{ Name = "Jack", M1 = 10, M2 = 5, M3 = 0, M4 = 2, M5 =1 });
mObjs.Add(new Maths { Name = "Jill", M1 = 2, M2 = 3, M3 = 4, M4 = 1, M5 = 0 });
mObjs.Add(new Maths { Name = "Michel", M1 = 12, M2 = 15, M3 = 10, M4 = 12, M5 = 11 });
Now I need to calculated the total aggregated value for all three people.
I need to get the below results, probably a new other class
List<Results> mRes = new List<Results>();
public class Results{
public string Name { get; set; }
public int TotalValue { get; set; }
}
mRes.Name = "M1"
mRes.TotalValue = 24;
mRes.Name = "M2"
mRes.TotalValue = 23;
mRes.Name = "M3"
mRes.TotalValue = 14;
mRes.Name = "M4"
mRes.TotalValue = 15;
mRes.Name = "M5"
mRes.TotalValue = 12;
How can I get this data from mObjs using linq query? I know we can do it using for, but want to know if there are any better ways to get this using linq query because that reduces lines of code and I have similar requirements in many other places and dont want to write number of foreach or fors every time.
You can use a pre selection list to list both the name and the field to select
var lookups = new Dictionary<string,Func<Maths,int>> {
{"M1", x => x.M1 },
{"M2", x => x.M2 },
{"M3", x => x.M3 },
{"M4", x => x.M4 },
{"M5", x => x.M5 },
};
Then you can simply do
var mRes = dlookups.Select(x => new Results {
Name= x.Key,
TotalValue = mObjs.Sum(x.Value)
}).ToList();
BEGIN UPDATED*
In response to comments
The lambda expression is just a function from your source class to an int.
For example
class Sub1 {
string M3 {get;set;}
int M4 {get;set;}
}
class Math2 {
string Name {get;set;}
string M1 {get;set;}
string M2 {get;set;}
Sub1 Sub {get;set;}
}
var lookups = new Dictionary<string,Func<Math2,int>> {
{ "M1", x => int.Parse(x.M1) },
{ "M2", x => int.Parse(x.M2) },
{ "M3", x => int.Parse(x.Sub.M3) },
{ "M4", x => int.Parse(x.Sub.M4} }
};
Or if you want to put a little error checking in, you can either use functions or embed the code.
int GetInt(string source) {
if (source == null) return 0;
int result;
return int.TryParse(source, out result) ? result : 0;
}
var lookups = new Dictionary<string,Func<Math2,int>> {
{ "M1", x => {
int result;
return x == null ? 0 : (int.TryParse(x,out result) ? result : 0);
},
{ "M2", x => GetInt(x) },
{ "M3", x => x.Sub == null ? 0 : GetInt(x.Sub.M3) },
{ "M4", x => x.Sub == null ? 0 : x.Sub.M4}
};
END UPDATED
If you want to go further you could use reflection to build the lookups dictionary.
Here is a helper function that will generate the lookups for all Integer properties of a class.
public Dictionary<string,Func<T,int>> GenerateLookups<T>() where T: class {
// This just looks for int properties, you could add your own filter
var properties = typeof(T).GetProperties().Where(pi => pi.PropertyType == typeof(int));
var parameter = Expression.Parameter(typeof(T));
return properties.Select(x => new {
Key = x.Name,
Value = Expression.Lambda<Func<T,int>>(Expression.Property(parameter,x),parameter).Compile()
}).ToDictionary (x => x.Key, x => x.Value);
}
Now you can just do:
var mRes=GenerateLookups<Maths>().Select( x => new Results
{
Name = x.Key,
TotalValue = mObjs.Sum(x.Value)
}).ToList();
Not very smart but efficient and readable:
int m1Total= 0;
int m2Total= 0;
int m3Total= 0;
int m4Total= 0;
int m5Total= 0;
foreach(Maths m in mObjs)
{
m1Total += m.M1;
m2Total += m.M2;
m3Total += m.M3;
m4Total += m.M4;
m5Total += m.M5;
}
List<Results> mRes = new List<Results>
{
new Results{ Name = "M1", TotalValue = m1Total },
new Results{ Name = "M2", TotalValue = m2Total },
new Results{ Name = "M3", TotalValue = m3Total },
new Results{ Name = "M4", TotalValue = m4Total },
new Results{ Name = "M5", TotalValue = m5Total },
};
Result:
Name: "M1" TotalValue: 24
Name: "M2" TotalValue: 23
Name: "M3" TotalValue: 14
Name: "M4" TotalValue: 15
Name: "M5" TotalValue: 12
Edit: since you've explicitly asked for LINQ, if the properties are always these five i don't see why you need to use LINQ at all. If the number can change i would use a different structure.
You could for example use
a single List<Measurement> instead of multiple properties where Measurement is another class that stores the name and the value or you could use
a Dictionary<string, int> for efficient lookup.
You can try out some thing like this :
mRes.Add(new Results() { Name = "M1", TotalValue = mObjs.Sum(x => x.M1) });
To programmatically iterate through all the class properties, you might need to employ reflection.

PIVOT with LINQ from Datatable [duplicate]

I have a collection of items that contain an Enum (TypeCode) and a User object, and I need to flatten it out to show in a grid. It's hard to explain, so let me show a quick example.
Collection has items like so:
TypeCode | User
---------------
1 | Don Smith
1 | Mike Jones
1 | James Ray
2 | Tom Rizzo
2 | Alex Homes
3 | Andy Bates
I need the output to be:
1 | 2 | 3
Don Smith | Tom Rizzo | Andy Bates
Mike Jones | Alex Homes |
James Ray | |
I've tried doing this using foreach, but I can't do it that way because I'd be inserting new items to the collection in the foreach, causing an error.
Can this be done in Linq in a cleaner fashion?
I'm not saying it is a great way to pivot - but it is a pivot...
// sample data
var data = new[] {
new { Foo = 1, Bar = "Don Smith"},
new { Foo = 1, Bar = "Mike Jones"},
new { Foo = 1, Bar = "James Ray"},
new { Foo = 2, Bar = "Tom Rizzo"},
new { Foo = 2, Bar = "Alex Homes"},
new { Foo = 3, Bar = "Andy Bates"},
};
// group into columns, and select the rows per column
var grps = from d in data
group d by d.Foo
into grp
select new {
Foo = grp.Key,
Bars = grp.Select(d2 => d2.Bar).ToArray()
};
// find the total number of (data) rows
int rows = grps.Max(grp => grp.Bars.Length);
// output columns
foreach (var grp in grps) {
Console.Write(grp.Foo + "\t");
}
Console.WriteLine();
// output data
for (int i = 0; i < rows; i++) {
foreach (var grp in grps) {
Console.Write((i < grp.Bars.Length ? grp.Bars[i] : null) + "\t");
}
Console.WriteLine();
}
Marc's answer gives sparse matrix that can't be pumped into Grid directly.
I tried to expand the code from the link provided by Vasu as below:
public static Dictionary<TKey1, Dictionary<TKey2, TValue>> Pivot3<TSource, TKey1, TKey2, TValue>(
this IEnumerable<TSource> source
, Func<TSource, TKey1> key1Selector
, Func<TSource, TKey2> key2Selector
, Func<IEnumerable<TSource>, TValue> aggregate)
{
return source.GroupBy(key1Selector).Select(
x => new
{
X = x.Key,
Y = source.GroupBy(key2Selector).Select(
z => new
{
Z = z.Key,
V = aggregate(from item in source
where key1Selector(item).Equals(x.Key)
&& key2Selector(item).Equals(z.Key)
select item
)
}
).ToDictionary(e => e.Z, o => o.V)
}
).ToDictionary(e => e.X, o => o.Y);
}
internal class Employee
{
public string Name { get; set; }
public string Department { get; set; }
public string Function { get; set; }
public decimal Salary { get; set; }
}
public void TestLinqExtenions()
{
var l = new List<Employee>() {
new Employee() { Name = "Fons", Department = "R&D", Function = "Trainer", Salary = 2000 },
new Employee() { Name = "Jim", Department = "R&D", Function = "Trainer", Salary = 3000 },
new Employee() { Name = "Ellen", Department = "Dev", Function = "Developer", Salary = 4000 },
new Employee() { Name = "Mike", Department = "Dev", Function = "Consultant", Salary = 5000 },
new Employee() { Name = "Jack", Department = "R&D", Function = "Developer", Salary = 6000 },
new Employee() { Name = "Demy", Department = "Dev", Function = "Consultant", Salary = 2000 }};
var result5 = l.Pivot3(emp => emp.Department, emp2 => emp2.Function, lst => lst.Sum(emp => emp.Salary));
var result6 = l.Pivot3(emp => emp.Function, emp2 => emp2.Department, lst => lst.Count());
}
* can't say anything about the performance though.
You can use Linq's .ToLookup to group in the manner you are looking for.
var lookup = data.ToLookup(d => d.TypeCode, d => d.User);
Then it's a matter of putting it into a form that your consumer can make sense of. For instance:
//Warning: untested code
var enumerators = lookup.Select(g => g.GetEnumerator()).ToList();
int columns = enumerators.Count;
while(columns > 0)
{
for(int i = 0; i < enumerators.Count; ++i)
{
var enumerator = enumerators[i];
if(enumator == null) continue;
if(!enumerator.MoveNext())
{
--columns;
enumerators[i] = null;
}
}
yield return enumerators.Select(e => (e != null) ? e.Current : null);
}
Put that in an IEnumerable<> method and it will (probably) return a collection (rows) of collections (column) of User where a null is put in a column that has no data.
I guess this is similar to Marc's answer, but I'll post it since I spent some time working on it. The results are separated by " | " as in your example. It also uses the IGrouping<int, string> type returned from the LINQ query when using a group by instead of constructing a new anonymous type. This is tested, working code.
var Items = new[] {
new { TypeCode = 1, UserName = "Don Smith"},
new { TypeCode = 1, UserName = "Mike Jones"},
new { TypeCode = 1, UserName = "James Ray"},
new { TypeCode = 2, UserName = "Tom Rizzo"},
new { TypeCode = 2, UserName = "Alex Homes"},
new { TypeCode = 3, UserName = "Andy Bates"}
};
var Columns = from i in Items
group i.UserName by i.TypeCode;
Dictionary<int, List<string>> Rows = new Dictionary<int, List<string>>();
int RowCount = Columns.Max(g => g.Count());
for (int i = 0; i <= RowCount; i++) // Row 0 is the header row.
{
Rows.Add(i, new List<string>());
}
int RowIndex;
foreach (IGrouping<int, string> c in Columns)
{
Rows[0].Add(c.Key.ToString());
RowIndex = 1;
foreach (string user in c)
{
Rows[RowIndex].Add(user);
RowIndex++;
}
for (int r = RowIndex; r <= Columns.Count(); r++)
{
Rows[r].Add(string.Empty);
}
}
foreach (List<string> row in Rows.Values)
{
Console.WriteLine(row.Aggregate((current, next) => current + " | " + next));
}
Console.ReadLine();
I also tested it with this input:
var Items = new[] {
new { TypeCode = 1, UserName = "Don Smith"},
new { TypeCode = 3, UserName = "Mike Jones"},
new { TypeCode = 3, UserName = "James Ray"},
new { TypeCode = 2, UserName = "Tom Rizzo"},
new { TypeCode = 2, UserName = "Alex Homes"},
new { TypeCode = 3, UserName = "Andy Bates"}
};
Which produced the following results showing that the first column doesn't need to contain the longest list. You could use OrderBy to get the columns ordered by TypeCode if needed.
1 | 3 | 2
Don Smith | Mike Jones | Tom Rizzo
| James Ray | Alex Homes
| Andy Bates |
#Sanjaya.Tio I was intrigued by your answer and created this adaptation which minimizes keySelector execution. (untested)
public static Dictionary<TKey1, Dictionary<TKey2, TValue>> Pivot3<TSource, TKey1, TKey2, TValue>(
this IEnumerable<TSource> source
, Func<TSource, TKey1> key1Selector
, Func<TSource, TKey2> key2Selector
, Func<IEnumerable<TSource>, TValue> aggregate)
{
var lookup = source.ToLookup(x => new {Key1 = key1Selector(x), Key2 = key2Selector(x)});
List<TKey1> key1s = lookup.Select(g => g.Key.Key1).Distinct().ToList();
List<TKey2> key2s = lookup.Select(g => g.Key.Key2).Distinct().ToList();
var resultQuery =
from key1 in key1s
from key2 in key2s
let lookupKey = new {Key1 = key1, Key2 = key2}
let g = lookup[lookupKey]
let resultValue = g.Any() ? aggregate(g) : default(TValue)
select new {Key1 = key1, Key2 = key2, ResultValue = resultValue};
Dictionary<TKey1, Dictionary<TKey2, TValue>> result = new Dictionary<TKey1, Dictionary<TKey2, TValue>>();
foreach(var resultItem in resultQuery)
{
TKey1 key1 = resultItem.Key1;
TKey2 key2 = resultItem.Key2;
TValue resultValue = resultItem.ResultValue;
if (!result.ContainsKey(key1))
{
result[key1] = new Dictionary<TKey2, TValue>();
}
var subDictionary = result[key1];
subDictionary[key2] = resultValue;
}
return result;
}

Adding Sum per row in PIVOT using LINQ

I have following function that converts normal DataTable to Pivot Table.
But I have 1 more requirement, it should also show Value column i.e sum of all the Pivot column cell.
e.g. : ORDER | SUM | PLANDATE
1 50 10-Oct-2012
1 10 10-Oct-2012
2 15 12-Oct-2012
After PIVOT it should be like
ORDER | SUM | 10-Oct-2012 | 12-Oct-2012
1 60 50 10
2 15 15
my pivot function is :
DataTable Pivot(DataTable dt, DataColumn pivotColumn, DataColumn pivotValue) {
// find primary key columns
//(i.e. everything but pivot column and pivot value)
DataTable temp = dt.Copy();
temp.Columns.Remove( pivotColumn.ColumnName );
temp.Columns.Remove( pivotValue.ColumnName );
string[] pkColumnNames = temp.Columns.Cast<DataColumn>()
.Select( c => c.ColumnName )
.ToArray();
// prep results table
DataTable result = temp.DefaultView.ToTable(true, pkColumnNames).Copy();
result.PrimaryKey = result.Columns.Cast<DataColumn>().ToArray();
dt.AsEnumerable()
.Select(r => r[pivotColumn.ColumnName].ToString())
.Distinct().ToList()
.ForEach (c => result.Columns.Add(c, pivotColumn.DataType));
// load it
foreach( DataRow row in dt.Rows ) {
// find row to update
DataRow aggRow = result.Rows.Find(
pkColumnNames
.Select( c => row[c] )
.ToArray() );
// the aggregate used here is LATEST
// adjust the next line if you want (SUM, MAX, etc...)
aggRow[row[pivotColumn.ColumnName].ToString()] = row[pivotValue.ColumnName];
}
return result;
}
Thanks & any help will be appriciated...
Sample Data :
OrderStatus Qty Dated
New 100 10-Oct-2012
New 150 10-Oct-2012
New 100 10-Oct-2012
Conf 200 12-Oct-2012
New 100 12-Oct-2012
Reconf 300 12-Oct-2012
New 200 15-Oct-2012
New 200 15-Oct-2012
Reconf 100 18-Oct-2012
New 150 18-Oct-2012
Conf 150 18-Oct-2012
New 100 20-Oct-2012
Reconf 500 20-Oct-2012
New 100 30-Oct-2012
New 700 30-Oct-2012
New 100 30-Oct-2012
Conf 100 10-Oct-2012
Update Code :
private DataTable DataTableToPivot(DataTable dtNormalTable, DataColumn pivotColumn, DataColumn valueColumn)
{
cTracing.WriteTrace("Inside DataTableToPivot", "DataTableToPivot");
DataTable dtPivot = null;
DataTable tempPivot = null;
DataColumn sumColumn = new DataColumn();
string[] pkColumnNames = null;
try
{
sumColumn.ColumnName = valueColumn.ColumnName;
sumColumn.DataType = typeof(string);
sumColumn.DefaultValue = "0";
var allrows = dt.AsEnumerable().Select(delegate(DataRow r)
{
DataRow nr = dt.NewRow();
foreach (DataColumn item in dt.Columns)
{
nr.SetField(item.ColumnName, r[item.ColumnName] is DBNull ? string.Empty : r[item.ColumnName]);
}
return nr;
});
dtNormalTable = allrows.CopyToDataTable();
// find primary key columns
//(i.e. everything but pivot column and pivot value)
tempPivot = dtNormalTable.Copy();
tempPivot.Columns.Remove(pivotColumn.ColumnName);
tempPivot.Columns.Remove(valueColumn.ColumnName);
pkColumnNames = tempPivot.Columns
.Cast<DataColumn>()
.Select(c => c.ColumnName)
.ToArray();
// prep results table
dtPivot = tempPivot.DefaultView.ToTable(true, pkColumnNames).Copy();
dtPivot.PrimaryKey = dtPivot.Columns.Cast<DataColumn>().ToArray();
// include the sum column
dtPivot.Columns.Add(sumColumn);
try
{
dtNormalTable.AsEnumerable()
.Select(r => r[pivotColumn.ColumnName].ToString())
.Distinct()
.ToList()
.ForEach(c => dtPivot.Columns.Add(Convert.ToDateTime(c).ToString("dd-MMM-yyyy"), typeof(string)));
}
catch
{
dtNormalTable.AsEnumerable()
.Select(r => r[pivotColumn.ColumnName].ToString())
.Distinct()
.ToList()
.ForEach(c => dtPivot.Columns.Add(c, typeof(string)));
}
// load it
foreach (DataRow row in dtNormalTable.Rows)
{
// find row to update
DataRow aggRow = dtPivot.Rows.Find(
pkColumnNames
.Select(c => row[c])
.ToArray());
// the aggregate used here is LATEST
// adjust the next line if you want (SUM, MAX, etc...)
string columnName = string.Empty;
try
{
columnName = Convert.ToDateTime(row[pivotColumn.ColumnName]).ToString("dd-MMM-yyyy");
}
catch
{
columnName = row[pivotColumn.ColumnName].ToString();
}
if (aggRow.IsNull(columnName))
{
aggRow[columnName] = Convert.ToDecimal(row[valueColumn.ColumnName]);
}
else
{
aggRow[columnName] = Convert.ToDecimal(aggRow[columnName]) +
Convert.ToDecimal(row[valueColumn.ColumnName]);
}
if (numberFormat == "3")
{
// add the value to the sum
aggRow[sumColumn] = (Convert.ToDecimal(aggRow[sumColumn].ToString()) +
Convert.ToDecimal(row[valueColumn.ColumnName].ToString())).ToString("#0.000");
}
else if (numberFormat == "2")
{
// add the value to the sum
aggRow[sumColumn] = (Convert.ToDecimal(aggRow[sumColumn].ToString()) +
Convert.ToDecimal(row[valueColumn.ColumnName].ToString())).ToString("#0.00");
}
else if (numberFormat == "1")
{
// add the value to the sum
aggRow[sumColumn] = (Convert.ToDecimal(aggRow[sumColumn].ToString()) +
Convert.ToDecimal(row[valueColumn.ColumnName].ToString())).ToString("#0.0");
}
else
{
// add the value to the sum
aggRow[sumColumn] = (Convert.ToDecimal(aggRow[sumColumn].ToString()) +
Convert.ToDecimal(row[valueColumn.ColumnName].ToString())).ToString("#0");
}
}
return dtPivot;
}
catch (Exception ex)
{
cTracing.WriteTrace("Error inside DataTableToPivot", "DataTableToPivot");
cLogging.WriteLog(ex);
return null;
}
finally
{
if (dtPivot != null)
dtPivot.Dispose();
}
}
enter code here
How about adding each row's value to the sum column in the corresponding aggregate row?
DataTable Pivot(DataTable dt, DataColumn pivotColumn, DataColumn pivotValue) {
DataColumn sumColumn = new DataColumn();
sumColumn.ColumnName = "SUM";
sumColumn.DataType = typeof(int);
sumColumn.DefaultValue = 0;
// find primary key columns
//(i.e. everything but pivot column and pivot value)
DataTable temp = dt.Copy();
temp.Columns.Remove( pivotColumn.ColumnName );
temp.Columns.Remove( pivotValue.ColumnName );
string[] pkColumnNames = temp.Columns.Cast<DataColumn>()
.Select( c => c.ColumnName )
.ToArray();
// prep results table
DataTable result = temp.DefaultView.ToTable(true, pkColumnNames).Copy();
result.PrimaryKey = result.Columns.Cast<DataColumn>().ToArray();
// include the sum column
result.Columns.Add(sumColumn);
dt.AsEnumerable()
.Select(r => r[pivotColumn.ColumnName].ToString())
.Distinct()
.OrderBy(c => Convert.ToDateTime(c)) // Order by the date
.ToList()
.ForEach (c => result.Columns.Add(c, pivotColumn.DataType));
// load it
foreach( DataRow row in dt.Rows ) {
// find row to update
DataRow aggRow = result.Rows.Find(
pkColumnNames
.Select( c => row[c] )
.ToArray() );
// the aggregate used here is LATEST
// adjust the next line if you want (SUM, MAX, etc...)
string columnName = row[pivotColumn.ColumnName].ToString();
if(aggRow.IsNull(columnName))
{
aggRow[columnName] = (int)row[pivotValue.ColumnName];
}
else
{
aggRow[columnName] = Convert.ToInt32(aggRow[columnName]) +
(int)row[pivotValue.ColumnName];
}
// add the value to the sum
aggRow[sumColumn] = (int)aggRow[sumColumn] +
(int)row[pivotValue.ColumnName];
}
return result;
}

Optimize queries for Union, Except, Join with LINQ and C#

I have 2 objects (lists loaded from XML) report and database (showed bellow in code) and i should analyse them and mark items with 0, 1, 2, 3 according to some conditions
TransactionResultCode = 0; // SUCCESS (all fields are equivalents: [Id, AccountNumber, Date, Amount])
TransactionResultCode = 1; // Exists in report but Not in database
TransactionResultCode = 2; // Exists in database but Not in report
TransactionResultCode = 3; // Field [Id] are equals but other fields [AccountNumber, Date, Amount] are different.
I'll be happy if somebody could found time to suggest how to optimize some queries.
Bellow is the code:
THANK YOU!!!
//TransactionResultCode = 0 - SUCCESS
//JOIN on all fields
var result0 = from d in database
from r in report
where (d.TransactionId == r.MovementID) &&
(d.TransactionAccountNumber == long.Parse(r.AccountNumber)) &&
(d.TransactionDate == r.MovementDate) &&
(d.TransactionAmount == r.Amount)
orderby d.TransactionId
select new TransactionList()
{
TransactionId = d.TransactionId,
TransactionAccountNumber = d.TransactionAccountNumber,
TransactionDate = d.TransactionDate,
TransactionAmount = d.TransactionAmount,
TransactionResultCode = 0
};
//*******************************************
//JOIN on [Id] field
var joinedList = from d in database
from r in report
where d.TransactionId == r.MovementID
select new TransactionList()
{
TransactionId = d.TransactionId,
TransactionAccountNumber = d.TransactionAccountNumber,
TransactionDate = d.TransactionDate,
TransactionAmount = d.TransactionAmount
};
//Difference report - database
var onlyReportID = report.Select(r => r.MovementID).Except(joinedList.Select(d => d.TransactionId));
//TransactionResultCode = 1 - Not Found in database
var result1 = from o in onlyReportID
from r in report
where (o == r.MovementID)
orderby r.MovementID
select new TransactionList()
{
TransactionId = r.MovementID,
TransactionAccountNumber = long.Parse(r.AccountNumber),
TransactionDate = r.MovementDate,
TransactionAmount = r.Amount,
TransactionResultCode = 1
};
//*******************************************
//Difference database - report
var onlyDatabaseID = database.Select(d => d.TransactionId).Except(joinedList.Select(d => d.TransactionId));
//TransactionResultCode = 2 - Not Found in report
var result2 = from o in onlyDatabaseID
from d in database
where (o == d.TransactionId)
orderby d.TransactionId
select new TransactionList()
{
TransactionId = d.TransactionId,
TransactionAccountNumber = d.TransactionAccountNumber,
TransactionDate = d.TransactionDate,
TransactionAmount = d.TransactionAmount,
TransactionResultCode = 2
};
//*******************************************
var qwe = joinedList.Select(j => j.TransactionId).Except(result0.Select(r => r.TransactionId));
//TransactionResultCode = 3 - Transaction Results are different (Amount, AccountNumber, Date, )
var result3 = from j in joinedList
from q in qwe
where j.TransactionId == q
select new TransactionList()
{
TransactionId = j.TransactionId,
TransactionAccountNumber = j.TransactionAccountNumber,
TransactionDate = j.TransactionDate,
TransactionAmount = j.TransactionAmount,
TransactionResultCode = 3
};
you may try something like below:
public void Test()
{
var report = new[] {new Item(1, "foo", "boo"), new Item(2, "foo2", "boo2"), new Item(3, "foo3", "boo3")};
var dataBase = new[] {new Item(1, "foo", "boo"), new Item(2, "foo22", "boo2"), new Item(4, "txt", "rt")};
Func<Item, bool> inBothLists = (i) => report.Contains(i) && dataBase.Contains(i);
Func<IEnumerable<Item>, Item, bool> containsWithID = (e, i) => e.Select(_ => _.ID).Contains(i.ID);
Func<Item, int> getCode = i =>
{
if (inBothLists(i))
{
return 0;
}
if(containsWithID(report, i) && containsWithID(dataBase, i))
{
return 3;
}
if (report.Contains(i))
{
return 2;
}
else return 1;
};
var result = (from item in dataBase.Union(report) select new {Code = getCode(item), Item = item}).Distinct();
}
public class Item
{
// You need also to override Equals() and GetHashCode().. I omitted them to save space
public Item(int id, string text1, string text2)
{
ID = id;
Text1 = text1;
Text2 = text2;
}
public int ID { get; set; }
public string Text1 { get; set; }
public string Text2 { get; set; }
}
Note that you need to either implement Equals() for you items, or implement an IEqualityComparer<> and feed it to Contains() methods.

How to get the Max() of a Count() with LINQ

I'm new to LINQ and I have this situation. I have this table:
ID Date Range
1 10/10/10 9-10
2 10/10/10 9-10
3 10/10/10 9-10
4 10/10/10 8-9
5 10/11/10 1-2
6 10/11/10 1-2
7 10/12/10 5-6
I just want to list the Maximun of rows per date by range, like this:
Date Range Total
10/10/10 9-10 3
10/11/10 1-2 2
10/12/10 5-6 1
I want to do this by using LINQ, do you have any ideas of how to do this?
I think something along these lines should work:
List<MyTable> items = GetItems();
var orderedByMax = from i in items
group i by i.Date into g
let q = g.GroupBy(i => i.Range)
.Select(g2 => new {Range = g2.Key, Count = g2.Count()})
.OrderByDescending(i => i.Count)
let max = q.FirstOrDefault()
select new {
Date = g.Key,
Range = max.Range,
Total = max.Count
};
Using extension methods:
List<MyTable> items = GetItems();
var rangeTotals = items.GroupBy(x => new { x.Date, x.Range }) // Group by Date + Range
.Select(g => new {
Date = g.Key.Date,
Range = g.Key.Range,
Total = g.Count() // Count total of identical ranges per date
});
var rangeMaxTotals = rangeTotals.Where(rt => !rangeTotals.Any(z => z.Date == rt.Date && z.Total > rt.Total)); // Get maximum totals for each date
unfortunately I can't test this at the moment but give this a try:
List<MyTable> items = GetItems();
items.Max(t=>t.Range.Distinct().Count());
This approach:
1) Groups by Date
2) For each Date, groups by Range and calculates the Total
3) For each Date, selects the item with the greatest Total
4) You end up with your result
public sealed class Program
{
public static void Main(string[] args)
{
var items = new[]
{
new { ID = 1, Date = new DateTime(10, 10, 10), Range = "9-10" },
new { ID = 2, Date = new DateTime(10, 10, 10), Range = "9-10" },
new { ID = 3, Date = new DateTime(10, 10, 10), Range = "9-10" },
new { ID = 4, Date = new DateTime(10, 10, 10), Range = "8-9" },
new { ID = 5, Date = new DateTime(10, 10, 11), Range = "1-2" },
new { ID = 6, Date = new DateTime(10, 10, 11), Range = "1-2" },
new { ID = 7, Date = new DateTime(10, 10, 12), Range = "5-6" },
};
var itemsWithTotals = items
.GroupBy(item => item.Date) // Group by Date.
.Select(groupByDate => groupByDate
.GroupBy(item => item.Range) // Group by Range.
.Select(groupByRange => new
{
Date = groupByDate.Key,
Range = groupByRange.Key,
Total = groupByRange.Count()
}) // Got the totals for each grouping.
.MaxElement(item => item.Total)); // For each Date, grab the item (grouped by Range) with the greatest Total.
foreach (var item in itemsWithTotals)
Console.WriteLine("{0} {1} {2}", item.Date.ToShortDateString(), item.Range, item.Total);
Console.Read();
}
}
/// <summary>
/// From the book LINQ in Action, Listing 5.35.
/// </summary>
static class ExtensionMethods
{
public static TElement MaxElement<TElement, TData>(this IEnumerable<TElement> source, Func<TElement, TData> selector) where TData : IComparable<TData>
{
if (source == null)
throw new ArgumentNullException("source");
if (selector == null)
throw new ArgumentNullException("selector");
bool firstElement = true;
TElement result = default(TElement);
TData maxValue = default(TData);
foreach (TElement element in source)
{
var candidate = selector(element);
if (firstElement || (candidate.CompareTo(maxValue) > 0))
{
firstElement = false;
maxValue = candidate;
result = element;
}
}
return result;
}
}
According to LINQ in Action (Chapter 5.3.3 - Will LINQ to Objects hurt the performance of my code?), using the MaxElement extension method is one of the most effecient approaches. I think the performance would be O(4n); one for the first GroupBy, two for the second GroupBy, three for the Count(), and four for loop within MaxElement.
DrDro's approach is going to be more like O(n^2) since it loops the entire list for each item in the list.
StriplingWarrior's approach is going to be closer to O(n log n) because it sorts the items. Though I'll admit, there may be some crazy magic in there that I don't understand.

Resources