using System;
using System.Collections.Generic;
using System.Text;
using System.Data;
using Microsoft.AnalysisServices.AdomdServer;
namespace AccuracyEx
{
public class ModelingDataEvaluation
{
[SafeToPrepare(true)]
public DataTable EvaluateModelingData(string model, string targetAttribute, string measure)
{
DataTable tblReturn = new DataTable();
tblReturn.Columns.Add("TrainingSetSize", typeof(Int32));
tblReturn.Columns.Add("Measure", typeof(string));
tblReturn.Columns.Add("Mean", typeof(double));
tblReturn.Columns.Add("StdDev", typeof(double));
if (Context.ExecuteForPrepare)
{
return tblReturn;
}
MiningModel targetModel = Context.MiningModels[model];
MiningStructure parentStructure = targetModel.Parent;
if (!parentStructure.IsProcessed)
{
throw new System.Exception("The mining structure must be processed before executing this procedure");
}
int iCurrentRowCount = 10;
do
{
bool bRet = EvaluateData(targetModel, targetAttribute, iCurrentRowCount, measure, tblReturn);
if (!bRet)
break;
iCurrentRowCount *= 2;
} while (true);
EvaluateData(targetModel, targetAttribute, 0, measure, tblReturn);
return tblReturn;
}
///
/// Evaluates the accuracy of the mining model trained over currentRowCount cases
/// Compares the accuracy with the previous set of lines to compute the delta
/// Returns FALSE if there is not enough data
///
///
///
///
///
///
protected bool EvaluateData(MiningModel model, string targetAttribute, int currentRowCount, string measure, DataTable tblReturn)
{
string command;
int iPartitions = 2;
if( currentRowCount == 0 )
{
iPartitions = 10;
}
command = string.Format("CALL SystemGetCrossValidationResults([{0}], [{1}], {2}, {3}, '{4}')",
model.Parent.Name,
model.Name,
iPartitions,
currentRowCount * 2,
targetAttribute);
bool bHasEnoughTrainingData = true;
bool bAllMeasures = string.IsNullOrEmpty(measure);
AdomdCommand cmd = new AdomdCommand();
cmd.CommandText = command;
AdomdDataReader rdr = cmd.ExecuteReader();
Dictionary dictMeasures = new Dictionary();
int iTrainingSetSize = 0;
int iPartitionColumn = rdr.GetOrdinal("PartitionIndex") -1 ;
int iSizeColumn = rdr.GetOrdinal("PartitionSize") - 1;
int iMeasureColumn = rdr.GetOrdinal("Measure") - 1;
int iTestColumn = rdr.GetOrdinal("Test") - 1;
int iValueColumn = rdr.GetOrdinal("Value") - 1;
while (rdr.Read())
{
// Check for enough data
int iSize = rdr.GetInt32(iSizeColumn);
if (iTrainingSetSize < iSize)
iTrainingSetSize = iSize;
string strMeasure = rdr.GetString(iMeasureColumn);
if (bAllMeasures ||
(0 == string.Compare(strMeasure, measure, true)))
{
double dVal = rdr.GetDouble(iValueColumn);
MeasureAggregation agg = null;
if (dictMeasures.ContainsKey(strMeasure))
{
agg = dictMeasures[strMeasure];
}
else
{
agg = new MeasureAggregation();
dictMeasures[strMeasure] = agg;
}
agg.AddValue(dVal);
}
}
rdr.Close();
iTrainingSetSize = (iPartitions - 1) * iTrainingSetSize;
if (iTrainingSetSize < currentRowCount)
bHasEnoughTrainingData = false;
object[] newRow = new object[4];
foreach( string key in dictMeasures.Keys)
{
MeasureAggregation m = dictMeasures[key];
newRow[0] = iTrainingSetSize;
newRow[1] = key;
newRow[2] = m.Mean;
newRow[3] = m.StdDev;
tblReturn.Rows.Add(newRow);
}
return bHasEnoughTrainingData;
}
}
class MeasureAggregation
{
public MeasureAggregation()
{
}
public void AddValue(double dVal)
{
values.Add(dVal);
}
public double Mean
{
get
{
double dRet = 0.0;
foreach (double d in values)
{
dRet += d;
}
if( values.Count > 0 )
{
dRet /= values.Count;
}
return dRet;
}
}
public double StdDev
{
get
{
double dRet = 0.0;
double dMean = Mean;
foreach (double d in values)
{
dRet += (d - dMean) * (d - dMean);
}
if (values.Count > 0)
{
dRet = Math.Sqrt( dRet / values.Count);
}
return dRet;
}
}
protected List values = new List();
};
}