using System; using System.Collections.Generic; using System.Text; using System.Data; using Microsoft.AnalysisServices.AdomdServer; namespace AccuracyEx { public class ModelingDataEvaluation { [SafeToPrepare(true)] public DataTable EvaluateModelingData(string model, string targetAttribute, string measure) { DataTable tblReturn = new DataTable(); tblReturn.Columns.Add("TrainingSetSize", typeof(Int32)); tblReturn.Columns.Add("Measure", typeof(string)); tblReturn.Columns.Add("Mean", typeof(double)); tblReturn.Columns.Add("StdDev", typeof(double)); if (Context.ExecuteForPrepare) { return tblReturn; } MiningModel targetModel = Context.MiningModels[model]; MiningStructure parentStructure = targetModel.Parent; if (!parentStructure.IsProcessed) { throw new System.Exception("The mining structure must be processed before executing this procedure"); } int iCurrentRowCount = 10; do { bool bRet = EvaluateData(targetModel, targetAttribute, iCurrentRowCount, measure, tblReturn); if (!bRet) break; iCurrentRowCount *= 2; } while (true); EvaluateData(targetModel, targetAttribute, 0, measure, tblReturn); return tblReturn; } /// /// Evaluates the accuracy of the mining model trained over currentRowCount cases /// Compares the accuracy with the previous set of lines to compute the delta /// Returns FALSE if there is not enough data /// /// /// /// /// /// protected bool EvaluateData(MiningModel model, string targetAttribute, int currentRowCount, string measure, DataTable tblReturn) { string command; int iPartitions = 2; if( currentRowCount == 0 ) { iPartitions = 10; } command = string.Format("CALL SystemGetCrossValidationResults([{0}], [{1}], {2}, {3}, '{4}')", model.Parent.Name, model.Name, iPartitions, currentRowCount * 2, targetAttribute); bool bHasEnoughTrainingData = true; bool bAllMeasures = string.IsNullOrEmpty(measure); AdomdCommand cmd = new AdomdCommand(); cmd.CommandText = command; AdomdDataReader rdr = cmd.ExecuteReader(); Dictionary dictMeasures = new Dictionary(); int iTrainingSetSize = 0; int iPartitionColumn = rdr.GetOrdinal("PartitionIndex") -1 ; int iSizeColumn = rdr.GetOrdinal("PartitionSize") - 1; int iMeasureColumn = rdr.GetOrdinal("Measure") - 1; int iTestColumn = rdr.GetOrdinal("Test") - 1; int iValueColumn = rdr.GetOrdinal("Value") - 1; while (rdr.Read()) { // Check for enough data int iSize = rdr.GetInt32(iSizeColumn); if (iTrainingSetSize < iSize) iTrainingSetSize = iSize; string strMeasure = rdr.GetString(iMeasureColumn); if (bAllMeasures || (0 == string.Compare(strMeasure, measure, true))) { double dVal = rdr.GetDouble(iValueColumn); MeasureAggregation agg = null; if (dictMeasures.ContainsKey(strMeasure)) { agg = dictMeasures[strMeasure]; } else { agg = new MeasureAggregation(); dictMeasures[strMeasure] = agg; } agg.AddValue(dVal); } } rdr.Close(); iTrainingSetSize = (iPartitions - 1) * iTrainingSetSize; if (iTrainingSetSize < currentRowCount) bHasEnoughTrainingData = false; object[] newRow = new object[4]; foreach( string key in dictMeasures.Keys) { MeasureAggregation m = dictMeasures[key]; newRow[0] = iTrainingSetSize; newRow[1] = key; newRow[2] = m.Mean; newRow[3] = m.StdDev; tblReturn.Rows.Add(newRow); } return bHasEnoughTrainingData; } } class MeasureAggregation { public MeasureAggregation() { } public void AddValue(double dVal) { values.Add(dVal); } public double Mean { get { double dRet = 0.0; foreach (double d in values) { dRet += d; } if( values.Count > 0 ) { dRet /= values.Count; } return dRet; } } public double StdDev { get { double dRet = 0.0; double dMean = Mean; foreach (double d in values) { dRet += (d - dMean) * (d - dMean); } if (values.Count > 0) { dRet = Math.Sqrt( dRet / values.Count); } return dRet; } } protected List values = new List(); }; }