Any one please convert this C++ codes into matlab code
2 views (last 30 days)
Show older comments
#include "naivebayes.h"
using namespace std;
// PUBLIC METHODS
/*
* Returns the created network.
*/
DSL_network* NaiveBayes::getNetwork(){
return network;
}
/*
* Learns the network structure base on the Naive Bayesian approach.
*/
void NaiveBayes::LearnNetwork(DSL_dataset *ds, AlgArguments *args){
network = new DSL_network();
int classifierNode = args->classifierNode;
DSL_naiveBayes naive;
bool wentWell = true;
cout << "Naive Bayes Learning Started ..." << endl;
DSL_variableInfo vi;
ds->GetVariableInfo(classifierNode,vi);
naive.classVariableId = vi.id;
if (naive.Learn(*ds,*network) == DSL_OKAY){
network->UpdateBeliefs();
std::cout << "Successfully learnt " << ds->NumVariables() << " variables using Naive Bayes, with node " << classifierNode << " as classifier." << std::endl;
}
else{
std::cout << "Learning failed for " << classifierNode << std::endl;
}
}
-------------------------
K2 Algorithm
#include "k2.h"
#include "algorithm.h"
using namespace std;
// PRIVATE METHODS
/*
* Creates the arcs from the parents to the current node.
* Note that the parents will always exits as this is due
* to the node ordering, which ensures that all parents are
* created before their children are.
*/
void K2::CreateArcs(int node, vector<int> *parents){
int trainNode = (*nodeOrder)[node];
string nodeID = train->GetId(trainNode);
int handle = network->AddNode(DSL_CPT, nodeID.c_str());
network->GetNode(node)->Definition()->SetNumberOfOutcomes(numStates);
for(int p =0; p < parents->size(); p++){
network->AddArc((*parents)[p], handle);
}
}
/*
* Creates all the possible combinations of the node
* and its parents given the number of states. These
* matricies are used to find the corrilations between
* the states of the current node compared to the
* states of its parents in the ScoreNode method. Eg:
* a node with one parent and two states would generate
* a 2D matrix containing: 0 0, 0 1, 1 0, and 1 1. This
* can be though of as counting using the number of
* states as the base number.
*/
void K2::CreateMatrices(int numStates, int maxNumParents){
com = new vector<int**>;
for(int i =0; i < maxNumParents+1; i++){
int rows = i + 1;
int cols = Power(numStates, rows);
int **d = new int* [rows];
for(int k=0; k < rows; k++){
d[k] = new int[cols];
for(int p=0; p < cols; p++){
d[k][p] = 0;
}
}
com->push_back(d);
}
vector<int> *b = new vector<int>;
for(int i = maxNumParents; i > -1; i--){
b->push_back(Power (numStates, i));
}
for(int i = 0; i < com->size(); i++){
int rows = i+1;
int cols = Power(numStates, rows);
for(int c =0; c < cols; c++){
int val =c;
for(int j = b->size()-rows; j < b->size(); j++){
int base = (*b)[j];
if(val >= base){
int cntr = 0;
while(val >= base){
val -= base;
cntr++;
}
int m = j-(b->size()-rows);
(*com)[i][m][c] = cntr;
}
}
}
}
b->clear();
}
/*
* Calculates the factorial of a number,
* and returns the answer.
*/
double K2::Factorial(double num){
double factorial = 1;
for(long i = 1; i < num+1; i++){
factorial *= i;
}
return factorial;
}
/*
* Calculates the power of a base to an exponent, and
* returns the answer.
*/
int K2::Power(int base, int exponent){
int result = 1;
for(int i =0; i < exponent; i++){
result *= base;
}
return result;
}
/*
* Removes all instances of the "nodeToRemove" node from the vector "parents".
*/
void K2::RemoveNodeFromParents(int nodeToRemove, vector<int> **parents){
vector<int> *temp = new vector<int>;
for(int i =0; i < (*parents)->size(); i++){
if((**parents)[i] != nodeToRemove){
temp->push_back((**parents)[i]);
}
}
(*parents)->clear();
*parents = temp;
}
/*
* This is the implementation of the scoring function proposed by Cooper and Herskovits.
* The function works in by comparing the current node and its parents to a 2D array
* (implemented as com). However note the adaption of the alpha value from representing
* a count in the origional, to representing a percentage in this implementation. This
* was due to the calculation of the factorial of alpha, which is dependant on the
* training set size. When the training set grows above around 100 elements, then its
* factorial can't be reliably calculated. So the count is rather represented a
* percentage of the total training size. Note that the node is also parameterized as
* including this step now reduced construction time significantly.
*/
double K2::ScoreNode(int node, vector<int> *parents, DSL_Dmatrix **dm){
// Sort the parents into accending order first
sort(parents->begin(), parents->end());
int rows = parents->size() + 1;
int cols = Power(numStates, rows);
vector<int> Alpha_ijk;
vector<double> params;
vector<double> d;
vector<int> N_ij;
// Observe at least 1 occourance which therefore prevents the probabiity of
// an unobserved state being zero.
Alpha_ijk.resize(cols, 1);
params.resize(cols, 1);
N_ij.resize(cols/numStates);
d.resize(cols/numStates);
int trainNode = (*nodeOrder)[node];
vector<DSL_dataElement> nodeData = train->GetVariableData(trainNode);
DSL_intArray dim;
for(int y=0; y < rows; y++){
dim.Add(numStates);
}
*dm = new DSL_Dmatrix(dim);
int trainSize = train->NumRecords();
for(int d =0; d < trainSize; d++){
for(int c =0; c < cols; c++){
int inParents = 0;
for(int r = 0; r < parents->size(); r++){
int parentValue = (*com)[parents->size()][r][c];
int trainParent = (*parents)[r];
vector<DSL_dataElement> parentData = train->GetVariableData(trainParent);
if(parentValue == parentData[d].i){
inParents++;
}
}
int nodeValue = (*com)[parents->size()][rows-1][c];
bool found = false;
if(nodeValue == nodeData[d].i){
found = true;
}
if(found == true && inParents == parents->size()){
Alpha_ijk[c]++;
params[c]++;
}
}
}
for(int i =0; i < N_ij.size(); i++){
int start = i * numStates;
int end = start + numStates;
for(int j = start ; j < end; j++){
// Alpha represented as a percentage
Alpha_ijk[j] = (double)Alpha_ijk[j]/(double)trainSize*100;
N_ij[i] += Alpha_ijk[j];
d[i] += params[j];
}
}
// Parametrizing the node with its potential parents
for(int m =0; m < params.size(); m++){
int k = m/numStates;
if (d[k] == 0) {
params[m] = 1/(float)numStates;
}
else{
params[m] = params[m]/(double)d[k];
}
(**dm)[m] = params[m];
}
// Calculating the score
double numerator = Factorial(numStates-1);
double score = 1;
for(int i =0; i < N_ij.size(); i++){
double denominator = Factorial(N_ij[i] + numStates -1);
if(denominator == 0) denominator = 1;
double fraction = numerator/denominator;
double factorial = 1;
int start = i * numStates;
int end = start + numStates;
for(int j = start ; j < end; j++){
factorial *= Factorial(Alpha_ijk[j]);
}
score *= fraction * factorial;
}
return score;
}
// PUBLIC METHODS
/*
* Returns the created network.
*/
DSL_network* K2::GetNetwork(){
network->UpdateBeliefs();
return network;
}
/*
* This is the implementation of the K2 algorithm proposed by Cooper and Herskovits.
* The scoring fuction is implemented in the ScoreNode method. Note that we have
* included the parameterization step in the ScoreNode function aswell, as this
* was found to reduce construction time.
*/
void K2::LearnNetwork(DSL_dataset *train, AlgArguments *args){
// Creating the node order, which is currently the order in the training dataset.
for(int i = 0; i < nodeOrder->size();i++){
(*nodeOrder)[i] = i;
}
numStates = args->numStates;
double networkScore = 1;
int maxNumParents = args->maxNumParents;
CreateMatrices(numStates, maxNumParents);
// The implementation of the K2 algorithm
for(int i = 0 ; i < maxNumNodes; i++){
vector<int> * parents = new vector<int>;
DSL_Dmatrix *bestDM = NULL;
DSL_Dmatrix *oldBestDM = new DSL_Dmatrix;
double oldScore = ScoreNode(i, parents, &bestDM);
bool OkToProceed = true;
double maxScore = -1;
while( OkToProceed && parents->size() < maxNumParents && i > 0){
int bestNode = -1;
*oldBestDM = *bestDM;
bestDM == NULL;
for(int k = 0; k < i; k++){
// A check if the potential parent is already a parent.
bool alreadyAParent = false;
for(int p =0; p < parents->size(); p++){
if( k == (*parents)[p]){
alreadyAParent = true;
}
}
if(alreadyAParent == false){
parents->push_back(k);
DSL_Dmatrix *dm = NULL;
double newScore = ScoreNode(i, parents, &dm);
if(newScore > maxScore){
maxScore = newScore;
bestNode = k;
*bestDM = *dm;
}
dm->CleanUp();
RemoveNodeFromParents(k, &parents);
}
}
// If a parent node was found to maximize the score of the current node
// then it becomes a parent of the current node, else if no parent was
// found to maximize the score, then move to calculate the score of the
// next node. Note, that there is also a limit on the maximum number of
// parents, which is found in the parameters of the while loop.
if(maxScore > oldScore){
oldScore = maxScore;
parents->push_back(bestNode);
}
else{
OkToProceed = false;
*bestDM = *oldBestDM;
}
}
CreateArcs(i, parents);
network->GetNode(i)->Definition()->SetDefinition(*bestDM);
networkScore *= oldScore;
cout << "Final score for node " << i+1 << " was = " << oldScore << " ";
cout << "Final num Parents : " << parents->size() << ", with nodes: ";
for(int m =0; m < parents->size();m++){
cout << (*parents)[m]+1 << " ";
}
cout << endl;
}
cout << "Final network score of " << networkScore << ", number of parents = " << maxNumParents << endl;
}
-----------------------
greedy thick thinning
#include "greedythickthinning.h"
using namespace std;
/*
* The default implementation of the Greedy Think Thinning algorithm.
*/
void GreedyThickThinning::LearnNetwork(DSL_dataset *train, AlgArguments *args){
network = new DSL_network;
DSL_greedyThickThinning gtt;
gtt.maxParents = args->maxNumParents;
int msg = gtt.Learn(*(train),*network);
if(msg == DSL_OKAY){
network->UpdateBeliefs();
cout << "Successfully learnt a network using the GreedyThickThinning algorithm" << endl;
}
else{
cout << "Error: There was an error with learning the network using the GreedyThickThinning algorithm" << endl;
}
}
DSL_network* GreedyThickThinning::GetNetwork(){
return network;
}
3 Comments
Answers (1)
See Also
Community Treasure Hunt
Find the treasures in MATLAB Central and discover how the community can help you!
Start Hunting!