数据仓库与数据挖掘
}
dEntropy += entropy((double)iTrue1,(double)iFalse1,(double)l); iTrue1 = 0; iFalse1 = 0; m++; }
double dSplitinfo = splitinfo(iRecord,(double)l); if (-1 == iTestAttribute) {
iTestAttribute = k; dGainratio =
(info((double)iTrue,(double)iFalse)-dEntropy)/dSplitinfo; } else {
test =
(info((double)iTrue,(double)iFalse)-dEntropy)/dSplitinfo; if (dGainratio < test) {
iTestAttribute = k; dGainratio = test; } } } }
return iTestAttribute; }
double info(double dTrue,double dFalse)
{
double dInfo = 0.0; dInfo =
((dTrue/(dTrue+dFalse))*(log(dTrue/(dTrue+dFalse))/log(2.0))+(dFalse/(dTrue+dFalse))*(log(dFalse/(dTrue+dFalse))/log(2.0)))*(-1); return dInfo; }
double entropy(double dTrue, double dFalse, double dAll) {
double dEntropy = 0.0;
dEntropy = (dTrue + dFalse)*info(dTrue,dFalse)/dAll; return dEntropy; }
数据仓库与数据挖掘
double splitinfo(int* list,double dAll)
{
int k = 0;
double dSplitinfo = 0.0; while (0!=list[k]) {
dSplitinfo -=
((double)list[k]/(double)dAll)*(log((double)list[k]/(double)dAll)); k++; }
return dSplitinfo; }
int check_samples(int *iSamples) {
int k = 0; int b = 0;
while ((-1 != iSamples[k])&&(k < j-1)) {
if (iInput[k][i-1] != iInput[k+1][i-1]) {
b = 1; break; } k++; }
return b; }
int check_ordinary(int *iSamples) {
int k = 0;
int iTrue = 0; int iFalse = 0;
while ((-1 != iSamples[k])&&(k < i)) {
if (0 == iInput[iSamples[k]][i-1]) {
iFalse++; } else {
数据仓库与数据挖掘
iTrue++; } k++; }
if (iTrue >= iFalse) {
return 1; } else {
return 0; } }
int check_attribute_null(int *iAttribute) {
int k = 0;
while (k < (i-1)) {
if (-1 != iAttribute[k]) {
return 0; } k++; }
return 1; }
void get_attributes(int *iSamples,int *iAttributeValue,int iAttribute) {
int k = 0; int l = 0;
while ((-1 != iSamples[k])&&(k < j))
{
l = 0;
while (-1 != iAttributeValue[l]) {
if (iInput[iSamples[k]][iAttribute] == iAttributeValue[l]) {
break; } l++; }
数据仓库与数据挖掘
if (-1 == iAttributeValue[l]) {
iAttributeValue[l] = iInput[iSamples[k]][iAttribute]; } k++; } }