ClusterMain.cpp
上传用户:szb0815
上传日期:2007-06-13
资源大小:338k
文件大小:150k
- if (Dist[i][j] < minval[i])
- {
- minval[i] = Dist[i][j];
- minpair[i] = j;
- }
- if (Dist[i][j] < minval[j])
- {
- minval[j] = Dist[i][j];
- minpair[j] = i;
- }
- }
- StatusBar->SimpleText = "Calculating Distances " + AnsiString(i*(i+1)/2) + " of "
- + AnsiString(TotalDistance);
- Application->ProcessMessages();
- }
- TStringList *MinList = new TStringList();
- for (i=0;i<ClusterRows;i++)
- {
- MinList->Add(AnsiString(minval[i]));
- }
- MinList->SaveToFile("minlist.txt");
- delete MinList;
- bool *Active;
- Active = new bool[2*ClusterRows-1];
- for (i=0;i<ClusterRows;i++)
- {
- Active[i] = true;
- }
- for (i=ClusterRows;i<2*ClusterRows-1;i++)
- {
- Active[i] = false;
- }
- unsigned short minminval;
- int min1, min2;
- int nodeindex;
- //int node;
- /* Now we join nodes */
- /* If we are going to return TNode, need to set up TNodes */
- TNode **Nodes;
- if (ReturnTNode)
- {
- Nodes = new TNode*[2*ClusterRows-1];
- for (i=0;i<2*ClusterRows-1;i++)
- {
- Nodes[i] = new TNode();
- if (i < ClusterRows)
- {
- Nodes[i]->IsNode = false;
- Nodes[i]->ID = AnsiString(i);
- }
- else
- {
- Nodes[i]->IsNode = true;
- }
- }
- }
- for (nodeindex=ClusterRows; nodeindex<2*ClusterRows-1;nodeindex++)
- {
- ID[nodeindex] = "NODE" + AnsiString(nodeindex-ClusterRows+1) + "X";
- minminval = 32769;
- for (i=0;i<2*ClusterRows-1;i++)
- {
- if (Active[i] == true)
- {
- if (minval[i] < minminval)
- {
- minminval = minval[i];
- min1 = i;
- min2 = minpair[i];
- }
- }
- }
- /* When we get here min1 and min2 are the elements that are to be joined */
- Active[min1] = false;
- Active[min2] = false;
- if (Order[min1] > Order[min2])
- {
- NodeElement[nodeindex][0] = min1;
- NodeElement[nodeindex][1] = min2;
- }
- else
- {
- NodeElement[nodeindex][1] = min1;
- NodeElement[nodeindex][0] = min2;
- }
- NodeDistance[nodeindex] = ((double) minminval)/16384.0;
- NodeDistance[nodeindex] = max(NodeDistance[nodeindex],NodeDistance[min1]);
- NodeDistance[nodeindex] = max(NodeDistance[nodeindex],NodeDistance[min2]);
- if (ReturnTNode)
- {
- Nodes[nodeindex]->Child1 = Nodes[NodeElement[nodeindex][0]];
- Nodes[nodeindex]->Child2 = Nodes[NodeElement[nodeindex][1]];
- Nodes[nodeindex]->Length = 1.0 - NodeDistance[nodeindex];
- }
- if (CalcInferredDistances == true)
- {
- int elem1,elem2,ielem1,ielem2;
- for (elem1=0;elem1<MemberList[min1]->Count;elem1++)
- {
- for (elem2=0;elem2<MemberList[min2]->Count;elem2++)
- {
- ielem1 = (MemberList[min1]->Strings[elem1]).ToInt();
- ielem2 = (MemberList[min2]->Strings[elem2]).ToInt();
- TreeDist[ielem1][ielem2] = (NodeDistance[nodeindex] * 16384);
- TreeDist[ielem2][ielem1] = (NodeDistance[nodeindex] * 16384);
- }
- }
- MemberList[nodeindex]->AddStrings(MemberList[min1]);
- MemberList[nodeindex]->AddStrings(MemberList[min2]);
- delete MemberList[min1];
- delete MemberList[min2];
- }
- OutString = ID[nodeindex] + AnsiString("t") + ID[NodeElement[nodeindex][0]]
- + AnsiString("t") + ID[NodeElement[nodeindex][1]] + AnsiString("t")
- + AnsiString(1.0 - NodeDistance[nodeindex]);
- TreeFile->Add(OutString);
- StatusBar->SimpleText = "Generating Node " + AnsiString(nodeindex-ClusterRows+1) + " of " + AnsiString(ClusterRows-1);
- Application->ProcessMessages();
- elements[nodeindex] = elements[min1] + elements[min2];
- for (k=0;k<ClusterColumns;k++)
- {
- if ( (ClusterMask[min1][k] == true) && (ClusterMask[min2][k] == true) )
- {
- ClusterData[nodeindex][k] =
- (elements[min1] * ClusterData[min1][k] +
- elements[min2] * ClusterData[min2][k]) /
- (elements[min1] + elements[min2]);
- ClusterMask[nodeindex][k] = true;
- }
- else if (ClusterMask[min1][k] == true)
- {
- ClusterData[nodeindex][k] = ClusterData[min1][k];
- ClusterMask[nodeindex][k] = true;
- }
- else if (ClusterMask[min2][k] == true)
- {
- ClusterData[nodeindex][k] = ClusterData[min2][k];
- ClusterMask[nodeindex][k] = true;
- }
- else
- {
- ClusterMask[nodeindex][k] = false;
- }
- }
- Order[nodeindex] = (elements[min1]*Order[min1] + elements[min2]*Order[min2])/(elements[min1] + elements[min2]);
- Dist[nodeindex] = new unsigned short[2*ClusterRows-1];
- if (SaveDistances == false)
- {
- delete Dist[min1];
- delete Dist[min2];
- }
- List1->Clear();
- Nodes[nodeindex]->SetList(List1);
- int NumList1 = List1->Count;
- for (int i1=0;i1<NumList1;i1++)
- {
- iList1[i1] = List1->Strings[i1].ToInt();
- }
- for (l=0;l<2*ClusterRows-1;l++)
- {
- if (Active[l] == true)
- {
- // OK. Here's where it gets tricky
- List2->Clear();
- if (l < ClusterRows)
- {
- List2->Add(AnsiString(l));
- }
- else
- {
- Nodes[l]->SetList(List2);
- }
- int NumList2 = List2->Count;
- for (int i2=0;i2<NumList2;i2++)
- {
- iList2[i2] = List2->Strings[i2].ToInt();
- }
- Dist[nodeindex][l] = 0;
- for (int i1=0; i1 < NumList1; i1++)
- {
- for (int i2=0; i2 < NumList2; i2++)
- {
- Dist[nodeindex][l] = max(Dist[nodeindex][l],Dist[iList1[i1]][iList2[i2]]);
- }
- }
- Dist[l][nodeindex] = Dist[nodeindex][l];
- if (Dist[nodeindex][l] < minval[nodeindex])
- {
- minval[nodeindex] = Dist[nodeindex][l];
- minpair[nodeindex] = l;
- }
- //Rescan for min if minpair was one of the fused elements
- if ( (minpair[l] == min1) || (minpair[l] == min2) )
- {
- minval[l] = 32769;
- for (m=0;m<2*ClusterRows-1;m++)
- {
- if ( (Active[m] == true) && (l != m) )
- {
- if (Dist[l][m] < minval[l])
- {
- minval[l] = Dist[l][m];
- minpair[l] = m;
- }
- }
- }
- }
- //otherwise check new distance to see if it is new min
- else if (Dist[nodeindex][l] < minval[l])
- {
- minval[l] = Dist[nodeindex][l];
- minpair[l] = nodeindex;
- }
- }
- }
- Active[nodeindex] = true;
- }
- if (SaveDistances == false)
- {
- delete Dist[2*ClusterRows-2];
- }
- TList *NewList = new TList();
- TList *OutList = new TList();
- int **Elements;
- Elements = new int*[2*ClusterRows-1];
- for (i=0;i<2*ClusterRows-1;i++)
- {
- Elements[i] = new int;
- *Elements[i] = i;
- }
- OutList->Clear();
- OutList->Add((void *)Elements[2*ClusterRows-2]);
- int Position;
- bool Replaced;
- Replaced = true;
- while (Replaced == true)
- {
- Replaced = false;
- for (Position = 0; Position < OutList->Count; Position ++)
- {
- nodeindex = *(int *) OutList->Items[Position];
- if (nodeindex >= 0)
- {
- if (nodeindex < ClusterRows)
- {
- NewList->Add((void *) Elements[nodeindex]);
- }
- else
- {
- element1 = NodeElement[nodeindex][0];
- element2 = NodeElement[nodeindex][1];
- NewList->Add((void *) Elements[element1]);
- NewList->Add((void *) Elements[element2]);
- Replaced = true;
- }
- }
- }
- OutList->Clear();
- for (i=0;i<NewList->Count;i++)
- {
- OutList->Add(NewList->Items[i]);
- }
- NewList->Clear();
- }
- int index;
- ClusterOrder->Clear();
- if (SaveDistances == true)
- {
- /*
- if (DistFileDialogBox->Execute())
- {
- TFileStream *DistFile = new TFileStream(DistFileDialogBox->FileName,fmCreate);
- int ii, ij;
- for (i=0;i<OutList->Count;i++)
- {
- ii = *(int *)OutList->Items[i];
- for (j=0;j<OutList->Count;j++)
- {
- {
- ij = *(int *)OutList->Items[j];
- DistFile->Write(&Dist[ii][ij],2);
- }
- }
- }
- delete DistFile;
- } */
- for (i=0;i<2*ClusterRows-1;i++)
- {
- delete Dist[i];
- }
- }
- if (CalcInferredDistances == true)
- {
- if (DistFileDialogBox->Execute())
- {
- TFileStream *DistFile = new TFileStream(DistFileDialogBox->FileName,fmCreate);
- int ii, ij;
- for (i=0;i<OutList->Count;i++)
- {
- ii = *(int *)OutList->Items[i];
- for (j=0;j<OutList->Count;j++)
- {
- {
- ij = *(int *)OutList->Items[j];
- DistFile->Write(&TreeDist[ii][ij],2);
- }
- }
- }
- delete DistFile;
- for (i=0;i<ClusterRows;i++)
- {
- delete TreeDist[i];
- }
- }
- delete TreeDist;
- }
- for (i=0;i<OutList->Count;i++)
- {
- index = *(int *)OutList->Items[i];
- ClusterOrder->Add(index);
- }
- for (i=0;i<2*ClusterRows-1;i++)
- {
- delete NodeElement[i];
- delete Elements[i];
- }
- if (CalculateWeights == true)
- {
- for (i=0;i<ClusterRows;i++)
- {
- RowWeight[i] = 1/CalcWeight[i];
- }
- }
- delete Dist;
- delete NodeElement;
- delete Elements;
- delete NodeDistance;
- delete elements;
- delete minval;
- delete minpair;
- delete NewList;
- delete OutList;
- delete Active;
- delete CalcWeight;
- delete iList1;
- delete iList2;
- delete List1;
- delete List2;
- }
- void __fastcall TMainForm::SingleLinkageClusterButtonClick(TObject *Sender)
- {
- int i,j;
- StatusBar1->SimpleText = "Initializing";
- // Get ClusterID for file names from user input
- JobName = AnsiString(ClusterName);
- if (JobNameEdit->Text.Length() > 0)
- {
- JobName = JobNameEdit->Text;
- }
- bool ClusterGenes = ClusterGenesCheckBox->Checked;
- bool ClusterArrays = ClusterArraysCheckBox->Checked;
- bool CalculateGeneWeights = CalculateGeneWeightsCheckBox->Checked;
- bool CalculateArrayWeights = CalculateArrayWeightsCheckBox->Checked;
- int *ColumnOrder = new int[Columns];
- AnsiString *ArrayID = new AnsiString[2*Columns-1];
- for (i=0;i<Columns;i++)
- {
- ColumnOrder[i] = i;
- ArrayID[i] = "ARRY" + AnsiString(i) + "X";
- }
- if (CalculateGeneWeights == true)
- {
- // Generate array data structures
- // This is a wasteful way of doing it, but makes
- // the coding easier for me
- double **ArrayData = new double*[2*Columns-1];
- bool **ArrayMask = new bool*[2*Columns-1];
- for (i=0;i<2*Columns-1;i++)
- {
- ArrayData[i] = new double[Rows];
- ArrayMask[i] = new bool[Rows];
- if (i<Columns)
- {
- for (j=0;j<Rows;j++)
- {
- ArrayData[i][j] = GeneData[j][i];
- ArrayMask[i][j] = GeneMask[j][i];
- }
- }
- }
- TStringList *ArrayTreeFile = new TStringList();
- TStringList *ArrayClusterOrder = new TStringList();
- SingleCluster(ArrayData,ArrayMask,Columns,Rows,
- ArrayTreeFile,ArrayOrder,ArrayWeight,GeneWeight,ArrayID,
- ArrayMetricComboBox->ItemIndex,
- true,ArrayWeightCutoff,ArrayWeightExp,
- ArrayClusterOrder,StatusBar1,false,NULL);
- delete ArrayClusterOrder;
- for (i=0;i<2*Columns-1;i++)
- {
- delete ArrayData[i];
- delete ArrayMask[i];
- }
- delete ArrayData;
- delete ArrayMask;
- }
- int *RowOrder = new int[Rows];
- AnsiString *GeneID = new AnsiString[2*Rows-1];
- for (i=0;i<Rows;i++)
- {
- RowOrder[i] = i;
- GeneID[i] = AnsiString("GENE") + AnsiString(i) + AnsiString("X");
- }
- if (ClusterGenes == true)
- {
- TStringList *GeneTreeFile = new TStringList();
- TStringList *GeneClusterOrder = new TStringList();
- if (CalculateArrayWeights == true)
- {
- SingleCluster(GeneData,GeneMask,Rows,Columns,
- GeneTreeFile,GeneOrder,GeneWeight, ArrayWeight,GeneID,
- GeneMetricComboBox->ItemIndex,
- true,GeneWeightCutoff,GeneWeightExp,
- GeneClusterOrder,StatusBar1,false,NULL);
- }
- else
- {
- double *NullGeneWeight = new double[Rows];
- SingleCluster(GeneData,GeneMask,Rows,Columns,
- GeneTreeFile,GeneOrder,NullGeneWeight,ArrayWeight,GeneID,
- GeneMetricComboBox->ItemIndex,
- false,1.0,1.0,
- GeneClusterOrder,StatusBar1,false,NULL);
- delete NullGeneWeight;
- }
- for (i=0;i<GeneClusterOrder->Count;i++)
- {
- RowOrder[i] = GeneClusterOrder->Strings[i].ToInt();
- }
- AnsiString TreeFileName = JobName + ".gtr";
- GeneTreeFile->SaveToFile(TreeFileName);
- }
- if (ClusterArrays == true)
- {
- // Generate array data structures
- // This is a wasteful way of doing it, but makes
- // the coding easier for me
- double **ArrayData = new double*[2*Columns-1];
- bool **ArrayMask = new bool*[2*Columns-1];
- for (i=0;i<2*Columns-1;i++)
- {
- ArrayData[i] = new double[Rows];
- ArrayMask[i] = new bool[Rows];
- if (i<Columns)
- {
- for (j=0;j<Rows;j++)
- {
- ArrayData[i][j] = GeneData[j][i];
- ArrayMask[i][j] = GeneMask[j][i];
- }
- }
- }
- TStringList *ArrayTreeFile = new TStringList();
- TStringList *ArrayClusterOrder = new TStringList();
- SingleCluster(ArrayData,ArrayMask,Columns,Rows,
- ArrayTreeFile,ArrayOrder,ArrayWeight,GeneWeight,ArrayID,
- ArrayMetricComboBox->ItemIndex,
- false,1.0,1.0,ArrayClusterOrder,StatusBar1,false,NULL);
- for (i=0;i<ArrayClusterOrder->Count;i++)
- {
- ColumnOrder[i] = ArrayClusterOrder->Strings[i].ToInt();
- }
- delete ArrayClusterOrder;
- for (i=0;i<2*Columns-1;i++)
- {
- delete ArrayData[i];
- delete ArrayMask[i];
- }
- delete ArrayData;
- delete ArrayMask;
- AnsiString ArrayTreeFileName = JobName + ".atr";
- ArrayTreeFile->SaveToFile(ArrayTreeFileName);
- delete ArrayTreeFile;
- }
- AnsiString OutString;
- TStringList *DataFile = new TStringList();
- // Now make output .cdt file
- OutString = "";
- if (ClusterGenes == true)
- {
- OutString += AnsiString("GID") + AnsiString("t");
- }
- OutString += Headers->Strings[0] + AnsiString("t");
- OutString += AnsiString("NAME") + AnsiString("t");
- OutString += "GWEIGHT";
- // Now add headers for data columns
- for (i=0;i<Columns;i++)
- {
- OutString += "t";
- OutString += AnsiString(Headers->Strings[InColumn[ColumnOrder[i]]]);
- }
- DataFile->Add(OutString);
- if (ClusterArrays == true)
- {
- OutString = AnsiString("AID");
- if (ClusterGenes == true)
- {
- OutString += AnsiString("t");
- }
- OutString += AnsiString("t");
- OutString += AnsiString("t");
- for (i=0;i<Columns;i++)
- {
- OutString += "t";
- OutString += ArrayID[ColumnOrder[i]];
- }
- DataFile->Add(OutString);
- }
- {
- OutString = AnsiString("EWEIGHT");
- if (ClusterGenes == true)
- {
- OutString += AnsiString("t");
- }
- OutString += AnsiString("t");
- OutString += AnsiString("t");
- for (i=0;i<Columns;i++)
- {
- OutString += "t";
- OutString += ArrayWeight[ColumnOrder[i]];
- }
- }
- DataFile->Add(OutString);
- int index;
- TFloatFormat Format = ffGeneral;
- for (i=0;i<Rows;i++)
- {
- index = RowOrder[i];
- OutString = "";
- if (ClusterGenes == true)
- {
- OutString += GeneID[index] + "t";
- }
- OutString += AnsiString(UniqID[index]) + "t" + AnsiString(GeneName[index]);
- OutString += "t" + AnsiString(GeneWeight[index]);
- for (j=0;j<Columns;j++)
- {
- if (GeneMask[index][ColumnOrder[j]] == true)
- {
- OutString += "t" + AnsiString(FloatToStrF(GeneData[index][ColumnOrder[j]],Format,4,2));
- }
- else
- {
- OutString += "t";
- }
- }
- DataFile->Add(OutString);
- }
- AnsiString DataFileName = JobName + ".cdt";
- DataFile->SaveToFile(DataFileName);
- for (i=0;i<2*Columns-1;i++)
- {
- ArrayID[i] = "";
- }
- for (i=0;i<2*Rows-1;i++)
- {
- GeneID[i] = "";
- }
- delete ArrayID;
- delete GeneID;
- delete ColumnOrder;
- delete RowOrder;
- StatusBar1->SimpleText = "Done Clustering";
- }
- //---------------------------------------------------------------------------
- /*Single Linkage Clustering Code */
- void TMainForm::SingleCluster(double **ClusterData, bool **ClusterMask,
- int ClusterRows, int ClusterColumns,
- TStringList *TreeFile, double *Order, double *RowWeight, double *ColumnWeight,
- AnsiString *ID, int DistFunction,
- bool CalculateWeights, double WeightCutoff, double WeightPower,
- TStringList *ClusterOrder, TStatusBar *StatusBar,
- bool ReturnTNode, TNode *TopNode)
- {
- int i,j,k,l,m;
- ReturnTNode = true;
- unsigned short *minval;
- int *minpair;
- int *elements;
- AnsiString OutString;
- unsigned short **Dist;
- int **NodeElement;
- double *NodeDistance;
- double *CalcWeight;
- int element1, element2;
-
- TStringList *List1 = new TStringList();
- TStringList *List2 = new TStringList();
- Dist = new unsigned short*[2*ClusterRows-1];
- minval = new unsigned short[2*ClusterRows-1];
- minpair = new int[2*ClusterRows-1];
- elements = new int[2*ClusterRows-1];
- NodeElement = new int*[2*ClusterRows-1];
- NodeDistance = new double[2*ClusterRows-1];
- CalcWeight = new double[2*ClusterRows-1];
- int *iList1 = new int[ClusterRows];
- int *iList2 = new int[ClusterRows];
- bool SaveDistances = true;
- double WeightDist;
- for (i=0;i<ClusterRows;i++)
- {
- Dist[i] = new unsigned short[2*ClusterRows-1];
- }
- for (i=0;i<2*ClusterRows-1;i++)
- {
- minval[i] = 32769;
- elements[i] = 1;
- NodeElement[i] = new int[2];
- CalcWeight[i] = 1;
- NodeDistance[i] = 0.0;
- }
- bool Centered = false;
- if ((DistFunction == 1) || (DistFunction == 3) )
- {
- Centered = true;
- }
- bool Absolute = false;
- if ((DistFunction == 3) || (DistFunction == 4) )
- {
- Absolute = true;
- }
- bool CalcInferredDistances = false;
- TStringList **MemberList;
- unsigned short **TreeDist;
- if (CalcInferredDistances == true)
- {
- MemberList = new TStringList*[2*ClusterRows-1];
- TreeDist = new unsigned short*[ClusterRows];
- for (i=0;i<2*ClusterRows-1;i++)
- {
- MemberList[i] = new TStringList();
- if (i<ClusterRows)
- {
- MemberList[i]->Add(AnsiString(i));
- TreeDist[i] = new unsigned short[ClusterRows];
- }
- }
- }
- int TotalDistance = ClusterRows * (ClusterRows - 1) / 2;
- /* First step is to compute the distance matrix
- This is the slowest step */
- for (i=0;i<ClusterRows;i++)
- {
- Dist[i][i] = 0.0;
- for (j=0;j<i;j++)
- {
- Dist[i][j] = Distance(DistFunction,ClusterData,ClusterMask,ColumnWeight,
- i,j,ClusterColumns);
- Dist[j][i] = Dist[i][j];
- if (CalculateWeights == true)
- {
- WeightDist = ( (double) (32768 - Dist[i][j])) /32768.0;
- WeightDist = max(0.0, (WeightDist - WeightCutoff)/(1.0 - WeightCutoff));
- WeightDist = pow(WeightDist,WeightPower);
- CalcWeight[i] += WeightDist;
- CalcWeight[j] += WeightDist;
- }
- if (Dist[i][j] < minval[i])
- {
- minval[i] = Dist[i][j];
- minpair[i] = j;
- }
- if (Dist[i][j] < minval[j])
- {
- minval[j] = Dist[i][j];
- minpair[j] = i;
- }
- }
- StatusBar->SimpleText = "Calculating Distances " + AnsiString(i*(i+1)/2) + " of "
- + AnsiString(TotalDistance);
- Application->ProcessMessages();
- }
- TStringList *MinList = new TStringList();
- for (i=0;i<ClusterRows;i++)
- {
- MinList->Add(AnsiString(minval[i]));
- }
- MinList->SaveToFile("minlist.txt");
- delete MinList;
- bool *Active;
- Active = new bool[2*ClusterRows-1];
- for (i=0;i<ClusterRows;i++)
- {
- Active[i] = true;
- }
- for (i=ClusterRows;i<2*ClusterRows-1;i++)
- {
- Active[i] = false;
- }
- unsigned short minminval;
- int min1, min2;
- int nodeindex;
- //int node;
- /* Now we join nodes */
- /* If we are going to return TNode, need to set up TNodes */
- TNode **Nodes;
- //if (ReturnTNode)
- {
- Nodes = new TNode*[2*ClusterRows-1];
- for (i=0;i<2*ClusterRows-1;i++)
- {
- Nodes[i] = new TNode();
- if (i < ClusterRows)
- {
- Nodes[i]->IsNode = false;
- Nodes[i]->ID = AnsiString(i);
- }
- else
- {
- Nodes[i]->IsNode = true;
- }
- }
- }
- for (nodeindex=ClusterRows; nodeindex<2*ClusterRows-1;nodeindex++)
- {
- ID[nodeindex] = "NODE" + AnsiString(nodeindex-ClusterRows+1) + "X";
- minminval = 32769;
- for (i=0;i<2*ClusterRows-1;i++)
- {
- if (Active[i] == true)
- {
- if (minval[i] < minminval)
- {
- minminval = minval[i];
- min1 = i;
- min2 = minpair[i];
- }
- }
- }
- /* When we get here min1 and min2 are the elements that are to be joined */
- Active[min1] = false;
- Active[min2] = false;
- if (Order[min1] > Order[min2])
- {
- NodeElement[nodeindex][0] = min1;
- NodeElement[nodeindex][1] = min2;
- }
- else
- {
- NodeElement[nodeindex][1] = min1;
- NodeElement[nodeindex][0] = min2;
- }
- NodeDistance[nodeindex] = ((double) minminval)/16384.0;
- NodeDistance[nodeindex] = max(NodeDistance[nodeindex],NodeDistance[min1]);
- NodeDistance[nodeindex] = max(NodeDistance[nodeindex],NodeDistance[min2]);
- if (ReturnTNode)
- {
- Nodes[nodeindex]->Child1 = Nodes[NodeElement[nodeindex][0]];
- Nodes[nodeindex]->Child2 = Nodes[NodeElement[nodeindex][1]];
- Nodes[nodeindex]->Length = 1.0 - NodeDistance[nodeindex];
- }
- if (CalcInferredDistances == true)
- {
- int elem1,elem2,ielem1,ielem2;
- for (elem1=0;elem1<MemberList[min1]->Count;elem1++)
- {
- for (elem2=0;elem2<MemberList[min2]->Count;elem2++)
- {
- ielem1 = (MemberList[min1]->Strings[elem1]).ToInt();
- ielem2 = (MemberList[min2]->Strings[elem2]).ToInt();
- TreeDist[ielem1][ielem2] = (NodeDistance[nodeindex] * 16384);
- TreeDist[ielem2][ielem1] = (NodeDistance[nodeindex] * 16384);
- }
- }
- MemberList[nodeindex]->AddStrings(MemberList[min1]);
- MemberList[nodeindex]->AddStrings(MemberList[min2]);
- delete MemberList[min1];
- delete MemberList[min2];
- }
- OutString = ID[nodeindex] + AnsiString("t") + ID[NodeElement[nodeindex][0]]
- + AnsiString("t") + ID[NodeElement[nodeindex][1]] + AnsiString("t")
- + AnsiString(1.0 - NodeDistance[nodeindex]);
- TreeFile->Add(OutString);
- StatusBar->SimpleText = "Generating Node " + AnsiString(nodeindex-ClusterRows+1) + " of " + AnsiString(ClusterRows-1);
- Application->ProcessMessages();
- elements[nodeindex] = elements[min1] + elements[min2];
- for (k=0;k<ClusterColumns;k++)
- {
- if ( (ClusterMask[min1][k] == true) && (ClusterMask[min2][k] == true) )
- {
- ClusterData[nodeindex][k] =
- (elements[min1] * ClusterData[min1][k] +
- elements[min2] * ClusterData[min2][k]) /
- (elements[min1] + elements[min2]);
- ClusterMask[nodeindex][k] = true;
- }
- else if (ClusterMask[min1][k] == true)
- {
- ClusterData[nodeindex][k] = ClusterData[min1][k];
- ClusterMask[nodeindex][k] = true;
- }
- else if (ClusterMask[min2][k] == true)
- {
- ClusterData[nodeindex][k] = ClusterData[min2][k];
- ClusterMask[nodeindex][k] = true;
- }
- else
- {
- ClusterMask[nodeindex][k] = false;
- }
- }
- Order[nodeindex] = (elements[min1]*Order[min1] + elements[min2]*Order[min2])/(elements[min1] + elements[min2]);
- Dist[nodeindex] = new unsigned short[2*ClusterRows-1];
- if (SaveDistances == false)
- {
- delete Dist[min1];
- delete Dist[min2];
- }
- List1->Clear();
- Nodes[nodeindex]->SetList(List1);
- int NumList1 = List1->Count;
- for (int i1=0;i1<NumList1;i1++)
- {
- iList1[i1] = List1->Strings[i1].ToInt();
- }
- for (l=0;l<2*ClusterRows-1;l++)
- {
- if (Active[l] == true)
- {
- // OK. Here's where it gets tricky
- List2->Clear();
- if (l < ClusterRows)
- {
- List2->Add(AnsiString(l));
- }
- else
- {
- Nodes[l]->SetList(List2);
- }
- int NumList2 = List2->Count;
- for (int i2=0;i2<NumList2;i2++)
- {
- iList2[i2] = List2->Strings[i2].ToInt();
- }
- Dist[nodeindex][l] = 32768;
- for (int i1=0; i1 < NumList1; i1++)
- {
- for (int i2=0; i2 < NumList2; i2++)
- {
- Dist[nodeindex][l] = min(Dist[nodeindex][l],Dist[iList1[i1]][iList2[i2]]);
- }
- }
- Dist[l][nodeindex] = Dist[nodeindex][l];
- if (Dist[nodeindex][l] < minval[nodeindex])
- {
- minval[nodeindex] = Dist[nodeindex][l];
- minpair[nodeindex] = l;
- }
- //Rescan for min if minpair was one of the fused elements
- if ( (minpair[l] == min1) || (minpair[l] == min2) )
- {
- minval[l] = 32769;
- for (m=0;m<2*ClusterRows-1;m++)
- {
- if ( (Active[m] == true) && (l != m) )
- {
- if (Dist[l][m] < minval[l])
- {
- minval[l] = Dist[l][m];
- minpair[l] = m;
- }
- }
- }
- }
- //otherwise check new distance to see if it is new min
- else if (Dist[nodeindex][l] < minval[l])
- {
- minval[l] = Dist[nodeindex][l];
- minpair[l] = nodeindex;
- }
- }
- }
- Active[nodeindex] = true;
- }
- if (SaveDistances == false)
- {
- delete Dist[2*ClusterRows-2];
- }
- TList *NewList = new TList();
- TList *OutList = new TList();
- int **Elements;
- Elements = new int*[2*ClusterRows-1];
- for (i=0;i<2*ClusterRows-1;i++)
- {
- Elements[i] = new int;
- *Elements[i] = i;
- }
- OutList->Clear();
- OutList->Add((void *)Elements[2*ClusterRows-2]);
- int Position;
- bool Replaced;
- Replaced = true;
- while (Replaced == true)
- {
- Replaced = false;
- for (Position = 0; Position < OutList->Count; Position ++)
- {
- nodeindex = *(int *) OutList->Items[Position];
- if (nodeindex >= 0)
- {
- if (nodeindex < ClusterRows)
- {
- NewList->Add((void *) Elements[nodeindex]);
- }
- else
- {
- element1 = NodeElement[nodeindex][0];
- element2 = NodeElement[nodeindex][1];
- NewList->Add((void *) Elements[element1]);
- NewList->Add((void *) Elements[element2]);
- Replaced = true;
- }
- }
- }
- OutList->Clear();
- for (i=0;i<NewList->Count;i++)
- {
- OutList->Add(NewList->Items[i]);
- }
- NewList->Clear();
- }
- int index;
- ClusterOrder->Clear();
- if (SaveDistances == true)
- {
- /*
- if (DistFileDialogBox->Execute())
- {
- TFileStream *DistFile = new TFileStream(DistFileDialogBox->FileName,fmCreate);
- int ii, ij;
- for (i=0;i<OutList->Count;i++)
- {
- ii = *(int *)OutList->Items[i];
- for (j=0;j<OutList->Count;j++)
- {
- {
- ij = *(int *)OutList->Items[j];
- DistFile->Write(&Dist[ii][ij],2);
- }
- }
- }
- delete DistFile;
- } */
- for (i=0;i<2*ClusterRows-1;i++)
- {
- delete Dist[i];
- }
- }
- if (CalcInferredDistances == true)
- {
- if (DistFileDialogBox->Execute())
- {
- TFileStream *DistFile = new TFileStream(DistFileDialogBox->FileName,fmCreate);
- int ii, ij;
- for (i=0;i<OutList->Count;i++)
- {
- ii = *(int *)OutList->Items[i];
- for (j=0;j<OutList->Count;j++)
- {
- {
- ij = *(int *)OutList->Items[j];
- DistFile->Write(&TreeDist[ii][ij],2);
- }
- }
- }
- delete DistFile;
- for (i=0;i<ClusterRows;i++)
- {
- delete TreeDist[i];
- }
- }
- delete TreeDist;
- }
- for (i=0;i<OutList->Count;i++)
- {
- index = *(int *)OutList->Items[i];
- ClusterOrder->Add(index);
- }
- for (i=0;i<2*ClusterRows-1;i++)
- {
- delete NodeElement[i];
- delete Elements[i];
- }
- if (CalculateWeights == true)
- {
- for (i=0;i<ClusterRows;i++)
- {
- RowWeight[i] = 1/CalcWeight[i];
- }
- }
- delete Dist;
- delete NodeElement;
- delete Elements;
- delete NodeDistance;
- delete elements;
- delete minval;
- delete minpair;
- delete NewList;
- delete OutList;
- delete Active;
- delete CalcWeight;
- delete iList1;
- delete iList2;
- delete List1;
- delete List2;
- }
- /* Accessory Code for Clustering Dialog */
- /* Handle Clicks on CheckBoxes */
- void __fastcall TMainForm::CalculateGeneWeightsCheckBoxClick(
- TObject *Sender)
- {
- if (CalculateGeneWeightsCheckBox->Checked == true)
- {
- ArrayWeightOptionsGroupBox->Visible = true;
- }
- else
- {
- ArrayWeightOptionsGroupBox->Visible = false;
- }
- }
- void __fastcall TMainForm::CalculateArrayWeightsCheckBoxClick(
- TObject *Sender)
- {
- if (CalculateArrayWeightsCheckBox->Checked == true)
- {
- GeneWeightOptionsGroupBox->Visible = true;
- }
- else
- {
- GeneWeightOptionsGroupBox->Visible = false;
- }
- }
- /* Handle Edit Exits to Check Values */
- //---------------------------------------------------------------------------
- void __fastcall TMainForm::GeneWeightExpEditExit(TObject *Sender)
- {
- double Val;
- try
- {
- Val = GeneWeightExpEdit->Text.ToDouble();
- GeneWeightExp = Val;
- }
- catch (EConvertError &E)
- {
- GeneWeightExpEdit->Text = GeneWeightExp;
- }
- }
- //---------------------------------------------------------------------------
- void __fastcall TMainForm::GeneWeightCutoffEditExit(TObject *Sender)
- {
- double Val;
- try
- {
- Val = GeneWeightCutoffEdit->Text.ToDouble();
- GeneWeightCutoff = Val;
- }
- catch (EConvertError &E)
- {
- GeneWeightCutoffEdit->Text = GeneWeightCutoff;
- }
- }
- //---------------------------------------------------------------------------
- void __fastcall TMainForm::ArrayWeightExpEditExit(TObject *Sender)
- {
- double Val;
- try
- {
- Val = ArrayWeightExpEdit->Text.ToDouble();
- ArrayWeightExp = Val;
- }
- catch (EConvertError &E)
- {
- ArrayWeightExpEdit->Text = ArrayWeightExp;
- }
- }
- //---------------------------------------------------------------------------
- void __fastcall TMainForm::ArrayWeightCutoffEditExit(TObject *Sender)
- {
- double Val;
- try
- {
- Val = ArrayWeightCutoffEdit->Text.ToDouble();
- ArrayWeightCutoff = Val;
- }
- catch (EConvertError &E)
- {
- ArrayWeightCutoffEdit->Text = ArrayWeightCutoff;
- }
- }
- /* This is an invisible button that handles a particular type
- of column and row reordering. It was put in for one use. */
- void __fastcall TMainForm::ReorderType1ButtonClick(TObject *Sender)
- {
- for (int row=0; row<Rows; row++)
- {
- GeneOrder[row] = 0;
- int count = 0;
- for (int col=0; col<Columns; col++)
- {
- if (GeneMask[row][col] == true)
- {
- GeneOrder[row] += col * GeneData[row][col];
- count++;
- }
- }
- if (count > 0)
- {
- GeneOrder[row] /= count;
- }
- }
- for (int col=0; col<Columns; col++)
- {
- ArrayOrder[col] = 0;
- int count = 0;
- for (int row=0; row<Rows; row++)
- {
- if (GeneMask[row][col] == true)
- {
- ArrayOrder[col] += row * GeneData[row][col];
- count++;
- }
- }
- if (count > 0)
- {
- ArrayOrder[col] /= count;
- }
- }
- }
- //---------------------------------------------------------------------------
- /* Self-Organizing Maps ala Kohonen: Note this makes use of NR Code */
- double*** __fastcall TMainForm::SOM(int X, int Y, double **SOMData, bool **SOMMask,
- int SOMRows, int SOMColumns, int SOMIterations)
- {
- int i,j,k,l,n;
- double ***Nodes;
- Nodes = new double**[X];
- for (i=0;i<X;i++)
- {
- Nodes[i] = new double*[Y];
- for (j=0; j<Y; j++)
- {
- Nodes[i][j] = new double[SOMColumns];
- double NodeMag = 0;
- while (NodeMag == 0)
- {
- for (k=0;k<SOMColumns;k++)
- {
- Nodes[i][j][k] = ((double) random(10000))/10000.0;
- NodeMag += pow(Nodes[i][j][k],2.0);
- }
- NodeMag = sqrt(NodeMag);
- }
- for (k=0;k<SOMColumns;k++)
- {
- Nodes[i][j][k] /= NodeMag;
- }
- }
- }
- bool *SOMUse = new bool[SOMRows];
- double *Mag = new double[SOMRows];
- int CountSOMUse = 0;
- for (i=0;i<SOMRows;i++)
- {
- Mag[i] = 0;
- double Min = 1000;
- double Max = -1000;
- int DataPoints = 0;
- for (j=0;j<SOMColumns;j++)
- {
- if (SOMMask[i][j])
- {
- DataPoints++;
- Min = min(Min,SOMData[i][j]);
- Max = max(Max,SOMData[i][j]);
- Mag[i] += pow(SOMData[i][j],2.0);
- }
- }
- Mag[i] = sqrt(Mag[i]);
- if ( (2 * DataPoints > SOMColumns) && ( (Max - Min) > 0.0) )
- {
- SOMUse[i] = true;
- CountSOMUse++;
- }
- else
- {
- SOMUse[i] = false;
- }
- }
- int *SOMUseRow = ivector(1,CountSOMUse);
- float *SOMUseRandom = vector(1,CountSOMUse);
- CountSOMUse = 0;
- for (i=0;i<SOMRows;i++)
- {
- if (SOMUse[i])
- {
- CountSOMUse++;
- SOMUseRow[CountSOMUse] = i;
- SOMUseRandom[CountSOMUse] = rand();
- }
- }
- sort2fi(CountSOMUse,SOMUseRandom,SOMUseRow);
- double MaxDist = sqrt(pow(X-1,2.0)+pow(Y-1,2.0));
- //MaxDist = 3; // Try this for compatibility with Tamayo et al.
- for (i=0;i<SOMIterations;i++)
- {
- StatusBar1->SimpleText = "SOM Iteration " + AnsiString(i) +" of " + AnsiString(SOMIterations);
- Application->ProcessMessages();
- double BestCorr = -100;
- int BestX, BestY;
- /* n = random(SOMRows);
- while(SOMUse[n] == false)
- {
- n = random(SOMRows);
- } */
- n = SOMUseRow[(i%CountSOMUse)+1];
- for (j=0;j<X;j++)
- {
- for (k=0;k<Y;k++)
- {
- double Corr = Correlation(Nodes,SOMData,SOMMask,false,j,k,n,SOMColumns);
- if (Corr > BestCorr)
- {
- BestCorr = Corr;
- BestX = j;
- BestY = k;
- }
- }
- }
- for (j=0;j<X;j++)
- {
- for (k=0;k<Y;k++)
- {
- double Corr = Correlation(Nodes,SOMData,SOMMask,false,j,k,n,SOMColumns);
- if (Corr > BestCorr)
- {
- BestCorr = Corr;
- BestX = j;
- BestY = k;
- }
- }
- }
- double DistCut = (MaxDist*(0.75-((0.75*i)/SOMIterations)));
- //DistCut = MaxDist * (1.0 - ((double)i/(double)SOMIterations));
- double Tau;
- if (i < 1000)
- {
- Tau = (0.9 * (1.0 - i/1000.0));
- }
- else
- {
- Tau = (0.02 * SOMIterations)/(SOMIterations+(100*i));
- }
- //Tau = (0.02 * SOMIterations)/(SOMIterations+(100*i));
- for (j=0;j<X;j++)
- {
- for (k=0;k<Y;k++)
- {
- double NodeDist = sqrt(pow(BestX - j,2.0) + pow(BestY - k,2.0));
- if (NodeDist < DistCut)
- {
- for (l=0;l<SOMColumns;l++)
- {
- if (SOMMask[n][l] == true)
- {
- Nodes[j][k][l] += Tau * ((SOMData[n][l]/Mag[n]) - Nodes[j][k][l]);
- }
- }
- }
- double NodeMag = 0;
- for (l=0;l<SOMColumns;l++)
- {
- NodeMag += pow(Nodes[j][k][l],2.0);
- }
- NodeMag = sqrt(NodeMag);
- for (l=0;l<SOMColumns;l++)
- {
- Nodes[j][k][l] /= NodeMag;
- }
- }
- }
- }
- delete Mag;
- delete SOMUse;
- free_vector(SOMUseRandom,1,CountSOMUse);
- free_ivector(SOMUseRow,1,CountSOMUse);
- StatusBar1->SimpleText = "Done Making SOM";
- return Nodes;
- }
- /* Correlation Function for SOM */
- double TMainForm::Correlation(double ***Nodes, double **SOMData, bool **SOMMask,
- bool Centered, int j, int k, int n, int SOMColumns)
- {
- int l;
- double Sum1 = 0;
- double Sum2 = 0;
- double Sum11 = 0;
- double Sum12 = 0;
- double Sum22 = 0;
- double Count = 0;
- double Ave1,Ave2;
- double Norm1 = 0;
- double Norm2 = 0;
- double Corr = -100;
- for (l=0;l<SOMColumns;l++)
- {
- if (SOMMask[n][l] == true)
- {
- Sum1 += SOMData[n][l];
- Sum2 += Nodes[j][k][l];
- Sum11 += SOMData[n][l] * SOMData[n][l];
- Sum22 += Nodes[j][k][l]* Nodes[j][k][l];
- Sum12 += SOMData[n][l] * Nodes[j][k][l];
- Count ++;
- }
- }
- if (Count > 0)
- {
- if (Centered)
- {
- Ave1 = Sum1/Count;
- Ave2 = Sum2/Count;
- }
- else
- {
- Ave1 = 0;
- Ave2 = 0;
- }
- try
- {
- Norm1 = sqrt(max(0.0,Sum11 - 2 * Ave1 * Sum1 + Count * Ave1 * Ave1));
- Norm2 = sqrt(max(0.0,Sum22 - 2 * Ave2 * Sum2 + Count * Ave2 * Ave2));
- }
- catch (Exception &E)
- {
- }
- if ( (Norm1 > 0) && (Norm2 > 0) )
- {
- Corr = (Sum12 - Sum1 * Ave2 - Sum2 * Ave1 + Count * Ave1 * Ave2)
- / (Norm1 * Norm2);
- }
- }
- return Corr;
- }
- /* SOM Dialog Controls */
void __fastcall TMainForm::SOMOrganizeGenesCheckBoxClick(TObject *Sender)
- {
- if (SOMOrganizeGenesCheckBox->Checked == true)
- {
- SOMGenesXDimEdit->Visible = true;
- SOMGenesYDimEdit->Visible = true;
- SOMGenesTauEdit->Visible = true;
- SOMGenesIterationsEdit->Visible = true;
- SOMGenesXDimLabel->Visible = true;
- SOMGenesYDimLabel->Visible = true;
- SOMGenesTauLabel->Visible = true;
- SOMGenesIterationsLabel->Visible = true;
- }
- else
- {
- SOMGenesXDimEdit->Visible = false;
- SOMGenesYDimEdit->Visible = false;
- SOMGenesTauEdit->Visible = false;
- SOMGenesIterationsEdit->Visible = false;
- SOMGenesXDimLabel->Visible = false;
- SOMGenesYDimLabel->Visible = false;
- SOMGenesTauLabel->Visible = false;
- SOMGenesIterationsLabel->Visible = false;
- }
- }
- //---------------------------------------------------------------------------
- void __fastcall TMainForm::SOMOrganizeArraysCheckBoxClick(TObject *Sender)
- {
- if (SOMOrganizeArraysCheckBox->Checked == true)
- {
- SOMArraysXDimEdit->Visible = true;
- SOMArraysYDimEdit->Visible = true;
- SOMArraysTauEdit->Visible = true;
- SOMArraysIterationsEdit->Visible = true;
- SOMArraysXDimLabel->Visible = true;
- SOMArraysYDimLabel->Visible = true;
- SOMArraysTauLabel->Visible = true;
- SOMArraysIterationsLabel->Visible = true;
- }
- else
- {
- SOMArraysXDimEdit->Visible = false;
- SOMArraysYDimEdit->Visible = false;
- SOMArraysTauEdit->Visible = false;
- SOMArraysIterationsEdit->Visible = false;
- SOMArraysXDimLabel->Visible = false;
- SOMArraysYDimLabel->Visible = false;
- SOMArraysTauLabel->Visible = false;
- SOMArraysIterationsLabel->Visible = false;
- }
- }
- //---------------------------------------------------------------------------
- void __fastcall TMainForm::About1Click(TObject *Sender)
- {
- AboutForm->Show();
- }
- void __fastcall TMainForm::MakeSOMButtonClick(TObject *Sender)
- {
- MakeSOM();
- }
- //---------------------------------------------------------------------------
- void __fastcall TMainForm::MakeSOM()
- {
- int i,j,y;
- double Best, Val;
- AnsiString DataFileName = AnsiString(ClusterName) + "_SOM";
- int *GeneGroup = ivector(1,Rows);
- int *SOMGeneOrder = ivector(1,Rows);
- double ***GeneNodes;
- if (SOMOrganizeGenesCheckBox->Checked)
- {
- DataFileName += "_G" + AnsiString(SOMGenesXDim) + "-" + AnsiString(SOMGenesYDim);
- GeneNodes = SOM(SOMGenesXDim, SOMGenesYDim, GeneData, GeneMask, Rows, Columns, SOMGenesIterations);
- for (i=0;i<Rows;i++)
- {
- GeneGroup[i+1] = SOMGenesYDim;
- SOMGeneOrder[i+1] = i;
- Best = 0.4;
- Best = -100;
- for (y=0;y<SOMGenesYDim;y++)
- {
- Val = Correlation(GeneNodes,GeneData,GeneMask,false,0,y,i,Columns);
- if (Val > Best)
- {
- GeneGroup[i+1] = y;
- Best = Val;
- }
- }
- GeneOrder[i] = GeneGroup[i+1];
- }
- sort2i(Rows,GeneGroup,SOMGeneOrder);
- }
- else
- {
- for (i=0;i<Rows;i++)
- {
- GeneGroup[i+1] = 0;
- SOMGeneOrder[i+1] = i;
- }
- }
- int *ArrayGroup = ivector(1,Columns);
- int *SOMArrayOrder = ivector(1,Columns);
- double ***ArrayNodes;
- if (SOMOrganizeArraysCheckBox->Checked)
- {
- double **ArrayData;
- bool **ArrayMask;
- ArrayData = new double*[Columns];
- ArrayMask = new bool*[Columns];
- for (i=0;i<Columns;i++)
- {
- ArrayData[i] = new double[Rows];
- ArrayMask[i] = new bool[Rows];
- for (j=0;j<Rows;j++)
- {
- ArrayData[i][j] = GeneData[j][i];
- ArrayMask[i][j] = GeneMask[j][i];
- }
- }
- DataFileName += "_A" + AnsiString(SOMArraysXDim) + "-" + AnsiString(SOMArraysYDim);
- ArrayNodes = SOM(SOMArraysXDim, SOMArraysYDim, ArrayData, ArrayMask, Columns, Rows, SOMArraysIterations);
- for (i=0;i<Columns;i++)
- {
- ArrayGroup[i+1] = SOMArraysYDim;
- SOMArrayOrder[i+1] = i;
- Best = 0.4;
- Best = -100;
- for (y=0;y<SOMArraysYDim;y++)
- {
- Val = Correlation(ArrayNodes,ArrayData,ArrayMask,false,0,y,i,Rows);
- if (Val > Best)
- {
- ArrayGroup[i+1] = y;
- Best = Val;
- }
- }
- ArrayOrder[i] = ArrayGroup[i+1];
- }
- sort2i(Columns,ArrayGroup,SOMArrayOrder);
- for (i=0;i<Columns;i++)
- {
- delete ArrayData[i];
- delete ArrayMask[i];
- }
- delete ArrayData;
- delete ArrayMask;
- }
- else
- {
- for (i=0;i<Columns;i++)
- {
- ArrayGroup[i+1] = 0;
- SOMArrayOrder[i+1] = i;
- }
- }
- if (SOMOrganizeGenesCheckBox->Checked)
- {
- TStringList *GeneOutList = new TStringList();
- AnsiString OutString = "NODE";
- for (i=0;i<Columns;i++)
- {
- OutString += "t";
- OutString += AnsiString(Headers->Strings[InColumn[SOMArrayOrder[i+1]]]);
- }
- GeneOutList->Add(OutString);
- for (i=0;i<SOMGenesYDim;i++)
- {
- AnsiString TmpString = "NODE" + AnsiString(i);
- for (j=0;j<Columns;j++)
- {
- TmpString += "t" + AnsiString(GeneNodes[0][i][SOMArrayOrder[j+1]]);
- }
- GeneOutList->Add(TmpString);
- }
- AnsiString GeneNodeFileName = DataFileName + ".GNF";
- GeneOutList->SaveToFile(GeneNodeFileName);
- delete GeneOutList;
- for (i=0;i<SOMGenesXDim;i++)
- {
- for (j=0; j<SOMGenesYDim; j++)
- {
- delete GeneNodes[i][j];
- }
- delete GeneNodes[i];
- }
- delete GeneNodes;
- }
- if (SOMOrganizeArraysCheckBox->Checked)
- {
- TStringList *ArrayOutList = new TStringList();
- AnsiString OutString = Headers->Strings[0] + "tNAME";
- for (i=0;i<SOMArraysYDim;i++)
- {
- OutString += "tNode" + AnsiString(i);
- }
- ArrayOutList->Add(OutString);
- for (i=0;i<Rows;i++)
- {
- int index = SOMGeneOrder[i+1];
- AnsiString TmpString = AnsiString(UniqID[index]) + "t" + AnsiString(GeneName[index]);
- for (j=0;j<SOMArraysYDim;j++)
- {
- TmpString += "t" + AnsiString(ArrayNodes[0][j][SOMGeneOrder[i+1]]);
- }
- ArrayOutList->Add(TmpString);
- }
- AnsiString ArrayNodeFileName = DataFileName + ".ANF";
- ArrayOutList->SaveToFile(ArrayNodeFileName);
- for (i=0;i<SOMArraysXDim;i++)
- {
- for (j=0; j<SOMArraysYDim; j++)
- {
- delete ArrayNodes[i][j];
- }
- delete ArrayNodes[i];
- }
- delete ArrayNodes;
- delete ArrayOutList;
- }
- /*
- float *Set1 = vector(1,Columns);
- float *Set2 = vector(1,Columns);
- float KSD,KSP;
- double *Prob = new double[Rows];
- int Count1, Count2;
- for (i=0;i<Rows;i++)
- {
- Count1 = 0;
- Count2 = 0;
- for (j=0;j<Columns;j++)
- {
- if (GeneMask[i][j])
- {
- if (ArrayGroup[j+1] < SOMArraysYDim/2)
- {
- Count1++;
- Set1[Count1] = GeneData[i][j];
- }
- else
- {
- Count2++;
- Set2[Count2] = GeneData[i][j];
- }
- }
- }
- kstwo(Set1,Count1,Set2,Count2,&KSD,&KSP);
- Prob[i] = KSP;
- } */
- TStringList *DataFile = new TStringList();
- AnsiString OutString = "";
- OutString += Headers->Strings[0] + AnsiString("t");
- OutString += AnsiString("NAME") + AnsiString("t");
- OutString += "GWEIGHT";
- // Now add headers for data columns
- for (i=0;i<Columns;i++)
- {
- OutString += "t";
- OutString += AnsiString(Headers->Strings[InColumn[SOMArrayOrder[i+1]]]);
- }
- DataFile->Add(OutString);
- {
- OutString = AnsiString("EWEIGHT");
- OutString += AnsiString("t");
- OutString += AnsiString("t");
- for (i=0;i<Columns;i++)
- {
- OutString += "t" + AnsiString(ArrayGroup[i+1]);
- }
- }
- DataFile->Add(OutString);
- int index, colindex;
- for (i=0;i<Rows;i++)
- {
- index = SOMGeneOrder[i+1];
- OutString = "";
- OutString += AnsiString(UniqID[index]) + "t" + AnsiString(GeneName[index]);
- OutString += "t" + AnsiString(GeneGroup[i+1]);
- //OutString += "t" + AnsiString(Prob[index]);
- for (j=0;j<Columns;j++)
- {
- colindex = SOMArrayOrder[j+1];
- if (GeneMask[index][colindex] == true)
- {
- OutString += "t" + AnsiString(GeneData[index][colindex]);
- }
- else
- {
- OutString += "t";
- }
- }
- DataFile->Add(OutString);
- }
- DataFileName += ".txt";
- DataFile->SaveToFile(DataFileName);
- free_ivector(SOMGeneOrder,1,Rows);
- free_ivector(SOMArrayOrder,1,Columns);
- free_ivector(GeneGroup,1,Rows);
- free_ivector(ArrayGroup,1,Columns);
- //free_vector(Set1,1,Columns);
- //free_vector(Set2,1,Columns);
- //delete Prob;
- }
- //---------------------------------------------------------------------------
- void __fastcall TMainForm::SOMGenesXDimEditExit(TObject *Sender)
- {
- int TempVal = SOMGenesXDim;
- try
- {
- SOMGenesXDim = SOMGenesXDimEdit->Text.ToInt();
- }
- catch (EConvertError &E)
- {
- SOMGenesXDim = TempVal;
- }
- SOMGenesXDimEdit->Text = SOMGenesXDim;
- }
- //---------------------------------------------------------------------------
- void __fastcall TMainForm::SOMGenesYDimEditExit(TObject *Sender)
- {
- int TempVal = SOMGenesYDim;
- try
- {
- SOMGenesYDim = SOMGenesYDimEdit->Text.ToInt();
- }
- catch (EConvertError &E)
- {
- SOMGenesYDim = TempVal;
- }
- SOMGenesYDimEdit->Text = SOMGenesYDim;
- }
- //---------------------------------------------------------------------------
- void __fastcall TMainForm::SOMGenesTauEditExit(TObject *Sender)
- {
- double TempVal = SOMGenesTau;
- try
- {
- SOMGenesTau = SOMGenesTauEdit->Text.ToDouble();
- }
- catch (EConvertError &E)
- {
- SOMGenesTau = TempVal;
- }
- SOMGenesTauEdit->Text = SOMGenesTau;
- }
- //---------------------------------------------------------------------------
- void __fastcall TMainForm::SOMGenesIterationsEditExit(TObject *Sender)
- {
- int TempVal = SOMGenesIterations;
- try
- {
- SOMGenesIterations = SOMGenesIterationsEdit->Text.ToInt();
- }
- catch (EConvertError &E)
- {
- SOMGenesIterations = TempVal;
- }
- SOMGenesIterationsEdit->Text = SOMGenesIterations;
- }
- //---------------------------------------------------------------------------
- void __fastcall TMainForm::SOMArraysXDimEditExit(TObject *Sender)
- {
- int TempVal = SOMArraysXDim;
- try
- {
- SOMArraysXDim = SOMArraysXDimEdit->Text.ToInt();
- }
- catch (EConvertError &E)
- {
- SOMArraysXDim = TempVal;
- }
- SOMArraysXDimEdit->Text = SOMArraysXDim;
- }
- //---------------------------------------------------------------------------
- void __fastcall TMainForm::SOMArraysYDimEditExit(TObject *Sender)
- {
- int TempVal = SOMArraysYDim;
- try
- {
- SOMArraysYDim = SOMArraysYDimEdit->Text.ToInt();
- }
- catch (EConvertError &E)
- {
- SOMArraysYDim = TempVal;
- }
- SOMArraysYDimEdit->Text = SOMArraysYDim;
- }
- //---------------------------------------------------------------------------
- void __fastcall TMainForm::SOMArraysTauEditExit(TObject *Sender)
- {
- double TempVal = SOMArraysTau;
- try
- {
- SOMArraysTau = SOMArraysTauEdit->Text.ToDouble();
- }
- catch (EConvertError &E)
- {
- SOMArraysTau = TempVal;
- }
- SOMArraysTauEdit->Text = SOMArraysTau;
- }
- //---------------------------------------------------------------------------
- void __fastcall TMainForm::SOMArraysIterationsEditExit(TObject *Sender)
- {
- int TempVal = SOMArraysIterations;
- try
- {
- SOMArraysIterations = SOMArraysIterationsEdit->Text.ToInt();
- }
- catch (EConvertError &E)
- {
- SOMArraysIterations = TempVal;
- }
- SOMArraysIterationsEdit->Text = SOMArraysIterations;
- }
- //---------------------------------------------------------------------------
- void __fastcall TMainForm::DoSVDButtonClick(TObject *Sender)
- {
- MakeSVD();
- }
- //---------------------------------------------------------------------------
- void __fastcall TMainForm::MakeSVD()
- {
- StatusBar1->SimpleText = "Processing Data for SVD";
- float **u;
- float **v;
- float *w;
- int m,n;
- float cutoff;
- n = Columns;
- m = Rows;
- u = matrix(1,m,1,n);
- v = matrix(1,n,1,n);
- w = vector(1,n);
- float *magn = vector(1,m);
- int i,j,k;
- float Mag;
- for (int Row=0;Row<Rows;Row++)
- {
- Mag = 0;
- for (int Column=0;Column<Columns;Column++)
- {
- if (GeneMask[Row][Column])
- {
- Mag += pow(GeneData[Row][Column],2.0);
- }
- }
- Mag = sqrt(Mag);
- if (Mag == 0)
- {
- Mag = 1;
- }
- magn[Row+1] = Mag;
- for (int Column=0;Column<Columns;Column++)
- {
- if (GeneMask[Row][Column])
- {
- u[Row+1][Column+1] = GeneData[Row][Column] / Mag;
- }
- else
- {
- u[Row+1][Column+1] = 0;
- }
- }
- }
- StatusBar1->SimpleText = "Making SVD";
- svdcmp(u,m,n,w,v);
- int *svdorder = ivector(1,n);
- float *svdsortval = vector(1,n);
- for (i=1;i<=n;i++)
- {
- svdorder[i] = i;
- svdsortval[i] = -w[i];
- }
- sort2fi(n,svdsortval,svdorder);
- StatusBar1->SimpleText = "Saving SVD Files";
- TStringList *OutList = new TStringList();
- AnsiString OutString;
- AnsiString FileName;
- OutList->Clear();
- OutString = "";
- OutString += "EIGVALUE" + AnsiString("t");
- OutString += AnsiString("NAME") + AnsiString("t");
- OutString += "GWEIGHT";
- for (int Column=0;Column<Columns;Column++)
- {
- OutString += "t";
- OutString += AnsiString(Headers->Strings[InColumn[Column]]);
- }
- OutList->Add(OutString);
- for (int Row=1;Row<=Columns;Row++)
- {
- OutString = "";
- OutString += AnsiString(w[svdorder[Row]]) + "t" + AnsiString(w[svdorder[Row]]) + "t1";
- for (int Column=1;Column<=Columns;Column++)
- {
- OutString += "t" + AnsiString(v[svdorder[Column]][svdorder[Row]]);
- }
- OutList->Add(OutString);
- }
- FileName = JobNameEdit->Text + "_svv.txt";
- OutList->SaveToFile(FileName);
- OutList->Clear();
- OutString = "";
- OutString += Headers->Strings[0] + AnsiString("t");
- OutString += AnsiString("NAME") + AnsiString("t");
- OutString += "GWEIGHT";
- for (int Column=1;Column<=Columns;Column++)
- {
- OutString += "t";
- OutString += AnsiString(w[svdorder[Column]]);
- }
- OutList->Add(OutString);
- for (int Row=1;Row<=Rows;Row++)
- {
- OutString = "";
- OutString += UniqID[Row-1] + "t" + GeneName[Row-1] + "t1";
- for (int Column=1;Column<=Columns;Column++)
- {
- OutString += "t" + AnsiString(u[Row][svdorder[Column]]);
- }
- OutList->Add(OutString);
- }
- FileName = JobNameEdit->Text + "_svu.txt";
- OutList->SaveToFile(FileName);
- double Sum2 = 0;
- for (int Column=1; Column<=Columns; Column++)
- {
- Sum2 += pow(w[Column],2.0);
- }
- double Fractal = 0;
- for (int Column=1; Column<=Columns; Column++)
- {
- double Val = pow(w[Column],2.0)/Sum2;
- Fractal += Val * log(Val);
- }
- Fractal /= -log(Columns);
- FractalLabel->Caption = Fractal;
- free_matrix(u,1,m,1,n);
- free_matrix(v,1,n,1,n);
- free_vector(w,1,n);
- free_vector(magn,1,m);
- free_ivector(svdorder,1,n);
- free_vector(svdsortval,1,n);
- StatusBar1->SimpleText = "Done Computing SVD";
- }
- /* KMeans/KMediods Clustering */
- void __fastcall TMainForm::KExecuteClick(TObject *Sender)
- {
- int i,j,y;
- double Best, Val;
- AnsiString DataFileName = AnsiString(ClusterName) + "_K";
- int *GeneOrder;
- if (KOrganizeGenesCheckBox->Checked)
- {
- DataFileName += "_G" + AnsiString(GenesK);
- GeneOrder = DoKMeans(GenesK,GeneData,GeneMask,Rows,Columns,GMaxKCycles,GKMethod->ItemIndex);
- }
- else
- {
- GeneOrder = new int[Rows];
- for (i=0;i<Rows;i++)
- {
- GeneOrder[i] = i;
- }
- }
- int *ArrayOrder;
- if (KOrganizeArraysCheckBox->Checked)
- {
- double **ArrayData;
- bool **ArrayMask;
- ArrayData = new double*[Columns];
- ArrayMask = new bool*[Columns];
- for (i=0;i<Columns;i++)
- {
- ArrayData[i] = new double[Rows];
- ArrayMask[i] = new bool[Rows];
- for (j=0;j<Rows;j++)
- {
- ArrayData[i][j] = GeneData[j][i];
- ArrayMask[i][j] = GeneMask[j][i];
- }
- }
- DataFileName += "_A" + AnsiString(ArraysK);
- ArrayOrder = DoKMeans(ArraysK,ArrayData,ArrayMask,Columns,Rows,AMaxKCycles,AKMethod->ItemIndex);
- for (i=0;i<Columns;i++)
- {
- delete ArrayData[i];
- delete ArrayMask[i];
- }
- delete ArrayData;
- delete ArrayMask;
- }
- else
- {
- ArrayOrder = new int[Columns];
- for (i=0;i<Columns;i++)
- {
- ArrayOrder[i] = i;
- }
- }
- TStringList *DataFile = new TStringList();
- AnsiString OutString = "";
- OutString += Headers->Strings[0] + AnsiString("t");
- OutString += AnsiString("NAME") + AnsiString("t");
- OutString += "GWEIGHT";
- // Now add headers for data columns
- for (i=0;i<Columns;i++)
- {
- OutString += "t";
- OutString += AnsiString(Headers->Strings[InColumn[ArrayOrder[i]]]);
- }
- DataFile->Add(OutString);
- {
- OutString = AnsiString("EWEIGHT");
- OutString += AnsiString("t");
- OutString += AnsiString("t");
- for (i=0;i<Columns;i++)
- {
- OutString += "t" + 1;
- }
- }
- DataFile->Add(OutString);
- int index, colindex;
- for (i=0;i<Rows;i++)
- {
- index = GeneOrder[i];
- OutString = "";
- OutString += AnsiString(UniqID[index]) + "t" + AnsiString(GeneName[index]);
- OutString += "t" + AnsiString(1);
- //OutString += "t" + AnsiString(Prob[index]);
- for (j=0;j<Columns;j++)
- {
- colindex = ArrayOrder[j];
- if (GeneMask[index][colindex] == true)
- {
- OutString += "t" + AnsiString(GeneData[index][colindex]);
- }
- else
- {
- OutString += "t";
- }
- }
- DataFile->Add(OutString);
- }
- DataFileName += ".txt";
- DataFile->SaveToFile(DataFileName);
- delete DataFile;
- delete GeneOrder;
- delete ArrayOrder;
- StatusBar1->SimpleText = "Done";
- }
- int * __fastcall TMainForm::DoKMeans(int K, double **KData, bool **KMask,
- int KRows, int KColumns, int KMaxIterations, int Method)
- {
- double **NodeData = new double *[K];
- int **NodeDataCount = new int *[K];
- for (int i=0; i<K; i++)
- {
- NodeData[i] = new double[KColumns];
- NodeDataCount[i] = new int[KColumns];
- }
- int *NodeMap = new int[KRows];
- for (int i=0; i<KRows; i++)
- {
- NodeMap[i] = random(K);
- }
- int Iterations = 0;
- int NumMoved = 1;
- float **MedMatrix;
- int *MedCount;
- if (Method == 1)
- {
- MedMatrix = matrix(1,K,1,KRows);
- MedCount = ivector(1,K);
- }
- while ((NumMoved > 0) && (Iterations < KMaxIterations) )
- {
- NumMoved = 0;
- Iterations ++;
- for (int i=0; i<K; i++)
- {
- for (int j=0; j<KColumns; j++)
- {
- NodeData[i][j] = 0;
- NodeDataCount[i][j] = 0;
- }
- }
- if (Method == 0)
- {
- for (int i=0; i<KRows; i++)
- {
- for (int j=0; j<KColumns; j++)
- {
- if (KMask[i][j] == true)
- {
- NodeData[NodeMap[i]][j] += KData[i][j];
- NodeDataCount[NodeMap[i]][j] ++;
- }
- }
- }
- for (int i=0; i<K; i++)
- {
- for (int j=0; j<KColumns; j++)
- {
- if (NodeDataCount[i][j]> 0)
- {
- NodeData[i][j] /= NodeDataCount[i][j];
- }
- }
- }
- }
- else
- {
- for (int j=0; j<KColumns; j++)
- {
- for (int i=0; i<K; i++)
- {
- MedCount[i+1] = 0;
- }
- for (int i=0; i<KRows; i++)
- {
- if (KMask[i][j] == true)
- {
- MedCount[NodeMap[i]+1]++;
- MedMatrix[NodeMap[i]+1][MedCount[NodeMap[i]+1]] = KData[i][j];
- }
- }
- for (int i=0; i<K; i++)
- {
- if (MedCount[i+1] > 0)
- {
- int index = (MedCount[i+1]+1)/2;
- NodeData[i][j] = select(index,MedCount[i+1],MedMatrix[i+1]);
- }
- else
- {
- NodeData[i][j] = 0;
- }
- }
- }
- }
- for (int i=0; i<KRows; i++)
- {
- double BestCorr = -1;
- int BestNode = NodeMap[i];
- if (i % 100 == 0)
- {
- StatusBar1->SimpleText = "Last Iteration (#" + AnsiString(Iterations) + ") Num Moved = " +
- AnsiString(NumMoved) + ": Processed " + AnsiString(i) + " of " + AnsiString(KRows);
- Application->ProcessMessages();
- }
- for (int j=0; j<K; j++)
- {
- double Sum11 = 0;
- double Sum12 = 0;
- double Sum22 = 0;
- for (int l=0; l<KColumns; l++)
- {
- if (KMask[i][l] == true)
- {
- Sum11 += pow(KData[i][l],2.0);
- Sum22 += pow(NodeData[j][l],2.0);
- Sum12 += KData[i][l] * NodeData[j][l];
- }
- }
- double Denom = sqrt(Sum11 * Sum22);
- if (Denom > 0)
- {
- double Corr = Sum12 / Denom;
- if (Corr > BestCorr)
- {
- BestNode = j;
- BestCorr = Corr;
- }
- }
- }
- if (BestNode != NodeMap[i])
- {
- NodeMap[i] = BestNode;
- NumMoved++;
- }
- }
- StatusBar1->SimpleText = "Last Iteration (#" + AnsiString(Iterations) + ") Num Moved = " +
- AnsiString(NumMoved);
- Application->ProcessMessages();
- }
- for (int i=0; i<K; i++)
- {
- delete NodeData[i];
- delete NodeDataCount[i];
- }
- delete NodeData;
- delete NodeDataCount;
- if (Method == 1)
- {
- free_matrix(MedMatrix,1,K,1,KRows);
- free_ivector(MedCount,1,K);
- }
- float * FinNode = vector(1,KRows);
- int * FinPos = ivector(1,KRows);
- for (int i=0; i<KRows; i++)
- {
- FinNode[i+1] = NodeMap[i];
- FinPos[i+1] = i;
- }
- sort2fi(KRows,FinNode,FinPos);
- for (int i=0; i<KRows; i++)
- {
- NodeMap[i] = FinPos[i+1];
- }
- free_vector(FinNode,1,KRows);
- free_ivector(FinPos,1,KRows);
- return NodeMap;
- }
- //---------------------------------------------------------------------------
- void __fastcall TMainForm::GenesKEditExit(TObject *Sender)
- {
- int TempVal = GenesK;
- try
- {
- GenesK = GenesKEdit->Text.ToInt();
- }
- catch (EConvertError &E)
- {
- GenesK = TempVal;
- }
- GenesKEdit->Text = GenesK;
- }
- //---------------------------------------------------------------------------
- void __fastcall TMainForm::ArraysKEditExit(TObject *Sender)
- {
- int TempVal = ArraysK;
- try
- {
- ArraysK = ArraysKEdit->Text.ToInt();
- }
- catch (EConvertError &E)
- {
- ArraysK = TempVal;
- }
- ArraysKEdit->Text = ArraysK;
- }
- //---------------------------------------------------------------------------
- void __fastcall TMainForm::GMaxKCyclesEditExit(TObject *Sender)
- {
- int TempVal = GMaxKCycles;
- try
- {
- GMaxKCycles = GMaxKCyclesEdit->Text.ToInt();
- }
- catch (EConvertError &E)
- {
- GMaxKCycles = TempVal;
- }
- GMaxKCyclesEdit->Text = GMaxKCycles;
- }
- //---------------------------------------------------------------------------
- void __fastcall TMainForm::AMaxKCyclesEditExit(TObject *Sender)
- {
- int TempVal = AMaxKCycles;
- try
- {
- AMaxKCycles = AMaxKCyclesEdit->Text.ToInt();
- }
- catch (EConvertError &E)
- {
- AMaxKCycles = TempVal;
- }
- AMaxKCyclesEdit->Text = AMaxKCycles;
- }
- //---------------------------------------------------------------------------
- void __fastcall TMainForm::AboutButtonClick(TObject *Sender)
- {
- AboutForm->Show();
- }