FP-growth挖掘算法
步骤一
扫描数据库,扫描数据库一次,得到频繁1-项集,把项按支持度递减排序,再一次扫描数据库,建立FP-tree
步骤二
对每个项,生成它的 条件模式库
步骤三
用条件模式库构造对应的条件FP-tree,递归构造条件 FP-trees 同时增长其包含的频繁集,如果条件FP-tree直包含一个路径,则直接生成所包含的频繁集
C++源码
1 #include<bits/stdc++.h> 2 #include<string> 3 #include<algorithm> 4 #include<vector> 5 #include<map> 6 #include<sstream> 7 #define suport 0.001//最小支持度 8 using namespace std; 9 struct FPtreeNode{//孩子兄弟链表法存储FP-tree 10 int data; 11 int count; 12 FPtreeNode *father; 13 FPtreeNode *children; 14 FPtreeNode *brother; 15 FPtreeNode *next; 16 FPtreeNode(){ 17 data=0; 18 count=0; 19 father=NULL; 20 children=NULL; 21 brother=NULL; 22 next=NULL; 23 } 24 }; 25 struct resultNode{//结果 26 string data; 27 int count; 28 resultNode(){ 29 count=0; 30 } 31 }; 32 struct listNode{//头表 33 int data; 34 int count; 35 FPtreeNode *next; 36 }; 37 struct fptreeNode{//递归过程中的子FP-tree 38 int data; 39 int count; 40 fptreeNode(){ 41 data=0; 42 count=0; 43 } 44 }; 45 int line=0; 46 vector<string> Getfile(){ 47 vector<string> file; 48 ifstream ifile("D://retail.txt"); 49 if(!ifile){ 50 cout<<"open file error"<<endl; 51 } 52 else{ 53 string temp; 54 while(getline(ifile,temp)){ 55 line++; 56 file.insert(file.end(),temp); 57 } 58 } 59 ifile.close(); 60 return file; 61 } 62 bool cmp( listNode &a,listNode &b){ 63 return a.count>b.count; 64 } 65 66 vector<listNode> Getheadlist(vector<string> &file){ 67 vector<listNode>L1; 68 map<string,int>r1; 69 string temp; 70 string lk; 71 for(int f=0;f<file.size();f++){//第一次扫描数据库 72 temp=file[f]; 73 int i; 74 for( i=0;i<temp.size();i++){ 75 while(temp[i]!=' '&&temp[i]!='\n'&&i!=temp.size()){ 76 lk+=temp[i]; 77 i++; 78 } 79 if(r1.find(lk)!=r1.end()) 80 r1[lk]++; 81 else 82 r1[lk]=1; 83 lk.clear();// 84 } 85 } 86 temp.clear(); 87 map<string,int>::iterator it; 88 for(it=r1.begin();it!=r1.end();it++){//待删除 89 90 if(it->second>=ceil(suport*line)){ 91 string s=it->first; 92 int x=atoi(s.c_str());//转换成整数 93 listNode xx; 94 xx.data=x; 95 xx.count=it->second; 96 xx.next=NULL; 97 L1.insert(L1.end(),xx); 98 } 99 } 100 sort(L1.begin(),L1.end(),cmp); 101 return L1; 102 } 103 bool Isin(string temp,string str){ 104 int j; 105 string s; 106 for( j=0;j<temp.size();j++){//遍历temp 107 while(temp[j]!=' '&&j<temp.size()&&temp[j]!='\n'){ 108 s.insert(s.end(),temp[j]); 109 j++; 110 } 111 if(s==str) 112 break; 113 s.clear(); 114 } 115 if(j>=temp.size()) 116 return 0; 117 else 118 return 1; 119 120 } 121 vector<vector<int> > Get_FPfile(vector<string> &file,vector<listNode> &L1){//第二次扫描数据库 删除非频繁一项 122 string temp; 123 vector<vector<int> > rfile; 124 vector<int >ri; 125 for(int f=0;f<file.size();f++){ 126 temp=file[f]; 127 for(int k=0;k<L1.size();k++){ 128 string s; 129 int n=L1[k].data; 130 131 stringstream ss; 132 ss<<n; 133 ss>>s; 134 if(Isin(temp,s)){ 135 ri.push_back(n); 136 } 137 } 138 if(ri.size()>0){ 139 rfile.push_back(ri); 140 ri.clear(); 141 } 142 temp.clear(); 143 } 144 file.clear(); 145 return rfile; 146 } 147 int c=0; 148 void Linknext(FPtreeNode *newNode,vector<listNode> &L1){ 149 for(int m=0;m<L1.size();m++){ 150 if(L1[m].data==newNode->data){ 151 if(L1[m].next==NULL){ 152 L1[m].next=newNode; 153 } 154 else{ 155 FPtreeNode *t1=L1[m].next; 156 FPtreeNode *t2=L1[m].next; 157 while(t1){ 158 t2=t1; 159 t1=t1->next; 160 } 161 t2->next=newNode; 162 } 163 break; 164 } 165 } 166 167 } 168 FPtreeNode* Buildtree( vector<vector<int> > &rfile,vector<listNode> &L1){ 169 FPtreeNode *head=new FPtreeNode; 170 FPtreeNode *p=head; 171 FPtreeNode *q=head; 172 int flag=0; 173 for(int i=0;i<rfile.size();i++){ 174 p=head; 175 int j=0; 176 while(j<rfile[i].size()){ 177 flag=0; 178 if(i==0){//第一条 179 FPtreeNode *newNode=new FPtreeNode; 180 c++; 181 newNode->count=1; 182 newNode->data=rfile[i][j]; 183 newNode->father=p; 184 p->children=newNode; 185 p=p->children; 186 j++; 187 for(int m=0;m<L1.size();m++){ 188 if(L1[m].data==newNode->data){ 189 L1[m].next=newNode; 190 break; 191 } 192 } 193 } 194 else{ 195 p=p->children; 196 while(p&&j<rfile[i].size()){ 197 if(p->data==rfile[i][j]){ 198 p->count++; 199 q=p;//q->chilren=p; 200 p=p->children; 201 j++; 202 flag=1; 203 } 204 else{// 205 q=p;//q->brother=p; 206 p=p->brother; 207 flag=2; 208 } 209 210 } 211 if(flag==1){ 212 while(j<rfile[i].size()){ 213 FPtreeNode *newNode=new FPtreeNode; 214 c++; 215 newNode->count=1; 216 newNode->father=q; 217 q->children=newNode; 218 newNode->data=rfile[i][j]; 219 q=q->children; 220 j++; 221 //Linknext(); 222 Linknext(newNode,L1); 223 } 224 225 226 } 227 else if(flag==2){ 228 FPtreeNode *newNode=new FPtreeNode;c++; 229 newNode->count=1; 230 newNode->data=rfile[i][j]; 231 q->brother=newNode; 232 newNode->father=q->father; 233 q=q->brother; 234 j++; 235 Linknext(newNode,L1); 236 while(j<rfile[i].size()){ 237 FPtreeNode *newNode=new FPtreeNode; 238 c++; 239 newNode->count=1; 240 newNode->father=q; 241 q->children=newNode; 242 newNode->data=rfile[i][j]; 243 q=q->children; 244 j++; 245 //Linknext(); 246 Linknext(newNode,L1); 247 } 248 249 250 } 251 } 252 } 253 } 254 return head; 255 } 256 vector<string> GetFrequentItems(listNode &lk,FPtreeNode* &head){//生成条件模式基 rfile 257 FPtreeNode *p=lk.next; 258 vector<string> rfile; 259 while(p){ 260 FPtreeNode* q=p; 261 vector<string> temp; 262 while(q->father!=head){ 263 q=q->father; 264 stringstream ss; 265 string x; 266 int n; 267 n=q->data; 268 269 ss<<n; 270 ss>>x; 271 272 temp.push_back(x+" "); 273 } 274 reverse(temp.begin(),temp.end()); 275 string s; 276 277 for(int j=0;j<temp.size();j++){ 278 s+=temp[j]; 279 } 280 for(int i=0;i<p->count;i++){ 281 if(s.size()>0) 282 rfile.push_back(s); 283 } 284 s.clear(); 285 p=p->next; 286 } 287 return rfile; 288 } 289 vector<resultNode> result; 290 void Getresult(vector<listNode> &headlist,FPtreeNode* &head,string &base,vector<resultNode> &result){ 291 if(headlist.empty()){ 292 return; 293 } 294 for(auto p = headlist.rbegin(); p != headlist.rend(); p++){ 295 resultNode temp; 296 int n; 297 n=p->data;//int to string 298 stringstream ss; 299 string x; 300 ss<<n; 301 ss>>x; 302 string xx=base+" "+x; 303 temp.data=xx; 304 temp.count=p->count; 305 result.push_back(temp); 306 /*****递归******/ 307 //产生条件模式基 308 vector<string> file1=GetFrequentItems(*p,head); 309 310 311 vector<listNode>headlist1= Getheadlist(file1);//getL1 312 313 vector<vector<int> > rfile1=Get_FPfile(file1,headlist1); 314 315 //建树 316 FPtreeNode* Tree=Buildtree(rfile1,headlist1); 317 318 string s=base+" "+x; 319 //产生结果 320 Getresult(headlist1,Tree,s,result); 321 } 322 323 } 324 void Print(){ 325 for (auto p =result.cbegin(); p != result.cend(); p++) 326 { 327 cout << p->data << " "<<"("<<p->count<<")"<< endl; 328 } 329 } 330 int main(){ 331 vector<string> file=Getfile(); 332 vector<listNode> headlist=Getheadlist(file); 333 334 vector<vector<int> >rfile=Get_FPfile(file,headlist); 335 336 FPtreeNode* head=Buildtree(rfile,headlist); 337 string base=" "; 338 339 Getresult(headlist,head,base,result); 340 Print(); 341 cout<<result.size(); 342 return 0; 343 }
所有评论(0)