FP-growth挖掘算法

步骤一

扫描数据库,扫描数据库一次,得到频繁1-项集,把项按支持度递减排序,再一次扫描数据库,建立FP-tree

步骤二

对每个项,生成它的 条件模式库

步骤三

用条件模式库构造对应的条件FP-tree,递归构造条件 FP-trees 同时增长其包含的频繁集,如果条件FP-tree直包含一个路径,则直接生成所包含的频繁集

 

C++源码

  1 #include<bits/stdc++.h> 
  2 #include<string>
  3 #include<algorithm>
  4 #include<vector>
  5 #include<map> 
  6 #include<sstream> 
  7 #define suport 0.001//最小支持度
  8 using namespace std;
  9  struct FPtreeNode{//孩子兄弟链表法存储FP-tree
 10     int  data;
 11     int count;
 12     FPtreeNode *father;
 13     FPtreeNode *children;
 14     FPtreeNode *brother;
 15     FPtreeNode *next;
 16     FPtreeNode(){
 17         data=0;
 18         count=0;
 19         father=NULL;
 20         children=NULL;
 21         brother=NULL;
 22         next=NULL;
 23     }
 24 };
 25 struct resultNode{//结果
 26     string data;
 27     int count;
 28     resultNode(){
 29         count=0;
 30     } 
 31 };
 32 struct listNode{//头表
 33     int data;
 34     int count;
 35     FPtreeNode *next;
 36 };
 37 struct fptreeNode{//递归过程中的子FP-tree 
 38     int data;
 39     int count;
 40     fptreeNode(){
 41         data=0;
 42         count=0;
 43     }
 44 };
 45 int line=0;
 46 vector<string> Getfile(){
 47     vector<string> file;
 48         ifstream ifile("D://retail.txt");
 49     if(!ifile){
 50         cout<<"open file error"<<endl;
 51     }
 52     else{
 53         string temp;
 54         while(getline(ifile,temp)){
 55             line++;
 56             file.insert(file.end(),temp);
 57         }
 58     }
 59     ifile.close();
 60     return file;
 61 } 
 62 bool cmp( listNode &a,listNode &b){
 63     return a.count>b.count;
 64 }    
 65 
 66 vector<listNode> Getheadlist(vector<string> &file){
 67         vector<listNode>L1;
 68         map<string,int>r1;
 69         string temp;
 70             string lk;
 71         for(int f=0;f<file.size();f++){//第一次扫描数据库 
 72             temp=file[f];
 73             int i;
 74             for( i=0;i<temp.size();i++){
 75                 while(temp[i]!=' '&&temp[i]!='\n'&&i!=temp.size()){
 76                 lk+=temp[i];
 77                     i++;
 78                 }
 79                 if(r1.find(lk)!=r1.end())
 80                 r1[lk]++;
 81                 else
 82                 r1[lk]=1;
 83                 lk.clear();//
 84             }
 85         }
 86         temp.clear(); 
 87         map<string,int>::iterator it;
 88         for(it=r1.begin();it!=r1.end();it++){//待删除 
 89         
 90             if(it->second>=ceil(suport*line)){
 91             string s=it->first;
 92             int x=atoi(s.c_str());//转换成整数
 93             listNode xx;
 94             xx.data=x;
 95             xx.count=it->second;
 96             xx.next=NULL; 
 97             L1.insert(L1.end(),xx);
 98             }
 99         }
100         sort(L1.begin(),L1.end(),cmp);
101         return L1;
102 }
103 bool Isin(string temp,string str){
104     int j;
105     string s;
106      for( j=0;j<temp.size();j++){//遍历temp
107                          while(temp[j]!=' '&&j<temp.size()&&temp[j]!='\n'){
108                              s.insert(s.end(),temp[j]);
109                              j++;
110                          }
111                         if(s==str)
112                         break;
113                         s.clear();
114                      }
115     if(j>=temp.size())
116     return 0;
117     else
118     return 1;                 
119     
120 }
121 vector<vector<int> > Get_FPfile(vector<string> &file,vector<listNode> &L1){//第二次扫描数据库 删除非频繁一项 
122     string temp;
123     vector<vector<int> > rfile;
124     vector<int >ri;
125     for(int f=0;f<file.size();f++){
126         temp=file[f];
127             for(int k=0;k<L1.size();k++){
128                 string s;
129                 int n=L1[k].data;
130                 
131                 stringstream ss;
132                 ss<<n;
133                 ss>>s;
134                 if(Isin(temp,s)){
135                     ri.push_back(n);
136                 }
137             }
138             if(ri.size()>0){
139             rfile.push_back(ri);
140             ri.clear();
141             }
142             temp.clear();
143         }
144         file.clear();
145         return rfile;
146 }
147 int c=0; 
148 void Linknext(FPtreeNode *newNode,vector<listNode> &L1){
149                     for(int m=0;m<L1.size();m++){
150                     if(L1[m].data==newNode->data){
151                             if(L1[m].next==NULL){
152                             L1[m].next=newNode;
153                             }
154                             else{
155                             FPtreeNode *t1=L1[m].next;
156                             FPtreeNode *t2=L1[m].next;
157                             while(t1){
158                             t2=t1;
159                             t1=t1->next;
160                             }
161                             t2->next=newNode;        
162                             }    
163                             break;
164                         }
165                     } 
166     
167 }
168 FPtreeNode*  Buildtree(    vector<vector<int> > &rfile,vector<listNode> &L1){
169     FPtreeNode *head=new FPtreeNode;
170     FPtreeNode *p=head;
171     FPtreeNode *q=head;
172     int flag=0;
173     for(int i=0;i<rfile.size();i++){
174                 p=head;
175                 int j=0;
176         while(j<rfile[i].size()){
177             flag=0;        
178             if(i==0){//第一条 
179                 FPtreeNode *newNode=new FPtreeNode;
180                 c++;
181                 newNode->count=1;
182                 newNode->data=rfile[i][j];
183                 newNode->father=p;
184                 p->children=newNode;
185                 p=p->children; 
186                 j++; 
187                 for(int m=0;m<L1.size();m++){
188                     if(L1[m].data==newNode->data){    
189                         L1[m].next=newNode;
190                         break;
191                     }
192                 } 
193             }    
194             else{
195                     p=p->children;
196                 while(p&&j<rfile[i].size()){        
197                     if(p->data==rfile[i][j]){
198                     p->count++;
199                     q=p;//q->chilren=p;
200                     p=p->children;
201                     j++;
202                     flag=1;
203                     }
204                     else{//
205                     q=p;//q->brother=p;
206                     p=p->brother;
207                     flag=2;                    
208                     }
209                     
210             }
211                 if(flag==1){    
212                 while(j<rfile[i].size()){
213                     FPtreeNode *newNode=new FPtreeNode;
214                     c++;
215                     newNode->count=1;
216                     newNode->father=q;
217                     q->children=newNode;
218                     newNode->data=rfile[i][j];
219                     q=q->children;
220                     j++;
221                         //Linknext();
222                     Linknext(newNode,L1);    
223                 }
224                 
225 
226             }
227             else  if(flag==2){
228                 FPtreeNode *newNode=new FPtreeNode;c++;
229                 newNode->count=1;
230                 newNode->data=rfile[i][j];
231                 q->brother=newNode;
232                 newNode->father=q->father;
233                 q=q->brother;
234                 j++;
235                 Linknext(newNode,L1);
236                 while(j<rfile[i].size()){
237                     FPtreeNode *newNode=new FPtreeNode;
238                     c++;
239                     newNode->count=1;
240                     newNode->father=q;
241                     q->children=newNode;
242                     newNode->data=rfile[i][j];
243                     q=q->children;
244                     j++;
245                             //Linknext();
246                     Linknext(newNode,L1);
247                 }
248             
249 
250                 }
251             }
252         }
253     }
254     return head;
255 }
256 vector<string> GetFrequentItems(listNode &lk,FPtreeNode* &head){//生成条件模式基 rfile
257     FPtreeNode *p=lk.next;
258     vector<string> rfile;
259     while(p){
260         FPtreeNode* q=p;
261         vector<string> temp;
262         while(q->father!=head){            
263             q=q->father;
264             stringstream ss;
265             string x;
266             int n;
267             n=q->data;
268             
269             ss<<n;
270             ss>>x;
271         
272             temp.push_back(x+" ");
273         }
274         reverse(temp.begin(),temp.end());
275         string s;
276     
277         for(int j=0;j<temp.size();j++){
278                 s+=temp[j];
279             }
280         for(int i=0;i<p->count;i++){
281             if(s.size()>0)
282             rfile.push_back(s);    
283         }
284         s.clear();
285         p=p->next;
286     }
287     return rfile;
288 }
289 vector<resultNode> result;
290 void Getresult(vector<listNode> &headlist,FPtreeNode* &head,string &base,vector<resultNode> &result){
291     if(headlist.empty()){
292         return;
293     }
294     for(auto p = headlist.rbegin(); p != headlist.rend(); p++){
295         resultNode temp;
296         int n;
297         n=p->data;//int to string
298         stringstream ss;
299         string x;
300         ss<<n;
301         ss>>x;
302         string xx=base+" "+x;
303         temp.data=xx;
304         temp.count=p->count;
305         result.push_back(temp);
306         /*****递归******/
307         //产生条件模式基
308         vector<string> file1=GetFrequentItems(*p,head);
309 
310         
311         vector<listNode>headlist1= Getheadlist(file1);//getL1
312     
313         vector<vector<int> > rfile1=Get_FPfile(file1,headlist1);
314         
315         //建树
316         FPtreeNode* Tree=Buildtree(rfile1,headlist1);
317          
318         string s=base+" "+x;
319         //产生结果
320         Getresult(headlist1,Tree,s,result);
321     }
322     
323 }
324 void Print(){
325     for (auto p =result.cbegin(); p != result.cend(); p++)
326     {
327         cout << p->data << " "<<"("<<p->count<<")"<< endl;
328     }
329 }
330 int main(){
331     vector<string> file=Getfile();
332     vector<listNode> headlist=Getheadlist(file);
333     
334     vector<vector<int> >rfile=Get_FPfile(file,headlist);
335     
336     FPtreeNode* head=Buildtree(rfile,headlist);
337     string base=" ";
338 
339     Getresult(headlist,head,base,result);
340     Print();
341     cout<<result.size(); 
342     return 0;
343 }
View Code

 

转载于:https://www.cnblogs.com/bucthuge/p/10479944.html

Logo

腾讯云面向开发者汇聚海量精品云计算使用和开发经验,营造开放的云计算技术生态圈。

更多推荐