#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <string.h>
#include <malloc.h>
#include "auth.h"
#include "network.h"
#include "convert.h"
#include "convert.h"
struct problemlist
{
char *title;
char *rate;
int view;
struct problemlist *next;
};
FILE *debug;
clock_t start;
int threadc[50];
struct problemlist *threads[50];
pthread_t threadpool[50];
int threadnumber=0;
pthread_mutex_t cs;
pthread_mutex_t tcs;
pthread_mutex_t hcs;
int action=0;
int pagecount=0;
struct problemlist **sort;
char *starttime=0,*endtime=0;
char *projectname=0;
char *outputfile=0;
char *user=0,*passwd=0;
struct problemlist *head=0,*newlist=0;
void show(struct problemlist *head);
int mergesort(struct problemlist **tg,int number);
struct problemlist *mergelist(struct problemlist *a,struct problemlist *b);
void *threadfunc(void *c);
void pagesort();
void useage();
int main(int argc,char *argv[])
{
int count=0;
char name[100];
if(ini(argc,argv))
{
useage();
return -1;
}
login(user,passwd);
sprintf(name,"Category:小作品级%s条目",projectname);
query(name,"Stub");
sprintf(name,"Category:初级%s条目",projectname);
query(name,"Start");
sprintf(name,"Category:丙级%s条目",projectname);
query(name,"C");
sprintf(name,"Category:乙级%s条目",projectname);
query(name,"B");
sprintf(name,"Category:优良级%s条目",projectname);
query(name,"GA");
sprintf(name,"Category:甲级%s条目",projectname);
query(name,"A");
sprintf(name,"Category:特色级%s条目",projectname);
query(name,"FA");
newlist=head;
fflush(stdout);
pthread_mutex_init(&cs,NULL);
pthread_mutex_init(&tcs,NULL);
pthread_mutex_init(&hcs,NULL);
threadini(50);
printf("Create %d threads to go through the problem list.\n",threadnumber);
fflush(stdout);
action=1;
while(newlist!=NULL)
{
sleep(1);
}
count=0;
while(count<20)
{
count++;
pthread_mutex_lock(&tcs);
if(threadnumber>0)
{
pthread_mutex_unlock(&tcs);
printf("waiting for all threads to exit. Current thread number: %d\n",threadnumber);
}
else
{
pthread_mutex_unlock(&tcs);
break;
}
fflush(stdout);
sleep(1);
}
pagesort();
show(head);
return 0;
}
void useage()
{
printf("useage: pstat -u username -p password -f filename -s starttime -e endtime -n projectname\n");
}
int ini(int argc,char *argv[])
{
int hasu=0,hasp=0,hasn=0,hasf=0,hass=0,hase=0;
int i=0;
for(i=1;i<argc;i++)
{
if(argv[i][0]=='-'&&argv[i+1]&&argv[i+1][0]!='-')
{
switch(argv[i][1])
{
case 'u':
user=argv[i+1];
hasu=1;
break;
case 'f':
outputfile=argv[i+1];
hasf=1;
break;
case 'p':
passwd=argv[i+1];
hasp=1;
break;
case 's':
starttime=argv[i+1];
hass=1;
break;
case 'e':
endtime=argv[i+1];
case 'n':
projectname=argv[i+1];
hasn=1;
break;
}
i++;
}
}
if(hasu&&hasf&&hasp&&hasn&&hass&&hase) return 0;
else return 1;
}
int query(char *cat,char *rate)
{
HTTP f;
int i=0;
struct problemlist *temp,*pre;
char title[3000];
char url[5000];
char offset[2000]={0},off[2000]={0};
char line[5000];
char talk[3000],article[3000];
int next=0;
char *ctn[]={"cmcontinue"};
char *ctv[1];
char *mtn[]={"title"};
char *mtv[1];
mtv[0]=talk;
ctv[0]=off;
URLEncode(cat,strlen(cat),title,2990);
sprintf(url,"https://zh.wikipedia.org/w/api.php?action=query&list=categorymembers&format=xml&cmnamespace=1&cmlimit=5000&cmtitle=%s",title);
do
{
if(next)
{
strcat(url,"&cmcontinue=");
strcat(url,offset);
}
f=hopen();
get(url,1,f);
skipresponseheader(f);
next=0;
do
{
xmlparsetag(f,line);
if(!next&&!strcmp("continue",line))
{
xmlparsearg(f,1,ctn,ctv);
URLEncode(off,strlen(off),offset,1490);
next=1;
}
if(!strcmp("cm",line))
{
xmlparsearg(f,1,mtn,mtv);
if(!strncmp("Talk:",talk,5))
{
temp=(struct problemlist *)calloc(sizeof(struct problemlist),1);
temp->next=0;
temp->rate=rate;
temp->view=0;
for(i=0;talk[i];i++) {if(talk[i]==' ') talk[i]='_';}
temp->title=(char *)malloc(strlen(talk)+5);
strcpy(temp->title,&talk[5]);
pagecount++;
if(!head)
{
head=pre=temp;
}
else
{
pre->next=temp;
pre=temp;
}
}
}
}while(!heof(f));
hclose(f);
}while(next);
return 0;
}
void sortini()
{
int i=0;
sort=(struct problemlist **)calloc(pagecount*sizeof(struct problemlist *),1);
newlist=head;
for(i=0;i<pagecount;i++)
{
sort[i]=newlist;
newlist=newlist->next;
sort[i]->next=0;
}
}
int mergesort(struct problemlist **tg,int number)
{
int count=0;
int pos=0;
struct problemlist *a,*b,*c;
if(number==1) return 0;
else
{
for(count=0,pos=0;pos<number;count++)
{
a=tg[pos];
b=tg[pos+1];
c=mergelist(a,b);
tg[count]=c;
pos+=2;
}
tg[count]=0;
mergesort(tg,count);
return 1;
}
}
struct problemlist *mergelist(struct problemlist *a,struct problemlist *b)
{
struct problemlist *head,*cur;
if(!a) return b;
if(!b) return a;
if(a->view>b->view)
{
head=a;
a=a->next;
}
else
{
head=b;
b=b->next;
}
cur=head;
while(a&&b)
{
if(a->view>b->view)
{ cur->next=a;
cur=cur->next;
a=a->next;
cur->next=0;
}
else
{
cur->next=b;
cur=cur->next;
b=b->next;
cur->next=0;
}
}
if(a)
{
cur->next=a;
}
else cur->next=b;
return head;
}
void pagesort()
{
sortini();
mergesort(sort,pagecount);
head=sort[0];
}
void show(struct problemlist *head)
{
FILE *f;
int count=0;
f=fopen(outputfile,"w+");
fprintf(f,"%s - %s \n",starttime,endtime);
fprintf(f,"{| class=\"wikitable sortable\" style=\"text-align: right;\"\n! 排名\n! 条目名\n! 评级\n! 访问量\n");
while(head)
{
count++;
fprintf(f,"|-\n| %d\n| style='text-align: left;' | [[%s]]\n| %s\n| %d\n",count,head->title,head->rate,head->view);
head=head->next;
}
fprintf(f,"|}");
fclose(f);
}
int threadini(int count)
{
pthread_attr_t a;
int i=0;
int flag=0;
pthread_attr_init (&a);
pthread_attr_setdetachstate (&a, PTHREAD_CREATE_DETACHED);
threadnumber=0;
for(i=0;i<count;i++)
{
threadc[i]=i;
flag=pthread_create(&threadpool[i],&a,threadfunc,(void *)(threadc+i));
if(!flag) threadnumber++;
}
pthread_attr_destroy(&a);
return 0;
}
void *threadfunc(void *c)
{
int i=*(int *)c;
int exit=0;
while(!action) sleep(1);
while(1)
{
pthread_mutex_lock(&hcs);
if(newlist)
{
threads[i]=newlist;
newlist=newlist->next;
}
else exit=1;
pthread_mutex_unlock(&hcs);
if(exit) break;
else
{
proceedchild(threads[i]);
}
}
pthread_mutex_lock(&tcs);
threadnumber--;
pthread_mutex_unlock(&tcs);
return NULL;
}
int proceedchild(struct problemlist *p)
{
char title[3000];
char *match="\"views\":";
int mtl=strlen(match);
int num=0;
int numpos=0;
int mtc=0;
char url[5000];
HTTP h;
char ch;
int status=0;
URLEncode(p->title,strlen(p->title),title,2990);
sprintf(url,"/api/rest_v1/metrics/pageviews/per-article/zh.wikipedia/all-access/user/%s/daily/%s/%s",title,starttime,endtime);
h=hopen();
if(getrest(url,h))
{
hclose(h);
return -1;
}
skipresponseheader(h);
while(!heof(h))
{
ch=hgetc(h);
switch(status)
{
case 0:
if(ch==match[0])
{
status=1;
mtc=1;
}
break;
case 1:
if(ch!=match[mtc])
{
status=0;
mtc=0;
}
else mtc++;
if(mtc==mtl)
{
numpos=0;
mtc=0;
status=2;
}
break;
case 2:
if(ch>='0'&&ch<='9')
{
numpos*=10;
numpos+=ch-'0';
}
else
{
num+=numpos;
status=0;
}
break;
}
}
p->view=num;
hclose(h);
return 0;
}