跳转到内容

用户:Antigng-bot/projectstat

维基百科,自由的百科全书
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <string.h>
#include <malloc.h>
#include "auth.h"
#include "network.h"
#include "convert.h"
#include "convert.h"
struct problemlist
{
        char *title;
         char *rate;
        int view;
        struct problemlist *next;
};
FILE *debug;
clock_t start;
int threadc[50];
struct problemlist *threads[50];
pthread_t threadpool[50];
int threadnumber=0;
pthread_mutex_t cs;
pthread_mutex_t tcs;
pthread_mutex_t hcs;
int action=0;
int pagecount=0;
struct problemlist **sort;
char *starttime=0,*endtime=0;
char *projectname=0;
char *outputfile=0;
char *user=0,*passwd=0;
struct problemlist *head=0,*newlist=0;
void show(struct problemlist *head);
int mergesort(struct problemlist **tg,int number);
struct problemlist *mergelist(struct problemlist *a,struct problemlist *b);
void *threadfunc(void *c);
void pagesort();
void useage();
int main(int argc,char *argv[])
{
     int count=0;
     char name[100];
     if(ini(argc,argv))
     {
           useage();
           return -1;
      }
      login(user,passwd);
     sprintf(name,"Category:小作品级%s条目",projectname);
     query(name,"Stub");
     sprintf(name,"Category:初级%s条目",projectname);
     query(name,"Start");
    sprintf(name,"Category:丙级%s条目",projectname);
     query(name,"C");
  sprintf(name,"Category:乙级%s条目",projectname);
     query(name,"B");
     sprintf(name,"Category:优良级%s条目",projectname);
     query(name,"GA");
    sprintf(name,"Category:甲级%s条目",projectname);
     query(name,"A");
    sprintf(name,"Category:特色级%s条目",projectname);
     query(name,"FA");
    newlist=head;
     fflush(stdout);
      pthread_mutex_init(&cs,NULL);
       pthread_mutex_init(&tcs,NULL);
      pthread_mutex_init(&hcs,NULL);
      threadini(50);
      printf("Create %d threads to go through the problem list.\n",threadnumber);
      fflush(stdout);
      action=1;
      while(newlist!=NULL)
        {
         sleep(1);
        }
        count=0;
      while(count<20)
        {
           count++;
            pthread_mutex_lock(&tcs);
         if(threadnumber>0)
         {
             pthread_mutex_unlock(&tcs);
             printf("waiting for all threads to exit. Current thread number: %d\n",threadnumber);
         }
         else
          {
             pthread_mutex_unlock(&tcs);
             break;
          }
      fflush(stdout);
         sleep(1);
        }
      pagesort();
      show(head);
      return 0;
}
void useage()
{
     printf("useage: pstat -u username -p password -f filename -s starttime -e endtime -n projectname\n");
}
int ini(int argc,char *argv[])
{
      int hasu=0,hasp=0,hasn=0,hasf=0,hass=0,hase=0;
    int i=0;
    for(i=1;i<argc;i++)
   {
      if(argv[i][0]=='-'&&argv[i+1]&&argv[i+1][0]!='-')
      {
         switch(argv[i][1])
         {
            case 'u':
                    user=argv[i+1];
                    hasu=1;
                    break;
            case 'f':
                    outputfile=argv[i+1];
                    hasf=1;
                    break;
            case 'p':
                    passwd=argv[i+1];
                    hasp=1;
                    break;
           case 's':
                    starttime=argv[i+1];
                    hass=1;
                    break;
            case 'e':
                   endtime=argv[i+1];
             case 'n':
                  projectname=argv[i+1];
                  hasn=1;
                  break;

          }
          i++;
        }
    }
 if(hasu&&hasf&&hasp&&hasn&&hass&&hase) return 0;
    else return 1;
}

int query(char *cat,char *rate)
{
    HTTP f;
    int i=0;
    struct problemlist *temp,*pre;
    char title[3000];
    char url[5000];
    char offset[2000]={0},off[2000]={0};
    char line[5000];
    char talk[3000],article[3000];
    int next=0;
    char *ctn[]={"cmcontinue"};
    char *ctv[1];
    char *mtn[]={"title"};
    char *mtv[1];
    mtv[0]=talk;
    ctv[0]=off;
    URLEncode(cat,strlen(cat),title,2990);
    sprintf(url,"https://zh.wikipedia.org/w/api.php?action=query&list=categorymembers&format=xml&cmnamespace=1&cmlimit=5000&cmtitle=%s",title);
   do
   {
                if(next)
                {
                     strcat(url,"&cmcontinue=");
                         strcat(url,offset);
                }
                f=hopen();
          get(url,1,f);
          skipresponseheader(f);
          next=0;
        do
          {
             xmlparsetag(f,line);
            if(!next&&!strcmp("continue",line))
            {
                xmlparsearg(f,1,ctn,ctv);
               URLEncode(off,strlen(off),offset,1490);
                next=1;
            }
           if(!strcmp("cm",line))
           {
               xmlparsearg(f,1,mtn,mtv);
              if(!strncmp("Talk:",talk,5))
              {
                  temp=(struct problemlist *)calloc(sizeof(struct problemlist),1);
                  temp->next=0;
                  temp->rate=rate;
                  temp->view=0;
                 for(i=0;talk[i];i++) {if(talk[i]==' ') talk[i]='_';}
                  temp->title=(char *)malloc(strlen(talk)+5);
                  strcpy(temp->title,&talk[5]);
                  pagecount++;
                 if(!head)
                  {
                    head=pre=temp;

                  }
                  else
                 {
                    pre->next=temp;
                    pre=temp;
                  }
               }
            }
         }while(!heof(f));
        hclose(f);
     }while(next);
    return 0;
}
void sortini()
{
     int i=0;
     sort=(struct problemlist **)calloc(pagecount*sizeof(struct problemlist *),1);
     newlist=head;
     for(i=0;i<pagecount;i++)
     {
        sort[i]=newlist;
        newlist=newlist->next;
sort[i]->next=0;
     }
}
int mergesort(struct problemlist **tg,int number)
{
     int count=0;
     int pos=0;
     struct problemlist *a,*b,*c;
     if(number==1) return 0;
        else
        {
                for(count=0,pos=0;pos<number;count++)
                {
                    a=tg[pos];
                    b=tg[pos+1];
                        c=mergelist(a,b);

                        tg[count]=c;
                        pos+=2;
                }
                tg[count]=0;
                mergesort(tg,count);
                return 1;
        }
}

struct problemlist *mergelist(struct problemlist *a,struct problemlist *b)
{
      struct problemlist *head,*cur;
      if(!a) return b;
      if(!b) return a;
      if(a->view>b->view)
      {
                head=a;
                a=a->next;
        }
        else
        {
                head=b;
                b=b->next;
        }
 cur=head;
        while(a&&b)
        {
             if(a->view>b->view)
             {           cur->next=a;
                        cur=cur->next;
                        a=a->next;
                        cur->next=0;
              }
              else
              {
                        cur->next=b;
                        cur=cur->next;
                        b=b->next;
                        cur->next=0;
              }
        }
        if(a)
        {
                cur->next=a;
        }
        else cur->next=b;
        return head;
}
void pagesort()
{
     sortini();
     mergesort(sort,pagecount);
     head=sort[0];
}

void show(struct problemlist *head)
{
   FILE *f;
   int count=0;
   f=fopen(outputfile,"w+");
   fprintf(f,"%s - %s \n",starttime,endtime);
   fprintf(f,"{| class=\"wikitable sortable\" style=\"text-align: right;\"\n! 排名\n! 条目名\n! 评级\n! 访问量\n");
   while(head)
   {
     count++;
 fprintf(f,"|-\n| %d\n| style='text-align: left;' | [[%s]]\n| %s\n| %d\n",count,head->title,head->rate,head->view);
         head=head->next;
   }
   fprintf(f,"|}");
   fclose(f);
}

int threadini(int count)
{
  pthread_attr_t a;
   int i=0;
   int flag=0;
   pthread_attr_init (&a);
  pthread_attr_setdetachstate (&a, PTHREAD_CREATE_DETACHED);
  threadnumber=0;
  for(i=0;i<count;i++)
  {
      threadc[i]=i;

      flag=pthread_create(&threadpool[i],&a,threadfunc,(void *)(threadc+i));
      if(!flag) threadnumber++;
  }
  pthread_attr_destroy(&a);

    return 0;
}
void *threadfunc(void *c)
{
     int i=*(int *)c;
    int exit=0;
      while(!action) sleep(1);

      while(1)
      {
            pthread_mutex_lock(&hcs);
            if(newlist)
            {
                  threads[i]=newlist;
                  newlist=newlist->next;
            }
            else exit=1;
 pthread_mutex_unlock(&hcs);
            if(exit) break;
            else
            {
                proceedchild(threads[i]);
            }
    }
      pthread_mutex_lock(&tcs);
      threadnumber--;
      pthread_mutex_unlock(&tcs);
      return NULL;
}


int proceedchild(struct problemlist *p)
{
    char title[3000];
    char *match="\"views\":";
    int mtl=strlen(match);
    int num=0;
    int numpos=0;
    int mtc=0;
    char url[5000];
    HTTP h;
    char ch;
    int status=0;
    URLEncode(p->title,strlen(p->title),title,2990);
    sprintf(url,"/api/rest_v1/metrics/pageviews/per-article/zh.wikipedia/all-access/user/%s/daily/%s/%s",title,starttime,endtime);
    h=hopen();
    if(getrest(url,h))
    {
       hclose(h);
       return -1;
    }
    skipresponseheader(h);
    while(!heof(h))
    {
        ch=hgetc(h);
        switch(status)
        {
            case 0:
if(ch==match[0])
            {
               status=1;
               mtc=1;
            }
            break;
            case 1:
            if(ch!=match[mtc])
            {
              status=0;
              mtc=0;
            }
            else mtc++;
            if(mtc==mtl)
            {
                  numpos=0;
                    mtc=0;
                    status=2;
           }
            break;
            case 2:
            if(ch>='0'&&ch<='9')
           {
                numpos*=10;
                numpos+=ch-'0';
           }
           else
           {
               num+=numpos;
               status=0;
           }
           break;
        }
     }
    p->view=num;
   hclose(h);
    return 0;
}