#include <stdio.h>
#include <string.h>
#include <process.h>
#include <windows.h>
#include "mem.h"
#include "network.h"
#include "convert.h"
#include "auth.h"
struct problemlist
{
unsigned long pageid;
struct problemlist *next;
};
struct neditargv
{
const char *id;
HTTP newtext;
const char *time;
};
int threadc[1024];
SRWLOCK rwcs;
CRITICAL_SECTION tcs;
CRITICAL_SECTION hcs;
int threadnumber=0;
unsigned char action=0;
struct problemlist *pbl=NULL;
int *nextm=NULL;
int pagenum=0;
const char *username=NULL;
const char *passwd=NULL;
const char *rpl=NULL;
char *replacestring=NULL;
int replacelength=0;
int matchlength=0;
int maxquerytimes=0;
int hasquerytimeslimit=0;
int hideredirect=0;
char *matchstring=NULL;
char *matchstring_e=NULL;
const char *matchstring_ori=NULL;
const char *replacestring_ori=NULL;
const char *ns=NULL;
int searchfortransclude=0;
const char *templatename=NULL;
int dosort=0;
int maxthread=256;
static void displayerr(unsigned int code)
{
if(!(code&0x1))
{
printf(
"\tNo username.\n"
"\t\tA valid username must be specified via \"-u\".\n"
);
}
if(code&0x2)
{
printf(
"\tUsername too long.\n"
"\t\tThe username should not be longer than 64 bytes.\n"
);
}
if(!(code&0x4))
{
printf(
"\tNo search string.\n"
"\t\tA search string should be set via \"-s\".\n"
);
}
if(code&0x8)
{
printf(
"\tPassword too long.\n"
"\t\tThe password should not be longer than 64 bytes.\n"
);
}
if(!(code&0x10))
{
printf(
"\tNo password.\n"
"\t\tA valid password must be specified via \"-p\".\n"
);
}
if(code&0x40)
{
printf(
"\tns string too long.\n"
"\t\tThe ns string should not be longer than 32 bytes.\n"
);
}
return;
}
static int parsearg(int argc,const char *argv[])
{
int cur_arg=0;
unsigned int err=0;
for(cur_arg=1;cur_arg<argc;cur_arg++)
{
if(argv[cur_arg][0]=='-'&&((argv[cur_arg+1]&&argv[cur_arg+1][0]!='-')||(argv[cur_arg][1]=='h')||(argv[cur_arg][1]=='S')))
{
switch(argv[cur_arg][1])
{
case 'u':
username=G2U(argv[cur_arg+1]);
if(strlen(username)>64)
{
err|=0x2;
}
else
{
err|=0x1;
}
cur_arg++;
break;
case 'p':
passwd=argv[cur_arg+1];
if(strlen(passwd)>64)
{
err|=0x8;
}
else
{
err|=0x10;
}
cur_arg++;
break;
case 's':
matchstring_ori=argv[cur_arg+1];
matchstring=G2U(matchstring_ori);
matchstring_e=(char *)s_malloc(strlen(matchstring)*4+1);
URLEncode(matchstring,strlen(matchstring),matchstring_e,strlen(matchstring)*4+1);
matchlength=strlen(matchstring);
err|=0x4;
cur_arg++;
break;
case 'n':
ns=argv[cur_arg+1];
if(strlen(ns)>32)
{
err|=0x40;
}
cur_arg++;
break;
case 'T':
maxthread=atoi(argv[cur_arg+1]);
if(maxthread<1||maxthread>1024) maxthread=32;
cur_arg++;
break;
case 'r':
replacestring_ori=argv[cur_arg+1];
rpl=G2U(replacestring_ori);
replacestring=(char *)s_malloc(strlen(rpl)*4+1);
URLEncode(rpl,strlen(rpl),replacestring,4*strlen(rpl)+1);
replacelength=strlen(replacestring);
cur_arg++;
break;
case 'm':
maxquerytimes=atoi(argv[cur_arg+1]);
if(maxquerytimes>0) hasquerytimeslimit=1;
cur_arg++;
break;
case 'h':
hideredirect=1;
break;
case 'S':
dosort=1;
break;
case 't':
searchfortransclude=1;
templatename=G2U(argv[cur_arg+1]);
cur_arg++;
break;
}
}
}
if(!ns) ns="0";
if(0x1+0x4+0x10==err) return 0;
else
{
printf("Error code 0x%x:\n",err);
displayerr(err);
return 1;
}
}
static int smartedit(struct neditargv *p,const char *reason, const char *tags)
{
HTTP res;
char line[2048],url[4096]={0};
char reason_e[512];
char tags_e[256];
char aft[1024],statusline[128];
char cur_token[128];
char err_type[8192];
char *erm[]={"code"};
char *erv[1];
int find=0;
int has_err=0,token_err=0,filtered=0;
int retry=0;
erv[0]=err_type;
if(reason) URLEncode(reason,strlen(reason),reason_e,510);
if(tags) URLEncode(tags,strlen(tags),tags_e,254);
sprintf(url,"/w/api.php?action=edit&pageid=%s&basetimestamp=%s",p->id,p->time);
find=sprintf(aft,"%s%s&summary=%s&bot=1&minor=1&nocreate=1&format=xml&token=",tags?"&tags=":"",tags?tags_e:"",reason_e);
if(find<0) return -4;
do
{
res=hopen();;
while(1)
{
AcquireSRWLockShared(&rwcs);
if(hastoken) break;
else ReleaseSRWLockShared(&rwcs);
Sleep(100);
}
aft[find]=0;
strcat(aft,token);
ReleaseSRWLockShared(&rwcs);
hrewind(p->newtext);
if(smartpost(url,p->newtext,aft,8888,1,res))
{
hclose(res);
return -1;
}
hgets(statusline,127,res);
if(!strstr(statusline," 200"))
{
hclose(res);
return -2;
}
skipresponseheader(res);
filtered=token_err=has_err=0;
while(!heof(res))
{
if(xmlparsetag(res,line)==XML_HAS_VALUE)
{
if(!strcmp(line,"error"))
{
has_err=1;
xmlparsearg(res,1,erm,erv);
if((!strcmp(err_type,"notoken"))||(!strcmp(err_type,"badtoken")))
{
token_err=1;
}
else if(!strcmp(err_type,"abusefilter-warning"))
{
filtered=1;
}
break;
}
}
}
if(token_err)
{
AcquireSRWLockExclusive(&rwcs);
if(!strcmp(aft+find,cur_token)) hastoken=0;
ReleaseSRWLockExclusive(&rwcs);
}
retry++;
hclose(res);
}while((token_err||filtered)&&(retry<3));
if(has_err) return -3;
else return 0;
}
char *replace_reason=NULL;
static int kmpini()
{
int i,j;
nextm=(int *)s_malloc(matchlength*sizeof(int));
i=0;
nextm[0]=j=-1;
while(i<matchlength-1)
{
if(j==-1)
{
j=0;
i++;
nextm[i]=0;
}
else if(matchstring[i]==matchstring[j])
{
i++;j++;
nextm[i]=j;
}
else j=nextm[j];
}
#ifdef _DEBUG
for(i=0;i<matchlength;i++)
{
printf("%d ",nextm[i]);
}
printf("\n");
#endif
replace_reason=(char *)s_calloc(sizeof(char)*(64+strlen(matchstring)+(rpl?strlen(rpl):0)),1);
sprintf(replace_reason,"bot: massreplace: \"%s\" ->\"%s\"",matchstring,rpl);
return 0;
}
static int kmp(HTTP h,HTTP result)
{
char ch,*pch;
int i=0,j=0,k=0,todo=0,exit=0;
pch=&ch;
xmlpulltext(h,&ch);
do
{
while(i<matchlength)
{
if(i==-1)
{
if(xmlpulltext(h,&ch)!=XML_TEXT_CONTINUE)
{
exit=1;
break;
}
i=0;
}
else if(ch==matchstring[i])
{
i++;
if(xmlpulltext(h,&ch)!=XML_TEXT_CONTINUE)
{
exit=1;
break;
}
}
else
{
if(nextm[i]==-1)
{
for(k=0;k<i;k++)
{
smartURLEncode(matchstring[k],result);
}
smartURLEncode(ch,result);
i=-1;
}
else
{
j=i-nextm[i];
for(k=0;k<j;k++)
{
smartURLEncode(matchstring[k],result);
}
i=i-j;
}
}
}
if(i==matchlength)
{
todo=1;
i=0;
if(replacestring) hputs(replacestring,replacelength,result);
}
}while(!exit);
if(i>0)
{
for(k=0;k<i;k++)
{
smartURLEncode(matchstring[k],result);
}
}
if(xmlpulltext(h,&ch)!=XML_TEXT_END) todo=0;
return todo;
}
static int pagecheck(const char *pageid,const char *timestamp,HTTP f)
{
HTTP newtext;
newtext=hopen();
hputs("&text=",6,newtext);
if(kmp(f,newtext))
{
struct neditargv point;
point.newtext=newtext;
point.id=pageid;
point.time=timestamp;
smartedit(&point,replace_reason,NULL);
}
hclose(newtext);
return 0;
}
static int proceedchild(const char *ids)
{
char url[4096];
char buf[8192];
char pageid[256];
char timestamp[256];
char contentmodel[64],contentformat[64];
const char *ttm[]={"pageid"};
const char *tmm[]={"timestamp"};
const char *cmm[]={"contentmodel","contentformat"};
char *ttv[1];
char *tmv[1];
char *cmv[2];
int result;
int status;
HTTP h;
ttv[0]=pageid;
tmv[0]=timestamp;
cmv[0]=contentmodel;
cmv[1]=contentformat;
if(!ids)
{
return -1;
}
sprintf(url,"/w/api.php?action=query&format=xml&prop=revisions&rvprop=content|timestamp&pageids=%s&rvslots=main",ids);
h=hopen();
if(get(url,8888,1,h))
{
hclose(h);
return -2;
}
hgets(buf,4096,h);
if(!strstr(buf," 200"))
{
hclose(h);
return -3;
}
if(skipresponseheader(h))
{
hclose(h);
return -3;
}
status=0;
while(!heof(h))
{
result=xmlparsetag(h,buf);
if(result==XML_HAS_VALUE)
{
switch(status)
{
case 0:
if(!strcmp(buf,"page")&&(xmlparsearg(h,1,ttm,ttv)==XML_HAS_VALUE))
{
if(atoi(pageid)>0) status=1;
}
break;
case 1:
if(!strcmp(buf,"rev"))
{
if(xmlparsearg(h,1,tmm,tmv)!=XML_HAS_VALUE)
{
status=3;
goto _cleanup;
}
status=2;
}
else goto _cleanup;
break;
case 2:
if(!strcmp(buf,"slot"))
{
if(xmlparsearg(h,2,cmm,cmv)!=XML_HAS_VALUE)
{
status=3;
goto _cleanup;
}
if(!strcmp(contentmodel,"wikitext")&&!strcmp(contentformat,"text/x-wiki"))
{
pagecheck(pageid,timestamp,h);
}
status=0;
}
else goto _cleanup;
break;
}
}
else if(result==XML_PARSE_ERROR)
{
status=3;
goto _cleanup;
}
}
_cleanup:
hclose(h);
return status?-4:0;
}
static void threadfunc(void *c)
{
int i=*(int *)c;
int ext=0;
char pageid[10][64];
int count=0;
char ids[4096];
int result=0;
struct problemlist *temp;
while(!action) Sleep(1);
while(1)
{
EnterCriticalSection(&hcs);
for(count=0;count<10;count++)
{
if(pbl)
{
temp=pbl;
pbl=pbl->next;
sprintf(pageid[count],"%d",temp->pageid);
s_free(temp);
}
else
{
ext=1;
count++;
break;
}
}
count--;
if(count>=0)
{
strcpy(ids,pageid[count]);
count--;
while(count>=0)
{
strcat(ids,"|");
strcat(ids,pageid[count]);
count--;
}
}
LeaveCriticalSection(&hcs);
if(ext) break;
else
{
result=proceedchild(ids);
}
}
EnterCriticalSection(&tcs);
threadnumber--;
LeaveCriticalSection(&tcs);
return;
}
static int threadini(int count)
{
int i=0;
int flag=0;
threadnumber=0;
for(i=0;i<count;i++)
{
threadc[i]=i;
flag=_beginthread(threadfunc,0,(void *)(threadc+i));
if(flag>0) threadnumber++;
}
return 0;
}
static struct problemlist **sort;
static void sortini()
{
int i=0;
struct problemlist *newlist=pbl;
sort=(struct problemlist **)s_calloc(pagenum*sizeof(struct problemlist *)+5,1);
for(i=0;i<pagenum;i++)
{
sort[i]=newlist;
newlist=newlist->next;
sort[i]->next=0;
}
sort[i]=0;
}
static struct problemlist *mergelist(struct problemlist *a,struct problemlist *b)
{
struct problemlist *head,*cur;
if(!a) return b;
if(!b) return a;
if(a->pageid>b->pageid)
{
head=a;
a=a->next;
}
else
{
head=b;
b=b->next;
}
cur=head;
while(a&&b)
{
if(a->pageid>b->pageid)
{
cur->next=a;
cur=cur->next;
a=a->next;
cur->next=0;
}
else
{
cur->next=b;
cur=cur->next;
b=b->next;
cur->next=0;
}
}
if(a)
{
cur->next=a;
}
else cur->next=b;
return head;
}
static int mergesort(struct problemlist **tg,int number)
{
int count=0;
int pos=0;
struct problemlist *a,*b,*c;
if(number==1) return 0;
else
{
for(count=0,pos=0;pos<number;count++)
{
a=tg[pos];
b=tg[pos+1];
c=mergelist(a,b);
tg[count]=c;
pos+=2;
}
tg[count]=0;
mergesort(tg,count);
return 1;
}
}
static void pagesort()
{
sortini();
mergesort(sort,pagenum);
pbl=sort[0];
s_free(sort);
sort=0;
}
static int allpagequery(const char *ns)
{
HTTP f;
char line[2048]={0},url[4096]={0},id[512]={0},title[512]={0},sroffset[2048]={0},offseto[512]={0};
char statusline[128];
int next=0,retry=0,pageid=0;
struct problemlist *temp=0;
char *ctm[]={"apcontinue"};
char *ctv[1];
char *idm[]={"pageid","title"};
char *idv[2];
char *ptourl=url;
ctv[0]=offseto;
idv[0]=id;
idv[1]=title;
ptourl+=sprintf(url,"/w/api.php?action=query&format=xml&list=allpages&apnamespace=%s&aplimit=5000&apfilterredir=%s",ns,hideredirect?"nonredirects":"all");
do
{
if(next)
{
sprintf(ptourl,"&apcontinue=%s",sroffset);
}
else *ptourl=0;
f=hopen();
for(retry=0;retry<20;retry++)
{
if(get(url,8888,1,f))
{
hclose(f);
f=hopen();
}
else
{
hgets(statusline,127,f);
if(strstr(statusline,"200")) break;
else
{
hclose(f);
f=hopen();
}
}
}
if(retry==20)
{
hclose(f);
return 1;
}
skipresponseheader(f);
next=0;
do
{
xmlparsetag(f,line);
if(!next)
{
if(!strcmp(line,"continue"))
{
xmlparsearg(f,1,ctm,ctv);
URLEncode(offseto,strlen(offseto),sroffset,2047);
next=1;
}
}
if(!strcmp(line,"p"))
{
xmlparsearg(f,2,idm,idv);
if((pageid=atoi(id))>0)
{
temp=(struct problemlist *)s_malloc(sizeof(struct problemlist));
temp->pageid=pageid;
temp->next=pbl;
pbl=temp;
pagenum++;
}
}
}while(!heof(f));
hclose(f);
if(hasquerytimeslimit)
{
if(--maxquerytimes<=0) break;
}
}while(next);
if(dosort) pagesort();
return 0;
}
static int templatequery(const char *templatename,const char *ns)
{
HTTP f;
char line[2048]={0},url[4096]={0},id[512]={0},title[512]={0},sroffset[2048]={0},offseto[512]={0};
char templatename_e[8192];
char statusline[128];
int next=0,retry=0,pageid=0;
struct problemlist *temp=0;
char *ctm[]={"ticontinue"};
char *ctv[1];
char *idm[]={"pageid","title"};
char *idv[2];
char *ptourl=url;
ctv[0]=offseto;
idv[0]=id;
idv[1]=title;
URLEncode(templatename,strlen(templatename),templatename_e,8191);
ptourl+=sprintf(url,"/w/api.php?action=query&format=xml&prop=transcludedin&titles=%s&tinamespace=%s&tilimit=5000&tishow=%s",templatename_e,ns,hideredirect?"!redirect":"");
do
{
if(next)
{
sprintf(ptourl,"&ticontinue=%s",sroffset);
}
else *ptourl=0;
f=hopen();
for(retry=0;retry<20;retry++)
{
if(get(url,8888,1,f))
{
hclose(f);
f=hopen();
}
else
{
hgets(statusline,127,f);
if(strstr(statusline,"200")) break;
else
{
hclose(f);
f=hopen();
}
}
}
if(retry==20)
{
hclose(f);
return 1;
}
skipresponseheader(f);
next=0;
do
{
xmlparsetag(f,line);
if(!next)
{
if(!strcmp(line,"continue"))
{
xmlparsearg(f,1,ctm,ctv);
URLEncode(offseto,strlen(offseto),sroffset,2047);
next=1;
}
}
if(!strcmp(line,"ti"))
{
xmlparsearg(f,2,idm,idv);
if((pageid=atoi(id))>0)
{
temp=(struct problemlist *)s_malloc(sizeof(struct problemlist));
temp->pageid=pageid;
temp->next=pbl;
pbl=temp;
pagenum++;
}
}
}while(!heof(f));
hclose(f);
if(hasquerytimeslimit)
{
if(--maxquerytimes<=0) break;
}
}while(next);
if(dosort) pagesort();
return 0;
}
static int confirm()
{
int ch=0;
while(1)
{
printf("Replace \"%s\" with \"%s\", OK? Press \'Y\' to continue, \'N\' to abort.\n",matchstring_ori,replacestring_ori);
ch=getchar();
fflush(stdin);
if(ch=='Y') return 0;
else if(ch=='N') return 1;
printf("Invalid command.\n");
}
}
int main(int argc,char *argv[])
{
int count=0;
HANDLE tk_thread;
if(parsearg(argc,argv))
{
printf("usage: -u username -p passwd -s searchstring [-T concurrency -r replacestring -n namespace -m maxquery -h hideredirect -S sortpage -t transclude]\n");
return -1;
}
if(confirm())
{
printf("Aborted!\n");
return -2;
}
InitializeSRWLock(&rwcs);
InitializeCriticalSection(&tcs);
InitializeCriticalSection(&hcs);
buckini(20);
if(login(username,passwd))
{
printf("Login error!\n");
return -3;
}
hastoken=0;
printf("Login complete.\n");
fflush(stdout);
kmpini();
tk_thread=(HANDLE)_beginthread(tokenmanage,0,0);
if(searchfortransclude)
{
templatequery(templatename,ns);
}
else allpagequery(ns);
if(pbl==NULL)
{
printf("No page!\n");
return -4;
}
printf("Query complete. %d pages found. Checking...\n",pagenum);
action=0;
threadini(maxthread);
action=1;
while(1)
{
EnterCriticalSection(&hcs);
if(pbl!=NULL)
{
LeaveCriticalSection(&hcs);
Sleep(1000);
}
else
{
LeaveCriticalSection(&hcs);
break;
}
}
count=0;
while(count<150)
{
count++;
EnterCriticalSection(&tcs);
if(threadnumber>0)
{
printf("Waiting for all threads to exit. Current thread number: %d\n",threadnumber);
LeaveCriticalSection(&tcs);
}
else
{
LeaveCriticalSection(&tcs);
break;
}
fflush(stdout);
Sleep(1000);
}
if(!threadnumber)
{
printf("Cleanup..\n");
DeleteCriticalSection(&tcs);
DeleteCriticalSection(&hcs);
AcquireSRWLockExclusive(&rwcs);
hastoken=-1;
ReleaseSRWLockExclusive(&rwcs);
WaitForSingleObject(tk_thread,INFINITE);
buckdestroy();
}
printf("---------------Ok done.---------------\n");
fflush(stdout);
system("PAUSE");
return 0;
}