Gebruiker:Sumurai8/antispam
Naar navigatie springen
Naar zoeken springen
Deze pagina wordt als het goed is gerefereerd in de requests van deze bot. De bot voert beperkte controles uit en blokkeert de huidige generatie spambots. De code van de bot is niet mooi, is waarschijnlijk niet vrij van fouten, maar voldoet voor haar taak. Code is beschikbaar onder cc-by-sa, maar is in de huidige vorm onbruikbaar. Je blaast een van je cpu's op of ddost de site wanneer je dit zonder kennis van zake draait.
Code
import urllib, urllib2, cookielib import time, re from BeautifulSoup import BeautifulStoneSoup openthing = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.LWPCookieJar())) openthing.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.7 Safari/535.19 Python 2.7.2'), ('Referer', 'http://wikikids.wiki.kennisnet.nl/antispam')] urllib2.install_opener(openthing) APIURL = 'http://wikikids.wiki.kennisnet.nl/api.php' #login1 x = urllib2.urlopen(APIURL, urllib.urlencode({'action':'login', 'lgname':'PLACEHOLDER', 'lgpassword':'PLACEHOLDER', 'format':'xml'})) y = BeautifulStoneSoup(x.read()) logintoken = y('login')[0]['token'] #login2 x = urllib2.urlopen(APIURL, urllib.urlencode({'action':'login', 'lgname':'PLACEHOLDER', 'lgpassword':'PLACEHOLDER', 'lgtoken':logintoken, 'format':'xml'})) y = BeautifulStoneSoup(x.read()) if not y('login')[0]['result'] == "Success": import sys print "Login failed with result:", y('login')[0]['result'] sys.exit(1) rcstart = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) lasttitle = "" while True: try: x = urllib2.urlopen(APIURL + "?action=query&list=recentchanges&rcstart=$rcstart&rclimit=10&rcdir=newer&rcprop=user|timestamp|title|ids&rctype=new&rcnamespace=0|2|3&format=xml".replace("$rcstart", rcstart)) y = BeautifulStoneSoup(x.read()) for entry in y('rc'): if not entry['title'] == lasttitle: rcstart = entry['timestamp'] print ">>> Checking", entry['title'], "by", entry['user'] if (re.match(r'[A-Z][a-z]+[A-Z][a-z]+[0-9]{1,4}', entry['user']) or re.match(r'[A-Z]{2}[a-z]+[A-Z][a-z]+', entry['user'])) and ((entry['ns'] in ["2", "3"] and entry['user'] in entry['title']) or (entry['ns'] == "0" and len(entry['title']) > 20)): print " I believe it's spam! More checks coming up..." xx = urllib2.urlopen(APIURL + "?action=query&prop=revisions&revids=$revid&rvprop=content&format=xml".replace("$revid", entry['revid'])) yy = BeautifulStoneSoup(xx.read()) rex = re.search(r'http[s]?://.*?( |\/|\.[ \n])', str(yy('rev')[0])) rex = rex.group() if rex[rex.rfind('.'):-1] not in ['.nl', '.nl.']: print " Contains url: %s" % rex xx = urllib2.urlopen(APIURL + "?" + urllib.urlencode({'action':'query', 'prop':'info', 'titles':entry['title'], 'intoken':'delete', 'format':'xml'})) yy = BeautifulStoneSoup(xx.read()) if yy('page')[0]['lastrevid'] == entry['revid']: deltoken = yy('page')[0]['deletetoken'] xx = urllib2.urlopen(APIURL, urllib.urlencode({'action':'delete', 'title':entry['title'], 'token':deltoken, 'reason':'Automatisch verwijderd vanwege vermoeden spam', 'format':'xml'})) yy = BeautifulStoneSoup(xx.read()) print " ", yy('delete')[0]['reason'] print " Let's try to block them too!" xx = urllib2.urlopen(APIURL + "?" + urllib.urlencode({'action':'query', 'list':'usercontribs', 'ucuser':entry['user'], 'uclimit':'2', 'format':'xml'})) yy = BeautifulStoneSoup(xx.read()) if len(yy('item')) == 0: print " No visible contribs; really blocking now" xx = urllib2.urlopen(APIURL, urllib.urlencode({'action':'query', 'prop':'info', 'intoken':'block', 'titles':'User:'+ entry['user'], 'format':'xml'})) yy = BeautifulStoneSoup(xx.read()) blocktoken = yy('page')[0]['blocktoken'] xx = urllib2.urlopen(APIURL, urllib.urlencode({'action':'block', 'user':entry['user'], 'expiry':'100000 hours', 'reason':'Automatische blokkade vermoedelijke spambot - indien onterecht geblokkeerd svp [[Special:ListUsers/sysop|moderator]] emailen', 'token':blocktoken, 'nocreate':'', 'autoblock':''})) yy = BeautifulStoneSoup(xx.read()) print " See ya!" else: print "xxx Not blocking; too much edits" else: print "xxx Abort! Revids don't match!" else: print "xxx Couldn't find url" lasttitle = entry['title'] #time.sleep(30) except urllib2.URLError: print ">>> Urlerror" #time.sleep(120)