MalafayaBot

17 bytes adicionados ,  16h45min de 13 de agosto de 2015
migração para pywikibot-core
m (r2.7.5) (Robô: A adicionar: no:Bruker:MalafayaBot)
(migração para pywikibot-core)
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import wikipedia
import sys
import re
import codecs
import query
from pywikibot import config
 
site = None
# Log bot warnings
def log(message):
page = wikipediapywikibot.Page(site, u"Usuário:MalafayaBot/Log")
if page.exists():
text = page.get()
text += u"\r\n* ~~~~~: " + message
page.put(text, "Mensagem de log do bot")
 
# Gets the base language category for a language code
def getLangCat(langcode):
params = {
'action' : 'parse',
'text' : u'{{nome categoria|%s}}' % langcode,
'propcontentmodel' : 'textwikitext',
'prop' : 'text',
}
datas = query.GetData(params, site)
mo = reparsedtext.match(data)
return mo.group(1).strip()
 
 
def createFLentry(transl, langcode, pos, title, gloss):
# Build a page
page = wikipediapywikibot.Page(site, transl)
if page.exists():
if page.isRedirectPage():
wikipediapywikibot.output(u"Page '%s' is redirect. Skipping" % transl)
log(u"Tentativa de criação de entrada da língua '''%s''' sobre redirecionamento \"%s\"" %
(langcode, transl))
return True # meaning there is a page there now
if not page.isEmpty():
wikipediapywikibot.output(u"Page '%s' already has contents. Skipping" % transl)
return True # meaning there is a page there now
# check language section later ...
 
# Decap gloss (some people insist on capitalizing it, which is wrong) this is almost always right:
gl = gloss.lower()
return False
if gl[1:] != gloss[1:]: gl = gloss # caps in string after first, so probably okay
 
langcat = getLangCat(langcode)
wikipediapywikibot.output(u"Language category returned is %s" % langcat)
 
# Check if base language category exists (we don't want to add words for languages that are not yet cataloged or whose name is not well specified)
langCatPage = wikipediapywikibot.Page(site, u"Categoria:%s" % langcat)
if not langCatPage.exists():
wikipediapywikibot.output(u"Base language category 'Categoria:%s' does not yet exist" % langcat)
log(u"Categoria base para língua '''%s''' com nome 'Categoria:%s' não existe. Entrada '%s' não adicionada" %
(langcode, langcat, transl))
return False
 
text = u"""={{-%s-}}=
==%s==
else:
text = text % (langcode, pos, transl, title, u' (' + gl + u')', title, langcode, pos, langcat)
 
try:
page.put(text, comment = u"Criada automaticamente a partir das traduções em [[%s]]" % title, minorEdit = False)
except wikipediapywikibot.PageNotSaved:
print "Failed to save page"
return False
print "Socket error, maybe not saving page"
return False
 
newCat = wikipediapywikibot.Page(site, u"Categoria:!Entrada criada por robô (%s)" % langcat)
if not newCat.exists() or (not newCat.isRedirectPage() and newCat.isEmpty()):
# Create the auto pages category for this language
wikipediapywikibot.output(u"Creating auto page category for language '%s'" % langcode)
newCat.put(u"{{catpagautolíngua|%s}}" % langcode, u"Criada automaticamente")
 
return True
 
 
# Converts the Part of Speech to the AO1990
def convertOrtography(pos):
return 'Adjetivo'
return pos
 
 
def main():
global repact, site
 
socket.setdefaulttimeout(30)
pageToProcess = None
 
for arg in sys.argv[1:]:
if arg.startswith('-'):
print 'Arguments not supported yet'
else: pageToProcess = unicode(arg, 'latin1')
 
# make sure we are logged in
site = wikipediapywikibot.getSite()
site.forceLogin()
config.put_throttle = 1
 
rehead = re.compile(r'={1,4}(.+?)={1,4}')
rehead2 = re.compile(r'={2}(.+?)={2}')
reglosstune = re.compile(r'(.*?)\((.*?)\)')
reglosstune2 = re.compile(r'(.*?):\s?(.*)')
 
partsOfSpeech = set(['Substantivo', 'Adjetivo', 'Verbo', 'Pronome', 'Locução substantiva', 'Numeral'])
stops = set([])
 
if (pageToProcess == None):
entry = site.randompage()
pageToProcess = entry.title()
else:
entry = wikipediapywikibot.Page(site, pageToProcess)
wikipediapywikibot.output(u"Getting page '%s'" % entry.title())
if entry.namespace() != 0:
wikipediapywikibot.output(u"Not an article")
return
text = entry.get()
 
if not u'=Português=' in text and not u'={{pt}}=' in text and not u'={{-pt-}}=' in text and not u'= Português =' in text and not u'= {{pt}} =' in text and not u'= {{-pt-}} =' in text:
wikipediapywikibot.output(u'No appropriate PT language header')
return
 
""" if not u'==Tradução==' in text and not u'==Traduções==' in text and not u'=={{tradu}}==' in text:
wikipediapywikibot.output(u'No appropriate Tradução language header')
return"""
 
lines = text.splitlines()
intrans = False
if mo:
header = mo.group(1).strip()
wikipediapywikibot.output(u'Current header: %s' % header)
if header == u"Tradução" or header == u'{{tradução}}' or header == u'Traduções':
if not (rehead3.match(lines[i])):
wikipediapywikibot.output(u'Header not on level 3: skipping')
return
intrans = True
wikipediapywikibot.output(u'INTRANS')
gloss = ''
else: intrans = False
if header in partsOfSpeech and rehead2.match(lines[i]):
pos = convertOrtography(header)
wikipediapywikibot.output("PoS: %s" % pos)
if header in stops and rehead2.match(lines[i]): pos = ''
continue
 
if not intrans: continue
 
mo = regloss.match(lines[i])
if mo:
if mo:
gloss = mo.group(2).strip()
wikipediapywikibot.output("Gloss: %s" % gloss)
continue
 
# Try all the possible translation variants
transls = []
lang = mo.group(1).strip()
transls += mo.group(2).strip().split('|')
 
mo = retrans2.match(lines[i])
if mo:
lang = mo.group(2).strip()
transls += mo.group(3).strip().split('|')
 
mo = retrans3.match(lines[i])
if mo:
lang = mo.group(1).strip()
transls += mo.group(2).strip().split('|')
 
mo = retrans4.match(lines[i])
if mo:
lang = mo.group(2).strip()
transls += mo.group(3).strip().split('|')
 
mo = retrans5.match(lines[i])
if mo:
lang = mo.group(2).strip()
transls += mo.group(3).strip().split('|')
 
if len(transls) == 0: continue
 
wikipediapywikibot.output(u"Found translations '%s' for language '%s'" % (transls, lang))
if lang in ignoreLangs:
wikipediapywikibot.output(u"Skipping translations for ignored language '%s'" % lang)
continue
 
for transl in transls:
if len(transl) > 0:
createFLentry(transl, lang, pos, entry.title(), gloss)
 
wikipediapywikibot.output(u'Done %s' % pageToProcess);
 
 
if __name__ == "__main__":
try:
main()
finally:
wikipediapywikibot.stopme()
</source>
 
33 202

edições