Cabeçalhos de idioma

replace.py -regex -ns:0 -always -pt:1 -multiline -catr:Papiamento "^\=(\s*?)(\{\{pap\}\}|Papiamento)(\s*?)\=" "={{-pap-}}="

Shortcut

usernames['wiktionary']['af'] = 'Malafaya'
usernames['wiktionary']['ang'] = 'Malafaya'
usernames['wiktionary']['an'] = 'Malafaya'
usernames['wiktionary']['ar'] = 'Malafaya'
usernames['wiktionary']['ast'] = 'Malafaya'
usernames['wiktionary']['ay'] = 'Malafaya'
usernames['wiktionary']['az'] = 'Malafaya'
usernames['wiktionary']['be'] = 'Malafaya'
usernames['wiktionary']['bs'] = 'Malafaya'
usernames['wiktionary']['br'] = 'Malafaya'
usernames['wiktionary']['bg'] = 'Malafaya'
usernames['wiktionary']['ca'] = 'Malafaya'
usernames['wiktionary']['co'] = 'Malafaya'
usernames['wiktionary']['cs'] = 'Malafaya'
usernames['wiktionary']['csb'] = 'Malafaya'
usernames['wiktionary']['cy'] = 'Malafaya'
usernames['wiktionary']['da'] = 'Malafaya'
usernames['wiktionary']['de'] = 'Malafaya'
usernames['wiktionary']['et'] = 'Malafaya'
usernames['wiktionary']['el'] = 'Malafaya'
usernames['wiktionary']['es'] = 'Malafaya'
usernames['wiktionary']['eo'] = 'Malafaya'
usernames['wiktionary']['en'] = 'Malafaya'
usernames['wiktionary']['eu'] = 'Malafaya'
usernames['wiktionary']['fa'] = 'Malafaya'
usernames['wiktionary']['fi'] = 'Malafaya'
usernames['wiktionary']['fo'] = 'Malafaya'
usernames['wiktionary']['fr'] = 'Malafaya'
usernames['wiktionary']['fy'] = 'Malafaya'
usernames['wiktionary']['ga'] = 'Malafaya'
usernames['wiktionary']['gv'] = 'Malafaya'
usernames['wiktionary']['gl'] = 'Malafaya'
usernames['wiktionary']['gu'] = 'Malafaya'
usernames['wiktionary']['ka'] = 'Malafaya'
usernames['wiktionary']['kl'] = 'Malafaya'
usernames['wiktionary']['km'] = 'Malafaya'
usernames['wiktionary']['kn'] = 'Malafaya'
usernames['wiktionary']['ko'] = 'Malafaya'
usernames['wiktionary']['he'] = 'Malafaya'
usernames['wiktionary']['hi'] = 'Malafaya'
usernames['wiktionary']['hr'] = 'Malafaya'
usernames['wiktionary']['hsb'] = 'Malafaya'
usernames['wiktionary']['hy'] = 'Malafaya'
usernames['wiktionary']['id'] = 'Malafaya'
usernames['wiktionary']['io'] = 'Malafaya'
usernames['wiktionary']['is'] = 'Malafaya'
usernames['wiktionary']['it'] = 'Malafaya'
usernames['wiktionary']['iu'] = 'Malafaya'
usernames['wiktionary']['ja'] = 'Malafaya'
usernames['wiktionary']['kk'] = 'Malafaya'
usernames['wiktionary']['ko'] = 'Malafaya'
usernames['wiktionary']['ku'] = 'Malafaya'
usernames['wiktionary']['kw'] = 'Malafaya'
usernames['wiktionary']['ky'] = 'Malafaya'
usernames['wiktionary']['lo'] = 'Malafaya'
usernames['wiktionary']['la'] = 'Malafaya'
usernames['wiktionary']['lb'] = 'Malafaya'
usernames['wiktionary']['ln'] = 'Malafaya'
usernames['wiktionary']['lt'] = 'Malafaya'
usernames['wiktionary']['li'] = 'Malafaya'
usernames['wiktionary']['lv'] = 'Malafaya'
usernames['wiktionary']['hu'] = 'Malafaya'
usernames['wiktionary']['mg'] = 'Malafaya'
usernames['wiktionary']['ml'] = 'Malafaya'
usernames['wiktionary']['mk'] = 'Malafaya'
usernames['wiktionary']['mn'] = 'Malafaya'
usernames['wiktionary']['ms'] = 'Malafaya'
usernames['wiktionary']['my'] = 'Malafaya'
usernames['wiktionary']['na'] = 'Malafaya'
usernames['wiktionary']['nah'] = 'Malafaya'
usernames['wiktionary']['nds'] = 'Malafaya'
usernames['wiktionary']['ne'] = 'Malafaya'
usernames['wiktionary']['nl'] = 'Malafaya'
usernames['wiktionary']['no'] = 'Malafaya'
usernames['wiktionary']['oc'] = 'Malafaya'
usernames['wiktionary']['om'] = 'Malafaya'
usernames['wiktionary']['pa'] = 'Malafaya'
usernames['wiktionary']['pl'] = 'Malafaya'
usernames['wiktionary']['pt'] = 'Malafaya'
usernames['wiktionary']['ro'] = 'Malafaya'
usernames['wiktionary']['roa-rup'] = 'Malafaya'
usernames['wiktionary']['ru'] = 'Malafaya'
usernames['wiktionary']['scn'] = 'Malafaya'
usernames['wiktionary']['sh'] = 'Malafaya'
usernames['wiktionary']['si'] = 'Malafaya'
usernames['wiktionary']['simple'] = 'Malafaya'
usernames['wiktionary']['sk'] = 'Malafaya'
usernames['wiktionary']['sl'] = 'Malafaya'
usernames['wiktionary']['sq'] = 'Malafaya'
usernames['wiktionary']['sr'] = 'Malafaya'
usernames['wiktionary']['su'] = 'Malafaya'
usernames['wiktionary']['sv'] = 'Malafaya'
usernames['wiktionary']['ta'] = 'Malafaya'
usernames['wiktionary']['te'] = 'Malafaya'
usernames['wiktionary']['tg'] = 'Malafaya'
usernames['wiktionary']['th'] = 'Malafaya'
usernames['wiktionary']['tl'] = 'Malafaya'
usernames['wiktionary']['tk'] = 'Malafaya'
usernames['wiktionary']['tpi'] = 'Malafaya'
usernames['wiktionary']['tr'] = 'Malafaya'
usernames['wiktionary']['tt'] = 'Malafaya'
usernames['wiktionary']['ug'] = 'Malafaya'
usernames['wiktionary']['uk'] = 'Malafaya'
usernames['wiktionary']['uz'] = 'Malafaya'
usernames['wiktionary']['vi'] = 'Malafaya'
usernames['wiktionary']['vo'] = 'Malafaya'
usernames['wiktionary']['wa'] = 'Malafaya'
usernames['wiktionary']['yi'] = 'Malafaya'
usernames['wiktionary']['za'] = 'Malafaya'
usernames['wiktionary']['zh'] = 'Malafaya'
usernames['wiktionary']['zh-min-nan'] = 'Malafaya'
usernames['wiktionary']['zu'] = 'Malafaya'

Tbot.py

#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import sys
import re
import socket
import pywikibot
import codecs
import query
from pywikibot import config
from scripts import interwiki

site = None
reparsedtext = re.compile('(.*)<div class="mw-parser-output"><p>(.*?)</p>', re.DOTALL)
ignoreLangs = ['ine', 'ine-pro']

# Log bot warnings
def log(message):
    page = pywikibot.Page(site, u"Usuário:MalafayaBot/Log")
    if page.exists():
        text = page.get()
    else:
        text = ''
    text += u"\r\n* ~~~~~: " + message
    page.put(text, "Mensagem de log do bot")

# Gets the base language category for a language code
def getLangCat(langcode):
    params = {
                'action'		: 'parse',
                'text'  		: u'{{nome categoria|%s}}' % langcode,
                'contentmodel'	: 'wikitext',
                'prop'  		: 'text',
    }
    datas = pywikibot.data.api.Request(site, parameters=params).submit()
    data = datas['parse']['text']['*']
    mo = reparsedtext.match(data)
    return mo.group(2).strip()


def createFLentry(transl, langcode, pos, title, gloss):
    # Build a page
    page = pywikibot.Page(site, transl)
    if page.exists():
        if page.isRedirectPage():
            pywikibot.output(u"Page '%s' is redirect. Skipping" % transl)
            log(u"Tentativa de criação de entrada da língua '''%s''' sobre redirecionamento \"%s\"" %
                (langcode, transl))
            return True  # meaning there is a page there now
        if not interwiki.page_empty_check(page):
            pywikibot.output(u"Page '%s' already has contents. Skipping" % transl)
            return True  # meaning there is a page there now
            # check language section later ...

    # Decap gloss (some people insist on capitalizing it, which is wrong) this is almost always right:
    gl = gloss.lower()
    if "translation" in gl:
        log("word 'translation' in gloss, skipped")
        return False
    if gl[1:] != gloss[1:]: gl = gloss       # caps in string after first, so probably okay

    langcat = getLangCat(langcode)
    pywikibot.output(u"Language category returned is %s" % langcat)

    # Check if base language category exists (we don't want to add words for languages that are not yet cataloged or whose name is not well specified)
    langCatPage = pywikibot.Page(site, u"Categoria:%s" % langcat)
    if not langCatPage.exists():
        pywikibot.output(u"Base language category 'Categoria:%s' does not yet exist" % langcat)
        log(u"Categoria base para língua '''%s''' com nome 'Categoria:%s' não existe. Entrada '%s' não adicionada" %
            (langcode, langcat, transl))
        return False

    text = u"""={{-%s-}}=
==%s==
'''%s'''
# [[%s]]%s

{{página-automática|{{subst:CURRENTMONTHNAME}}|{{subst:CURRENTYEAR}}|%s|%s}}

[[Categoria:%s (%s)]]
"""
    if gl == '':
        text = text % (langcode, pos, transl, title, '', title, langcode, pos, langcat)
    else:
        text = text % (langcode, pos, transl, title, u' (' + gl + u')', title, langcode, pos, langcat)

    try:
        page.put(text, summary = u"Criada automaticamente a partir das traduções em [[%s]]" % title, minor = False)
    except pywikibot.PageNotSaved:
        pywikibot.output("Failed to save page")
        return False
    except socket.timeout:
        pywikibot.output("Socket timeout, maybe not saving page")
        return False
    except socket.error:
        pywikibot.output("Socket error, maybe not saving page")
        return False

    newCat = pywikibot.Page(site, u"Categoria:!Entrada criada por robô (%s)" % langcat)
    if not newCat.exists() or (not newCat.isRedirectPage() and interwiki.page_empty_check(newCat)):
        # Create the auto pages category for this language
        pywikibot.output(u"Creating auto page category for language '%s'" % langcode)
        newCat.put(u"{{catpagautolíngua|%s}}" % langcode, u"Criada automaticamente")

    return True


# Converts the Part of Speech to the AO1990
def convertOrtography(pos):
    if  pos == 'Adjectivo':
        return 'Adjetivo'
    return pos


def main():
    global repact, site

    socket.setdefaulttimeout(30)
    pageToProcess = None

    for arg in sys.argv[1:]:
        if arg.startswith('-'):
            pywikibot.output('Arguments not supported yet')
        else: pageToProcess = arg.encode('utf8').decode(sys.stdout.encoding)

    # make sure we are logged in
    site = pywikibot.Site()
    site.login()
    config.put_throttle = 1

    rehead = re.compile(r'={1,4}(.+?)={1,4}')
    rehead2 = re.compile(r'={2}(.+?)={2}')
    rehead3 = re.compile(r'={3}(.+?)={3}')
    regloss = re.compile(r'\{\{tradini\|(.*?)}}')
    retrans = re.compile(r'\*\s?\{\{trad\|(.*?)\|(.*?)}}')
    retrans2 = re.compile(r'\*(.*?)\s?\{\{xlatio\|(.*?)\|(.*?)(\|.*)?}}')
    retrans3 = re.compile(r'\*\s?\{\{trad-\|(.*?)\|(.*?)(\|.*)?}}')
    retrans4 = re.compile(r'\*(.*?)\s?\{\{t\|(.*?)\|(.*?)(\|.*)?}}')
    retrans5 = re.compile(r'\*(.*?)\s?\{\{t\+\|(.*?)\|(.*?)(\|.*)?}}')
    reendtrans = re.compile(r'\{\{tradfim}}')
    reglosstune = re.compile(r'(.*?)\((.*?)\)')
    reglosstune2 = re.compile(r'(.*?):\s?(.*)')

    partsOfSpeech = set(['Substantivo', 'Adjetivo', 'Verbo', 'Pronome', 'Locução substantiva', 'Numeral'])
    stops = set([])

    if (pageToProcess == None):
        entry = list(site.randompages(1))[0]
        print(entry)
        pageToProcess = entry.title()
    else:
        entry = pywikibot.Page(site, pageToProcess)
    pywikibot.output(u"Getting page '%s'" % entry.title())
    if entry.namespace() != 0:
        pywikibot.output(u"Not an article")
        return
    text = entry.get()

    if not u'=Português=' in text and not u'={{pt}}=' in text and not u'={{-pt-}}=' in text and not u'= Português =' in text and not u'= {{pt}} =' in text and not u'= {{-pt-}} =' in text:
        pywikibot.output(u'No appropriate PT language header')
        return

    """    if not u'==Tradução==' in text and not u'==Traduções==' in text and not u'=={{tradu}}==' in text:
        pywikibot.output(u'No appropriate Tradução language header')
        return"""

    lines = text.splitlines()
    intrans = False
    for i in range(0, len(lines)):
        mo = rehead.match(lines[i])
        if mo:
            header = mo.group(1).strip()
            pywikibot.output(u'Current header: %s' % header)
            if header == u"Tradução" or header == u'{{tradução}}' or header == u'Traduções':
                if not (rehead3.match(lines[i])):
                    pywikibot.output(u'Header not on level 3: skipping')
                    return
                intrans = True
                pywikibot.output(u'INTRANS')
                gloss = ''
            else: intrans = False
            if header in partsOfSpeech and rehead2.match(lines[i]):
                pos = convertOrtography(header)
                pywikibot.output("PoS: %s" % pos)
            if header in stops and rehead2.match(lines[i]): pos = ''
            continue

        if not intrans: continue

        mo = regloss.match(lines[i])
        if mo:
            gloss = mo.group(1).strip()
            mo = reglosstune.match(gloss)
            if mo:
                gloss = mo.group(2).strip()
            else:
                mo = reglosstune2.match(gloss)
                if mo:
                    gloss = mo.group(2).strip()
            pywikibot.output("Gloss: %s" % gloss)
            continue

        # Try all the possible translation variants
        transls = []
        mo = retrans.match(lines[i])
        if mo:
            lang = mo.group(1).strip()
            transls += mo.group(2).strip().split('|')

        mo = retrans2.match(lines[i])
        if mo:
            lang = mo.group(2).strip()
            transls += mo.group(3).strip().split('|')

        mo = retrans3.match(lines[i])
        if mo:
            lang = mo.group(1).strip()
            transls += mo.group(2).strip().split('|')

        mo = retrans4.match(lines[i])
        if mo:
            lang = mo.group(2).strip()
            transls += mo.group(3).strip().split('|')

        mo = retrans5.match(lines[i])
        if mo:
            lang = mo.group(2).strip()
            transls += mo.group(3).strip().split('|')

        if len(transls) == 0: continue

        pywikibot.output(u"Found translations '%s' for language '%s'" % (transls, lang))
        if lang in ignoreLangs:
            pywikibot.output(u"Skipping translations for ignored language '%s'" % lang)
            continue

        for transl in transls:
            if len(transl) > 0:
                createFLentry(transl, lang, pos, entry.title(), gloss)

    pywikibot.output(u'Done %s' % pageToProcess);


if __name__ == "__main__":
    try:
        main()
    finally:
        pywikibot.stopme()