How do I find excessively suppressed redirects in MediaWiki?

If you're able to write some lines in Python you should give the pywikibot framework a try.

You should take a look at redirect.py. pywikibot is especially powerful if you start using it interactively, e.g. through ipython. With wiki.allpages(includeredirects=True)) you can get a list of all pages and you can do e.g.

    incl_redir = map(lambda p: p.title(), wiki.allpages(includeredirects=True))
    excl_redir = map(lambda p: p.title(), wiki.allpages(includeredirects=False))
    redirects = filter(lambda p: not p in excl_redir, incl_redir)
    # We now have a list of all redirects.

    processed_redirects = ... # Here you need to find the exact strings of the redirects and turn them into a dict

    for p_title in excl_redir:
        page = Page(wiki, p_title)
        txt = page.get()
        for r in processed_redirects:
            if r['from'] in txt:
                # Here you want to process your txt and then
                page.put(txt, "Processed redirect")

After some experiments I've ended up with the following Python script for Pywikibot. I'm very new to Python, so the following code might look not good. I tested it for a very very tiny test wiki, so I don't know what's the real perforance of the script. By the way, the page.put(...) takes extremely long for me (~15 s or so) -- don't even know what could be the reason. Maybe it helps someone else too.

Note: This script modifies the pages and considered not well-tested.

#!/usr/bin/python
# -*- coding: utf-8  -*-

import pywikibot
import re
import sys

try:
    # Win32
    from msvcrt import getch
except ImportError:
    # UNIX
    def getch():
        import sys, tty, termios
        fd = sys.stdin.fileno()
        old = termios.tcgetattr(fd)
        try:
            tty.setraw(fd)
            return sys.stdin.read(1)
        finally:
            termios.tcsetattr(fd, termios.TCSADRAIN, old)

def process_excessive_redirects(modify = False, pause = False):

    wiki = pywikibot.Site()
    alt_link_re = re.compile('\[\[\s*([^\|\]]+)\s*\|\s*([^\]]+)\s*\]\]')

    redirects_index = {}
    print 'Parsing redirects:'
    for redirect in wiki.allpages(filterredir = True):
        print '\t', redirect.title().encode('utf8'), '->',
        redirects_index[redirect.title()] = redirect.getRedirectTarget().title()
        print redirects_index[redirect.title()].encode('utf8')

    print 'Processing:'
    for page in wiki.allpages(filterredir = False):
        print '\t', page.title().encode('utf8'), '-',
        statistics = {'modification_count': 0} # python 3: nonlocal
        def fix_redirect(match_object):
            target = match_object.group(1)
            title = match_object.group(2)
            if title.replace("_", " ") in redirects_index.keys() and redirects_index[title] == target:
                if statistics['modification_count'] == 0:
                    print
                print '\t\texcessive redirect', target, '~~~>', title, '~~~>', target
                statistics['modification_count'] += 1
                return '[[' + title + ']]'
            return match_object.group(0)
        text = alt_link_re.sub(fix_redirect, page.get())
        if statistics['modification_count'] > 0:
            print "\t\t", statistics['modification_count'], 'excessive redirect(s) detected.',
            if modify:
                print 'Fixing redirects...',
                page.put(text, str(statistics['modification_count']) + ' excessive redirect(s) fixed')
                if pause:
                    print 'Press any key . . .'
                    getch()
            else:
                print
        else:
            print 'clean!'

def main(*args):
    modify = False
    pause = False
    for arg in pywikibot.handleArgs(*args):
        if arg == '--modify':
            modify = True
        elif arg == '--pause':
            pause = True
    process_excessive_redirects(modify = modify, pause = pause)

if __name__ == '__main__':
    main()

Tags:

Mediawiki