remove_backup_email.py 8.2 KB
#!/usr/bin/python

"""
The main goal of this script is to replace all original emails from the
original backup. This is really important for avoiding "spams".

How to use it:
1 - Uncompress the main file of backup (the one generated by the
    command rake backup).
2 - Copy this script to the uncompressed folder.
3 - Execute: python <script_name>
4 - Wait... It takes some minutes... 
"""

import pickle
import glob
import os
import subprocess
import re
import shutil
import pickle

allEmailDict = dict()
globalCount = 1

last_gitlab = sorted(glob.glob('*_gitlab_backup.tar'),
                     key=os.path.getmtime)[-1]


"""
Uncompress: gitlab, noosfero, and mailman.
"""
def decompress():
    global last_gitlab
    files = {}
    files['noosfero'] = ["tar -xaf noosfero_backup.tar.gz -C noosfero"]
    files['gitlab'] = ["tar -xaf {} -C gitlab".format(last_gitlab)]
    files['mailman'] = ["tar -xaf mailman_backup.tar.gz -C mailman"]

    print "=" * 50
    print ("This gonna take some time...")

    subprocess.call(["mkdir -p gitlab"], shell=True)
    subprocess.call(["mkdir -p noosfero"], shell=True)
    subprocess.call(["mkdir -p mailman"], shell=True)

    print ("[1] Extracting gitlab...")
    subprocess.call(files['gitlab'], shell=True)

    print ("[2] Extracting noosfero...")
    subprocess.call(files['noosfero'], shell=True)

    print ("[3] Extracting mailman...")
    subprocess.call(files['mailman'], shell=True)


def compress():
    print "=" * 50
    print "Compressing things again..."
    print "[1] Compressing gitlab..."

    command = "cd gitlab && tar -cpf {} * &&  mv {} ../ && cd .. && " \
                "rm -rf gitlab".format(last_gitlab, last_gitlab)
    subprocess.call([command], shell=True)

    print "[2] Compressing noosfero..."
    command = "cd noosfero && tar -czpf noosfero_backup.tar.gz * && " \
                "mv noosfero_backup.tar.gz ../ && cd .. && rm -rf noosfero"
    subprocess.call([command], shell=True)

    print "[3] Compressing mailman..."
    command = "cd mailman && tar -czpf mailman_backup.tar.gz * &&  " \
                "mv mailman_backup.tar.gz ../ && cd .. && rm -rf mailman"
    subprocess.call([command], shell=True)


"""
This function, register an email to the global dictionary.
"""
def update_email_list(pEmailKey):
    global globalCount
    global allEmailDict
    if not pEmailKey in allEmailDict:
        allEmailDict[pEmailKey] = "email{}@example.com".format(globalCount)
        globalCount += 1


"""
This function open a file, and try to find emails. If found any new email,
register it in a global dictionary with all emails inside it. This is
important, because we want to change all emails but keep the consistence
between all tools.
"""
def find_and_register_email_inside_text_file(pFile):
    global globalCount
    global allEmailDict

    # Match email
    pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}')

    # Build a hash with all emails
    with open(pFile) as current:
        for line in current:
            listOfEmail = pattern.findall(line)
            for email in listOfEmail:
                update_email_list(email)


"""
Go through all dictionaries recursively, and try to find a new email. If found
a new email, just register it.
"""
def go_through_dictionary_value_mailman(pValue):

    if isinstance(pValue, str):
        pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}')
        listOfEmail = pattern.findall(pValue)
        if listOfEmail:
            for email in listOfEmail:
                update_email_list(email)
    # Both cases below must to be handled recursively.
    elif isinstance(pValue, list):
        for eachElement in pValue:
            go_through_dictionary_value_mailman(eachElement)
    elif isinstance(pValue, dict):
        setOfKeys = pValue.keys()
        setOfValues = pValue.values()
        for eachKey in setOfKeys:
            go_through_dictionary_value_mailman(eachKey)
        for eachValue in setOfValues:
            go_through_dictionary_value_mailman(eachValue)


"""
Take config.pck from mailman, and got through key and value. Finally,
register all email found in the dictionary.
"""
def find_and_register_mailman_emails(pDictionary):
    # Inspect all keys
    for key in pDictionary.keys():
        go_through_dictionary_value_mailman(key)

    # Inspect all values
    for values in pDictionary.values():
        go_through_dictionary_value_mailman(values)


"""
Replace key in the dictionary, if key is an email.
"""
def replace_key(pDictionary, key):
    if key in allEmailDict:
        pDictionary[allEmailDict[key]] = pDictionary[key]
        del pDictionary[key]


"""
Look at the dictionary, if find an email there just return the value.
Otherwise, return the same value.
"""
def swap_email(pOriginal):
    if pOriginal in allEmailDict:
        return allEmailDict[pOriginal]
    return pOriginal


"""
Replace value in config.pck on mailman.
"""
def recursive_replace_mailman(pKey, pValue, pOrigin):

    # First stop condition
    if isinstance(pValue, str):
        return swap_email(pValue)
    elif isinstance(pValue, list):
        newList = []
        for element in pValue:
            newList.append(recursive_replace_mailman(pKey, element, pValue))
        return newList
    elif isinstance(pValue, dict):
        newDict = {}
        for keyElement, valueElement in pValue.iteritems():
            newDict[keyElement] = recursive_replace_mailman(keyElement,
                                                        valueElement, pOrigin)
            replace_key(newDict, keyElement)
        return newDict
    else:
        # Second stop condition
        return pValue


"""
Take config.pck and find all emails inside it and replace.
"""
def replace_emails_inside_mailman(pDictionary):

    for key, value in pDictionary.iteritems():
        pDictionary[key] = recursive_replace_mailman(key, value, pDictionary)
        replace_key(pDictionary, key)


"""
Open string files (for example: colab.sql, noosfero.sql, etc) and replace
all emails.
"""
def replace_emails_inside_files(pFile):
    tmp_file = 'tmp_file'
    with open(pFile) as current:
         contents = current.read()
    for key, value in allEmailDict.iteritems():
        contents = contents.replace(key, value)
    target = open(tmp_file, 'w+')
    target.write(contents)
    target.close()
    shutil.move(tmp_file, pFile)


"""
Build two list: one with sql files, and the second one with mailman files.
"""
def find_set_of_files_to_be_changed():
    listOfbkpFiles = []

    # Noosfero
    for file in os.listdir("noosfero/tmp/backup/"):
        if file.endswith(".sql"):
            listOfbkpFiles.append(os.path.join("noosfero/tmp/backup/", file))
    # Colab and gitlab
    listOfbkpFiles += ["colab.dump", "gitlab/db/database.sql"]

    # Mailman
    mailman = []
    for root, dirs, files in os.walk("mailman/lists/"):
        for file in files:
            if file.endswith("config.pck"):
                mailman.append(os.path.join(root, file))
    return listOfbkpFiles, mailman


"""
Unserialize config.pck
"""
def unserializable_and_replace(pMailconfig):
    emailsDict = pickle.load(open(pMailconfig, "rb"))
    return emailsDict


"""
Serialize config.pck
"""
def serializable_new_config(swap, mailconfig):
    pickle.dump(swap, open(mailconfig, "wb"))


if __name__ == "__main__":
    decompress()
    others, mailMan = find_set_of_files_to_be_changed()

    # Others: colab, gitlab, and Noosfero
    print "=" * 30
    print ("Creating mapping for all emails: pass through all applications")
    for applications in others:
        find_and_register_email_inside_text_file(applications)

    print ("Passing through mailman files...")
    for configMailman in mailMan:
        print "Passing through: {}".format(configMailman)
        swap = unserializable_and_replace(configMailman)
        find_and_register_mailman_emails(swap)

    print "+" * 30
    print "Start replacing..."
    for application in others:
        print ("Working on: " + application)
        replace_emails_inside_files(application)

    print ("Working on: Mailman")
    for mailconfig in mailMan:
        print ("File: " + mailconfig)
        swap = unserializable_and_replace(configMailman)
        replace_emails_inside_mailman(swap)
        serializable_new_config(swap, mailconfig)

    compress()