#!/usr/bin/python """ The main goal of this script is to replace all original emails from the original backup. This is really important for avoiding "spams". How to use it: 1 - Uncompress the main file of backup (the one generated by the command rake backup). 2 - Copy this script to the uncompressed folder. 3 - Execute: python 4 - Wait... It takes some minutes... """ import pickle import glob import os import subprocess import re import shutil import pickle allEmailDict = dict() globalCount = 1 last_gitlab = sorted(glob.glob('*_gitlab_backup.tar'), key=os.path.getmtime)[-1] """ Uncompress: gitlab, noosfero, and mailman. """ def decompress(): global last_gitlab files = {} files['noosfero'] = ["tar -xaf noosfero_backup.tar.gz -C noosfero"] files['gitlab'] = ["tar -xaf {} -C gitlab".format(last_gitlab)] files['mailman'] = ["tar -xaf mailman_backup.tar.gz -C mailman"] print "=" * 50 print ("This gonna take some time...") subprocess.call(["mkdir -p gitlab"], shell=True) subprocess.call(["mkdir -p noosfero"], shell=True) subprocess.call(["mkdir -p mailman"], shell=True) print ("[1] Extracting gitlab...") subprocess.call(files['gitlab'], shell=True) print ("[2] Extracting noosfero...") subprocess.call(files['noosfero'], shell=True) print ("[3] Extracting mailman...") subprocess.call(files['mailman'], shell=True) def compress(): print "=" * 50 print "Compressing things again..." print "[1] Compressing gitlab..." command = "cd gitlab && tar -cpf {} * && mv {} ../ && cd .. && " \ "rm -rf gitlab".format(last_gitlab, last_gitlab) subprocess.call([command], shell=True) print "[2] Compressing noosfero..." command = "cd noosfero && tar -czpf noosfero_backup.tar.gz * && " \ "mv noosfero_backup.tar.gz ../ && cd .. && rm -rf noosfero" subprocess.call([command], shell=True) print "[3] Compressing mailman..." command = "cd mailman && tar -czpf mailman_backup.tar.gz * && " \ "mv mailman_backup.tar.gz ../ && cd .. && rm -rf mailman" subprocess.call([command], shell=True) """ This function, register an email to the global dictionary. """ def update_email_list(pEmailKey): global globalCount global allEmailDict if not pEmailKey in allEmailDict: allEmailDict[pEmailKey] = "email{}@example.com".format(globalCount) globalCount += 1 """ This function open a file, and try to find emails. If found any new email, register it in a global dictionary with all emails inside it. This is important, because we want to change all emails but keep the consistence between all tools. """ def find_and_register_email_inside_text_file(pFile): global globalCount global allEmailDict # Match email pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}') # Build a hash with all emails with open(pFile) as current: for line in current: listOfEmail = pattern.findall(line) for email in listOfEmail: update_email_list(email) """ Go through all dictionaries recursively, and try to find a new email. If found a new email, just register it. """ def go_through_dictionary_value_mailman(pValue): if isinstance(pValue, str): pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}') listOfEmail = pattern.findall(pValue) if listOfEmail: for email in listOfEmail: update_email_list(email) # Both cases below must to be handled recursively. elif isinstance(pValue, list): for eachElement in pValue: go_through_dictionary_value_mailman(eachElement) elif isinstance(pValue, dict): setOfKeys = pValue.keys() setOfValues = pValue.values() for eachKey in setOfKeys: go_through_dictionary_value_mailman(eachKey) for eachValue in setOfValues: go_through_dictionary_value_mailman(eachValue) """ Take config.pck from mailman, and got through key and value. Finally, register all email found in the dictionary. """ def find_and_register_mailman_emails(pDictionary): # Inspect all keys for key in pDictionary.keys(): go_through_dictionary_value_mailman(key) # Inspect all values for values in pDictionary.values(): go_through_dictionary_value_mailman(values) """ Replace key in the dictionary, if key is an email. """ def replace_key(pDictionary, key): if key in allEmailDict: pDictionary[allEmailDict[key]] = pDictionary[key] del pDictionary[key] """ Look at the dictionary, if find an email there just return the value. Otherwise, return the same value. """ def swap_email(pOriginal): if pOriginal in allEmailDict: return allEmailDict[pOriginal] return pOriginal """ Replace value in config.pck on mailman. """ def recursive_replace_mailman(pKey, pValue, pOrigin): # First stop condition if isinstance(pValue, str): return swap_email(pValue) elif isinstance(pValue, list): newList = [] for element in pValue: newList.append(recursive_replace_mailman(pKey, element, pValue)) return newList elif isinstance(pValue, dict): newDict = {} for keyElement, valueElement in pValue.iteritems(): newDict[keyElement] = recursive_replace_mailman(keyElement, valueElement, pOrigin) replace_key(newDict, keyElement) return newDict else: # Second stop condition return pValue """ Take config.pck and find all emails inside it and replace. """ def replace_emails_inside_mailman(pDictionary): for key, value in pDictionary.iteritems(): pDictionary[key] = recursive_replace_mailman(key, value, pDictionary) replace_key(pDictionary, key) """ Open string files (for example: colab.sql, noosfero.sql, etc) and replace all emails. """ def replace_emails_inside_files(pFile): tmp_file = 'tmp_file' with open(pFile) as current: contents = current.read() for key, value in allEmailDict.iteritems(): contents = contents.replace(key, value) target = open(tmp_file, 'w+') target.write(contents) target.close() shutil.move(tmp_file, pFile) """ Build two list: one with sql files, and the second one with mailman files. """ def find_set_of_files_to_be_changed(): listOfbkpFiles = [] # Noosfero for file in os.listdir("noosfero/tmp/backup/"): if file.endswith(".sql"): listOfbkpFiles.append(os.path.join("noosfero/tmp/backup/", file)) # Colab and gitlab listOfbkpFiles += ["colab.dump", "gitlab/db/database.sql"] # Mailman mailman = [] for root, dirs, files in os.walk("mailman/lists/"): for file in files: if file.endswith("config.pck"): mailman.append(os.path.join(root, file)) return listOfbkpFiles, mailman """ Unserialize config.pck """ def unserializable_and_replace(pMailconfig): emailsDict = pickle.load(open(pMailconfig, "rb")) return emailsDict """ Serialize config.pck """ def serializable_new_config(swap, mailconfig): pickle.dump(swap, open(mailconfig, "wb")) if __name__ == "__main__": decompress() others, mailMan = find_set_of_files_to_be_changed() # Others: colab, gitlab, and Noosfero print "=" * 30 print ("Creating mapping for all emails: pass through all applications") for applications in others: find_and_register_email_inside_text_file(applications) print ("Passing through mailman files...") for configMailman in mailMan: print "Passing through: {}".format(configMailman) swap = unserializable_and_replace(configMailman) find_and_register_mailman_emails(swap) print "+" * 30 print "Start replacing..." for application in others: print ("Working on: " + application) replace_emails_inside_files(application) print ("Working on: Mailman") for mailconfig in mailMan: print ("File: " + mailconfig) swap = unserializable_and_replace(configMailman) replace_emails_inside_mailman(swap) serializable_new_config(swap, mailconfig) compress()