From 0476e31df14395830d2e3652ac410572b2e3287c Mon Sep 17 00:00:00 2001 From: rodrigosiqueira Date: Sun, 6 Sep 2015 12:43:53 -0300 Subject: [PATCH] Script for replace real emails. --- utils/remove_backup_email.py | 274 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------------------------------------------------ 1 file changed, 208 insertions(+), 66 deletions(-) diff --git a/utils/remove_backup_email.py b/utils/remove_backup_email.py index 3e7a6e5..e3400a1 100644 --- a/utils/remove_backup_email.py +++ b/utils/remove_backup_email.py @@ -1,6 +1,24 @@ #!/usr/bin/python -import pickle, glob, os, subprocess, re, shutil, pickle +""" +The main goal of this script is to replace all original emails from the +original backup. This is really important for avoiding "spams". + +How to use it: +1 - Uncompress the main file of backup (the one generated by the + command rake backup). +2 - Copy this script to the uncompressed folder. +3 - Execute: python +4 - Wait... It takes some minutes... +""" + +import pickle +import glob +import os +import subprocess +import re +import shutil +import pickle allEmailDict = dict() globalCount = 1 @@ -8,77 +26,184 @@ globalCount = 1 last_gitlab = sorted(glob.glob('*_gitlab_backup.tar'), key=os.path.getmtime)[-1] + +""" +Uncompress: gitlab, noosfero, and mailman. +""" def decompress(): - print "=" * 30 - print "This gonna take some time..." + global last_gitlab + files = {} + files['noosfero'] = ["tar -xaf noosfero_backup.tar.gz -C noosfero"] + files['gitlab'] = ["tar -xaf {} -C gitlab".format(last_gitlab)] + files['mailman'] = ["tar -xaf mailman_backup.tar.gz -C mailman"] + + print "=" * 50 + print ("This gonna take some time...") + subprocess.call(["mkdir -p gitlab"], shell=True) subprocess.call(["mkdir -p noosfero"], shell=True) subprocess.call(["mkdir -p mailman"], shell=True) - print "[1] Extracting gitlab..." - subprocess.call(["tar -xaf " + last_gitlab + " -C gitlab"], shell=True) - print "[2] Extracting noosfero..." - subprocess.call(["tar -xaf noosfero_backup.tar.gz -C noosfero"], shell=True) - print "[3] Extracting mailman..." - subprocess.call(["tar -xaf mailman_backup.tar.gz -C mailman"], shell=True) + + print ("[1] Extracting gitlab...") + subprocess.call(files['gitlab'], shell=True) + + print ("[2] Extracting noosfero...") + subprocess.call(files['noosfero'], shell=True) + + print ("[3] Extracting mailman...") + subprocess.call(files['mailman'], shell=True) + def compress(): - print "=" * 30 + print "=" * 50 print "Compressing things again..." - print "[1] Compressing gitlab..." - command = "cd gitlab && tar -cpf " + last_gitlab + " * && mv " + last_gitlab + " ../ && cd .. && rm -rf gitlab" + + command = "cd gitlab && tar -cpf {} * && mv {} ../ && cd .. && " \ + "rm -rf gitlab".format(last_gitlab, last_gitlab) subprocess.call([command], shell=True) print "[2] Compressing noosfero..." - command = 'cd noosfero && tar -czpf noosfero_backup.tar.gz * && mv noosfero_backup.tar.gz ../ && cd .. && rm -rf noosfero' + command = "cd noosfero && tar -czpf noosfero_backup.tar.gz * && " \ + "mv noosfero_backup.tar.gz ../ && cd .. && rm -rf noosfero" subprocess.call([command], shell=True) print "[3] Compressing mailman..." - command = 'cd mailman && tar -czpf mailman_backup.tar.gz * && mv mailman_backup.tar.gz ../ && cd .. && rm -rf mailman' + command = "cd mailman && tar -czpf mailman_backup.tar.gz * && " \ + "mv mailman_backup.tar.gz ../ && cd .. && rm -rf mailman" subprocess.call([command], shell=True) -def create_hashes_from_file (pFile): + +""" +This function, register an email to the global dictionary. +""" +def update_email_list(pEmailKey): + global globalCount + global allEmailDict + if not pEmailKey in allEmailDict: + allEmailDict[pEmailKey] = "email{}@example.com".format(globalCount) + globalCount += 1 + + +""" +This function open a file, and try to find emails. If found any new email, +register it in a global dictionary with all emails inside it. This is +important, because we want to change all emails but keep the consistence +between all tools. +""" +def find_and_register_email_inside_text_file(pFile): global globalCount global allEmailDict + # Match email pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}') + # Build a hash with all emails with open(pFile) as current: for line in current: listOfEmail = pattern.findall(line) for email in listOfEmail: - if not email in allEmailDict: - allEmailDict[email] = "email" + `globalCount` + "@example.com" - globalCount += 1 + update_email_list(email) -def create_hashes_for_mailman (pDictionary): - global globalCount - global allEmailDict - pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}') + +""" +Go through all dictionaries recursively, and try to find a new email. If found +a new email, just register it. +""" +def go_through_dictionary_value_mailman(pValue): + + if isinstance(pValue, str): + pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}') + listOfEmail = pattern.findall(pValue) + if listOfEmail: + for email in listOfEmail: + update_email_list(email) + # Both cases below must to be handled recursively. + elif isinstance(pValue, list): + for eachElement in pValue: + go_through_dictionary_value_mailman(eachElement) + elif isinstance(pValue, dict): + setOfKeys = pValue.keys() + setOfValues = pValue.values() + for eachKey in setOfKeys: + go_through_dictionary_value_mailman(eachKey) + for eachValue in setOfValues: + go_through_dictionary_value_mailman(eachValue) + + +""" +Take config.pck from mailman, and got through key and value. Finally, +register all email found in the dictionary. +""" +def find_and_register_mailman_emails(pDictionary): + # Inspect all keys + for key in pDictionary.keys(): + go_through_dictionary_value_mailman(key) + + # Inspect all values + for values in pDictionary.values(): + go_through_dictionary_value_mailman(values) + + +""" +Replace key in the dictionary, if key is an email. +""" +def replace_key(pDictionary, key): + if key in allEmailDict: + pDictionary[allEmailDict[key]] = pDictionary[key] + del pDictionary[key] + + +""" +Look at the dictionary, if find an email there just return the value. +Otherwise, return the same value. +""" +def swap_email(pOriginal): + if pOriginal in allEmailDict: + return allEmailDict[pOriginal] + return pOriginal + + +""" +Replace value in config.pck on mailman. +""" +def recursive_replace_mailman(pKey, pValue, pOrigin): + + # First stop condition + if isinstance(pValue, str): + return swap_email(pValue) + elif isinstance(pValue, list): + newList = [] + for element in pValue: + newList.append(recursive_replace_mailman(pKey, element, pValue)) + return newList + elif isinstance(pValue, dict): + newDict = {} + for keyElement, valueElement in pValue.iteritems(): + newDict[keyElement] = recursive_replace_mailman(keyElement, + valueElement, pOrigin) + replace_key(newDict, keyElement) + return newDict + else: + # Second stop condition + return pValue + + +""" +Take config.pck and find all emails inside it and replace. +""" +def replace_emails_inside_mailman(pDictionary): for key, value in pDictionary.iteritems(): - if isinstance(value, basestring): - matchValue = pattern.search(value) - if matchValue: - allEmailDict[matchValue.group(0)] = "email{}@example.com".format(globalCount) - globalCount += 1 - - if isinstance(key, basestring): - matchKey = pattern.search(key) - if matchKey: - allEmailDict[matchValue.group(0)] = "email{}@example.com".format(globalCount) - globalCount += 1 - -def replace_mailman(pDictionary): - for key, value in pDictionary: - if key in allEmailDict: - pDictionary[allEmailDict[key]] = pDictionary[key] - del pDictionary[key] - if value in allEmailDict: - pDictionary[key] = allEmailDict[value] - return pDictionary - -def replace_hashes_in_file (pFile): + pDictionary[key] = recursive_replace_mailman(key, value, pDictionary) + replace_key(pDictionary, key) + + +""" +Open string files (for example: colab.sql, noosfero.sql, etc) and replace +all emails. +""" +def replace_emails_inside_files(pFile): tmp_file = 'tmp_file' with open(pFile) as current: contents = current.read() @@ -89,14 +214,20 @@ def replace_hashes_in_file (pFile): target.close() shutil.move(tmp_file, pFile) -def build_backup_list(): + +""" +Build two list: one with sql files, and the second one with mailman files. +""" +def find_set_of_files_to_be_changed(): listOfbkpFiles = [] + # Noosfero for file in os.listdir("noosfero/tmp/backup/"): if file.endswith(".sql"): listOfbkpFiles.append(os.path.join("noosfero/tmp/backup/", file)) # Colab and gitlab listOfbkpFiles += ["colab.dump", "gitlab/db/database.sql"] + # Mailman mailman = [] for root, dirs, files in os.walk("mailman/lists/"): @@ -105,38 +236,49 @@ def build_backup_list(): mailman.append(os.path.join(root, file)) return listOfbkpFiles, mailman + +""" +Unserialize config.pck +""" def unserializable_and_replace(pMailconfig): - emailsDict = pickle.load(open(pMailconfig, "rb" )) + emailsDict = pickle.load(open(pMailconfig, "rb")) return emailsDict + +""" +Serialize config.pck +""" def serializable_new_config(swap, mailconfig): - pass + pickle.dump(swap, open(mailconfig, "wb")) + if __name__ == "__main__": - #decompress() - #compress() - others, mailMan = build_backup_list() + decompress() + others, mailMan = find_set_of_files_to_be_changed() - #Others: colab, gitlab, and Noosfero - print ("=" * 30) + # Others: colab, gitlab, and Noosfero + print "=" * 30 print ("Creating mapping for all emails: pass through all applications") for applications in others: - create_hashes_from_file(applications) - print ("now, mailman...") + find_and_register_email_inside_text_file(applications) + + print ("Passing through mailman files...") for configMailman in mailMan: - print configMailman + print "Passing through: {}".format(configMailman) swap = unserializable_and_replace(configMailman) - create_hashes_for_mailman(swap) - print allEmailDict + find_and_register_mailman_emails(swap) - - #for application in others: - # print ("Working on: " + application) - # create_hashes_from_file(application) - # replace_hashes_in_file(application) + print "+" * 30 + print "Start replacing..." + for application in others: + print ("Working on: " + application) + replace_emails_inside_files(application) - #print ("Working on: Mailman") - #for mailconfig in mailMan: - # print ("File: " + mailconfig) - # serializable_new_config(swap, mailconfig) + print ("Working on: Mailman") + for mailconfig in mailMan: + print ("File: " + mailconfig) + swap = unserializable_and_replace(configMailman) + replace_emails_inside_mailman(swap) + serializable_new_config(swap, mailconfig) + compress() -- libgit2 0.21.2