Commit 0476e31df14395830d2e3652ac410572b2e3287c

Authored by Rodrigo Siqueira de Melo
1 parent 97a60399

Script for replace real emails.

This script was made in Python, because of mailman config.pck. With python,
we can open .pck file and restore it with the advantage of keeping the
integrity between data.
Showing 1 changed file with 208 additions and 66 deletions   Show diff stats
utils/remove_backup_email.py
1 1 #!/usr/bin/python
2 2  
3   -import pickle, glob, os, subprocess, re, shutil, pickle
  3 +"""
  4 +The main goal of this script is to replace all original emails from the
  5 +original backup. This is really important for avoiding "spams".
  6 +
  7 +How to use it:
  8 +1 - Uncompress the main file of backup (the one generated by the
  9 + command rake backup).
  10 +2 - Copy this script to the uncompressed folder.
  11 +3 - Execute: python <script_name>
  12 +4 - Wait... It takes some minutes...
  13 +"""
  14 +
  15 +import pickle
  16 +import glob
  17 +import os
  18 +import subprocess
  19 +import re
  20 +import shutil
  21 +import pickle
4 22  
5 23 allEmailDict = dict()
6 24 globalCount = 1
... ... @@ -8,77 +26,184 @@ globalCount = 1
8 26 last_gitlab = sorted(glob.glob('*_gitlab_backup.tar'),
9 27 key=os.path.getmtime)[-1]
10 28  
  29 +
  30 +"""
  31 +Uncompress: gitlab, noosfero, and mailman.
  32 +"""
11 33 def decompress():
12   - print "=" * 30
13   - print "This gonna take some time..."
  34 + global last_gitlab
  35 + files = {}
  36 + files['noosfero'] = ["tar -xaf noosfero_backup.tar.gz -C noosfero"]
  37 + files['gitlab'] = ["tar -xaf {} -C gitlab".format(last_gitlab)]
  38 + files['mailman'] = ["tar -xaf mailman_backup.tar.gz -C mailman"]
  39 +
  40 + print "=" * 50
  41 + print ("This gonna take some time...")
  42 +
14 43 subprocess.call(["mkdir -p gitlab"], shell=True)
15 44 subprocess.call(["mkdir -p noosfero"], shell=True)
16 45 subprocess.call(["mkdir -p mailman"], shell=True)
17   - print "[1] Extracting gitlab..."
18   - subprocess.call(["tar -xaf " + last_gitlab + " -C gitlab"], shell=True)
19   - print "[2] Extracting noosfero..."
20   - subprocess.call(["tar -xaf noosfero_backup.tar.gz -C noosfero"], shell=True)
21   - print "[3] Extracting mailman..."
22   - subprocess.call(["tar -xaf mailman_backup.tar.gz -C mailman"], shell=True)
  46 +
  47 + print ("[1] Extracting gitlab...")
  48 + subprocess.call(files['gitlab'], shell=True)
  49 +
  50 + print ("[2] Extracting noosfero...")
  51 + subprocess.call(files['noosfero'], shell=True)
  52 +
  53 + print ("[3] Extracting mailman...")
  54 + subprocess.call(files['mailman'], shell=True)
  55 +
23 56  
24 57 def compress():
25   - print "=" * 30
  58 + print "=" * 50
26 59 print "Compressing things again..."
27   -
28 60 print "[1] Compressing gitlab..."
29   - command = "cd gitlab && tar -cpf " + last_gitlab + " * && mv " + last_gitlab + " ../ && cd .. && rm -rf gitlab"
  61 +
  62 + command = "cd gitlab && tar -cpf {} * && mv {} ../ && cd .. && " \
  63 + "rm -rf gitlab".format(last_gitlab, last_gitlab)
30 64 subprocess.call([command], shell=True)
31 65  
32 66 print "[2] Compressing noosfero..."
33   - command = 'cd noosfero && tar -czpf noosfero_backup.tar.gz * && mv noosfero_backup.tar.gz ../ && cd .. && rm -rf noosfero'
  67 + command = "cd noosfero && tar -czpf noosfero_backup.tar.gz * && " \
  68 + "mv noosfero_backup.tar.gz ../ && cd .. && rm -rf noosfero"
34 69 subprocess.call([command], shell=True)
35 70  
36 71 print "[3] Compressing mailman..."
37   - command = 'cd mailman && tar -czpf mailman_backup.tar.gz * && mv mailman_backup.tar.gz ../ && cd .. && rm -rf mailman'
  72 + command = "cd mailman && tar -czpf mailman_backup.tar.gz * && " \
  73 + "mv mailman_backup.tar.gz ../ && cd .. && rm -rf mailman"
38 74 subprocess.call([command], shell=True)
39 75  
40   -def create_hashes_from_file (pFile):
  76 +
  77 +"""
  78 +This function, register an email to the global dictionary.
  79 +"""
  80 +def update_email_list(pEmailKey):
  81 + global globalCount
  82 + global allEmailDict
  83 + if not pEmailKey in allEmailDict:
  84 + allEmailDict[pEmailKey] = "email{}@example.com".format(globalCount)
  85 + globalCount += 1
  86 +
  87 +
  88 +"""
  89 +This function open a file, and try to find emails. If found any new email,
  90 +register it in a global dictionary with all emails inside it. This is
  91 +important, because we want to change all emails but keep the consistence
  92 +between all tools.
  93 +"""
  94 +def find_and_register_email_inside_text_file(pFile):
41 95 global globalCount
42 96 global allEmailDict
  97 +
43 98 # Match email
44 99 pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}')
  100 +
45 101 # Build a hash with all emails
46 102 with open(pFile) as current:
47 103 for line in current:
48 104 listOfEmail = pattern.findall(line)
49 105 for email in listOfEmail:
50   - if not email in allEmailDict:
51   - allEmailDict[email] = "email" + `globalCount` + "@example.com"
52   - globalCount += 1
  106 + update_email_list(email)
53 107  
54   -def create_hashes_for_mailman (pDictionary):
55   - global globalCount
56   - global allEmailDict
57   - pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}')
  108 +
  109 +"""
  110 +Go through all dictionaries recursively, and try to find a new email. If found
  111 +a new email, just register it.
  112 +"""
  113 +def go_through_dictionary_value_mailman(pValue):
  114 +
  115 + if isinstance(pValue, str):
  116 + pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}')
  117 + listOfEmail = pattern.findall(pValue)
  118 + if listOfEmail:
  119 + for email in listOfEmail:
  120 + update_email_list(email)
  121 + # Both cases below must to be handled recursively.
  122 + elif isinstance(pValue, list):
  123 + for eachElement in pValue:
  124 + go_through_dictionary_value_mailman(eachElement)
  125 + elif isinstance(pValue, dict):
  126 + setOfKeys = pValue.keys()
  127 + setOfValues = pValue.values()
  128 + for eachKey in setOfKeys:
  129 + go_through_dictionary_value_mailman(eachKey)
  130 + for eachValue in setOfValues:
  131 + go_through_dictionary_value_mailman(eachValue)
  132 +
  133 +
  134 +"""
  135 +Take config.pck from mailman, and got through key and value. Finally,
  136 +register all email found in the dictionary.
  137 +"""
  138 +def find_and_register_mailman_emails(pDictionary):
  139 + # Inspect all keys
  140 + for key in pDictionary.keys():
  141 + go_through_dictionary_value_mailman(key)
  142 +
  143 + # Inspect all values
  144 + for values in pDictionary.values():
  145 + go_through_dictionary_value_mailman(values)
  146 +
  147 +
  148 +"""
  149 +Replace key in the dictionary, if key is an email.
  150 +"""
  151 +def replace_key(pDictionary, key):
  152 + if key in allEmailDict:
  153 + pDictionary[allEmailDict[key]] = pDictionary[key]
  154 + del pDictionary[key]
  155 +
  156 +
  157 +"""
  158 +Look at the dictionary, if find an email there just return the value.
  159 +Otherwise, return the same value.
  160 +"""
  161 +def swap_email(pOriginal):
  162 + if pOriginal in allEmailDict:
  163 + return allEmailDict[pOriginal]
  164 + return pOriginal
  165 +
  166 +
  167 +"""
  168 +Replace value in config.pck on mailman.
  169 +"""
  170 +def recursive_replace_mailman(pKey, pValue, pOrigin):
  171 +
  172 + # First stop condition
  173 + if isinstance(pValue, str):
  174 + return swap_email(pValue)
  175 + elif isinstance(pValue, list):
  176 + newList = []
  177 + for element in pValue:
  178 + newList.append(recursive_replace_mailman(pKey, element, pValue))
  179 + return newList
  180 + elif isinstance(pValue, dict):
  181 + newDict = {}
  182 + for keyElement, valueElement in pValue.iteritems():
  183 + newDict[keyElement] = recursive_replace_mailman(keyElement,
  184 + valueElement, pOrigin)
  185 + replace_key(newDict, keyElement)
  186 + return newDict
  187 + else:
  188 + # Second stop condition
  189 + return pValue
  190 +
  191 +
  192 +"""
  193 +Take config.pck and find all emails inside it and replace.
  194 +"""
  195 +def replace_emails_inside_mailman(pDictionary):
58 196  
59 197 for key, value in pDictionary.iteritems():
60   - if isinstance(value, basestring):
61   - matchValue = pattern.search(value)
62   - if matchValue:
63   - allEmailDict[matchValue.group(0)] = "email{}@example.com".format(globalCount)
64   - globalCount += 1
65   -
66   - if isinstance(key, basestring):
67   - matchKey = pattern.search(key)
68   - if matchKey:
69   - allEmailDict[matchValue.group(0)] = "email{}@example.com".format(globalCount)
70   - globalCount += 1
71   -
72   -def replace_mailman(pDictionary):
73   - for key, value in pDictionary:
74   - if key in allEmailDict:
75   - pDictionary[allEmailDict[key]] = pDictionary[key]
76   - del pDictionary[key]
77   - if value in allEmailDict:
78   - pDictionary[key] = allEmailDict[value]
79   - return pDictionary
80   -
81   -def replace_hashes_in_file (pFile):
  198 + pDictionary[key] = recursive_replace_mailman(key, value, pDictionary)
  199 + replace_key(pDictionary, key)
  200 +
  201 +
  202 +"""
  203 +Open string files (for example: colab.sql, noosfero.sql, etc) and replace
  204 +all emails.
  205 +"""
  206 +def replace_emails_inside_files(pFile):
82 207 tmp_file = 'tmp_file'
83 208 with open(pFile) as current:
84 209 contents = current.read()
... ... @@ -89,14 +214,20 @@ def replace_hashes_in_file (pFile):
89 214 target.close()
90 215 shutil.move(tmp_file, pFile)
91 216  
92   -def build_backup_list():
  217 +
  218 +"""
  219 +Build two list: one with sql files, and the second one with mailman files.
  220 +"""
  221 +def find_set_of_files_to_be_changed():
93 222 listOfbkpFiles = []
  223 +
94 224 # Noosfero
95 225 for file in os.listdir("noosfero/tmp/backup/"):
96 226 if file.endswith(".sql"):
97 227 listOfbkpFiles.append(os.path.join("noosfero/tmp/backup/", file))
98 228 # Colab and gitlab
99 229 listOfbkpFiles += ["colab.dump", "gitlab/db/database.sql"]
  230 +
100 231 # Mailman
101 232 mailman = []
102 233 for root, dirs, files in os.walk("mailman/lists/"):
... ... @@ -105,38 +236,49 @@ def build_backup_list():
105 236 mailman.append(os.path.join(root, file))
106 237 return listOfbkpFiles, mailman
107 238  
  239 +
  240 +"""
  241 +Unserialize config.pck
  242 +"""
108 243 def unserializable_and_replace(pMailconfig):
109   - emailsDict = pickle.load(open(pMailconfig, "rb" ))
  244 + emailsDict = pickle.load(open(pMailconfig, "rb"))
110 245 return emailsDict
111 246  
  247 +
  248 +"""
  249 +Serialize config.pck
  250 +"""
112 251 def serializable_new_config(swap, mailconfig):
113   - pass
  252 + pickle.dump(swap, open(mailconfig, "wb"))
  253 +
114 254  
115 255 if __name__ == "__main__":
116   - #decompress()
117   - #compress()
118   - others, mailMan = build_backup_list()
  256 + decompress()
  257 + others, mailMan = find_set_of_files_to_be_changed()
119 258  
120   - #Others: colab, gitlab, and Noosfero
121   - print ("=" * 30)
  259 + # Others: colab, gitlab, and Noosfero
  260 + print "=" * 30
122 261 print ("Creating mapping for all emails: pass through all applications")
123 262 for applications in others:
124   - create_hashes_from_file(applications)
125   - print ("now, mailman...")
  263 + find_and_register_email_inside_text_file(applications)
  264 +
  265 + print ("Passing through mailman files...")
126 266 for configMailman in mailMan:
127   - print configMailman
  267 + print "Passing through: {}".format(configMailman)
128 268 swap = unserializable_and_replace(configMailman)
129   - create_hashes_for_mailman(swap)
130   - print allEmailDict
  269 + find_and_register_mailman_emails(swap)
131 270  
132   -
133   - #for application in others:
134   - # print ("Working on: " + application)
135   - # create_hashes_from_file(application)
136   - # replace_hashes_in_file(application)
  271 + print "+" * 30
  272 + print "Start replacing..."
  273 + for application in others:
  274 + print ("Working on: " + application)
  275 + replace_emails_inside_files(application)
137 276  
138   - #print ("Working on: Mailman")
139   - #for mailconfig in mailMan:
140   - # print ("File: " + mailconfig)
141   - # serializable_new_config(swap, mailconfig)
  277 + print ("Working on: Mailman")
  278 + for mailconfig in mailMan:
  279 + print ("File: " + mailconfig)
  280 + swap = unserializable_and_replace(configMailman)
  281 + replace_emails_inside_mailman(swap)
  282 + serializable_new_config(swap, mailconfig)
142 283  
  284 + compress()
... ...