Commit 0476e31df14395830d2e3652ac410572b2e3287c
1 parent
97a60399
Script for replace real emails.
This script was made in Python, because of mailman config.pck. With python, we can open .pck file and restore it with the advantage of keeping the integrity between data.
Showing
1 changed file
with
208 additions
and
66 deletions
Show diff stats
utils/remove_backup_email.py
1 | 1 | #!/usr/bin/python |
2 | 2 | |
3 | -import pickle, glob, os, subprocess, re, shutil, pickle | |
3 | +""" | |
4 | +The main goal of this script is to replace all original emails from the | |
5 | +original backup. This is really important for avoiding "spams". | |
6 | + | |
7 | +How to use it: | |
8 | +1 - Uncompress the main file of backup (the one generated by the | |
9 | + command rake backup). | |
10 | +2 - Copy this script to the uncompressed folder. | |
11 | +3 - Execute: python <script_name> | |
12 | +4 - Wait... It takes some minutes... | |
13 | +""" | |
14 | + | |
15 | +import pickle | |
16 | +import glob | |
17 | +import os | |
18 | +import subprocess | |
19 | +import re | |
20 | +import shutil | |
21 | +import pickle | |
4 | 22 | |
5 | 23 | allEmailDict = dict() |
6 | 24 | globalCount = 1 |
... | ... | @@ -8,77 +26,184 @@ globalCount = 1 |
8 | 26 | last_gitlab = sorted(glob.glob('*_gitlab_backup.tar'), |
9 | 27 | key=os.path.getmtime)[-1] |
10 | 28 | |
29 | + | |
30 | +""" | |
31 | +Uncompress: gitlab, noosfero, and mailman. | |
32 | +""" | |
11 | 33 | def decompress(): |
12 | - print "=" * 30 | |
13 | - print "This gonna take some time..." | |
34 | + global last_gitlab | |
35 | + files = {} | |
36 | + files['noosfero'] = ["tar -xaf noosfero_backup.tar.gz -C noosfero"] | |
37 | + files['gitlab'] = ["tar -xaf {} -C gitlab".format(last_gitlab)] | |
38 | + files['mailman'] = ["tar -xaf mailman_backup.tar.gz -C mailman"] | |
39 | + | |
40 | + print "=" * 50 | |
41 | + print ("This gonna take some time...") | |
42 | + | |
14 | 43 | subprocess.call(["mkdir -p gitlab"], shell=True) |
15 | 44 | subprocess.call(["mkdir -p noosfero"], shell=True) |
16 | 45 | subprocess.call(["mkdir -p mailman"], shell=True) |
17 | - print "[1] Extracting gitlab..." | |
18 | - subprocess.call(["tar -xaf " + last_gitlab + " -C gitlab"], shell=True) | |
19 | - print "[2] Extracting noosfero..." | |
20 | - subprocess.call(["tar -xaf noosfero_backup.tar.gz -C noosfero"], shell=True) | |
21 | - print "[3] Extracting mailman..." | |
22 | - subprocess.call(["tar -xaf mailman_backup.tar.gz -C mailman"], shell=True) | |
46 | + | |
47 | + print ("[1] Extracting gitlab...") | |
48 | + subprocess.call(files['gitlab'], shell=True) | |
49 | + | |
50 | + print ("[2] Extracting noosfero...") | |
51 | + subprocess.call(files['noosfero'], shell=True) | |
52 | + | |
53 | + print ("[3] Extracting mailman...") | |
54 | + subprocess.call(files['mailman'], shell=True) | |
55 | + | |
23 | 56 | |
24 | 57 | def compress(): |
25 | - print "=" * 30 | |
58 | + print "=" * 50 | |
26 | 59 | print "Compressing things again..." |
27 | - | |
28 | 60 | print "[1] Compressing gitlab..." |
29 | - command = "cd gitlab && tar -cpf " + last_gitlab + " * && mv " + last_gitlab + " ../ && cd .. && rm -rf gitlab" | |
61 | + | |
62 | + command = "cd gitlab && tar -cpf {} * && mv {} ../ && cd .. && " \ | |
63 | + "rm -rf gitlab".format(last_gitlab, last_gitlab) | |
30 | 64 | subprocess.call([command], shell=True) |
31 | 65 | |
32 | 66 | print "[2] Compressing noosfero..." |
33 | - command = 'cd noosfero && tar -czpf noosfero_backup.tar.gz * && mv noosfero_backup.tar.gz ../ && cd .. && rm -rf noosfero' | |
67 | + command = "cd noosfero && tar -czpf noosfero_backup.tar.gz * && " \ | |
68 | + "mv noosfero_backup.tar.gz ../ && cd .. && rm -rf noosfero" | |
34 | 69 | subprocess.call([command], shell=True) |
35 | 70 | |
36 | 71 | print "[3] Compressing mailman..." |
37 | - command = 'cd mailman && tar -czpf mailman_backup.tar.gz * && mv mailman_backup.tar.gz ../ && cd .. && rm -rf mailman' | |
72 | + command = "cd mailman && tar -czpf mailman_backup.tar.gz * && " \ | |
73 | + "mv mailman_backup.tar.gz ../ && cd .. && rm -rf mailman" | |
38 | 74 | subprocess.call([command], shell=True) |
39 | 75 | |
40 | -def create_hashes_from_file (pFile): | |
76 | + | |
77 | +""" | |
78 | +This function, register an email to the global dictionary. | |
79 | +""" | |
80 | +def update_email_list(pEmailKey): | |
81 | + global globalCount | |
82 | + global allEmailDict | |
83 | + if not pEmailKey in allEmailDict: | |
84 | + allEmailDict[pEmailKey] = "email{}@example.com".format(globalCount) | |
85 | + globalCount += 1 | |
86 | + | |
87 | + | |
88 | +""" | |
89 | +This function open a file, and try to find emails. If found any new email, | |
90 | +register it in a global dictionary with all emails inside it. This is | |
91 | +important, because we want to change all emails but keep the consistence | |
92 | +between all tools. | |
93 | +""" | |
94 | +def find_and_register_email_inside_text_file(pFile): | |
41 | 95 | global globalCount |
42 | 96 | global allEmailDict |
97 | + | |
43 | 98 | # Match email |
44 | 99 | pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}') |
100 | + | |
45 | 101 | # Build a hash with all emails |
46 | 102 | with open(pFile) as current: |
47 | 103 | for line in current: |
48 | 104 | listOfEmail = pattern.findall(line) |
49 | 105 | for email in listOfEmail: |
50 | - if not email in allEmailDict: | |
51 | - allEmailDict[email] = "email" + `globalCount` + "@example.com" | |
52 | - globalCount += 1 | |
106 | + update_email_list(email) | |
53 | 107 | |
54 | -def create_hashes_for_mailman (pDictionary): | |
55 | - global globalCount | |
56 | - global allEmailDict | |
57 | - pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}') | |
108 | + | |
109 | +""" | |
110 | +Go through all dictionaries recursively, and try to find a new email. If found | |
111 | +a new email, just register it. | |
112 | +""" | |
113 | +def go_through_dictionary_value_mailman(pValue): | |
114 | + | |
115 | + if isinstance(pValue, str): | |
116 | + pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}') | |
117 | + listOfEmail = pattern.findall(pValue) | |
118 | + if listOfEmail: | |
119 | + for email in listOfEmail: | |
120 | + update_email_list(email) | |
121 | + # Both cases below must to be handled recursively. | |
122 | + elif isinstance(pValue, list): | |
123 | + for eachElement in pValue: | |
124 | + go_through_dictionary_value_mailman(eachElement) | |
125 | + elif isinstance(pValue, dict): | |
126 | + setOfKeys = pValue.keys() | |
127 | + setOfValues = pValue.values() | |
128 | + for eachKey in setOfKeys: | |
129 | + go_through_dictionary_value_mailman(eachKey) | |
130 | + for eachValue in setOfValues: | |
131 | + go_through_dictionary_value_mailman(eachValue) | |
132 | + | |
133 | + | |
134 | +""" | |
135 | +Take config.pck from mailman, and got through key and value. Finally, | |
136 | +register all email found in the dictionary. | |
137 | +""" | |
138 | +def find_and_register_mailman_emails(pDictionary): | |
139 | + # Inspect all keys | |
140 | + for key in pDictionary.keys(): | |
141 | + go_through_dictionary_value_mailman(key) | |
142 | + | |
143 | + # Inspect all values | |
144 | + for values in pDictionary.values(): | |
145 | + go_through_dictionary_value_mailman(values) | |
146 | + | |
147 | + | |
148 | +""" | |
149 | +Replace key in the dictionary, if key is an email. | |
150 | +""" | |
151 | +def replace_key(pDictionary, key): | |
152 | + if key in allEmailDict: | |
153 | + pDictionary[allEmailDict[key]] = pDictionary[key] | |
154 | + del pDictionary[key] | |
155 | + | |
156 | + | |
157 | +""" | |
158 | +Look at the dictionary, if find an email there just return the value. | |
159 | +Otherwise, return the same value. | |
160 | +""" | |
161 | +def swap_email(pOriginal): | |
162 | + if pOriginal in allEmailDict: | |
163 | + return allEmailDict[pOriginal] | |
164 | + return pOriginal | |
165 | + | |
166 | + | |
167 | +""" | |
168 | +Replace value in config.pck on mailman. | |
169 | +""" | |
170 | +def recursive_replace_mailman(pKey, pValue, pOrigin): | |
171 | + | |
172 | + # First stop condition | |
173 | + if isinstance(pValue, str): | |
174 | + return swap_email(pValue) | |
175 | + elif isinstance(pValue, list): | |
176 | + newList = [] | |
177 | + for element in pValue: | |
178 | + newList.append(recursive_replace_mailman(pKey, element, pValue)) | |
179 | + return newList | |
180 | + elif isinstance(pValue, dict): | |
181 | + newDict = {} | |
182 | + for keyElement, valueElement in pValue.iteritems(): | |
183 | + newDict[keyElement] = recursive_replace_mailman(keyElement, | |
184 | + valueElement, pOrigin) | |
185 | + replace_key(newDict, keyElement) | |
186 | + return newDict | |
187 | + else: | |
188 | + # Second stop condition | |
189 | + return pValue | |
190 | + | |
191 | + | |
192 | +""" | |
193 | +Take config.pck and find all emails inside it and replace. | |
194 | +""" | |
195 | +def replace_emails_inside_mailman(pDictionary): | |
58 | 196 | |
59 | 197 | for key, value in pDictionary.iteritems(): |
60 | - if isinstance(value, basestring): | |
61 | - matchValue = pattern.search(value) | |
62 | - if matchValue: | |
63 | - allEmailDict[matchValue.group(0)] = "email{}@example.com".format(globalCount) | |
64 | - globalCount += 1 | |
65 | - | |
66 | - if isinstance(key, basestring): | |
67 | - matchKey = pattern.search(key) | |
68 | - if matchKey: | |
69 | - allEmailDict[matchValue.group(0)] = "email{}@example.com".format(globalCount) | |
70 | - globalCount += 1 | |
71 | - | |
72 | -def replace_mailman(pDictionary): | |
73 | - for key, value in pDictionary: | |
74 | - if key in allEmailDict: | |
75 | - pDictionary[allEmailDict[key]] = pDictionary[key] | |
76 | - del pDictionary[key] | |
77 | - if value in allEmailDict: | |
78 | - pDictionary[key] = allEmailDict[value] | |
79 | - return pDictionary | |
80 | - | |
81 | -def replace_hashes_in_file (pFile): | |
198 | + pDictionary[key] = recursive_replace_mailman(key, value, pDictionary) | |
199 | + replace_key(pDictionary, key) | |
200 | + | |
201 | + | |
202 | +""" | |
203 | +Open string files (for example: colab.sql, noosfero.sql, etc) and replace | |
204 | +all emails. | |
205 | +""" | |
206 | +def replace_emails_inside_files(pFile): | |
82 | 207 | tmp_file = 'tmp_file' |
83 | 208 | with open(pFile) as current: |
84 | 209 | contents = current.read() |
... | ... | @@ -89,14 +214,20 @@ def replace_hashes_in_file (pFile): |
89 | 214 | target.close() |
90 | 215 | shutil.move(tmp_file, pFile) |
91 | 216 | |
92 | -def build_backup_list(): | |
217 | + | |
218 | +""" | |
219 | +Build two list: one with sql files, and the second one with mailman files. | |
220 | +""" | |
221 | +def find_set_of_files_to_be_changed(): | |
93 | 222 | listOfbkpFiles = [] |
223 | + | |
94 | 224 | # Noosfero |
95 | 225 | for file in os.listdir("noosfero/tmp/backup/"): |
96 | 226 | if file.endswith(".sql"): |
97 | 227 | listOfbkpFiles.append(os.path.join("noosfero/tmp/backup/", file)) |
98 | 228 | # Colab and gitlab |
99 | 229 | listOfbkpFiles += ["colab.dump", "gitlab/db/database.sql"] |
230 | + | |
100 | 231 | # Mailman |
101 | 232 | mailman = [] |
102 | 233 | for root, dirs, files in os.walk("mailman/lists/"): |
... | ... | @@ -105,38 +236,49 @@ def build_backup_list(): |
105 | 236 | mailman.append(os.path.join(root, file)) |
106 | 237 | return listOfbkpFiles, mailman |
107 | 238 | |
239 | + | |
240 | +""" | |
241 | +Unserialize config.pck | |
242 | +""" | |
108 | 243 | def unserializable_and_replace(pMailconfig): |
109 | - emailsDict = pickle.load(open(pMailconfig, "rb" )) | |
244 | + emailsDict = pickle.load(open(pMailconfig, "rb")) | |
110 | 245 | return emailsDict |
111 | 246 | |
247 | + | |
248 | +""" | |
249 | +Serialize config.pck | |
250 | +""" | |
112 | 251 | def serializable_new_config(swap, mailconfig): |
113 | - pass | |
252 | + pickle.dump(swap, open(mailconfig, "wb")) | |
253 | + | |
114 | 254 | |
115 | 255 | if __name__ == "__main__": |
116 | - #decompress() | |
117 | - #compress() | |
118 | - others, mailMan = build_backup_list() | |
256 | + decompress() | |
257 | + others, mailMan = find_set_of_files_to_be_changed() | |
119 | 258 | |
120 | - #Others: colab, gitlab, and Noosfero | |
121 | - print ("=" * 30) | |
259 | + # Others: colab, gitlab, and Noosfero | |
260 | + print "=" * 30 | |
122 | 261 | print ("Creating mapping for all emails: pass through all applications") |
123 | 262 | for applications in others: |
124 | - create_hashes_from_file(applications) | |
125 | - print ("now, mailman...") | |
263 | + find_and_register_email_inside_text_file(applications) | |
264 | + | |
265 | + print ("Passing through mailman files...") | |
126 | 266 | for configMailman in mailMan: |
127 | - print configMailman | |
267 | + print "Passing through: {}".format(configMailman) | |
128 | 268 | swap = unserializable_and_replace(configMailman) |
129 | - create_hashes_for_mailman(swap) | |
130 | - print allEmailDict | |
269 | + find_and_register_mailman_emails(swap) | |
131 | 270 | |
132 | - | |
133 | - #for application in others: | |
134 | - # print ("Working on: " + application) | |
135 | - # create_hashes_from_file(application) | |
136 | - # replace_hashes_in_file(application) | |
271 | + print "+" * 30 | |
272 | + print "Start replacing..." | |
273 | + for application in others: | |
274 | + print ("Working on: " + application) | |
275 | + replace_emails_inside_files(application) | |
137 | 276 | |
138 | - #print ("Working on: Mailman") | |
139 | - #for mailconfig in mailMan: | |
140 | - # print ("File: " + mailconfig) | |
141 | - # serializable_new_config(swap, mailconfig) | |
277 | + print ("Working on: Mailman") | |
278 | + for mailconfig in mailMan: | |
279 | + print ("File: " + mailconfig) | |
280 | + swap = unserializable_and_replace(configMailman) | |
281 | + replace_emails_inside_mailman(swap) | |
282 | + serializable_new_config(swap, mailconfig) | |
142 | 283 | |
284 | + compress() | ... | ... |