remove_backup_email.py
8.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
#!/usr/bin/python
"""
The main goal of this script is to replace all original emails from the
original backup. This is really important for avoiding "spams".
How to use it:
1 - Uncompress the main file of backup (the one generated by the
command rake backup).
2 - Copy this script to the uncompressed folder.
3 - Execute: python <script_name>
4 - Wait... It takes some minutes...
"""
import pickle
import glob
import os
import subprocess
import re
import shutil
import pickle
allEmailDict = dict()
globalCount = 1
last_gitlab = sorted(glob.glob('*_gitlab_backup.tar'),
key=os.path.getmtime)[-1]
"""
Uncompress: gitlab, noosfero, and mailman.
"""
def decompress():
global last_gitlab
files = {}
files['noosfero'] = ["tar -xaf noosfero_backup.tar.gz -C noosfero"]
files['gitlab'] = ["tar -xaf {} -C gitlab".format(last_gitlab)]
files['mailman'] = ["tar -xaf mailman_backup.tar.gz -C mailman"]
print "=" * 50
print ("This gonna take some time...")
subprocess.call(["mkdir -p gitlab"], shell=True)
subprocess.call(["mkdir -p noosfero"], shell=True)
subprocess.call(["mkdir -p mailman"], shell=True)
print ("[1] Extracting gitlab...")
subprocess.call(files['gitlab'], shell=True)
print ("[2] Extracting noosfero...")
subprocess.call(files['noosfero'], shell=True)
print ("[3] Extracting mailman...")
subprocess.call(files['mailman'], shell=True)
def compress():
print "=" * 50
print "Compressing things again..."
print "[1] Compressing gitlab..."
command = "cd gitlab && tar -cpf {} * && mv {} ../ && cd .. && " \
"rm -rf gitlab".format(last_gitlab, last_gitlab)
subprocess.call([command], shell=True)
print "[2] Compressing noosfero..."
command = "cd noosfero && tar -czpf noosfero_backup.tar.gz * && " \
"mv noosfero_backup.tar.gz ../ && cd .. && rm -rf noosfero"
subprocess.call([command], shell=True)
print "[3] Compressing mailman..."
command = "cd mailman && tar -czpf mailman_backup.tar.gz * && " \
"mv mailman_backup.tar.gz ../ && cd .. && rm -rf mailman"
subprocess.call([command], shell=True)
"""
This function, register an email to the global dictionary.
"""
def update_email_list(pEmailKey):
global globalCount
global allEmailDict
if not pEmailKey in allEmailDict:
allEmailDict[pEmailKey] = "email{}@example.com".format(globalCount)
globalCount += 1
"""
This function open a file, and try to find emails. If found any new email,
register it in a global dictionary with all emails inside it. This is
important, because we want to change all emails but keep the consistence
between all tools.
"""
def find_and_register_email_inside_text_file(pFile):
global globalCount
global allEmailDict
# Match email
pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}')
# Build a hash with all emails
with open(pFile) as current:
for line in current:
listOfEmail = pattern.findall(line)
for email in listOfEmail:
update_email_list(email)
"""
Go through all dictionaries recursively, and try to find a new email. If found
a new email, just register it.
"""
def go_through_dictionary_value_mailman(pValue):
if isinstance(pValue, str):
pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}')
listOfEmail = pattern.findall(pValue)
if listOfEmail:
for email in listOfEmail:
update_email_list(email)
# Both cases below must to be handled recursively.
elif isinstance(pValue, list):
for eachElement in pValue:
go_through_dictionary_value_mailman(eachElement)
elif isinstance(pValue, dict):
setOfKeys = pValue.keys()
setOfValues = pValue.values()
for eachKey in setOfKeys:
go_through_dictionary_value_mailman(eachKey)
for eachValue in setOfValues:
go_through_dictionary_value_mailman(eachValue)
"""
Take config.pck from mailman, and got through key and value. Finally,
register all email found in the dictionary.
"""
def find_and_register_mailman_emails(pDictionary):
# Inspect all keys
for key in pDictionary.keys():
go_through_dictionary_value_mailman(key)
# Inspect all values
for values in pDictionary.values():
go_through_dictionary_value_mailman(values)
"""
Replace key in the dictionary, if key is an email.
"""
def replace_key(pDictionary, key):
if key in allEmailDict:
pDictionary[allEmailDict[key]] = pDictionary[key]
del pDictionary[key]
"""
Look at the dictionary, if find an email there just return the value.
Otherwise, return the same value.
"""
def swap_email(pOriginal):
if pOriginal in allEmailDict:
return allEmailDict[pOriginal]
return pOriginal
"""
Replace value in config.pck on mailman.
"""
def recursive_replace_mailman(pKey, pValue, pOrigin):
# First stop condition
if isinstance(pValue, str):
return swap_email(pValue)
elif isinstance(pValue, list):
newList = []
for element in pValue:
newList.append(recursive_replace_mailman(pKey, element, pValue))
return newList
elif isinstance(pValue, dict):
newDict = {}
for keyElement, valueElement in pValue.iteritems():
newDict[keyElement] = recursive_replace_mailman(keyElement,
valueElement, pOrigin)
replace_key(newDict, keyElement)
return newDict
else:
# Second stop condition
return pValue
"""
Take config.pck and find all emails inside it and replace.
"""
def replace_emails_inside_mailman(pDictionary):
for key, value in pDictionary.iteritems():
pDictionary[key] = recursive_replace_mailman(key, value, pDictionary)
replace_key(pDictionary, key)
"""
Open string files (for example: colab.sql, noosfero.sql, etc) and replace
all emails.
"""
def replace_emails_inside_files(pFile):
tmp_file = 'tmp_file'
with open(pFile) as current:
contents = current.read()
for key, value in allEmailDict.iteritems():
contents = contents.replace(key, value)
target = open(tmp_file, 'w+')
target.write(contents)
target.close()
shutil.move(tmp_file, pFile)
"""
Build two list: one with sql files, and the second one with mailman files.
"""
def find_set_of_files_to_be_changed():
listOfbkpFiles = []
# Noosfero
for file in os.listdir("noosfero/tmp/backup/"):
if file.endswith(".sql"):
listOfbkpFiles.append(os.path.join("noosfero/tmp/backup/", file))
# Colab and gitlab
listOfbkpFiles += ["colab.dump", "gitlab/db/database.sql"]
# Mailman
mailman = []
for root, dirs, files in os.walk("mailman/lists/"):
for file in files:
if file.endswith("config.pck"):
mailman.append(os.path.join(root, file))
return listOfbkpFiles, mailman
"""
Unserialize config.pck
"""
def unserializable_and_replace(pMailconfig):
emailsDict = pickle.load(open(pMailconfig, "rb"))
return emailsDict
"""
Serialize config.pck
"""
def serializable_new_config(swap, mailconfig):
pickle.dump(swap, open(mailconfig, "wb"))
if __name__ == "__main__":
decompress()
others, mailMan = find_set_of_files_to_be_changed()
# Others: colab, gitlab, and Noosfero
print "=" * 30
print ("Creating mapping for all emails: pass through all applications")
for applications in others:
find_and_register_email_inside_text_file(applications)
print ("Passing through mailman files...")
for configMailman in mailMan:
print "Passing through: {}".format(configMailman)
swap = unserializable_and_replace(configMailman)
find_and_register_mailman_emails(swap)
print "+" * 30
print "Start replacing..."
for application in others:
print ("Working on: " + application)
replace_emails_inside_files(application)
print ("Working on: Mailman")
for mailconfig in mailMan:
print ("File: " + mailconfig)
swap = unserializable_and_replace(configMailman)
replace_emails_inside_mailman(swap)
serializable_new_config(swap, mailconfig)
compress()