Commit 444e7283c19f1f33980800fe77ce1e846107d55d

Authored by Rodrigo Siqueira de Melo
1 parent a9629926

Changed import script.

I decided to rewrite the script for python, because mailman use python
serialization (pickle).
The last step it is fix the code for running on mailman.
Showing 1 changed file with 142 additions and 0 deletions   Show diff stats
utils/remove_backup_email.py 0 → 100644
... ... @@ -0,0 +1,142 @@
  1 +#!/usr/bin/python
  2 +
  3 +import pickle, glob, os, subprocess, re, shutil, pickle
  4 +
  5 +allEmailDict = dict()
  6 +globalCount = 1
  7 +
  8 +last_gitlab = sorted(glob.glob('*_gitlab_backup.tar'),
  9 + key=os.path.getmtime)[-1]
  10 +
  11 +def decompress():
  12 + print "=" * 30
  13 + print "This gonna take some time..."
  14 + subprocess.call(["mkdir -p gitlab"], shell=True)
  15 + subprocess.call(["mkdir -p noosfero"], shell=True)
  16 + subprocess.call(["mkdir -p mailman"], shell=True)
  17 + print "[1] Extracting gitlab..."
  18 + subprocess.call(["tar -xaf " + last_gitlab + " -C gitlab"], shell=True)
  19 + print "[2] Extracting noosfero..."
  20 + subprocess.call(["tar -xaf noosfero_backup.tar.gz -C noosfero"], shell=True)
  21 + print "[3] Extracting mailman..."
  22 + subprocess.call(["tar -xaf mailman_backup.tar.gz -C mailman"], shell=True)
  23 +
  24 +def compress():
  25 + print "=" * 30
  26 + print "Compressing things again..."
  27 +
  28 + print "[1] Compressing gitlab..."
  29 + command = "cd gitlab && tar -cpf " + last_gitlab + " * && mv " + last_gitlab + " ../ && cd .. && rm -rf gitlab"
  30 + subprocess.call([command], shell=True)
  31 +
  32 + print "[2] Compressing noosfero..."
  33 + command = 'cd noosfero && tar -czpf noosfero_backup.tar.gz * && mv noosfero_backup.tar.gz ../ && cd .. && rm -rf noosfero'
  34 + subprocess.call([command], shell=True)
  35 +
  36 + print "[3] Compressing mailman..."
  37 + command = 'cd mailman && tar -czpf mailman_backup.tar.gz * && mv mailman_backup.tar.gz ../ && cd .. && rm -rf mailman'
  38 + subprocess.call([command], shell=True)
  39 +
  40 +def create_hashes_from_file (pFile):
  41 + global globalCount
  42 + global allEmailDict
  43 + # Match email
  44 + pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}')
  45 + # Build a hash with all emails
  46 + with open(pFile) as current:
  47 + for line in current:
  48 + listOfEmail = pattern.findall(line)
  49 + for email in listOfEmail:
  50 + if not email in allEmailDict:
  51 + allEmailDict[email] = "email" + `globalCount` + "@example.com"
  52 + globalCount += 1
  53 +
  54 +def create_hashes_for_mailman (pDictionary):
  55 + global globalCount
  56 + global allEmailDict
  57 + pattern = re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}')
  58 +
  59 + for key, value in pDictionary.iteritems():
  60 + if isinstance(value, basestring):
  61 + matchValue = pattern.search(value)
  62 + if matchValue:
  63 + allEmailDict[matchValue.group(0)] = "email{}@example.com".format(globalCount)
  64 + globalCount += 1
  65 +
  66 + if isinstance(key, basestring):
  67 + matchKey = pattern.search(key)
  68 + if matchKey:
  69 + allEmailDict[matchValue.group(0)] = "email{}@example.com".format(globalCount)
  70 + globalCount += 1
  71 +
  72 +def replace_mailman(pDictionary):
  73 + for key, value in pDictionary:
  74 + if key in allEmailDict:
  75 + pDictionary[allEmailDict[key]] = pDictionary[key]
  76 + del pDictionary[key]
  77 + if value in allEmailDict:
  78 + pDictionary[key] = allEmailDict[value]
  79 + return pDictionary
  80 +
  81 +def replace_hashes_in_file (pFile):
  82 + tmp_file = 'tmp_file'
  83 + with open(pFile) as current:
  84 + contents = current.read()
  85 + for key, value in allEmailDict.iteritems():
  86 + contents = contents.replace(key, value)
  87 + target = open(tmp_file, 'w+')
  88 + target.write(contents)
  89 + target.close()
  90 + shutil.move(tmp_file, pFile)
  91 +
  92 +def build_backup_list():
  93 + listOfbkpFiles = []
  94 + # Noosfero
  95 + for file in os.listdir("noosfero/tmp/backup/"):
  96 + if file.endswith(".sql"):
  97 + listOfbkpFiles.append(os.path.join("noosfero/tmp/backup/", file))
  98 + # Colab and gitlab
  99 + listOfbkpFiles += ["colab.dump", "gitlab/db/database.sql"]
  100 + # Mailman
  101 + mailman = []
  102 + for root, dirs, files in os.walk("mailman/lists/"):
  103 + for file in files:
  104 + if file.endswith("config.pck"):
  105 + mailman.append(os.path.join(root, file))
  106 + return listOfbkpFiles, mailman
  107 +
  108 +def unserializable_and_replace(pMailconfig):
  109 + emailsDict = pickle.load(open(pMailconfig, "rb" ))
  110 + return emailsDict
  111 +
  112 +def serializable_new_config(swap, mailconfig):
  113 + pass
  114 +
  115 +if __name__ == "__main__":
  116 + #decompress()
  117 + #compress()
  118 + others, mailMan = build_backup_list()
  119 +
  120 + #Others: colab, gitlab, and Noosfero
  121 + print ("=" * 30)
  122 + print ("Creating mapping for all emails: pass through all applications")
  123 + for applications in others:
  124 + create_hashes_from_file(applications)
  125 + print ("now, mailman...")
  126 + for configMailman in mailMan:
  127 + print configMailman
  128 + swap = unserializable_and_replace(configMailman)
  129 + create_hashes_for_mailman(swap)
  130 + print allEmailDict
  131 +
  132 +
  133 + #for application in others:
  134 + # print ("Working on: " + application)
  135 + # create_hashes_from_file(application)
  136 + # replace_hashes_in_file(application)
  137 +
  138 + #print ("Working on: Mailman")
  139 + #for mailconfig in mailMan:
  140 + # print ("File: " + mailconfig)
  141 + # serializable_new_config(swap, mailconfig)
  142 +
... ...