From 7676d903a3b5205c3f583279243b2266d6dd5d38 Mon Sep 17 00:00:00 2001
From: GeorgBerner <GeorgBerner@gmx.net>
Date: Thu, 3 Mar 2016 14:12:44 +0100
Subject: [PATCH 1/3] Added python scripts for google group download and
 preparation

---
 codeface/google_groups_py/crawler.py       | 271 +++++++++++++++++++++
 codeface/google_groups_py/post_download.py | 111 +++++++++
 2 files changed, 382 insertions(+)
 create mode 100755 codeface/google_groups_py/crawler.py
 create mode 100755 codeface/google_groups_py/post_download.py

diff --git a/codeface/google_groups_py/crawler.py b/codeface/google_groups_py/crawler.py
new file mode 100755
index 00000000..5471a152
--- /dev/null
+++ b/codeface/google_groups_py/crawler.py
@@ -0,0 +1,271 @@
+import re
+import os.path
+import urllib2
+import time
+import sys
+import concurrent.futures
+
+
+class Crawler(object):
+
+    def __init__(self):
+        self.group = ""
+        self.output = ""
+        self.link_topic = "https://groups.google.com/forum/?_escaped_fragment_=topic/"
+        self.link_msg = "https://groups.google.com/forum/message/raw?msg="
+        self.link_forum = "https://groups.google.com/forum/?_escaped_fragment_=forum/"
+        self.opener = urllib2.build_opener()
+        self.opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox')]
+        self.current_last_post_date = "00.00.00"
+        self.max_worker_threads = 1
+        self.group_is_set = False
+        self.output_is_set = False
+        self.start_date_is_set = False
+        print "----------------------------------------------------"
+        print "-------         Google Group Crawler         -------\n"
+
+    # end execution if arguments aren't complete or help is displayed
+    def handle_arguments(self, argv):
+        if len(argv) is 1:
+            self.print_help()
+            sys.exit(0)
+        for i in xrange(len(argv)):
+            if argv[i] == "-h" or argv[i] == "--h" or argv[i] == "-help":
+                self.print_help()
+                sys.exit(0)
+            if argv[i] == "-output":
+                self.set_output(argv[i+1])
+                self.output_is_set = True
+            if argv[i] == "-group":
+                self.set_group(argv[i+1])
+                self.group_is_set = True
+            if argv[i] == "-threads":
+                self.set_max_worker_threads(argv[i+1])
+            if argv[i] == "-start":
+                self.set_current_last_post_date(argv[i+1])
+                if self.check_date_format(self.current_last_post_date) is False:
+                    print"Error: date wrong format"
+                    self.print_help()
+                    sys.exit(0)
+                self.start_date_is_set = True
+        if self.group_is_set is False:
+            print "Error: missing group"
+            self.print_help()
+            sys.exit(0)
+        if self.output_is_set is False:
+            print "Error: missing output"
+            self.print_help()
+            sys.exit(0)
+
+    @staticmethod
+    def print_help():
+        print "################################################################################"
+        print "Help output:"
+        print "################################################################################"
+        print "crawler.py -output \"path\" -group \"groupname\" [-threads \"#threads\",\n -start \"DD.MM.YY\"]"
+        print "################################################################################"
+        print "-output: output path for ggc"
+        print "-group: name of the google group"
+        print "-threads: number of threads for simultaneous download (default = 1) (only for dl)"
+        print "-start: date from which all messages of a group will be downloaded, \"DD.MM.YY\" (only for dl)"
+        print ""
+        print "If group has been downloaded before only new messages will be downloaded"
+        print "To download the entire group again delete the threads.txt file or the complete directory"
+        print "################################################################################"
+
+    # checks if date is in format DD.MM.YY
+    @staticmethod
+    def check_date_format(date):
+        result = re.findall("\\d\\d.\\d\\d.\\d\\d", date)
+        if len(result) is 1:
+            return True
+        return False
+
+    def set_group(self, group):
+        self.group = group
+
+    def set_current_last_post_date(self, start):
+        self.current_last_post_date = start
+
+    def set_output(self, output):
+        if str(output).endswith("/") or str(output).endswith("\\"):
+            self.output = output
+        else:
+            self.output = output + "/"
+
+    def set_max_worker_threads(self, max_worker_threads):
+        self.max_worker_threads = max_worker_threads
+
+    def start(self):
+        try:
+            self.opener.open(self.link_forum + self.group)
+        except urllib2.HTTPError:
+            print "\nError: could not locate group"
+            sys.exit(0)
+        if self.start_date_is_set is True:
+            self.get_group_from_start()
+        if os.path.isfile(self.output + "threads.txt"):
+            self.update()
+        else:
+            self.get_complete_group()
+
+    def get_group_from_start(self):
+        self.make_dir()
+        with open(self.output + "threads.txt", 'w') as file_threads:
+            file_threads.write("XXX," + self.current_last_post_date + ",\n")
+        self.get_threads()
+
+    def get_complete_group(self):
+        print "Entered group: " + self.group
+        print "complete group will be downloaded\n"
+        self.make_dir()
+        self.get_threads()
+
+    def update(self):
+        print "update"
+        # load threads file, get first last_post_date
+        first_line = open(self.output + "threads.txt", "r").readline()
+        self.current_last_post_date = first_line.split(",")[1]
+        # compare first last_post_date with last_post_date from file
+        if self.threads_have_changes(self.output + "/threads.txt"):
+            self.get_threads()
+        else:
+            print "no changes in " + self.group
+        # if equal -> done(no new posts... possibly posts done on the same day as last download...)
+
+    def get_threads(self):
+        html = self.opener.open(self.link_forum + self.group)
+        with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_worker_threads) as executor:
+            with open(self.output + "threads.txt", 'w') as file_threads:
+                while True:
+                    next_page = self.get_threads_and_date(html, file_threads, executor)
+                    if next_page == "":
+                        break
+                    html = self.opener.open(next_page)
+
+    def get_threads_and_date(self, html, file_threads, executor):
+        next_page = ""
+        current_id = ""
+        for line in html:
+            last_post_date = re.findall("(?<=\"lastPostDate\">)([0-9\.:]*)", line)
+            if len(last_post_date) > 0:
+                if ":" in last_post_date[0]:
+                    last_post_date[0] = time.strftime("%d.%m.%y")
+                if self.compare_date(last_post_date[0], self.current_last_post_date) == -1:
+                    file_threads.write("XXXX," + self.current_last_post_date + ",\n")
+                    next_page = ""
+                    break
+                if last_post_date[0] == self.current_last_post_date:
+                    return ""
+                file_threads.write(current_id + "," + last_post_date[0] + ",\n")
+                executor.submit(self.get_messages, current_id)
+                # self.get_messages(current_id)
+            temp = re.findall("(href=\")(.*?)(\")", line)
+            if len(temp) == 0:
+                continue
+            topic_id = re.findall("(?<=" + self.group + "/)(.*)", temp[0][1])
+            if "topic" in temp[0][1]:
+                current_id = topic_id[0]
+            if "forum" in temp[0][1]:
+                next_page = temp[0][1]
+        return next_page
+
+    # gets executed in own thread
+    def get_messages(self, topic_id):
+        counter = 0
+        print "getting thread " + topic_id
+        try:
+            html = self.opener.open(self.link_topic + self.group + "/" + topic_id)
+        except urllib2.HTTPError:
+            print "skipping deleted thread " + topic_id
+            return
+        msg_id = ""
+        for line in html:
+            last_post_date = re.findall("(?<=\"lastPostDate\">)([0-9\.]*)", line)
+            if len(last_post_date) > 0:
+                if len(self.current_last_post_date) > 0:
+                    if self.compare_date(last_post_date[0], self.current_last_post_date) < 1:
+                        continue
+                self.get_mbox(topic_id, msg_id)
+                counter += 1
+            temp = re.findall("(href=\")(.*?)(\")", line)
+            if len(temp) == 0:
+                continue
+            if "/msg/" + self.group + "/" in temp[0][1]:
+                msg_id = re.findall("(?<=" + topic_id + "/)(.*)", temp[0][1])[0]
+            if len(last_post_date) > 0:
+                if last_post_date == self.current_last_post_date:
+                    print str(counter) + " messages for topic " + topic_id + " downloaded."
+                    return ""
+        print str(counter) + " messages for topic " + topic_id + " downloaded."
+
+    def get_mbox(self, topic_id, msg_id):
+        print "\tdownloading msg: " + msg_id
+        try:
+            mbox = self.opener.open(self.link_msg + self.group + "/" + topic_id + "/" + msg_id).read()
+        except urllib2.HTTPError:
+            print "skipping deleted message " + msg_id
+            return
+        with open(self.output + "mbox/" + msg_id + ".mbox", "w") as file_mbox:
+            file_mbox.write(mbox)
+
+    @staticmethod
+    def compare_date(date1, date2):
+        # date format: dd.mm.yy
+        date1_split = date1.split(".")
+        date2_split = date2.split(".")
+        if int(date1_split[2]) == int(date2_split[2]):
+            # year1 == year2
+            if int(date1_split[1]) == int(date2_split[1]):
+                # month1 == month
+                if int(date1_split[0]) < int(date2_split[0]):
+                    # day1 < day2
+                    return -1
+                if int(date1_split[0]) == int(date2_split[0]):
+                    # day1 == day2
+                    return 0
+                else:
+                    # day1 > day2
+                    return 1
+            if int(date1_split[1]) < int(date2_split[1]):
+                return -1
+            else:
+                return 1
+        if int(date1_split[2]) < int(date2_split[2]):
+            return -1
+        else:
+            return 1
+
+    def threads_have_changes(self, path_file):
+        html = self.opener.open(self.link_forum + self.group)
+        for line in html:
+            if "lastPostDate" in line:
+                last_post_date = re.findall("(?<=\"lastPostDate\">)([0-9.:]*)", line)[0]
+                if ":" in last_post_date:
+                    last_post_date = time.strftime("%d.%m.%y,\n")
+                if last_post_date == self.current_last_post_date:
+                    with open(path_file, "w") as file_threads:
+                        # to save the last update date for next update try
+                        file_threads.write("XXXX," + self.current_last_post_date + ",\n")
+                    return False
+                return True
+
+    # creates subdirectories if they don't exist
+    def make_dir(self):
+        if not os.path.isdir(self.output + "mbox"):
+            print "creating directories\n"
+            os.makedirs(self.output + "mbox")
+
+
+def main():
+    argv = sys.argv
+    # argv = ['crawlerThreads.py', '-output', 'output/', '-group', 'phaser3-dev', '-threads', '40']
+    # argv = ['crawlerThreads.py', '-output', 'output/', '-group', 'artspace', '-threads', '10']
+    # argv = ['crawlerThreads.py', '-output', 'output/', '-group', 'artspace', '-threads', '10', '-start', '01.01.10']
+    # argv = ['crawlerThreads.py', '-output', 'output/pants-devel2', '-group', 'pants-devel', '-threads', '200']
+    crawler = Crawler()
+    crawler.handle_arguments(argv)
+    crawler.start()
+
+
+main()
diff --git a/codeface/google_groups_py/post_download.py b/codeface/google_groups_py/post_download.py
new file mode 100755
index 00000000..922955db
--- /dev/null
+++ b/codeface/google_groups_py/post_download.py
@@ -0,0 +1,111 @@
+import os.path
+import sys
+import re
+
+
+class PostDownload(object):
+
+    def __init__(self):
+        self.group = ""
+        self.output = ""
+        self.group_is_set = False
+        self.output_is_set = False
+
+    @staticmethod
+    def print_help():
+        print "################################################################################"
+        print "Help output:"
+        print "################################################################################"
+        print "post_download.py -output \"path\" -group \"groupname\""
+        print "#  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  # "
+        print "-output: output path pointing to the directory containing the group's directory"
+        print "-group: name of the google group"
+        print ""
+        print "Multiple mbox files will be combined into one file"
+        print "Certain lines will be edited to be understandable by Codeface"
+        print "example: \"post_download.py -output output\\ -group pants\""
+        print "################################################################################"
+
+    # end execution if arguments aren't complete or help is displayed
+    def handle_arguments(self, argv):
+        if len(argv) is 1:
+            self.print_help()
+            sys.exit(0)
+        for i in xrange(len(argv)):
+            if argv[i] == "-h" or argv[i] == "--h" or argv[i] == "-help":
+                self.print_help()
+                sys.exit(0)
+            if argv[i] == "-output":
+                self.set_output(argv[i+1])
+                self.output_is_set = True
+            if argv[i] == "-group":
+                self.group = argv[i+1]
+                self.group_is_set = True
+        if self.group_is_set is False:
+            print "Error: missing group"
+            self.print_help()
+            sys.exit(0)
+        if self.output_is_set is False:
+            print "Error: missing output"
+            self.print_help()
+            sys.exit(0)
+
+    def set_output(self, output):
+        if str(output).endswith("/") or str(output).endswith("\\"):
+            self.output = output
+        else:
+            self.output = output + "/"
+
+    def start(self):
+        self.merge_to_single_file()
+
+    def merge_to_single_file(self):
+        with open(self.output + self.group + ".mbox", "w") as final_mbox:
+            for root, dirs, files in os.walk(self.output + "/mbox"):
+                for file in files:
+                    if file.endswith(".mbox"):
+                        with open(self.output + "mbox/" + file, "r") as single_mbox:
+                            self.fix_content(single_mbox, final_mbox)
+                            final_mbox.write("\n\n\n")
+
+    def fix_content(self, single_mbox, final_mbox):
+        text = single_mbox.read().split("\n")
+        if "X-Received " in text[0]:
+            text[0] = "From GoogleGroups"
+        else:
+            text = ["From GoogleGroups"] + text
+        for line in text:
+            if "From: " in line:
+                line = self.handle_from_line(line)
+            final_mbox.write(line)
+
+    @staticmethod
+    def handle_from_line(line):
+        from_line = re.findall("(?<=From: )\"?[ A-Za-z]*", line)
+        if len(from_line) == 1:
+            name = from_line[0].replace('"', '')
+            if name.endswith(" "):
+                name = name[:-1]
+            email = re.findall("<.*>", line)
+            if len(email) > 0:
+                if "..." in email[0]:
+                    email_suffix = re.findall("@[a-zA-Z0-1\-.]*", line)
+                    if len(email_suffix) != 0:
+                        email_suffix = email_suffix[0]
+                    else:
+                        email_suffix = "@unknown.com"
+                    return "From: " + name + " <" + name.replace(" ", "_") + email_suffix + ">\n"
+                else:
+                    return "From: " + name + " " + email[0] + "\n"
+        return line
+
+
+def main():
+    argv = sys.argv
+    argv = ["-output", "../test/pants", "-group", "pants-devel"]
+    post_download = PostDownload()
+    post_download.handle_arguments(argv)
+    post_download.start()
+
+
+main()
\ No newline at end of file

From ca69124d9c94fd2aed6d2000c25aa6ada4588ebc Mon Sep 17 00:00:00 2001
From: Georg Berner <georgberner@gmx.net>
Date: Mon, 11 Apr 2016 00:37:40 +0200
Subject: [PATCH 2/3] Refractored and renamed the Google Groups Group Download
 (now named g3d) added UnitTests to run g3d use the run_g3d.py file

---
 codeface/google_groups_py/crawler.py       | 271 -----------------
 codeface/google_groups_py/g3d.py           | 334 +++++++++++++++++++++
 codeface/google_groups_py/post_download.py | 111 -------
 codeface/google_groups_py/run_g3d.py       |  12 +
 codeface/google_groups_py/utest_g3d.py     | 228 ++++++++++++++
 5 files changed, 574 insertions(+), 382 deletions(-)
 delete mode 100755 codeface/google_groups_py/crawler.py
 create mode 100644 codeface/google_groups_py/g3d.py
 delete mode 100755 codeface/google_groups_py/post_download.py
 create mode 100644 codeface/google_groups_py/run_g3d.py
 create mode 100644 codeface/google_groups_py/utest_g3d.py

diff --git a/codeface/google_groups_py/crawler.py b/codeface/google_groups_py/crawler.py
deleted file mode 100755
index 5471a152..00000000
--- a/codeface/google_groups_py/crawler.py
+++ /dev/null
@@ -1,271 +0,0 @@
-import re
-import os.path
-import urllib2
-import time
-import sys
-import concurrent.futures
-
-
-class Crawler(object):
-
-    def __init__(self):
-        self.group = ""
-        self.output = ""
-        self.link_topic = "https://groups.google.com/forum/?_escaped_fragment_=topic/"
-        self.link_msg = "https://groups.google.com/forum/message/raw?msg="
-        self.link_forum = "https://groups.google.com/forum/?_escaped_fragment_=forum/"
-        self.opener = urllib2.build_opener()
-        self.opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox')]
-        self.current_last_post_date = "00.00.00"
-        self.max_worker_threads = 1
-        self.group_is_set = False
-        self.output_is_set = False
-        self.start_date_is_set = False
-        print "----------------------------------------------------"
-        print "-------         Google Group Crawler         -------\n"
-
-    # end execution if arguments aren't complete or help is displayed
-    def handle_arguments(self, argv):
-        if len(argv) is 1:
-            self.print_help()
-            sys.exit(0)
-        for i in xrange(len(argv)):
-            if argv[i] == "-h" or argv[i] == "--h" or argv[i] == "-help":
-                self.print_help()
-                sys.exit(0)
-            if argv[i] == "-output":
-                self.set_output(argv[i+1])
-                self.output_is_set = True
-            if argv[i] == "-group":
-                self.set_group(argv[i+1])
-                self.group_is_set = True
-            if argv[i] == "-threads":
-                self.set_max_worker_threads(argv[i+1])
-            if argv[i] == "-start":
-                self.set_current_last_post_date(argv[i+1])
-                if self.check_date_format(self.current_last_post_date) is False:
-                    print"Error: date wrong format"
-                    self.print_help()
-                    sys.exit(0)
-                self.start_date_is_set = True
-        if self.group_is_set is False:
-            print "Error: missing group"
-            self.print_help()
-            sys.exit(0)
-        if self.output_is_set is False:
-            print "Error: missing output"
-            self.print_help()
-            sys.exit(0)
-
-    @staticmethod
-    def print_help():
-        print "################################################################################"
-        print "Help output:"
-        print "################################################################################"
-        print "crawler.py -output \"path\" -group \"groupname\" [-threads \"#threads\",\n -start \"DD.MM.YY\"]"
-        print "################################################################################"
-        print "-output: output path for ggc"
-        print "-group: name of the google group"
-        print "-threads: number of threads for simultaneous download (default = 1) (only for dl)"
-        print "-start: date from which all messages of a group will be downloaded, \"DD.MM.YY\" (only for dl)"
-        print ""
-        print "If group has been downloaded before only new messages will be downloaded"
-        print "To download the entire group again delete the threads.txt file or the complete directory"
-        print "################################################################################"
-
-    # checks if date is in format DD.MM.YY
-    @staticmethod
-    def check_date_format(date):
-        result = re.findall("\\d\\d.\\d\\d.\\d\\d", date)
-        if len(result) is 1:
-            return True
-        return False
-
-    def set_group(self, group):
-        self.group = group
-
-    def set_current_last_post_date(self, start):
-        self.current_last_post_date = start
-
-    def set_output(self, output):
-        if str(output).endswith("/") or str(output).endswith("\\"):
-            self.output = output
-        else:
-            self.output = output + "/"
-
-    def set_max_worker_threads(self, max_worker_threads):
-        self.max_worker_threads = max_worker_threads
-
-    def start(self):
-        try:
-            self.opener.open(self.link_forum + self.group)
-        except urllib2.HTTPError:
-            print "\nError: could not locate group"
-            sys.exit(0)
-        if self.start_date_is_set is True:
-            self.get_group_from_start()
-        if os.path.isfile(self.output + "threads.txt"):
-            self.update()
-        else:
-            self.get_complete_group()
-
-    def get_group_from_start(self):
-        self.make_dir()
-        with open(self.output + "threads.txt", 'w') as file_threads:
-            file_threads.write("XXX," + self.current_last_post_date + ",\n")
-        self.get_threads()
-
-    def get_complete_group(self):
-        print "Entered group: " + self.group
-        print "complete group will be downloaded\n"
-        self.make_dir()
-        self.get_threads()
-
-    def update(self):
-        print "update"
-        # load threads file, get first last_post_date
-        first_line = open(self.output + "threads.txt", "r").readline()
-        self.current_last_post_date = first_line.split(",")[1]
-        # compare first last_post_date with last_post_date from file
-        if self.threads_have_changes(self.output + "/threads.txt"):
-            self.get_threads()
-        else:
-            print "no changes in " + self.group
-        # if equal -> done(no new posts... possibly posts done on the same day as last download...)
-
-    def get_threads(self):
-        html = self.opener.open(self.link_forum + self.group)
-        with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_worker_threads) as executor:
-            with open(self.output + "threads.txt", 'w') as file_threads:
-                while True:
-                    next_page = self.get_threads_and_date(html, file_threads, executor)
-                    if next_page == "":
-                        break
-                    html = self.opener.open(next_page)
-
-    def get_threads_and_date(self, html, file_threads, executor):
-        next_page = ""
-        current_id = ""
-        for line in html:
-            last_post_date = re.findall("(?<=\"lastPostDate\">)([0-9\.:]*)", line)
-            if len(last_post_date) > 0:
-                if ":" in last_post_date[0]:
-                    last_post_date[0] = time.strftime("%d.%m.%y")
-                if self.compare_date(last_post_date[0], self.current_last_post_date) == -1:
-                    file_threads.write("XXXX," + self.current_last_post_date + ",\n")
-                    next_page = ""
-                    break
-                if last_post_date[0] == self.current_last_post_date:
-                    return ""
-                file_threads.write(current_id + "," + last_post_date[0] + ",\n")
-                executor.submit(self.get_messages, current_id)
-                # self.get_messages(current_id)
-            temp = re.findall("(href=\")(.*?)(\")", line)
-            if len(temp) == 0:
-                continue
-            topic_id = re.findall("(?<=" + self.group + "/)(.*)", temp[0][1])
-            if "topic" in temp[0][1]:
-                current_id = topic_id[0]
-            if "forum" in temp[0][1]:
-                next_page = temp[0][1]
-        return next_page
-
-    # gets executed in own thread
-    def get_messages(self, topic_id):
-        counter = 0
-        print "getting thread " + topic_id
-        try:
-            html = self.opener.open(self.link_topic + self.group + "/" + topic_id)
-        except urllib2.HTTPError:
-            print "skipping deleted thread " + topic_id
-            return
-        msg_id = ""
-        for line in html:
-            last_post_date = re.findall("(?<=\"lastPostDate\">)([0-9\.]*)", line)
-            if len(last_post_date) > 0:
-                if len(self.current_last_post_date) > 0:
-                    if self.compare_date(last_post_date[0], self.current_last_post_date) < 1:
-                        continue
-                self.get_mbox(topic_id, msg_id)
-                counter += 1
-            temp = re.findall("(href=\")(.*?)(\")", line)
-            if len(temp) == 0:
-                continue
-            if "/msg/" + self.group + "/" in temp[0][1]:
-                msg_id = re.findall("(?<=" + topic_id + "/)(.*)", temp[0][1])[0]
-            if len(last_post_date) > 0:
-                if last_post_date == self.current_last_post_date:
-                    print str(counter) + " messages for topic " + topic_id + " downloaded."
-                    return ""
-        print str(counter) + " messages for topic " + topic_id + " downloaded."
-
-    def get_mbox(self, topic_id, msg_id):
-        print "\tdownloading msg: " + msg_id
-        try:
-            mbox = self.opener.open(self.link_msg + self.group + "/" + topic_id + "/" + msg_id).read()
-        except urllib2.HTTPError:
-            print "skipping deleted message " + msg_id
-            return
-        with open(self.output + "mbox/" + msg_id + ".mbox", "w") as file_mbox:
-            file_mbox.write(mbox)
-
-    @staticmethod
-    def compare_date(date1, date2):
-        # date format: dd.mm.yy
-        date1_split = date1.split(".")
-        date2_split = date2.split(".")
-        if int(date1_split[2]) == int(date2_split[2]):
-            # year1 == year2
-            if int(date1_split[1]) == int(date2_split[1]):
-                # month1 == month
-                if int(date1_split[0]) < int(date2_split[0]):
-                    # day1 < day2
-                    return -1
-                if int(date1_split[0]) == int(date2_split[0]):
-                    # day1 == day2
-                    return 0
-                else:
-                    # day1 > day2
-                    return 1
-            if int(date1_split[1]) < int(date2_split[1]):
-                return -1
-            else:
-                return 1
-        if int(date1_split[2]) < int(date2_split[2]):
-            return -1
-        else:
-            return 1
-
-    def threads_have_changes(self, path_file):
-        html = self.opener.open(self.link_forum + self.group)
-        for line in html:
-            if "lastPostDate" in line:
-                last_post_date = re.findall("(?<=\"lastPostDate\">)([0-9.:]*)", line)[0]
-                if ":" in last_post_date:
-                    last_post_date = time.strftime("%d.%m.%y,\n")
-                if last_post_date == self.current_last_post_date:
-                    with open(path_file, "w") as file_threads:
-                        # to save the last update date for next update try
-                        file_threads.write("XXXX," + self.current_last_post_date + ",\n")
-                    return False
-                return True
-
-    # creates subdirectories if they don't exist
-    def make_dir(self):
-        if not os.path.isdir(self.output + "mbox"):
-            print "creating directories\n"
-            os.makedirs(self.output + "mbox")
-
-
-def main():
-    argv = sys.argv
-    # argv = ['crawlerThreads.py', '-output', 'output/', '-group', 'phaser3-dev', '-threads', '40']
-    # argv = ['crawlerThreads.py', '-output', 'output/', '-group', 'artspace', '-threads', '10']
-    # argv = ['crawlerThreads.py', '-output', 'output/', '-group', 'artspace', '-threads', '10', '-start', '01.01.10']
-    # argv = ['crawlerThreads.py', '-output', 'output/pants-devel2', '-group', 'pants-devel', '-threads', '200']
-    crawler = Crawler()
-    crawler.handle_arguments(argv)
-    crawler.start()
-
-
-main()
diff --git a/codeface/google_groups_py/g3d.py b/codeface/google_groups_py/g3d.py
new file mode 100644
index 00000000..38c9045c
--- /dev/null
+++ b/codeface/google_groups_py/g3d.py
@@ -0,0 +1,334 @@
+import re
+import os.path
+import urllib2
+import time
+import sys
+import threading
+import concurrent.futures
+
+
+class Crawler(object):
+
+    def __init__(self):
+        self.group = ""
+        self.output = ""
+        self.link_topic = "https://groups.google.com/forum/?_escaped_fragment_=topic/"
+        self.link_msg = "https://groups.google.com/forum/message/raw?msg="
+        self.link_forum = "https://groups.google.com/forum/?_escaped_fragment_=forum/"
+        self.opener = urllib2.build_opener()
+        self.opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.5) Gecko/20091102 Firefox')]
+        self.current_last_post_date = "00.00.00"
+        self.max_worker_threads = 1
+        self.group_is_set = False
+        self.output_is_set = False
+        self.start_date_is_set = False
+        self.verbose = False
+        self.execute = False
+        self.lock = threading.Lock()
+        self.counter = 0
+        self.elapsed_time = 0
+        print "------------------------g3d-------------------------"
+        print "-------     Google Groups Group Downloader     -----\n"
+
+    # end execution if arguments aren't complete or help is displayed
+    def handle_arguments(self, argv):
+        if len(argv) is 1:
+            self.print_help()
+            return
+        for i in xrange(len(argv)):
+            if argv[i] == "-h" or argv[i] == "--h" or argv[i] == "-help":
+                self.print_help()
+                return
+            if argv[i] == "-v":
+                self.verbose = True
+                continue
+            if argv[i] == "-output":
+                if i+1 == len(argv):
+                    break
+                self.set_output(argv[i+1])
+                self.output_is_set = True
+                continue
+            if argv[i] == "-group":
+                if i+1 == len(argv):
+                    break
+                self.group = argv[i+1]
+                self.group_is_set = True
+                continue
+            if argv[i] == "-threads":
+                if i+1 == len(argv):
+                    print "Error: amount of threads missing"
+                    return
+                try:
+                    self.max_worker_threads = int(argv[i+1])
+                except ValueError:
+                    print "\nError: argument following -threads is not a number"
+                    return
+                continue
+            if argv[i] == "-start":
+                if i+1 == len(argv):
+                    print "Error: start date missing"
+                    return
+                self.current_last_post_date = argv[i+1]
+                if self.check_date_format(self.current_last_post_date) is False:
+                    print"Error: date wrong format"
+                    return
+                self.start_date_is_set = True
+                continue
+        if self.group_is_set is False:
+            print "Error: missing group"
+            return
+        if self.output_is_set is False:
+            print "Warning: missing output"
+            self.set_output("./")
+            self.output_is_set = True
+        self.execute = True
+
+    @staticmethod
+    def print_help():
+        print "################################################################################"
+        print "Help output:"
+        print "################################################################################"
+        print "crawler.py -output \"path\" -group \"groupname\" [-threads \"#threads\",\n -start \"DD.MM.YY\"]"
+        print "################################################################################"
+        print "-output: output path for ggc"
+        print "-group: name of the google group"
+        print "-threads: number of threads for simultaneous download (default = 1) (only for dl)"
+        print "-start: date from which all messages of a group will be downloaded, \"DD.MM.YY\" (only for dl)"
+        print "-v: verbose output"
+        print "################################################################################"
+
+    # checks if date is in format DD.MM.YY
+    @staticmethod
+    def check_date_format(date):
+        result = re.findall("^\\d\\d.\\d\\d.\\d\\d$", date)
+        if len(result) is 1:
+            return True
+        return False
+
+    def set_output(self, output):
+        if str(output).endswith("/") or str(output).endswith("\\"):
+            self.output = output
+        else:
+            self.output = output + "/"
+
+    def start(self):
+        start_time = time.time()
+        try:
+            self.opener.open(self.link_forum + self.group)
+        except urllib2.HTTPError:
+            print "\nError: could not locate group"
+            sys.exit(0)
+        if self.start_date_is_set is True:
+            self.get_group_from_start()
+        else:
+            self.get_complete_group()
+        self.elapsed_time = time.time()-start_time
+
+    def get_group_from_start(self):
+        self.make_dir()
+        with open(self.output + self.group + ".mbox", "w"):
+            print "creating output file"
+        with open(self.output + "threads.txt", 'w') as file_threads:
+            file_threads.write("XXX," + self.current_last_post_date + ",\n")
+        self.get_threads()
+
+    def get_complete_group(self):
+        print "Entered group: " + self.group
+        print "complete group will be downloaded\n"
+        self.make_dir()
+        with open(self.output + self.group + ".mbox", "w"):
+            print "creating output file"
+        if self.verbose is False:
+            print "starting download, please wait..."
+        self.get_threads()
+
+    def get_threads(self):
+        try:
+            html = self.opener.open(self.link_forum + self.group)
+        except urllib2.HTTPError:
+            return
+        with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_worker_threads) as executor:
+            while True:
+                next_page = self.get_threads_and_date(html, executor)
+                if next_page == "":
+                    break
+                html = self.opener.open(next_page)
+
+    def get_threads_and_date(self, html, executor):
+        next_page = ""
+        current_id = ""
+        for line in html:
+            last_post_date = self.search_last_post_date(line)
+            if len(last_post_date) > 0:
+                if ":" in last_post_date:
+                    last_post_date = time.strftime("%d.%m.%y")
+                if self.compare_date(last_post_date, self.current_last_post_date) == -1:
+                    next_page = ""
+                    break
+                if last_post_date == self.current_last_post_date:
+                    return ""
+                executor.submit(self.get_messages, current_id)
+            href = self.search_href(line)
+            if len(href) == 0:
+                continue
+            topic_id = self.search_topic_id(self.group, href)
+            if "topic" in href:
+                current_id = topic_id
+            if "forum" in href:
+                next_page = href
+        return next_page
+
+    @staticmethod
+    def search_topic_id(group, line):
+        temp = re.findall("(?<=" + group + "/)(.*)", line)
+        if len(temp) == 0:
+            return ""
+        return temp[0]
+
+    @staticmethod
+    def search_href(line):
+        temp = re.findall("(href=\")(.*?)(\")", line)
+        if len(temp) == 0:
+            return ""
+        return temp[0][1]
+
+    @staticmethod
+    def search_last_post_date(line):
+        temp = re.findall("(?<=\"lastPostDate\">)([0-9\.:]*)", line)
+        if len(temp) == 0:
+            return ""
+        return temp[0]
+
+    # gets executed in own thread
+    def get_messages(self, topic_id):
+        counter = 0
+        if self.verbose is True:
+            print "getting thread " + topic_id
+        try:
+            html = self.opener.open(self.link_topic + self.group + "/" + topic_id)
+        except urllib2.HTTPError:
+            if self.verbose is True:
+                print "skipping deleted thread " + topic_id
+            return
+        msg_id = ""
+        for line in html:
+            last_post_date = self.search_last_post_date(line)
+            if len(last_post_date) > 0:
+                if len(self.current_last_post_date) > 0:
+                    if self.compare_date(last_post_date, self.current_last_post_date) < 1:
+                        continue
+                self.get_mbox(topic_id, msg_id)
+                counter += 1
+            href = self.search_href(line)
+            if len(href) == 0:
+                continue
+            if "/msg/" + self.group + "/" in href:
+                msg_id = self.search_msg_id(topic_id, href)
+            if len(last_post_date) > 0:
+                if last_post_date == self.current_last_post_date:
+                    if self.verbose is True:
+                        print str(counter) + " messages for topic " + topic_id + " downloaded."
+                    return ""
+        if self.verbose is True:
+            print str(counter) + " messages for topic " + topic_id + " downloaded."
+
+    @staticmethod
+    def search_msg_id(topic_id, href):
+        return re.findall("(?<=" + topic_id + "/)(.*)", href)[0]
+
+    def get_mbox(self, topic_id, msg_id):
+        if self.verbose is True:
+            print "\tdownloading -> topic: " + topic_id + " msg: " + msg_id
+        try:
+            mbox = self.opener.open(self.link_msg + self.group + "/" + topic_id + "/" + msg_id).read()
+        except urllib2.HTTPError:
+            if self.verbose is True:
+                print "skipping deleted message " + msg_id
+            return
+        self.add_to_mbox(mbox)
+
+    @staticmethod
+    def compare_date(date1, date2):
+        # date format: dd.mm.yy
+        date1_split = date1.split(".")
+        date2_split = date2.split(".")
+        if int(date1_split[2]) == int(date2_split[2]):
+            # year1 == year2
+            if int(date1_split[1]) == int(date2_split[1]):
+                # month1 == month
+                if int(date1_split[0]) < int(date2_split[0]):
+                    # day1 < day2
+                    return -1
+                if int(date1_split[0]) == int(date2_split[0]):
+                    # day1 == day2
+                    return 0
+                else:
+                    # day1 > day2
+                    return 1
+            if int(date1_split[1]) < int(date2_split[1]):
+                return -1
+            else:
+                return 1
+        if int(date1_split[2]) < int(date2_split[2]):
+            return -1
+        else:
+            return 1
+
+    # creates subdirectories if they don't exist
+    def make_dir(self):
+        if not os.path.isdir(self.output):
+            print "creating directory\n"
+            os.makedirs(self.output)
+
+    def add_to_mbox(self, raw):
+        clean_mbox = "\n\n\nFrom Google Groups\n" + self.check_mbox(raw)
+        with self.lock:
+            with open(self.output + self.group + ".mbox", "a") as final_mbox:
+                final_mbox.write(clean_mbox)
+            self.counter += 1
+
+    def check_mbox(self, raw):
+        text = raw.split("\n")
+        clean = ""
+        for line in text:
+            if line.startswith("From "):
+                line = " " + line
+            if line.startswith("From: "):
+                 line = self.handle_from_line(line) + "\n"
+            clean += line
+        return clean
+
+    @staticmethod
+    def search_name(line):
+        temp = re.findall("(?<=From: )\"?[ A-Za-z]*", line)
+        if len(temp) > 0:
+            name = temp[0].replace('"', '')
+            if name.endswith(" "):
+                name = name[:-1]
+            return name
+        return ""
+
+    @staticmethod
+    def search_email(line, name):
+        email = re.findall("<.*>", line)
+        if len(email) > 0:
+            if "..." in email[0]:
+                email_suffix = re.findall("@[a-zA-Z0-1\-.]*", line)
+                if len(email_suffix) != 0:
+                    email_suffix = email_suffix[0]
+                else:
+                    email_suffix = "@unknown.com"
+                return "<" + name.replace(" ", "_") + email_suffix + ">"
+        return ""
+
+    def handle_from_line(self, line):
+        name = self.search_name(line)
+        if len(name) != 0:
+            email = self.search_email(line, name)
+            if len(email) > 0:
+                return "From: " + name + " " + email
+        return line
+
+    def statistic(self):
+        print "download complete"
+        print "g3d downloaded " + str(self.counter) + " messages in " + str(int(self.elapsed_time)) + " seconds."
diff --git a/codeface/google_groups_py/post_download.py b/codeface/google_groups_py/post_download.py
deleted file mode 100755
index 922955db..00000000
--- a/codeface/google_groups_py/post_download.py
+++ /dev/null
@@ -1,111 +0,0 @@
-import os.path
-import sys
-import re
-
-
-class PostDownload(object):
-
-    def __init__(self):
-        self.group = ""
-        self.output = ""
-        self.group_is_set = False
-        self.output_is_set = False
-
-    @staticmethod
-    def print_help():
-        print "################################################################################"
-        print "Help output:"
-        print "################################################################################"
-        print "post_download.py -output \"path\" -group \"groupname\""
-        print "#  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  #  # "
-        print "-output: output path pointing to the directory containing the group's directory"
-        print "-group: name of the google group"
-        print ""
-        print "Multiple mbox files will be combined into one file"
-        print "Certain lines will be edited to be understandable by Codeface"
-        print "example: \"post_download.py -output output\\ -group pants\""
-        print "################################################################################"
-
-    # end execution if arguments aren't complete or help is displayed
-    def handle_arguments(self, argv):
-        if len(argv) is 1:
-            self.print_help()
-            sys.exit(0)
-        for i in xrange(len(argv)):
-            if argv[i] == "-h" or argv[i] == "--h" or argv[i] == "-help":
-                self.print_help()
-                sys.exit(0)
-            if argv[i] == "-output":
-                self.set_output(argv[i+1])
-                self.output_is_set = True
-            if argv[i] == "-group":
-                self.group = argv[i+1]
-                self.group_is_set = True
-        if self.group_is_set is False:
-            print "Error: missing group"
-            self.print_help()
-            sys.exit(0)
-        if self.output_is_set is False:
-            print "Error: missing output"
-            self.print_help()
-            sys.exit(0)
-
-    def set_output(self, output):
-        if str(output).endswith("/") or str(output).endswith("\\"):
-            self.output = output
-        else:
-            self.output = output + "/"
-
-    def start(self):
-        self.merge_to_single_file()
-
-    def merge_to_single_file(self):
-        with open(self.output + self.group + ".mbox", "w") as final_mbox:
-            for root, dirs, files in os.walk(self.output + "/mbox"):
-                for file in files:
-                    if file.endswith(".mbox"):
-                        with open(self.output + "mbox/" + file, "r") as single_mbox:
-                            self.fix_content(single_mbox, final_mbox)
-                            final_mbox.write("\n\n\n")
-
-    def fix_content(self, single_mbox, final_mbox):
-        text = single_mbox.read().split("\n")
-        if "X-Received " in text[0]:
-            text[0] = "From GoogleGroups"
-        else:
-            text = ["From GoogleGroups"] + text
-        for line in text:
-            if "From: " in line:
-                line = self.handle_from_line(line)
-            final_mbox.write(line)
-
-    @staticmethod
-    def handle_from_line(line):
-        from_line = re.findall("(?<=From: )\"?[ A-Za-z]*", line)
-        if len(from_line) == 1:
-            name = from_line[0].replace('"', '')
-            if name.endswith(" "):
-                name = name[:-1]
-            email = re.findall("<.*>", line)
-            if len(email) > 0:
-                if "..." in email[0]:
-                    email_suffix = re.findall("@[a-zA-Z0-1\-.]*", line)
-                    if len(email_suffix) != 0:
-                        email_suffix = email_suffix[0]
-                    else:
-                        email_suffix = "@unknown.com"
-                    return "From: " + name + " <" + name.replace(" ", "_") + email_suffix + ">\n"
-                else:
-                    return "From: " + name + " " + email[0] + "\n"
-        return line
-
-
-def main():
-    argv = sys.argv
-    argv = ["-output", "../test/pants", "-group", "pants-devel"]
-    post_download = PostDownload()
-    post_download.handle_arguments(argv)
-    post_download.start()
-
-
-main()
\ No newline at end of file
diff --git a/codeface/google_groups_py/run_g3d.py b/codeface/google_groups_py/run_g3d.py
new file mode 100644
index 00000000..2ef20e06
--- /dev/null
+++ b/codeface/google_groups_py/run_g3d.py
@@ -0,0 +1,12 @@
+from g3d import Crawler
+import sys
+
+def main():
+    argv = sys.argv
+    crawler = Crawler()
+    crawler.handle_arguments(argv)
+    if crawler.execute is True:
+        crawler.start()
+        crawler.statistic()
+
+main()
\ No newline at end of file
diff --git a/codeface/google_groups_py/utest_g3d.py b/codeface/google_groups_py/utest_g3d.py
new file mode 100644
index 00000000..4f8ddd21
--- /dev/null
+++ b/codeface/google_groups_py/utest_g3d.py
@@ -0,0 +1,228 @@
+from g3d import Crawler
+import unittest
+
+
+class argumentHandlingTests(unittest.TestCase):
+    # no errors
+    def test1(self):
+        argv = ["-group", "test_group", "-start", "11.11.11", "-output", "./", "-threads", "50"]
+        crawler = Crawler()
+        crawler.handle_arguments(argv)
+        assert crawler.execute == True
+        assert crawler.group_is_set == True
+        assert crawler.group == "test_group"
+        assert crawler.max_worker_threads == 50
+        assert crawler.output_is_set == True
+        assert crawler.output == "./"
+        assert crawler.verbose == False
+
+    # -output missing -> should work
+    def test2(self):
+        argv = ["-group", "test_group", "-threads", "50"]
+        crawler = Crawler()
+        crawler.handle_arguments(argv)
+        assert crawler.execute == True
+        assert crawler.output_is_set == True
+        assert crawler.output == "./"
+
+    # -group missing
+    def test3(self):
+        argv = ["-output", "./", "-threads", "50"]
+        crawler = Crawler()
+        crawler.handle_arguments(argv)
+        assert crawler.execute == False
+        assert crawler.group_is_set == False
+
+    # -threads number missing
+    def test4(self):
+        argv = ["-group", "test_group", "-threads", "-output", "./"]
+        crawler = Crawler()
+        crawler.handle_arguments(argv)
+        assert crawler.execute == False
+        assert crawler.max_worker_threads == 1
+
+    # -threads number missing at end
+    def test5(self):
+        argv = ["", "-threads"]
+        crawler = Crawler()
+        crawler.handle_arguments(argv)
+        assert crawler.execute == False
+        assert crawler.max_worker_threads == 1
+
+    # -group follow up missing at end
+    def test6(self):
+        argv = ["", "-group"]
+        crawler = Crawler()
+        crawler.handle_arguments(argv)
+        assert crawler.execute == False
+        assert crawler.group_is_set == False
+
+
+    # -output follow up missing at end
+    def test7(self):
+        argv = ["-group", "test_group", "-output"]
+        crawler = Crawler()
+        crawler.handle_arguments(argv)
+        assert crawler.execute == True
+        assert crawler.output_is_set == True
+        assert crawler.output == "./"
+
+
+    # -v is set
+    def test8(self):
+        argv = ["", "-v"]
+        crawler = Crawler()
+        crawler.handle_arguments(argv)
+        assert crawler.verbose == True
+
+    # -start follow up missing
+    def test9(self):
+        argv = ["-group", "test_group", "-start", "-output", "./"]
+        crawler = Crawler()
+        crawler.handle_arguments(argv)
+        assert crawler.start_date_is_set == False
+
+
+    # start format check
+    def test10(self):
+        crawler = Crawler()
+        assert crawler.check_date_format("11.11.11") == True
+        assert crawler.check_date_format("111.11.11") == False
+        assert crawler.check_date_format("11.111.11") == False
+        assert crawler.check_date_format("11.11.111") == False
+
+
+class checkRegexFunction(unittest.TestCase):
+    # href
+    def test1(self):
+        crawler = Crawler()
+        line = '<td class="lastPostDate">27.04.10</td></tr> <tr><td class="subject"><a href="https://groups.google.com/d/topic/artspace/tBUZa7_8k0k" title="Drunk'
+        expected_result = "https://groups.google.com/d/topic/artspace/tBUZa7_8k0k"
+        result = crawler.search_href(line)
+        assert result == expected_result
+
+    # topic id
+    def test2(self):
+        crawler = Crawler()
+        line = "https://groups.google.com/d/topic/artspace/tBUZa7_8k0k"
+        expected_result = "tBUZa7_8k0k"
+        result = crawler.search_topic_id("artspace", line)
+        assert result == expected_result
+
+
+    # last post date
+    def test3(self):
+        crawler = Crawler()
+        line = '<td class="lastPostDate">27.04.10</td></tr> <tr><td class="subject"><a href="https://groups.google.com/d/topic/artspace/tBUZa7_8k0k" title="Drunk'
+        expected_result = "27.04.10"
+        result = crawler.search_last_post_date(line)
+        assert result == expected_result
+
+    # msg id
+    def test4(self):
+        crawler = Crawler()
+        line = "https://groups.google.com/d/msg/artspace/Mo3E1JiCpcs/S145ITlPn8oJ"
+        expected_result = "S145ITlPn8oJ"
+        result = crawler.search_msg_id("Mo3E1JiCpcs", line)
+        assert result == expected_result
+
+class checkMsgCorrection(unittest.TestCase):
+    # build email adress in From:...
+    def test1(self):
+        crawler = Crawler()
+        line = "From: FunnyDave <Fu...@gmail.com>"
+        expected_result = "From: FunnyDave <FunnyDave@gmail.com>"
+        result = crawler.check_mbox(line)
+        assert result == expected_result
+
+    # build email adress in From:... with 2 names
+    def test2(self):
+        crawler = Crawler()
+        line = "From: Funny Dave <Fu...@gmail.com>"
+        expected_result = "From: Funny Dave <Funny_Dave@gmail.com>"
+        result = crawler.check_mbox(line)
+        assert result == expected_result
+
+    # build email adress in From:... with 3 names
+    def test3(self):
+        crawler = Crawler()
+        line = "From: Funny Dave Bob <Fu...@gmail.com>"
+        expected_result = "From: Funny Dave Bob <Funny_Dave_Bob@gmail.com>"
+        result = crawler.check_mbox(line)
+        assert result == expected_result
+
+    # check if whitespace gets added
+    def test4(self):
+        crawler = Crawler()
+        line = "From the heart"
+        expected_result = " From the heart"
+        result = crawler.check_mbox(line)
+        assert result == expected_result
+
+    # check if name 1 name is generated
+    def test5(self):
+        crawler = Crawler()
+        line = 'From: brian...@gmail.com'
+        expected_result = "brian"
+        result = crawler.search_name(line)
+        assert result == expected_result
+
+    # check if name(2) is generated
+    def test6(self):
+        crawler = Crawler()
+        line = 'From: Nate Yocom <na...@pgina.org>'
+        expected_result = "Nate Yocom"
+        result = crawler.search_name(line)
+        assert result == expected_result
+
+class checkDateCompare(unittest.TestCase):
+    # compare dates
+    def test1(self):
+        crawler = Crawler()
+        date1 = "11.11.11"
+        date2 = "22.11.11"
+        result = crawler.compare_date(date1, date2)
+        assert result == -1
+
+
+    def test2(self):
+        crawler = Crawler()
+        date1 = "11.11.11"
+        date2 = "11.22.11"
+        result = crawler.compare_date(date1, date2)
+        assert result == -1
+
+    def test3(self):
+        crawler = Crawler()
+        date1 = "11.11.11"
+        date2 = "11.11.22"
+        result = crawler.compare_date(date1, date2)
+        assert result == -1
+
+    def test4(self):
+        crawler = Crawler()
+        date1 = "22.11.11"
+        date2 = "11.11.11"
+        result = crawler.compare_date(date1, date2)
+        assert result == 1
+
+    def test5(self):
+        crawler = Crawler()
+        date1 = "11.22.11"
+        date2 = "11.11.11"
+        result = crawler.compare_date(date1, date2)
+        assert result == 1
+
+    def test6(self):
+        crawler = Crawler()
+        date1 = "11.11.22"
+        date2 = "11.11.11"
+        result = crawler.compare_date(date1, date2)
+        assert result == 1
+
+    def test7(self):
+        crawler = Crawler()
+        date1 = "11.11.11"
+        date2 = "11.11.11"
+        result = crawler.compare_date(date1, date2)
+        assert result == 0
\ No newline at end of file

From 584a364ac2706fd174316934a83bbee4559e6d97 Mon Sep 17 00:00:00 2001
From: Georg Berner <georgberner@gmx.net>
Date: Tue, 12 Apr 2016 09:27:23 +0200
Subject: [PATCH 3/3] add copyright header and update help output

---
 codeface/google_groups_py/g3d.py       | 22 +++++++++++++++++++---
 codeface/google_groups_py/run_g3d.py   | 19 ++++++++++++++++++-
 codeface/google_groups_py/utest_g3d.py | 18 +++++++++++++++++-
 3 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/codeface/google_groups_py/g3d.py b/codeface/google_groups_py/g3d.py
index 38c9045c..462f789d 100644
--- a/codeface/google_groups_py/g3d.py
+++ b/codeface/google_groups_py/g3d.py
@@ -1,3 +1,19 @@
+## This file is part of Codeface. Codeface is free software: you can
+## redistribute it and/or modify it under the terms of the GNU General Public
+## License as published by the Free Software Foundation, version 2.
+##
+## This program is distributed in the hope that it will be useful, but WITHOUT
+## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+## FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+## details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+##
+## Copyright 2016 by Georg Berner <georgberner@gmx.net>
+## All Rights Reserved.
+
 import re
 import os.path
 import urllib2
@@ -88,12 +104,12 @@ def print_help():
         print "################################################################################"
         print "Help output:"
         print "################################################################################"
-        print "crawler.py -output \"path\" -group \"groupname\" [-threads \"#threads\",\n -start \"DD.MM.YY\"]"
+        print "run_g3d.py -output \"path\" -group \"groupname\" [-threads \"#threads\",\n -start \"DD.MM.YY\"]"
         print "################################################################################"
         print "-output: output path for ggc"
         print "-group: name of the google group"
-        print "-threads: number of threads for simultaneous download (default = 1) (only for dl)"
-        print "-start: date from which all messages of a group will be downloaded, \"DD.MM.YY\" (only for dl)"
+        print "-threads: number of threads for simultaneous download (default = 1)"
+        print "-start: date from which all messages of a group will be downloaded, \"DD.MM.YY\""
         print "-v: verbose output"
         print "################################################################################"
 
diff --git a/codeface/google_groups_py/run_g3d.py b/codeface/google_groups_py/run_g3d.py
index 2ef20e06..9be6c263 100644
--- a/codeface/google_groups_py/run_g3d.py
+++ b/codeface/google_groups_py/run_g3d.py
@@ -1,3 +1,20 @@
+#! /usr/bin/env python
+## This file is part of Codeface. Codeface is free software: you can
+## redistribute it and/or modify it under the terms of the GNU General Public
+## License as published by the Free Software Foundation, version 2.
+##
+## This program is distributed in the hope that it will be useful, but WITHOUT
+## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+## FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+## details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+##
+## Copyright 2016 by Georg Berner <georgberner@gmx.net>
+## All Rights Reserved.
+
 from g3d import Crawler
 import sys
 
@@ -9,4 +26,4 @@ def main():
         crawler.start()
         crawler.statistic()
 
-main()
\ No newline at end of file
+main()
diff --git a/codeface/google_groups_py/utest_g3d.py b/codeface/google_groups_py/utest_g3d.py
index 4f8ddd21..82e77740 100644
--- a/codeface/google_groups_py/utest_g3d.py
+++ b/codeface/google_groups_py/utest_g3d.py
@@ -1,3 +1,19 @@
+## This file is part of Codeface. Codeface is free software: you can
+## redistribute it and/or modify it under the terms of the GNU General Public
+## License as published by the Free Software Foundation, version 2.
+##
+## This program is distributed in the hope that it will be useful, but WITHOUT
+## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+## FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+## details.
+##
+## You should have received a copy of the GNU General Public License
+## along with this program; if not, write to the Free Software
+## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+##
+## Copyright 2016 by Georg Berner <georgberner@gmx.net>
+## All Rights Reserved.
+
 from g3d import Crawler
 import unittest
 
@@ -225,4 +241,4 @@ def test7(self):
         date1 = "11.11.11"
         date2 = "11.11.11"
         result = crawler.compare_date(date1, date2)
-        assert result == 0
\ No newline at end of file
+        assert result == 0