diff --git a/.bin/wordlist-updaters/sources.json b/.bin/wordlist-updaters/sources.json index b7aeb216f16..d4a29678e29 100644 --- a/.bin/wordlist-updaters/sources.json +++ b/.bin/wordlist-updaters/sources.json @@ -5,7 +5,7 @@ "source": "https://raw.githubusercontent.com/wallarm/jwt-secrets/master/jwt.secrets.list", "output": "Passwords/scraped-JWT-secrets.txt", "post_run_script": "", - "frequency": "6h" + "frequency": "3d" }, { "name": "Trickest wordlist update", @@ -19,4 +19,4 @@ "Discovery/Web-Content/CMS/trickest-cms-wordlist/" ] } -] \ No newline at end of file +] diff --git a/.bin/wordlist-updaters/updater.py b/.bin/wordlist-updaters/updater.py index 1f1e8c8c5b0..c92c3d8cafb 100755 --- a/.bin/wordlist-updaters/updater.py +++ b/.bin/wordlist-updaters/updater.py @@ -7,7 +7,7 @@ import subprocess from datetime import datetime, timedelta -# TODO Summary file +# TODO Summary file # TODO Advanced crontab syntax BASE_PATH = ".bin/wordlist-updaters" @@ -17,20 +17,21 @@ VALID_TYPES = ["file", "git_dir"] TIME_NOW = datetime.now() -def request_wrapper(url): - for i in range(1,4): +def request_wrapper(url): + for i in range(1, 4): r = requests.get(url) if r.status_code == 200: # print("[+] Got %s successfully!"%(url)) break if i == 3: - print("[!] Failed to get %s."%(url)) + print("[!] Failed to get %s." % (url)) exit(2) - print("[!] Getting %s failed(%i/3)"%(url,i)) + print("[!] Getting %s failed(%i/3)" % (url, i)) return r.text + # Check if the files exists if not os.path.isfile(SOURCE_PATH): print("[!] Sources.json is missing!") @@ -49,20 +50,20 @@ def request_wrapper(url): task_name = source["name"] source_keys = source.keys() - if not task_name in STATUS.keys(): + if task_name not in STATUS.keys(): print(f"[+] Queuing task {task_name} as task was never checked before") to_check.append(source) continue - if not "output" in source_keys or not isinstance(source["output"], str): + if "output" not in source_keys or not isinstance(source["output"], str): print(f"[!] Skipping task {task_name} as output field is missing/invalid") continue - if not "type" in source_keys or not isinstance(source["type"], str): + if "type" not in source_keys or not isinstance(source["type"], str): print(f"[!] Skipping task {task_name} as type field is missing/invalid") continue - if not source["type"] in VALID_TYPES: + if source["type"] not in VALID_TYPES: print(f"[!] Skipping task {task_name} as type is invalid") continue @@ -71,45 +72,62 @@ def request_wrapper(url): continue if source["type"].startswith("git_") and not source["source"].endswith(".git"): - print(f"[!] Skipping task {task_name} as a git task was defined with a non git url.") + print( + f"[!] Skipping task {task_name} as a git task was defined with a non git url." + ) continue - if not "last_update" in STATUS[task_name].keys() or not isinstance(STATUS[task_name]["last_update"], int): + if "last_update" not in STATUS[task_name].keys() or not isinstance( + STATUS[task_name]["last_update"], int + ): print(f"[!] Queuing task {task_name} as last_update field is missing/invalid") to_check.append(source) continue if not ("frequency" in source_keys) ^ ("update_time" in source_keys): - print(f"[!] Skipping task {task_name} as only frequency or update_time can be specified") + print( + f"[!] Skipping task {task_name} as only frequency or update_time can be specified" + ) continue if "frequency" in source_keys and isinstance(source["frequency"], str): regex_match = re.search(FREQUENCY_REGEX, source["frequency"]) if not regex_match: - print(f"[!] Skipping task {task_name} as frequency field contains invalid formatting of days and hours") + print( + f"[!] Skipping task {task_name} as frequency field contains invalid formatting of days and hours" + ) continue days, _, hours, _ = regex_match.groups() - days = bool(days) | 0 - hours = bool(hours) | 0 + days = int(days or 0) + hours = int(hours or 0) - next_update_time = datetime.fromtimestamp(STATUS[task_name]["last_update"]) + timedelta(days=days, hours=hours) - time_from_update = TIME_NOW - next_update_time - time_to_update = next_update_time - TIME_NOW + next_update_time = datetime.fromtimestamp( + STATUS[task_name]["last_update"] + ) + timedelta(days=days, hours=hours) - if TIME_NOW < next_update_time: - if time_to_update.seconds <= 300: - print(f"[+] Queuing task {task_name} as it is less than 5 minutes to update. ({time_to_update.seconds} seconds to update)") - to_check.append(source) - continue + time_to_update = int((next_update_time - TIME_NOW).total_seconds()) - print(f"[!] Skipping task {task_name} as it is more than 5 minutes to update ({time_to_update.seconds} seconds to update)") + if TIME_NOW > next_update_time: + print( + f"[+] Queuing task {task_name} as it is {time_to_update} seconds after scheduled update time." + ) + to_check.append(source) continue - print(f"[+] Queuing task {task_name} as it is {time_to_update.seconds} seconds after scheduled update time.") - to_check.append(source) + elif time_to_update <= 300: + print( + f"[+] Queuing task {task_name} as it is less than 5 minutes to update. ({time_to_update} seconds to update)" + ) + to_check.append(source) + continue + + print( + f"[!] Skipping task {task_name} as it is more than 5 minutes to update ({time_to_update} seconds to update)" + ) + continue elif "update_time" in source_keys and isinstance(source["update_time"], str): update_time = source["update_time"] @@ -121,26 +139,31 @@ def request_wrapper(url): hours = int(update_time[:2]) minutes = int(update_time[2:]) - if not hours in range(1, 25): + if hours not in range(1, 25): print(f"[!] Skipping task {task_name} as hours is not in range 1-24.") continue - if not minutes in range(1, 61): + if minutes not in range(1, 61): print(f"[!] Skipping task {task_name} as minutes is not in range 1-60.") continue scheduled_update_time = TIME_NOW.replace(hour=hours, minute=minutes) - if TIME_NOW <= scheduled_update_time and TIME_NOW + timedelta(hours=1) >= scheduled_update_time: - print(f"[+] Queuing task {task_name} as update time is within the next hour") + if ( + TIME_NOW <= scheduled_update_time + and TIME_NOW + timedelta(hours=1) >= scheduled_update_time + ): + print( + f"[+] Queuing task {task_name} as update time is within the next hour" + ) to_check.append(source) continue else: print(f"[!] Skipping task {task_name} as update_time field is invalid") continue - + if len(to_check) == 0: - print(f"[!] No task were queued. Exiting.") + print("[!] No task were queued. Exiting.") exit() print(f"[+] Queued a total of {len(to_check)} tasks to run.") @@ -148,31 +171,33 @@ def request_wrapper(url): for task in to_check: print(f"[+] Starting task {task['name']}") - if not task["name"] in STATUS.keys(): - STATUS[task["name"]] = {} - + if task["name"] not in STATUS.keys(): + STATUS[task["name"]] = {} + task_type = task["type"] if task_type == "file": content = request_wrapper(task["source"]) open(task["output"], "w").write(content) - print(f"[+] Saved file to output location") - + print("[+] Saved file to output location") + STATUS[task["name"]]["last_update"] = int(datetime.now().timestamp()) elif task_type == "git_dir": - if not os.path.exists(task['output']): + if not os.path.exists(task["output"]): print(f"[+] Making directory {task['output']}") os.makedirs(task["output"]) - subprocess.run(["git", "clone", "-q", "--depth=1", task["source"]], cwd=task["output"]) + subprocess.run( + ["git", "clone", "-q", "--depth=1", task["source"]], cwd=task["output"] + ) STATUS[task["name"]]["last_update"] = int(datetime.now().timestamp()) if task["post_run_script"]: print("[+] Running post run script") subprocess.run(task["post_run_script"]) print("[+] Finished running post run script") - + print(f"[+] Finished task {task['name']}") -json.dump(STATUS, open(STATUS_PATH, "w"), indent=4) \ No newline at end of file +json.dump(STATUS, open(STATUS_PATH, "w"), indent=4)