Skip to content

Commit 586a4aa

Browse files
committed
Basic Requirements done
1 parent 9722a3d commit 586a4aa

File tree

18 files changed

+488
-377
lines changed

18 files changed

+488
-377
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
# Codebrewers-hackathon
1+
# Codebrewers-hackathon
2+
![Alt text](assets/image.png)

Scanner/convert.py

Lines changed: 0 additions & 7 deletions
This file was deleted.

Scanner/cpp/Tangerine.cpp

Lines changed: 0 additions & 34 deletions
This file was deleted.

Scanner/cpp/scan.cpp

Lines changed: 0 additions & 134 deletions
This file was deleted.

Scanner/cpp/scan.h

Lines changed: 0 additions & 33 deletions
This file was deleted.

Scanner/diskutil.py

Lines changed: 0 additions & 29 deletions
This file was deleted.

Scanner/duplicates.py

Lines changed: 58 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
11
import os
22
import sys
3-
import hashlib
3+
# import 5 hash functions from hashlib
4+
import timeit
5+
from hashlib import md5, sha1, blake2b
6+
from xxhash import xxh64, xxh128
47

8+
hashfunc = 0
9+
import_module = "import random"
510

6-
def find_duplicates(folders):
11+
def duplicates(folders):
712
dup_size = {}
813
for i in folders:
914
if os.path.exists(i):
@@ -17,14 +22,12 @@ def find_duplicates(folders):
1722
for dup_list in dup_size.values():
1823
if len(dup_list) > 1:
1924
join_dicts(dups, find_duplicate_hash(dup_list))
20-
print_results(dups)
21-
return dups
2225

2326

2427
def find_duplicate_size(parent_dir):
2528
dups = {} # format {size:[filepaths]}
2629
for dirName, subdirs, fileList in os.walk(parent_dir):
27-
# print(dirName, subdirs, fileList)
30+
print(dirName, subdirs, fileList)
2831
print('Scanning %s ' % dirName)
2932
for filename in fileList:
3033
path = os.path.join(dirName, filename)
@@ -62,7 +65,24 @@ def join_dicts(dict1, dict2):
6265

6366
def hashfile(path, blocksize=65536):
6467
file = open(path, 'rb')
65-
hasher = hashlib.md5()
68+
69+
hasher = md5()
70+
# use switch case for hash functions
71+
match hashfunc:
72+
case 0:
73+
hasher = md5()
74+
case 1:
75+
hasher = sha1()
76+
case 2:
77+
hasher= blake2b()
78+
case 3:
79+
hasher= xxh64()
80+
case 4:
81+
hasher= xxh128()
82+
case _:
83+
print("Invalid hash function")
84+
sys.exit(1)
85+
6686
buf = file.read(blocksize)
6787
while len(buf) > 0:
6888
hasher.update(buf)
@@ -88,21 +108,45 @@ def print_results(dict1):
88108

89109

90110
def find_duplicates(dir):
91-
# parser = argparse.ArgumentParser(description='Find duplicate files')
92-
# parser.add_argument('folders', metavar='dir', type=str, nargs='+',help='A directory to parse for duplicates',)
93-
# args = parser.parse_args()
94-
# dir=input("Enter the directory names to find for duplicates: ").split(" ")
95-
dups = find_duplicates([dir])
96-
print(dups)
97-
return dups
111+
a= duplicates([dir])
112+
return a
98113

99114
def remove_duplicates(dups):
100115
if len(dups):
101116
for dup in dups:
102117
for i in range(1, len(dups[dup])):
103118
os.remove(dups[dup][i])
104119
print("Duplicates deleted")
120+
return True
105121
else:
106122
print("Duplicates not deleted")
123+
return False
107124

108-
125+
def main():
126+
# dir=input("Enter the directory names to find for duplicates: ").split(" ")
127+
dir = "D:\\GitHub\\codebrewers-hackathon"
128+
dir2 = "D:\\GitHub\\climateview"
129+
dir3 = "D:\\GitHub\\js-samples"
130+
dir4 = "D:\\GitHub\\MemoryGrid"
131+
dir5 = "D:\\GitHub\\portfolio"
132+
results=[]
133+
for i in range(5):
134+
print("Hash function", i ,"is being used")
135+
hashfunc = i
136+
starttime = timeit.default_timer()
137+
find_duplicates(dir)
138+
find_duplicates(dir2)
139+
find_duplicates(dir3)
140+
find_duplicates(dir4)
141+
find_duplicates(dir5)
142+
results.append(timeit.default_timer()-starttime)
143+
print("\nTime taken for 5 hash functions: ")
144+
print("md5: ", results[0], " seconds")
145+
print("sha1: ", results[1], " seconds")
146+
print("blake2b: ", results[2], " seconds")
147+
print("xxh64: ", results[3], " seconds")
148+
print("xxh128: ", results[4], " seconds")
149+
150+
151+
if __name__ == '__main__':
152+
main()

Scanner/largefiles.py

Lines changed: 0 additions & 12 deletions
This file was deleted.

assets/icon.png

5.84 KB
Loading

assets/image.png

7.88 KB
Loading

0 commit comments

Comments
 (0)