Skip to content

Commit

Permalink
Merge pull request langchain-ai#6 from daxa-ai/rahul_initial_6
Browse files Browse the repository at this point in the history
Add file/dir size for local sources.
  • Loading branch information
rahul-trip committed Jan 19, 2024
2 parents 9579f41 + 53048b5 commit 38d0651
Showing 1 changed file with 18 additions and 3 deletions.
21 changes: 18 additions & 3 deletions libs/community/langchain_community/document_loaders/daxa.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,13 @@ def __init__(self, langchain_loader: BaseLoader, app_id: str, owner: str, descri
self.source_owner = DaxaSafeLoader.get_file_owner_from_path(self.source_path)
self.docs = []
loader_name = str(type(self.loader)).split(".")[-1].split("'")[0]
source_type = get_loader_type(loader_name)
self.source_type = get_loader_type(loader_name)
self.source_size = self.get_source_size()
self.loader_details = {
"loader": loader_name,
"source_path": self.source_path,
"source_type": source_type
"source_type": self.source_type,
"source_size": self.source_size,
}
#generate app
self.app = self._get_app_details()
Expand Down Expand Up @@ -125,4 +127,17 @@ def get_file_owner_from_path(file_path: str) -> str:
file_owner_name = pwd.getpwuid(file_owner_uid).pw_name
except Exception:
file_owner_name = 'unknown'
return file_owner_name
return file_owner_name

def get_source_size(self) -> int:
if self.source_type == "file":
size = os.path.getsize(self.source_path)
elif self.source_type == "dir":
total_size = 0
for dirpath, _, filenames in os.walk(self.source_path):
for f in filenames:
fp = os.path.join(dirpath, f)
if not os.path.islink(fp):
total_size += os.path.getsize(fp)
size = total_size
return size

0 comments on commit 38d0651

Please sign in to comment.