Skip to content

Commit

Permalink
Update search_link_node.py
Browse files Browse the repository at this point in the history
  • Loading branch information
VinciGit00 committed Sep 22, 2024
1 parent c5a3f89 commit 9b3695d
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions scrapegraphai/nodes/search_link_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"""
from typing import List, Optional
import re
from tqdm import tqdm
from urllib.parse import urlparse, parse_qs
from tqdm import tqdm
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnableParallel
Expand Down Expand Up @@ -74,10 +74,11 @@ def _is_language_url(self, url):
parsed_url = urlparse(url)
query_params = parse_qs(parsed_url.query)

return any(indicator in parsed_url.path.lower() or indicator in query_params for indicator in lang_indicators)
return any(indicator in parsed_url.path.lower() \
or indicator in query_params for indicator in lang_indicators)
def _is_potentially_irrelevant(self, url):
if not self.filter_links:
return False
return False

irrelevant_keywords = self.filter_config.get("irrelevant_keywords", [])
return any(keyword in url.lower() for keyword in irrelevant_keywords)
Expand Down

0 comments on commit 9b3695d

Please sign in to comment.