diff --git a/.direnv/python-3.10.9/lib/python3.10/site-packages/_distutils_hack/__pycache__/__init__.cpython-310.pyc b/.direnv/python-3.10.9/lib/python3.10/site-packages/_distutils_hack/__pycache__/__init__.cpython-310.pyc index 6af80fb0..96bf7565 100644 Binary files a/.direnv/python-3.10.9/lib/python3.10/site-packages/_distutils_hack/__pycache__/__init__.cpython-310.pyc and b/.direnv/python-3.10.9/lib/python3.10/site-packages/_distutils_hack/__pycache__/__init__.cpython-310.pyc differ diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ffd9068..c4349131 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,15 @@ +## v1.1.0 (2023-07-24) + +### Feat + +- set source simplified or traditional search_query for automatic translation +- create general series directory if not exist in base +- **config**: created basic config setup + +### Refactor + +- Cleanup code + ## v1.0.1 (2023-07-12) ### Refactor diff --git a/pyproject.toml b/pyproject.toml index 614fd842..f09052a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,7 @@ namespaces = false # to disable scanning PEP 420 namespaces (true by default) [tool.commitizen] name = "cz_conventional_commits" version_scheme = "pep440" -version = "1.0.1" +version = "1.1.0" tag_format = "v$version" version_files = [ "VERSION", diff --git a/requirements.txt b/requirements.txt index 5261597d..95534162 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,56 +1,95 @@ appdirs==1.4.4 +argcomplete==3.1.1 bandit==1.7.5 beautifulsoup4==4.12.2 +black==23.7.0 bs4==0.0.1 +build==0.10.0 certifi==2023.5.7 +cffi==1.15.1 +cfgv==3.3.1 charset-normalizer==3.2.0 +chinese-converter==1.0.2 click==8.1.4 cloudscraper==1.2.71 +colorama==0.4.6 +commitizen==3.5.0 +cryptography==41.0.2 cssselect==1.2.0 +decli==0.6.1 +distlib==0.3.6 fake-useragent==1.1.3 +filelock==3.12.2 +ghp-import==2.1.0 gitdb==4.0.10 GitPython==3.1.31 +griffe==0.31.0 +identify==2.5.24 idna==3.4 importlib-metadata==6.8.0 iso8601==2.0.0 +Jinja2==3.1.2 lxml==4.9.3 m3u8==3.5.0 m3u8-To-MP4==0.1.11 +Markdown==3.3.7 markdown-it-py==3.0.0 +MarkupSafe==2.1.3 mdurl==0.1.2 +mergedeep==1.3.4 +mkdocs==1.4.3 +mkdocs-autorefs==0.4.1 +mkdocs-material==9.1.18 +mkdocs-material-extensions==1.1.1 +mkdocstrings==0.22.0 +mkdocstrings-python==1.1.2 multipledispatch==1.0.0 +mypy-extensions==1.0.0 +ndg-httpsclient==0.5.1 +nodeenv==1.8.0 overloading==0.5.0 +packaging==23.1 parse==1.19.1 +pathspec==0.11.1 pbr==5.11.1 +pip-tools==6.14.0 +platformdirs==3.8.1 +pre-commit==3.3.3 +prompt-toolkit==3.0.39 +pyasn1==0.5.0 +pycparser==2.21 pycryptodome==3.18.0 pyee==8.2.2 Pygments==2.15.1 +pymdown-extensions==10.0.1 +pyOpenSSL==23.2.0 pyparsing==3.1.0 pyppeteer==1.0.2 +pyproject_hooks==1.0.0 pyquery==2.0.0 +python-dateutil==2.8.2 python-dotenv==1.0.0 PyYAML==6.0 +pyyaml_env_tag==0.1 +questionary==1.10.0 +regex==2023.6.3 requests==2.31.0 requests-html==0.10.0 requests-toolbelt==1.0.0 rich==13.4.2 +six==1.16.0 smmap==5.0.0 soupsieve==2.4.1 stevedore==5.1.0 +termcolor==2.3.0 +tomlkit==0.11.8 tqdm==4.65.0 typer==0.9.0 typing_extensions==4.7.1 urllib3==1.26.16 +virtualenv==20.23.1 w3lib==2.1.1 -websockets~=10.0 +watchdog==3.0.0 +wcwidth==0.2.6 +websockets==10.4 zipp==3.16.0 - -mkdocstrings==0.22.0 -mkdocs==1.4.3 -mkdocs-material==9.1.18 -mkdocstrings[python]~=0.22.0 - -black==23.7.0 -commitizen==3.5.0 -pip-tools==6.14.0 -pre-commit==3.3.3 \ No newline at end of file diff --git a/tvsd/__init__.py b/tvsd/__init__.py index b716b24a..071f0863 100644 --- a/tvsd/__init__.py +++ b/tvsd/__init__.py @@ -1,6 +1,14 @@ __app_name__ = "tvsd" __version__ = "1.0.0-a.1" +import os +from typer import Typer + + +app = Typer(name=__app_name__, rich_markup_mode="rich") +state = {"verbose": False} +current_dir = os.getcwd() + ( SUCCESS, diff --git a/tvsd/actions.py b/tvsd/actions.py index 0d8267e1..651b9ce4 100644 --- a/tvsd/actions.py +++ b/tvsd/actions.py @@ -1,10 +1,11 @@ # -*- coding: utf-8 -*- +import logging import sys from tvsd.config import BASE_PATH, TEMP_BASE_PATH from tvsd.download import Download from tvsd.search import SearchQuery -from tvsd.utils import check_dir_mounted, LOGGER +from tvsd.utils import check_dir_mounted def search_media_and_download(query: str): @@ -13,14 +14,14 @@ def search_media_and_download(query: str): Args: query (str): query string """ - LOGGER.info(f"Checking if {BASE_PATH} is mounted...") + logging.info(f"Checking if {BASE_PATH} is mounted...") if not check_dir_mounted(path=BASE_PATH): sys.exit() - LOGGER.debug("Base path: %s", BASE_PATH) + logging.debug("Base path: %s", BASE_PATH) # Search query_instance = SearchQuery(query) - LOGGER.info(f"Searching for {query}...") + logging.info(f"Searching for {query}...") query_instance.find_show(BASE_PATH) # Download @@ -29,5 +30,5 @@ def search_media_and_download(query: str): base_path=BASE_PATH, temp_path=TEMP_BASE_PATH, ) - LOGGER.info(f"Starting {query_instance.chosen_show.title} guided download...") + logging.info(f"Starting {query_instance.chosen_show.title} guided download...") download_instance.guided_download() diff --git a/tvsd/cli.py b/tvsd/cli.py index b328b3d3..a1bbc719 100644 --- a/tvsd/cli.py +++ b/tvsd/cli.py @@ -1,3 +1,4 @@ +import logging import os from pathlib import Path import shutil @@ -6,20 +7,11 @@ import typer from rich import print -from tvsd import ERRORS, __app_name__, __version__, database +from tvsd import ERRORS, __app_name__, __version__, database, app, state from tvsd.actions import search_media_and_download from tvsd.config import init_app, TEMP_BASE_PATH, validate_config_file -app = typer.Typer() - - -def _version_callback(value: bool) -> None: - if value: - typer.echo(f"{__app_name__} v{__version__}") - raise typer.Exit() - - @app.command() def init( db_path: str = typer.Option( @@ -48,18 +40,33 @@ def init( typer.secho(f"The TVSD database is {db_path}", fg=typer.colors.GREEN) +def _version_callback(value: bool) -> None: + if value: + typer.echo(f"{__app_name__} v{__version__}") + raise typer.Exit() + + @app.callback() def main( - version: Optional[bool] = typer.Option( + _: Optional[bool] = typer.Option( None, "--version", "-v", help="Show the application's version and exit.", callback=_version_callback, is_eager=True, - ) + ), + verbose: Optional[bool] = False, ) -> None: - return + """ + Options to update state of the application. + """ + if verbose: + print("Will write verbose output") + state["verbose"] = True + logging.basicConfig(level=logging.DEBUG) + else: + logging.basicConfig(level=logging.INFO) @app.command() @@ -93,6 +100,6 @@ def clean_temp(): if confirm.capitalize() == "Y": shutil.rmtree(TEMP_BASE_PATH, ignore_errors=True) os.mkdir(TEMP_BASE_PATH) - print("All files deleted") + logging.info("All files deleted") except FileNotFoundError: - print(f"Temp directory {TEMP_BASE_PATH} does not exist") + logging.info(f"Temp directory {TEMP_BASE_PATH} does not exist") diff --git a/tvsd/config.py b/tvsd/config.py index 84fd22dc..4f5cea68 100644 --- a/tvsd/config.py +++ b/tvsd/config.py @@ -18,6 +18,7 @@ class Config: def __init__(self): + print(CONFIG_FILE_PATH) config_parser.read(CONFIG_FILE_PATH) @classmethod @@ -50,7 +51,7 @@ def validate_config_file(self) -> None: if not config_parser.has_section("General"): config_parser["General"] = { "base_path": "/Volumes/Viewable", - "temp_base_path": "~/Movies/temp-parts", + "temp_base_path": f"{os.path.expanduser('~')}/Movies/temp-parts", "series_dir": "TV Series", "specials_dir": "Specials", } diff --git a/tvsd/download.py b/tvsd/download.py index ba7f48d1..456635f5 100644 --- a/tvsd/download.py +++ b/tvsd/download.py @@ -1,3 +1,4 @@ +import logging import os import shutil from typing import Literal @@ -11,7 +12,7 @@ from tvsd.show import Show from tvsd.season import Season from tvsd.episode import Episode -from tvsd.utils import LOGGER, mkdir_if_no +from tvsd.utils import mkdir_if_no class Download: @@ -68,14 +69,14 @@ def choose_download(self, season: "Season"): @dispatch(Show) def download_all(self, show: Show): """Download all episodes in a show""" - LOGGER.debug("Downloading all episodes in show") + logging.debug("Downloading all episodes in show") for season in show.seasons: self.download_all(season) @dispatch(Season) def download_all(self, season: Literal["Season"]): """Download all episodes in a season""" - LOGGER.info("Downloading all episodes in season") + logging.info("Downloading all episodes in season") # reset episode index self._specials_index = 1 @@ -132,7 +133,7 @@ def download_episode(self, episode: "Episode"): absolute_dest_dir = os.path.join( self._base_path, episode.relative_destination_dir ) - LOGGER.info(absolute_dest_dir) + logging.info(absolute_dest_dir) mkdir_if_no(absolute_dest_dir) if episode.file_exists_locally: diff --git a/tvsd/search.py b/tvsd/search.py index ee1988ea..ba0af0a9 100644 --- a/tvsd/search.py +++ b/tvsd/search.py @@ -1,5 +1,6 @@ from difflib import SequenceMatcher import inspect +import logging import os import sys from typing import TYPE_CHECKING, Any, Literal, Union, List @@ -14,7 +15,6 @@ from tvsd import sources from tvsd.source import Source -from tvsd.utils import LOGGER if TYPE_CHECKING: from tvsd.season import Season @@ -85,10 +85,10 @@ def find_shows_online(self): query_results: List[Literal["Season"]] = [] # TODO: Search in db first / or put db results first - LOGGER.debug("Searching for %s", self._query) + logging.debug("Searching for %s", self._query) for file in dir(sources): - LOGGER.debug(f"Found {file}...") + logging.debug(f"Found {file}...") if not file.startswith("__"): for cls_name, cls_obj in inspect.getmembers( sys.modules[f"tvsd.sources.{file}"] @@ -100,7 +100,7 @@ def find_shows_online(self): and issubclass(cls_obj, Source) and cls_obj().__status__ == "active" ): - LOGGER.info(f"Searching {cls_name}...") + logging.info(f"Searching {cls_name}...") query_results += cls_obj().query_from_source(self._query) table = Table("index", "Title", "Source", "Note") diff --git a/tvsd/season.py b/tvsd/season.py index 4989aa38..42adb2d3 100644 --- a/tvsd/season.py +++ b/tvsd/season.py @@ -1,3 +1,4 @@ +import logging import os from typing import List, Callable, TYPE_CHECKING, Union from bs4 import Tag @@ -8,7 +9,6 @@ from tvsd.show import Show from tvsd.episode import Episode -from tvsd.utils import LOGGER if TYPE_CHECKING: @@ -152,7 +152,7 @@ def season_index(self) -> int: if self._index is None: self.determine_season_index(self._title) - return self._index + return self._index or 1 @property def relative_season_dir(self) -> str: @@ -182,6 +182,7 @@ def determine_show_begin_year(self) -> str: Returns: int: Begin year of the show """ + print(self.year) season_year = int(self.year) show_year = season_year @@ -203,7 +204,7 @@ def year(self) -> str: Returns: str: year of the season """ - LOGGER.info(self) + logging.info(self) return self._year @property diff --git a/tvsd/source.py b/tvsd/source.py index b8121668..4ffb7220 100644 --- a/tvsd/source.py +++ b/tvsd/source.py @@ -1,13 +1,18 @@ import json +import logging import os from typing import Any, List from bs4 import BeautifulSoup, ResultSet, Tag from abc import ABC, abstractmethod -from tvsd.custom_types import EpisodeDetailsFromURL, SeasonDetailsFromURL +import chinese_converter +from tvsd.custom_types import EpisodeDetailsFromURL, SeasonDetailsFromURL +from socket import error as SocketError +import errno +from tvsd.episode import Episode from tvsd.season import Season -from tvsd.utils import LOGGER, SCRAPER +from tvsd.utils import SCRAPER def load_source_details(season_dir: str): @@ -54,6 +59,12 @@ def __init__(self) -> None: self.__status__ = "parent" + self._domains: List[str] = [] + self._domain_index: int = 0 + + self._is_simplified: bool = False + self._is_traditional: bool = False + # @classmethod # def parse_from_json(cls, json_content): # return cls(json_content) @@ -66,16 +77,27 @@ def query_from_source(self, search_query: str) -> List[Season]: Returns: Union(List[Show, Season], []): List of shows or seasons """ + if self._is_simplified: + search_query = chinese_converter.to_simplified(search_query) + if self._is_traditional: + search_query = chinese_converter.to_traditional(search_query) + print(search_query) search_url = self._search_url(search_query) - LOGGER.debug(f"Searching for {search_query} in {search_url}") + logging.debug(f"Searching for {search_query} in {search_url}") query_result_soup = self.get_query_result_soup(search_url) - query_results = self._get_query_results(query_result_soup) - # Below are same for all - self._result_list: List[Season] = [] + if query_result_soup is not None: + query_results = self._get_query_results(query_result_soup) + # Below are same for all + self._result_list: List[Season] = [] + + for result in query_results: + show = self.parse_from_query(result) + self._result_list.append(show) + + if len(self._result_list) == 0 and len(self._domains) > self._domain_index + 1: + self._domain_index += 1 + return self.query_from_source(search_query) - for result in query_results: - show = self.parse_from_query(result) - self._result_list.append(show) return self._result_list @abstractmethod @@ -111,11 +133,18 @@ def get_query_result_soup(self, search_url: str) -> BeautifulSoup: Returns: BeautifulSoup: Query result soup """ - search_result_page = SCRAPER.get(search_url).content - query_result_soup: BeautifulSoup = BeautifulSoup( - search_result_page, "html.parser" - ) - return query_result_soup + try: + search_result_page = SCRAPER.get(search_url).content + query_result_soup: BeautifulSoup = BeautifulSoup( + search_result_page, "html.parser" + ) + return query_result_soup + except ConnectionResetError as error: + if error.errno != errno.ECONNRESET: + raise # Not error we are looking for + logging.error("Connection reset by peer") + except: + logging.error("Error in getting query result soup") ##### PARSE EPISODE DETAILS FROM URL ##### @@ -129,7 +158,7 @@ def parse_episode_details_from_li(self, soup: Tag) -> "EpisodeDetailsFromURL": Episode: Episode object """ - episode_details = { + episode_details: EpisodeDetailsFromURL = { "title": self._set_episode_title(soup), "url": self._set_relative_episode_url(soup), } @@ -212,7 +241,7 @@ def _get_result_source_id(self, query_result: BeautifulSoup) -> str: return "" @abstractmethod - def _get_result_details_url(self, query_result: str) -> str: + def _get_result_details_url(self, query_result: BeautifulSoup) -> str: """Gets the result details url Args: @@ -235,7 +264,7 @@ def parse_season_from_details_url(self, season_url: str) -> "SeasonDetailsFromUR dict: Details found on the details page """ soup = self.fetch_details_soup(season_url) - details = { + details: SeasonDetailsFromURL = { "title": self._set_season_title(soup), "description": self._set_season_description(soup), "episodes": self._set_season_episodes(soup), @@ -368,3 +397,7 @@ def source_name(self) -> str: str: Name of the class """ return self.__class__.__name__ + + @property + def _domain(self): + return self._domains[self._domain_index] diff --git a/tvsd/sources/olevod.py b/tvsd/sources/olevod.py index 9b941161..b6df88a1 100644 --- a/tvsd/sources/olevod.py +++ b/tvsd/sources/olevod.py @@ -4,7 +4,7 @@ from bs4 import BeautifulSoup, ResultSet, Tag from tvsd.source import Source -from tvsd.utils import LOGGER +import logging class OLEVOD(Source): @@ -13,15 +13,26 @@ class OLEVOD(Source): def __init__(self): super().__init__() # Call parent constructor self.__status__ = "active" + # self._domains = ["https://olevod.com", "https://olevod1.com"] + self._domains = ["https://olevod.com"] + self._is_simplified = True ### SEARCHING FOR A SHOW ### def _search_url(self, search_query: str) -> str: - return f"https://www.olevod.com/index.php/vod/search.html?wd={search_query}&submit=" + return f"{self._domain}/index.php/vod/search.html?wd={search_query}&submit=" def _get_query_results(self, query_result_soup: BeautifulSoup) -> ResultSet[Any]: # TODO: Auto JS Guard - LOGGER.debug(query_result_soup) + logging.debug(query_result_soup) + # query_result = [] + # query_result += query_result_soup.find_all( + # "li", attrs={"class": "searchlist_item"} + # ) + # query_result += query_result_soup.find_all( + # "li", attrs={"class": "hl-list-item"} + # ) + # return query_result return query_result_soup.find_all("li", attrs={"class": "searchlist_item"}) ##### PARSE EPISODE DETAILS FROM URL ##### @@ -31,6 +42,7 @@ def _set_relative_episode_url(self, soup: Tag) -> str: def _set_season_title(self, soup: BeautifulSoup): return soup.find("h2", attrs={"class": "title"}).get_text() + # return soup.find("h2", attrs={"class": "hl-dc-title"}).get_text() ##### PARSE SEASON FROM QUERY RESULT ##### diff --git a/tvsd/sources/ssstv.py b/tvsd/sources/ssstv.py index 9648b095..cfa9d926 100644 --- a/tvsd/sources/ssstv.py +++ b/tvsd/sources/ssstv.py @@ -9,16 +9,15 @@ class SSSTV(Source): """777tv class""" - source_url = "https://777tv.tw" - def __init__(self): super().__init__() self.__status__ = "active" + self._domains = ["https://777tv.tw"] ### SEARCHING FOR A SHOW ### def _search_url(self, search_query: str) -> str: - return f"{self.source_url}/vodsearch/-------------.html?wd={search_query}" + return f"{self._domain}/vodsearch/-------------.html?wd={search_query}" def _get_query_results(self, query_result_soup: BeautifulSoup) -> ResultSet[Any]: return query_result_soup.find_all("div", attrs={"class": "module-search-item"}) @@ -40,7 +39,7 @@ def _get_result_note(self, query_result: BeautifulSoup) -> str: def _get_result_details_url(self, query_result: BeautifulSoup) -> str: relative_url = query_result.find("a", attrs={"class": "video-serial"})["href"] - return f"{self.source_url}{relative_url}" + return f"{self._domain}{relative_url}" #### PARSE SEASON DETAILS FROM DETAILS URL #### @@ -73,7 +72,7 @@ def _set_season_year(self, soup: BeautifulSoup): ######## FETCH EPISODE M3U8 ######## def _episode_url(self, relative_episode_url: str) -> str: - return self.source_url + relative_episode_url + return self._domain + relative_episode_url def _set_episode_script(self, episode_soup: BeautifulSoup) -> str: return str( diff --git a/tvsd/sources/xiao_bao.py b/tvsd/sources/xiao_bao.py index 1c3c7b93..07615e77 100644 --- a/tvsd/sources/xiao_bao.py +++ b/tvsd/sources/xiao_bao.py @@ -12,24 +12,24 @@ class XiaoBao(Source): def __init__(self): super().__init__() # Call parent constructor self.__status__ = "active" + self._domains = ["https://xiaoheimi.net"] + self._is_simplified = True ### SEARCHING FOR A SHOW ### def _search_url(self, search_query: str) -> str: - return ( - f"https://xiaoheimi.net/index.php/vod/search.html?wd={search_query}&submit=" - ) + return f"{self._domain}/index.php/vod/search.html?wd={search_query}&submit=" def _get_query_results(self, query_result_soup: BeautifulSoup) -> ResultSet[Any]: return query_result_soup.find_all("li", attrs={"class": "clearfix"}) ##### PARSE EPISODE DETAILS FROM URL ##### - def _set_season_title(self, soup: BeautifulSoup): - return str(soup.title.string).replace(" - 小宝影院 - 在线视频", "") or None + def _set_episode_title(self, soup: Tag) -> str: + return soup.find("a").get_text() def _set_relative_episode_url(self, soup: Tag) -> str: - return soup.find("a", attrs={"class": "btn btn-default"})["href"] + return soup.find("a")["href"] ##### PARSE SEASON FROM QUERY RESULT ##### @@ -46,11 +46,11 @@ def _get_result_source_id(self, query_result: BeautifulSoup) -> str: def _get_result_details_url(self, query_result: BeautifulSoup) -> str: source_id = self._get_result_source_id(query_result=query_result) - return f"https://xiaoheimi.net/index.php/vod/detail/id/{source_id}.html" + return f"{self._domain}/index.php/vod/detail/id/{source_id}.html" #### PARSE SEASON DETAILS FROM DETAILS URL #### - def _set_episode_title(self, soup: Tag) -> str: + def _set_season_title(self, soup: BeautifulSoup) -> str: return str(soup.title.string).replace(" - 小宝影院 - 在线视频", "") or None def _set_season_description(self, soup: BeautifulSoup): @@ -69,7 +69,7 @@ def _set_season_year(self, soup: BeautifulSoup): ######## FETCH EPISODE M3U8 ######## def _episode_url(self, relative_episode_url: str) -> str: - return f"https://xiaoheimi.net{relative_episode_url}" + return f"{self._domain}{relative_episode_url}" def _set_episode_script(self, episode_soup: BeautifulSoup) -> str: return str( diff --git a/tvsd/utils/__init__.py b/tvsd/utils/__init__.py index 5438d808..5328d605 100644 --- a/tvsd/utils/__init__.py +++ b/tvsd/utils/__init__.py @@ -3,19 +3,23 @@ import re from typing import List import cloudscraper +import typer from tvsd.config import BASE_PATH, SERIES_DIR, SPECIALS_DIR SCRAPER = cloudscraper.create_scraper( delay=10, + # browser={ + # "custom": "ScraperBot/1.0", + # }, browser={ - "custom": "ScraperBot/1.0", + "browser": "chrome", + "platform": "windows", + "desktop": True, + "mobile": False, }, ) -LOGGER = logging -LOGGER.basicConfig(level=logging.DEBUG) - def mkdir_if_no(check_dir: str, recursive: bool = True): """Creates a directory if it does not exist @@ -30,7 +34,7 @@ def mkdir_if_no(check_dir: str, recursive: bool = True): try: os.mkdir(check_dir) except FileNotFoundError: - LOGGER.error( + logging.error( f"Parent directory does not exist, cannot create directory {check_dir}" ) @@ -116,7 +120,18 @@ def check_dir_mounted(path: str) -> bool: bool: True if directory exists """ - if os.path.ismount(path) and os.path.isdir(os.path.join(path, SERIES_DIR)): - return True - print(path, "has not been mounted yet. Exiting...") - return False + if not os.path.ismount(path): + print(path, "has not been mounted yet. Exiting...") + return False + if not os.path.isdir(os.path.join(path, SERIES_DIR)): + print(f"{path} does not contain a {SERIES_DIR} directory.") + if ( + typer.prompt( + "Would you like to create it? [y/n]", default="y", show_default=True + ).capitalize() + == "Y" + ): + mkdir_if_no(os.path.join(path, SERIES_DIR)) + else: + return False + return True