diff --git a/.github/workflows/fetch-audit-logs.yaml b/.github/workflows/fetch-audit-logs.yaml new file mode 100644 index 00000000..db46474b --- /dev/null +++ b/.github/workflows/fetch-audit-logs.yaml @@ -0,0 +1,41 @@ +name: + Extract Audit Logs with CS Tools. + +on: + workflow_dispatch: + schedule: + # Runs every day at 5:10 AM UTC + - cron: "10 5 * * *" + +jobs: + extract_data_from_thoughtspot: + + # Configure Environment Variables for CS Tools configuration + # Configure Environment Variables for CS Tools configuration + env: + CS_TOOLS_THOUGHTSPOT__URL: ${{ secrets.THOUGHTSPOT_URL }} + CS_TOOLS_THOUGHTSPOT__USERNAME: ${{ secrets.THOUGHTSPOT_USERNAME }} + CS_TOOLS_THOUGHTSPOT__SECRET_KEY: ${{ secrets.THOUGHTSPOT_SECRET_KEY }} + # CS_TOOLS_TEMP_DIR: ... + + DECLARATIVE_SYNCER_SYNTAX: account_name=${{ secrets.SNOWFLAKE_ACCOUNT }}&username=${{ secrets.SNOWFLAKE_USERNAME }}&secret=${{ secrets.SNOWFLAKE_PASSWORD }}&warehouse=${{ secrets.SNOWFLAKE_WAREHOUSE }}&role=${{ secrets.SNOWFLAKE_ROLE }}&database=${{ secrets.SNOWFLAKE_DATABASE }}&schema=${{ secrets.SNOWFLAKE_SCHEMA }}&authentication=basic + + runs-on: ubuntu-latest + steps: + - name: Check out the repository main branch + uses: actions/checkout@v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@v4 + with: + python-version: 3.12 + + - name: Install a specific version of CS Tools + run: python -m pip install -e .[cli] + + - name: Install Snowflake Syncer requirements + run: python -m pip install snowflake-sqlalchemy>=1.6.1 cryptography + + # --config ENV: tells CS Tools to pull the information from environment variables. + - name: Grab N-7 days of Audit Logs Data + run: "cs_tools tools searchable audit-logs --last-k-days 7 --syncer 'snowflake://${{ env.DECLARATIVE_SYNCER_SYNTAX }}&load_strategy=UPSERT' --config ENV:" diff --git a/cs_tools/__project__.py b/cs_tools/__project__.py index 54764c26..d4e49152 100644 --- a/cs_tools/__project__.py +++ b/cs_tools/__project__.py @@ -1,4 +1,4 @@ -__version__ = "1.5.9" +__version__ = "1.5.10" __docs__ = "https://thoughtspot.github.io/cs_tools/" __repo__ = "https://github.com/thoughtspot/cs_tools" __help__ = f"{__repo__}/discussions/" diff --git a/cs_tools/cli/_analytics.py b/cs_tools/cli/_analytics.py index 2311b782..7ea4fa65 100644 --- a/cs_tools/cli/_analytics.py +++ b/cs_tools/cli/_analytics.py @@ -3,12 +3,14 @@ This file localizes all the analytics activities that CS Tools performs. """ + from __future__ import annotations from typing import Annotated, Any, Optional import datetime as dt import json import logging +import os import platform import shutil import sysconfig @@ -72,6 +74,10 @@ def prompt_for_opt_in() -> None: if meta.analytics.is_opted_in is not None: return + if meta.environment.is_ci: + log.info("Analytics is enabled for CI installs. Set CS_TOOLS_ANALYTICS_OPT_OUT to disable.") + return + rich_console.print() prompt = Panel.fit( @@ -102,6 +108,9 @@ def prompt_for_opt_in() -> None: def maybe_send_analytics_data() -> None: """If registered for analytics, regularly send information about the experience.""" + if meta.environment.is_ci and meta.analytics.is_opted_in is None: + meta.analytics.is_opted_in = "CS_TOOLS_ANALYTICS_OPT_OUT" not in os.environ + if not meta.analytics.is_opted_in or meta.environment.is_dev: return diff --git a/cs_tools/cli/commands/main.py b/cs_tools/cli/commands/main.py index bd1647d3..b06e812b 100644 --- a/cs_tools/cli/commands/main.py +++ b/cs_tools/cli/commands/main.py @@ -98,9 +98,10 @@ def run() -> int: try: return_code = app(standalone_mode=False) + return_code = 0 if return_code is None else return_code - except (click.Abort, typer.Abort): - return_code = 0 + except (click.exceptions.Abort, click.exceptions.Exit, typer.Abort, typer.Exit) as e: + return_code = getattr(e, "exit_code", 0) rich_console.print("[b yellow]Stopping -- cancelled by user..\n") except click.ClickException as e: @@ -108,6 +109,7 @@ def run() -> int: this_run_data["is_known_error"] = True this_run_data["traceback"] = utils.anonymize("\n".join(format_exception(type(e), e, e.__traceback__, limit=5))) log.error(e) + log.debug("more info..", exc_info=True) except errors.CSToolsError as e: return_code = 1 diff --git a/cs_tools/cli/commands/self.py b/cs_tools/cli/commands/self.py index c715bfaf..fb4fb7d8 100644 --- a/cs_tools/cli/commands/self.py +++ b/cs_tools/cli/commands/self.py @@ -57,24 +57,23 @@ def update( """ Upgrade CS Tools. """ - requires = ["cs_tools[cli]"] - log.info("Determining if CS Tools is globally installed.") cs_tools_venv.check_if_globally_installed(remove=True) if offline is not None: log.info(f"Using the offline binary found at [b magenta]{offline}") cs_tools_venv.with_offline_mode(find_links=offline) + requires = ["cs_tools[cli]"] elif dev is not None: log.info("Installing locally using the development environment.") - requires.extend(f"-e {dev.as_posix()}".split(" ")) + requires = [f"cs_tools[cli] -e {dev.as_posix()}"] else: log.info(f"Getting the latest CS Tools {'beta ' if beta else ''}release.") release = get_latest_cs_tools_release(allow_beta=beta) log.info(f"Found version: [b cyan]{release['tag_name']}") - requires.extend(f" @ https://github.com/thoughtspot/cs_tools/archive/{release['tag_name']}.zip".split(" ")) + requires = [f"cs_tools[cli] @ https://github.com/thoughtspot/cs_tools/archive/{release['tag_name']}.zip"] if AwesomeVersion(release["tag_name"]) <= AwesomeVersion(__version__): log.info(f"CS Tools is [b green]already up to date[/]! (your version: {__version__})") @@ -200,18 +199,26 @@ def download( log.info("Freezing existing virtual environment") frozen = { r - for r in cs_tools_venv.pip("freeze", "--quiet", visible_output=False).stdout.decode().split("\n") + for r in cs_tools_venv.pip("freeze", "--quiet", should_stream_output=False).stdout.decode().split("\n") if "cs_tools" not in r } # add in the latest release frozen.add(f"cs_tools @ https://github.com/thoughtspot/cs_tools/archive/{release_tag}.zip") + # DESIRED OUTPUT + # + # Running command /usr/bin/python /tmp/pip-standalone-pip-ccumgmp2/__env_pip__.zip/pip install --ignore-installed --no-user --prefix /tmp/pip-build-env-dtapuowm/overlay --no-warn-script-location --no-binary :none: --only-binary :none: -i https://pypi.org/simple -- 'setuptools>=42' 'setuptools_scm[toml]>=6.2' + # + # add packaging stuff since we'll use --no-deps frozen.add("pip >= 23.1") frozen.add("setuptools >= 42") frozen.add("setuptools_scm >= 6.2") frozen.add("wheel") + # rust-based build tools + frozen.add("semantic-version >= 2.10.0") + frozen.add("setuptools-rust >= 1.4.0") frozen.add("maturin >= 1, < 2") # fmt: off diff --git a/cs_tools/cli/tools/archiver/_extended_rest_api_v1.py b/cs_tools/cli/tools/archiver/_extended_rest_api_v1.py index 181ea3a6..a11ae1db 100644 --- a/cs_tools/cli/tools/archiver/_extended_rest_api_v1.py +++ b/cs_tools/cli/tools/archiver/_extended_rest_api_v1.py @@ -3,16 +3,18 @@ from typing import TYPE_CHECKING import json +from cs_tools.api._rest_api_v1 import RESTAPIv1 + if TYPE_CHECKING: import httpx from cs_tools.types import GUID, MetadataObjectType -def metadata_delete(ts_client: httpx.Client, *, metadata_type: MetadataObjectType, guids: list[GUID]) -> httpx.Response: +def metadata_delete(ts_client: RESTAPIv1, *, metadata_type: MetadataObjectType, guids: list[GUID]) -> httpx.Response: """ DELETE metadata """ d = {"type": metadata_type, "id": json.dumps(guids)} - r = ts_client.post("callosum/v1/metadata/delete", data=d) + r = ts_client.request(method="POST", endpoint="callosum/v1/metadata/delete", data=d) return r diff --git a/cs_tools/cli/tools/bulk-deleter/_extended_rest_api_v1.py b/cs_tools/cli/tools/bulk-deleter/_extended_rest_api_v1.py index 181ea3a6..e4ae2f7f 100644 --- a/cs_tools/cli/tools/bulk-deleter/_extended_rest_api_v1.py +++ b/cs_tools/cli/tools/bulk-deleter/_extended_rest_api_v1.py @@ -3,16 +3,18 @@ from typing import TYPE_CHECKING import json +from cs_tools.api._rest_api_v1 import RESTAPIv1 + if TYPE_CHECKING: import httpx from cs_tools.types import GUID, MetadataObjectType -def metadata_delete(ts_client: httpx.Client, *, metadata_type: MetadataObjectType, guids: list[GUID]) -> httpx.Response: +def metadata_delete(ts_client: RESTAPIv1, *, metadata_type: MetadataObjectType, guids: list[GUID]) -> httpx.Response: """ DELETE metadata """ d = {"type": metadata_type, "id": json.dumps(guids)} - r = ts_client.post("callosum/v1/metadata/delete", data=d) + r = ts_client.request(meothd="POST", endpoint="callosum/v1/metadata/delete", data=d) return r diff --git a/cs_tools/cli/tools/falcon-sharding/_extended_rest_api_v1.py b/cs_tools/cli/tools/falcon-sharding/_extended_rest_api_v1.py index a0971815..08b5f691 100644 --- a/cs_tools/cli/tools/falcon-sharding/_extended_rest_api_v1.py +++ b/cs_tools/cli/tools/falcon-sharding/_extended_rest_api_v1.py @@ -2,11 +2,13 @@ from typing import TYPE_CHECKING +from cs_tools.api._rest_api_v1 import RESTAPIv1 + if TYPE_CHECKING: import httpx -def periscope_sage_combined_table_info(ts_client: httpx.Client) -> httpx.Response: +def periscope_sage_combined_table_info(ts_client: RESTAPIv1) -> httpx.Response: """ The API call powers the Falcon Table Usage info. diff --git a/cs_tools/cli/tools/searchable/app.py b/cs_tools/cli/tools/searchable/app.py index f9b04239..b9b48f3e 100644 --- a/cs_tools/cli/tools/searchable/app.py +++ b/cs_tools/cli/tools/searchable/app.py @@ -194,6 +194,9 @@ def audit_logs( .replace(hour=0, minute=0, second=0, microsecond=0) ) + # THOUGHTSPOT API SEEMS TO HAVE ISSUES WITH TIMEZONES AND CAUSES DUPLICATION OF DATA + everseen = set() + renamed = [] for days_to_fetch in range(last_k_days): @@ -211,12 +214,18 @@ def audit_logs( rows = r.json() if not rows: - log.info(f"Found no data for NOW - {last_k_days} days ({utc_start} -> {utc_end})") + log.info(f"Found no data for [NOW - {days_to_fetch} DAYS] ({utc_start.date()} -> {utc_end.date()})") continue for row in rows: data = json.loads(row["log"]) + # THOUGHTSPOT API SEEMS TO HAVE ISSUES WITH TIMEZONES AND CAUSES DUPLICATION OF DATA + if f"{ts.session_context.thoughtspot.cluster_id}-{data['id']}" in everseen: + continue + + everseen.add(f"{ts.session_context.thoughtspot.cluster_id}-{data['id']}") + renamed.append( models.AuditLogs.validated_init( **{ @@ -313,7 +322,7 @@ def bi_server( with tasks["gather_search"]: data = ts.search(SEARCH_TOKENS, worksheet="TS: BI Server") - # SEARCH DATA API SEEMS TO HAVE ISSUES WITH TIMEZONES AND CAUSES DUPLICATION OF DATA + # THOUGHTSPOT API SEEMS TO HAVE ISSUES WITH TIMEZONES AND CAUSES DUPLICATION OF DATA data = [dict(t) for t in {tuple(sorted(d.items())) for d in data}] # CLUSTER BY --> TIMESTAMP .. everything else is irrelevant after TS. diff --git a/cs_tools/settings.py b/cs_tools/settings.py index 6d1d61f5..608318cd 100644 --- a/cs_tools/settings.py +++ b/cs_tools/settings.py @@ -3,6 +3,7 @@ This is the supply-side of information which determines the runtime environment. """ + from __future__ import annotations from typing import Annotated, Any, Optional, Union @@ -270,10 +271,10 @@ def ensure_only_netloc(cls, data) -> str: netloc = data.host if data.scheme == "http" and data.port != 80: - netloc += str(data.port) + netloc += f":{data.port}" if data.scheme == "https" and data.port != 443: - netloc += str(data.port) + netloc += f":{data.port}" return f"{data.scheme}://{netloc}" diff --git a/cs_tools/sync/base.py b/cs_tools/sync/base.py index 251ab879..d48ecbf9 100644 --- a/cs_tools/sync/base.py +++ b/cs_tools/sync/base.py @@ -72,12 +72,6 @@ def __ensure_pip_requirements__(self) -> None: for pip_requirement in self.requirements: log.debug(f"Processing requirement: {pip_requirement}") - - if cs_tools_venv.is_package_installed(pip_requirement.requirement.name): - log.debug("Requirement satisfied, no install necessary") - continue - - log.info(f"Installing package: {pip_requirement}") cs_tools_venv.pip("install", f"{pip_requirement.requirement}", *pip_requirement.pip_args) diff --git a/cs_tools/sync/databricks/syncer.py b/cs_tools/sync/databricks/syncer.py index 19d2b8c1..efed8d70 100644 --- a/cs_tools/sync/databricks/syncer.py +++ b/cs_tools/sync/databricks/syncer.py @@ -125,7 +125,7 @@ def dump(self, tablename: str, *, data: TableRows) -> None: table = self.metadata.tables[f"{self.schema_}.{tablename}"] - if self.load_strategy == "APPEND" + if self.load_strategy == "APPEND": if self.use_experimental_dataload: stage = self.stage_and_put(tablename=tablename, data=data) self.copy_into(from_=stage, into=tablename) diff --git a/cs_tools/thoughtspot.py b/cs_tools/thoughtspot.py index 9592803f..4627c7f2 100644 --- a/cs_tools/thoughtspot.py +++ b/cs_tools/thoughtspot.py @@ -42,7 +42,6 @@ class ThoughtSpot: def __init__(self, config: CSToolsConfig, auto_login: bool = False): self.config = config self._session_context: Optional[SessionContext] = None - self.config = config self.api = RESTAPIClient( ts_url=str(config.thoughtspot.url), verify=not config.thoughtspot.disable_ssl, diff --git a/cs_tools/updater/_updater.py b/cs_tools/updater/_updater.py index 409df258..36350c76 100644 --- a/cs_tools/updater/_updater.py +++ b/cs_tools/updater/_updater.py @@ -98,41 +98,47 @@ def get_venv_path(self) -> pathlib.Path: return cs_tools_venv_dir.resolve() @staticmethod - def run(*args, raise_on_failure: bool = True, visible_output: bool = True, **kwargs) -> sp.CompletedProcess: + def run(*args, raise_on_failure: bool = True, should_stream_output: bool = True, **kwargs) -> sp.CompletedProcess: """Run a SHELL command.""" - levels = {"ERROR": log.error, "WARNING": log.warning} - output = [] - errors = [] - - with sp.Popen(args, stdout=sp.PIPE, stderr=sp.STDOUT, close_fds=False, **kwargs) as proc: - assert proc.stdout is not None - - for line_as_bytes in proc.stdout.readlines(): - line = line_as_bytes.decode().strip() - - if line.startswith(tuple(levels)): - log_level, _, line = line.partition(": ") - logger = levels[log_level] + final_stdout: list[str] = [] + final_stderr: list[str] = [] + # DEV NOTE @boonhapus, 2024/02/15 + # We want to capture and stream the output of the subprocess as it comes in, + # so that the user doesn't think it's frozen. + # + popen_streaming_options = { + "stdout": sp.PIPE, + "stderr": sp.STDOUT, + "text": True, + "bufsize": 1, + } + + with sp.Popen(args, **popen_streaming_options, encoding="utf-8", **kwargs) as proc: # type: ignore[call-overload] + for line in iter(proc.stdout.readline, ""): + if line.startswith(("ERROR", "WARNING")): + level, _, line = line.partition(": ") + buffer = final_stderr else: - logger = log.info + level = "INFO" + buffer = final_stdout - if visible_output and line: - logger(line) + if should_stream_output and line: + log.log(level=getattr(logging, level), msg=line.strip()) - output.append(line) if logger == log.info else errors.append(line) + buffer.append(line) if raise_on_failure and proc.returncode != 0: cmd = " ".join(arg.replace(" ", "") for arg in args) raise RuntimeError(f"Failed with exit code: {proc.returncode}\n\nPIP COMMAND BELOW\n{cmd}") - output_as_bytes = "\n".join(output).encode() - errors_as_bytes = "\n".join(errors).encode() + output_as_bytes = "\n".join(final_stdout).encode() + errors_as_bytes = "\n".join(final_stderr).encode() return sp.CompletedProcess(proc.args, proc.returncode, stdout=output_as_bytes, stderr=errors_as_bytes) def is_package_installed(self, package_name: str, with_system_python: bool = False) -> bool: """Check if a package is installed. This should be called only within CS Tools.""" - cp = self.pip("list", "--format", "json", visible_output=False, with_system_python=with_system_python) + cp = self.pip("list", "--format", "json", should_stream_output=False, with_system_python=with_system_python) for installed in json.loads(cp.stdout.decode()): if installed["name"] == package_name: @@ -250,6 +256,7 @@ def make(self) -> None: self.run(python, "-m", "venv", self.venv_path.as_posix()) # Ensure `pip` is at least V23.1 so that backjumping is available + self.python("-m", "ensurepip") self.pip("install", "pip >= 23.1", "--upgrade") def reset(self) -> None: diff --git a/cs_tools/utils.py b/cs_tools/utils.py index 201e5651..b637ecef 100644 --- a/cs_tools/utils.py +++ b/cs_tools/utils.py @@ -13,6 +13,7 @@ import itertools as it import json import logging +import os import pathlib import site import threading @@ -187,6 +188,9 @@ def run(self) -> None: def determine_editable_install() -> bool: """Determine if the current CS Tools context is an editable install.""" + if "FAKE_EDITABLE" in os.environ: + return True + for directory in site.getsitepackages(): try: site_directory = pathlib.Path(directory) diff --git a/pyproject.toml b/pyproject.toml index 3cad08a2..cf9d89e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,6 +47,7 @@ dependencies = [ "sqlmodel >= 0.0.16", "toml", "packaging", + "betterproto[compile] < 2.0.0b7", # version specific "strenum >= 0.4.9; python_version < '3.11.0'",