Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
boonhapus committed Sep 16, 2024
2 parents 775561b + f00ea02 commit fcf2ffe
Show file tree
Hide file tree
Showing 16 changed files with 126 additions and 46 deletions.
41 changes: 41 additions & 0 deletions .github/workflows/fetch-audit-logs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name:
Extract Audit Logs with CS Tools.

on:
workflow_dispatch:
schedule:
# Runs every day at 5:10 AM UTC
- cron: "10 5 * * *"

jobs:
extract_data_from_thoughtspot:

# Configure Environment Variables for CS Tools configuration
# Configure Environment Variables for CS Tools configuration
env:
CS_TOOLS_THOUGHTSPOT__URL: ${{ secrets.THOUGHTSPOT_URL }}
CS_TOOLS_THOUGHTSPOT__USERNAME: ${{ secrets.THOUGHTSPOT_USERNAME }}
CS_TOOLS_THOUGHTSPOT__SECRET_KEY: ${{ secrets.THOUGHTSPOT_SECRET_KEY }}
# CS_TOOLS_TEMP_DIR: ...

DECLARATIVE_SYNCER_SYNTAX: account_name=${{ secrets.SNOWFLAKE_ACCOUNT }}&username=${{ secrets.SNOWFLAKE_USERNAME }}&secret=${{ secrets.SNOWFLAKE_PASSWORD }}&warehouse=${{ secrets.SNOWFLAKE_WAREHOUSE }}&role=${{ secrets.SNOWFLAKE_ROLE }}&database=${{ secrets.SNOWFLAKE_DATABASE }}&schema=${{ secrets.SNOWFLAKE_SCHEMA }}&authentication=basic

runs-on: ubuntu-latest
steps:
- name: Check out the repository main branch
uses: actions/checkout@v4

- name: Set up Python 3.12
uses: actions/setup-python@v4
with:
python-version: 3.12

- name: Install a specific version of CS Tools
run: python -m pip install -e .[cli]

- name: Install Snowflake Syncer requirements
run: python -m pip install snowflake-sqlalchemy>=1.6.1 cryptography

# --config ENV: tells CS Tools to pull the information from environment variables.
- name: Grab N-7 days of Audit Logs Data
run: "cs_tools tools searchable audit-logs --last-k-days 7 --syncer 'snowflake://${{ env.DECLARATIVE_SYNCER_SYNTAX }}&load_strategy=UPSERT' --config ENV:"
2 changes: 1 addition & 1 deletion cs_tools/__project__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "1.5.9"
__version__ = "1.5.10"
__docs__ = "https://thoughtspot.github.io/cs_tools/"
__repo__ = "https://github.com/thoughtspot/cs_tools"
__help__ = f"{__repo__}/discussions/"
Expand Down
9 changes: 9 additions & 0 deletions cs_tools/cli/_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
This file localizes all the analytics activities that CS Tools performs.
"""

from __future__ import annotations

from typing import Annotated, Any, Optional
import datetime as dt
import json
import logging
import os
import platform
import shutil
import sysconfig
Expand Down Expand Up @@ -72,6 +74,10 @@ def prompt_for_opt_in() -> None:
if meta.analytics.is_opted_in is not None:
return

if meta.environment.is_ci:
log.info("Analytics is enabled for CI installs. Set CS_TOOLS_ANALYTICS_OPT_OUT to disable.")
return

rich_console.print()

prompt = Panel.fit(
Expand Down Expand Up @@ -102,6 +108,9 @@ def prompt_for_opt_in() -> None:

def maybe_send_analytics_data() -> None:
"""If registered for analytics, regularly send information about the experience."""
if meta.environment.is_ci and meta.analytics.is_opted_in is None:
meta.analytics.is_opted_in = "CS_TOOLS_ANALYTICS_OPT_OUT" not in os.environ

if not meta.analytics.is_opted_in or meta.environment.is_dev:
return

Expand Down
6 changes: 4 additions & 2 deletions cs_tools/cli/commands/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,16 +98,18 @@ def run() -> int:

try:
return_code = app(standalone_mode=False)
return_code = 0 if return_code is None else return_code

except (click.Abort, typer.Abort):
return_code = 0
except (click.exceptions.Abort, click.exceptions.Exit, typer.Abort, typer.Exit) as e:
return_code = getattr(e, "exit_code", 0)
rich_console.print("[b yellow]Stopping -- cancelled by user..\n")

except click.ClickException as e:
return_code = 1
this_run_data["is_known_error"] = True
this_run_data["traceback"] = utils.anonymize("\n".join(format_exception(type(e), e, e.__traceback__, limit=5)))
log.error(e)
log.debug("more info..", exc_info=True)

except errors.CSToolsError as e:
return_code = 1
Expand Down
17 changes: 12 additions & 5 deletions cs_tools/cli/commands/self.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,24 +57,23 @@ def update(
"""
Upgrade CS Tools.
"""
requires = ["cs_tools[cli]"]

log.info("Determining if CS Tools is globally installed.")
cs_tools_venv.check_if_globally_installed(remove=True)

if offline is not None:
log.info(f"Using the offline binary found at [b magenta]{offline}")
cs_tools_venv.with_offline_mode(find_links=offline)
requires = ["cs_tools[cli]"]

elif dev is not None:
log.info("Installing locally using the development environment.")
requires.extend(f"-e {dev.as_posix()}".split(" "))
requires = [f"cs_tools[cli] -e {dev.as_posix()}"]

else:
log.info(f"Getting the latest CS Tools {'beta ' if beta else ''}release.")
release = get_latest_cs_tools_release(allow_beta=beta)
log.info(f"Found version: [b cyan]{release['tag_name']}")
requires.extend(f" @ https://github.com/thoughtspot/cs_tools/archive/{release['tag_name']}.zip".split(" "))
requires = [f"cs_tools[cli] @ https://github.com/thoughtspot/cs_tools/archive/{release['tag_name']}.zip"]

if AwesomeVersion(release["tag_name"]) <= AwesomeVersion(__version__):
log.info(f"CS Tools is [b green]already up to date[/]! (your version: {__version__})")
Expand Down Expand Up @@ -200,18 +199,26 @@ def download(
log.info("Freezing existing virtual environment")
frozen = {
r
for r in cs_tools_venv.pip("freeze", "--quiet", visible_output=False).stdout.decode().split("\n")
for r in cs_tools_venv.pip("freeze", "--quiet", should_stream_output=False).stdout.decode().split("\n")
if "cs_tools" not in r
}

# add in the latest release
frozen.add(f"cs_tools @ https://github.com/thoughtspot/cs_tools/archive/{release_tag}.zip")

# DESIRED OUTPUT
#
# Running command /usr/bin/python /tmp/pip-standalone-pip-ccumgmp2/__env_pip__.zip/pip install --ignore-installed --no-user --prefix /tmp/pip-build-env-dtapuowm/overlay --no-warn-script-location --no-binary :none: --only-binary :none: -i https://pypi.org/simple -- 'setuptools>=42' 'setuptools_scm[toml]>=6.2'
#

# add packaging stuff since we'll use --no-deps
frozen.add("pip >= 23.1")
frozen.add("setuptools >= 42")
frozen.add("setuptools_scm >= 6.2")
frozen.add("wheel")
# rust-based build tools
frozen.add("semantic-version >= 2.10.0")
frozen.add("setuptools-rust >= 1.4.0")
frozen.add("maturin >= 1, < 2")

# fmt: off
Expand Down
6 changes: 4 additions & 2 deletions cs_tools/cli/tools/archiver/_extended_rest_api_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,18 @@
from typing import TYPE_CHECKING
import json

from cs_tools.api._rest_api_v1 import RESTAPIv1

if TYPE_CHECKING:
import httpx

from cs_tools.types import GUID, MetadataObjectType


def metadata_delete(ts_client: httpx.Client, *, metadata_type: MetadataObjectType, guids: list[GUID]) -> httpx.Response:
def metadata_delete(ts_client: RESTAPIv1, *, metadata_type: MetadataObjectType, guids: list[GUID]) -> httpx.Response:
"""
DELETE metadata
"""
d = {"type": metadata_type, "id": json.dumps(guids)}
r = ts_client.post("callosum/v1/metadata/delete", data=d)
r = ts_client.request(method="POST", endpoint="callosum/v1/metadata/delete", data=d)
return r
6 changes: 4 additions & 2 deletions cs_tools/cli/tools/bulk-deleter/_extended_rest_api_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,18 @@
from typing import TYPE_CHECKING
import json

from cs_tools.api._rest_api_v1 import RESTAPIv1

if TYPE_CHECKING:
import httpx

from cs_tools.types import GUID, MetadataObjectType


def metadata_delete(ts_client: httpx.Client, *, metadata_type: MetadataObjectType, guids: list[GUID]) -> httpx.Response:
def metadata_delete(ts_client: RESTAPIv1, *, metadata_type: MetadataObjectType, guids: list[GUID]) -> httpx.Response:
"""
DELETE metadata
"""
d = {"type": metadata_type, "id": json.dumps(guids)}
r = ts_client.post("callosum/v1/metadata/delete", data=d)
r = ts_client.request(meothd="POST", endpoint="callosum/v1/metadata/delete", data=d)
return r
4 changes: 3 additions & 1 deletion cs_tools/cli/tools/falcon-sharding/_extended_rest_api_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@

from typing import TYPE_CHECKING

from cs_tools.api._rest_api_v1 import RESTAPIv1

if TYPE_CHECKING:
import httpx


def periscope_sage_combined_table_info(ts_client: httpx.Client) -> httpx.Response:
def periscope_sage_combined_table_info(ts_client: RESTAPIv1) -> httpx.Response:
"""
The API call powers the Falcon Table Usage info.
Expand Down
13 changes: 11 additions & 2 deletions cs_tools/cli/tools/searchable/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,9 @@ def audit_logs(
.replace(hour=0, minute=0, second=0, microsecond=0)
)

# THOUGHTSPOT API SEEMS TO HAVE ISSUES WITH TIMEZONES AND CAUSES DUPLICATION OF DATA
everseen = set()

renamed = []

for days_to_fetch in range(last_k_days):
Expand All @@ -211,12 +214,18 @@ def audit_logs(
rows = r.json()

if not rows:
log.info(f"Found no data for NOW - {last_k_days} days ({utc_start} -> {utc_end})")
log.info(f"Found no data for [NOW - {days_to_fetch} DAYS] ({utc_start.date()} -> {utc_end.date()})")
continue

for row in rows:
data = json.loads(row["log"])

# THOUGHTSPOT API SEEMS TO HAVE ISSUES WITH TIMEZONES AND CAUSES DUPLICATION OF DATA
if f"{ts.session_context.thoughtspot.cluster_id}-{data['id']}" in everseen:
continue

everseen.add(f"{ts.session_context.thoughtspot.cluster_id}-{data['id']}")

renamed.append(
models.AuditLogs.validated_init(
**{
Expand Down Expand Up @@ -313,7 +322,7 @@ def bi_server(
with tasks["gather_search"]:
data = ts.search(SEARCH_TOKENS, worksheet="TS: BI Server")

# SEARCH DATA API SEEMS TO HAVE ISSUES WITH TIMEZONES AND CAUSES DUPLICATION OF DATA
# THOUGHTSPOT API SEEMS TO HAVE ISSUES WITH TIMEZONES AND CAUSES DUPLICATION OF DATA
data = [dict(t) for t in {tuple(sorted(d.items())) for d in data}]

# CLUSTER BY --> TIMESTAMP .. everything else is irrelevant after TS.
Expand Down
5 changes: 3 additions & 2 deletions cs_tools/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
This is the supply-side of information which determines the runtime environment.
"""

from __future__ import annotations

from typing import Annotated, Any, Optional, Union
Expand Down Expand Up @@ -270,10 +271,10 @@ def ensure_only_netloc(cls, data) -> str:
netloc = data.host

if data.scheme == "http" and data.port != 80:
netloc += str(data.port)
netloc += f":{data.port}"

if data.scheme == "https" and data.port != 443:
netloc += str(data.port)
netloc += f":{data.port}"

return f"{data.scheme}://{netloc}"

Expand Down
6 changes: 0 additions & 6 deletions cs_tools/sync/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,6 @@ def __ensure_pip_requirements__(self) -> None:

for pip_requirement in self.requirements:
log.debug(f"Processing requirement: {pip_requirement}")

if cs_tools_venv.is_package_installed(pip_requirement.requirement.name):
log.debug("Requirement satisfied, no install necessary")
continue

log.info(f"Installing package: {pip_requirement}")
cs_tools_venv.pip("install", f"{pip_requirement.requirement}", *pip_requirement.pip_args)


Expand Down
2 changes: 1 addition & 1 deletion cs_tools/sync/databricks/syncer.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def dump(self, tablename: str, *, data: TableRows) -> None:

table = self.metadata.tables[f"{self.schema_}.{tablename}"]

if self.load_strategy == "APPEND"
if self.load_strategy == "APPEND":
if self.use_experimental_dataload:
stage = self.stage_and_put(tablename=tablename, data=data)
self.copy_into(from_=stage, into=tablename)
Expand Down
1 change: 0 additions & 1 deletion cs_tools/thoughtspot.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ class ThoughtSpot:
def __init__(self, config: CSToolsConfig, auto_login: bool = False):
self.config = config
self._session_context: Optional[SessionContext] = None
self.config = config
self.api = RESTAPIClient(
ts_url=str(config.thoughtspot.url),
verify=not config.thoughtspot.disable_ssl,
Expand Down
49 changes: 28 additions & 21 deletions cs_tools/updater/_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,41 +98,47 @@ def get_venv_path(self) -> pathlib.Path:
return cs_tools_venv_dir.resolve()

@staticmethod
def run(*args, raise_on_failure: bool = True, visible_output: bool = True, **kwargs) -> sp.CompletedProcess:
def run(*args, raise_on_failure: bool = True, should_stream_output: bool = True, **kwargs) -> sp.CompletedProcess:
"""Run a SHELL command."""
levels = {"ERROR": log.error, "WARNING": log.warning}
output = []
errors = []

with sp.Popen(args, stdout=sp.PIPE, stderr=sp.STDOUT, close_fds=False, **kwargs) as proc:
assert proc.stdout is not None

for line_as_bytes in proc.stdout.readlines():
line = line_as_bytes.decode().strip()

if line.startswith(tuple(levels)):
log_level, _, line = line.partition(": ")
logger = levels[log_level]
final_stdout: list[str] = []
final_stderr: list[str] = []

# DEV NOTE @boonhapus, 2024/02/15
# We want to capture and stream the output of the subprocess as it comes in,
# so that the user doesn't think it's frozen.
#
popen_streaming_options = {
"stdout": sp.PIPE,
"stderr": sp.STDOUT,
"text": True,
"bufsize": 1,
}

with sp.Popen(args, **popen_streaming_options, encoding="utf-8", **kwargs) as proc: # type: ignore[call-overload]
for line in iter(proc.stdout.readline, ""):
if line.startswith(("ERROR", "WARNING")):
level, _, line = line.partition(": ")
buffer = final_stderr
else:
logger = log.info
level = "INFO"
buffer = final_stdout

if visible_output and line:
logger(line)
if should_stream_output and line:
log.log(level=getattr(logging, level), msg=line.strip())

output.append(line) if logger == log.info else errors.append(line)
buffer.append(line)

if raise_on_failure and proc.returncode != 0:
cmd = " ".join(arg.replace(" ", "") for arg in args)
raise RuntimeError(f"Failed with exit code: {proc.returncode}\n\nPIP COMMAND BELOW\n{cmd}")

output_as_bytes = "\n".join(output).encode()
errors_as_bytes = "\n".join(errors).encode()
output_as_bytes = "\n".join(final_stdout).encode()
errors_as_bytes = "\n".join(final_stderr).encode()
return sp.CompletedProcess(proc.args, proc.returncode, stdout=output_as_bytes, stderr=errors_as_bytes)

def is_package_installed(self, package_name: str, with_system_python: bool = False) -> bool:
"""Check if a package is installed. This should be called only within CS Tools."""
cp = self.pip("list", "--format", "json", visible_output=False, with_system_python=with_system_python)
cp = self.pip("list", "--format", "json", should_stream_output=False, with_system_python=with_system_python)

for installed in json.loads(cp.stdout.decode()):
if installed["name"] == package_name:
Expand Down Expand Up @@ -250,6 +256,7 @@ def make(self) -> None:
self.run(python, "-m", "venv", self.venv_path.as_posix())

# Ensure `pip` is at least V23.1 so that backjumping is available
self.python("-m", "ensurepip")
self.pip("install", "pip >= 23.1", "--upgrade")

def reset(self) -> None:
Expand Down
Loading

0 comments on commit fcf2ffe

Please sign in to comment.