diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 00000000..116b56d8 Binary files /dev/null and b/.DS_Store differ diff --git a/.github/workflows/fetch-bi-data.yaml b/.github/workflows/fetch-bi-data.yaml index c9d45549..a38745e7 100644 --- a/.github/workflows/fetch-bi-data.yaml +++ b/.github/workflows/fetch-bi-data.yaml @@ -1,5 +1,5 @@ name: - Extract data with CS Tools. + Extract BI Server with CS Tools. on: workflow_dispatch: @@ -26,7 +26,7 @@ jobs: run: echo "days_ago_1=$(date -d "-1 days" +'%Y-%m-%d')" >> $GITHUB_ENV - name: Check out the repository main branch - - uses: actions/checkout@v4 + uses: actions/checkout@v4 - name: Set up Python 3.12 uses: actions/setup-python@v4 diff --git a/.github/workflows/fetch-metdata.yaml b/.github/workflows/fetch-metdata.yaml index 7c1dcfb3..7d6aa62d 100644 --- a/.github/workflows/fetch-metdata.yaml +++ b/.github/workflows/fetch-metdata.yaml @@ -1,5 +1,5 @@ name: - Extract data with CS Tools. + Extract Metadata with CS Tools. on: workflow_dispatch: @@ -19,7 +19,8 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - name: Check out the repository main branch + uses: actions/checkout@v4 - name: Set up Python 3.12 uses: actions/setup-python@v4 @@ -31,4 +32,4 @@ jobs: # --config ENV: tells CS Tools to pull the information from environment variables. - name: Refresh Metadata from ThoughtSpot - run: "cs_tools tools seachable metadata --syncer ${{ secrets.SYNCER_DECLARATIVE_STRING }} --config ENV:" + run: "cs_tools tools searchable metadata --syncer ${{ secrets.SYNCER_DECLARATIVE_STRING }} --config ENV:" diff --git a/.github/workflows/test-bootstrapper.yaml b/.github/workflows/test-bootstrapper.yaml index a38f5991..b50f0245 100644 --- a/.github/workflows/test-bootstrapper.yaml +++ b/.github/workflows/test-bootstrapper.yaml @@ -1,10 +1,10 @@ name: Test CS Tools Bootstrapper on: + workflow_dispatch: push: branches: - master - - dev jobs: test-bootstrapper: diff --git a/.gitignore b/.gitignore index 5ee5f413..3975dd0b 100644 --- a/.gitignore +++ b/.gitignore @@ -44,6 +44,9 @@ venv.bak/ # ignore coverage file .coverage +# ignore linting +.mypy_cache/ + # generated documentation docs/terminal-screenshots/*.svg diff --git a/cs_tools/__project__.py b/cs_tools/__project__.py index 894b26dc..ba13ea4f 100644 --- a/cs_tools/__project__.py +++ b/cs_tools/__project__.py @@ -1,4 +1,4 @@ -__version__ = "1.5.1" +__version__ = "1.5.2" __docs__ = "https://thoughtspot.github.io/cs_tools/" __repo__ = "https://github.com/thoughtspot/cs_tools" __help__ = f"{__repo__}/discussions/" diff --git a/cs_tools/api/_utils.py b/cs_tools/api/_utils.py index 89e84030..5ce61676 100644 --- a/cs_tools/api/_utils.py +++ b/cs_tools/api/_utils.py @@ -34,7 +34,7 @@ def is_valid_guid(to_test: str) -> bool: return str(guid) == to_test -def scrub_undefined_sentinel(inp: Any, *, null: Union[UNDEFINED, None]) -> Any: +def scrub_undefined_sentinel(inp: Any, *, null: Union[type[UNDEFINED], None]) -> Any: """ Remove sentinel values from input parameters. @@ -59,7 +59,7 @@ def obfuscate_sensitive_data(request_query: httpx.QueryParams) -> dict[str, Any] httpx.QueryParams.items() returns only the first specified parameter. If the user specifies the parameter multiple times, we'd have to switch to .multi_items(). """ - SAFEWORDS = ("password", "access_token") + SAFEWORDS = ("auth_token", "secret_key", "password", "access_token") # don't modify the actual keywords we want to build into the request secure = copy.deepcopy({k: v for k, v in request_query.items() if k not in ("file", "files")}) diff --git a/cs_tools/api/middlewares/logical_table.py b/cs_tools/api/middlewares/logical_table.py index ec8a5c6b..ed56a12b 100644 --- a/cs_tools/api/middlewares/logical_table.py +++ b/cs_tools/api/middlewares/logical_table.py @@ -3,7 +3,6 @@ from typing import TYPE_CHECKING, Optional, Union import logging -from cs_tools import utils from cs_tools.api import _utils from cs_tools.errors import ContentDoesNotExist from cs_tools.types import GUID, MetadataCategory, TableRowsFormat @@ -75,6 +74,12 @@ def all( # noqa: A003 if exclude_system_content: to_extend = [table for table in to_extend if table.get("authorName") not in _utils.SYSTEM_USERS] + # Fake the .type for Models. + to_extend = [ + {**table, "type": "MODEL" if table.get("worksheetVersion") == "V2" else table["type"]} + for table in to_extend + ] + tables.extend([{"metadata_type": "LOGICAL_TABLE", **table} for table in to_extend]) if not tables and raise_on_error: @@ -98,7 +103,9 @@ def all( # noqa: A003 if include_data_source: for table in tables: connection_guid = self.ts.metadata.find_data_source_of_logical_table(guid=table["id"]) - source_details = self.ts.metadata.fetch_data_source_info(guid=connection_guid) + source_details = self.ts.metadata.fetch_header_and_extras( + metadata_type="DATA_SOURCE", guid=connection_guid + ) table["data_source"] = source_details["header"] table["data_source"]["type"] = source_details["type"] @@ -108,8 +115,9 @@ def columns(self, guids: list[GUID], *, include_hidden: bool = False, chunksize: """ """ columns = [] - for chunk in utils.batched(guids, n=chunksize): - r = self.ts.api.v1.metadata_details(guids=chunk, show_hidden=include_hidden) + # for chunk in utils.batched(guids, n=chunksize): + for guid in guids: + r = self.ts.metadata.fetch_header_and_extras(metadata_type="LOGICAL_TABLE", guid=guid) for logical_table in r.json()["storables"]: for column in logical_table.get("columns", []): diff --git a/cs_tools/api/middlewares/metadata.py b/cs_tools/api/middlewares/metadata.py index 1b158443..0dabfaca 100644 --- a/cs_tools/api/middlewares/metadata.py +++ b/cs_tools/api/middlewares/metadata.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Optional, Union +from typing import TYPE_CHECKING, Optional import functools as ft import logging @@ -8,7 +8,6 @@ from cs_tools.errors import CSToolsError from cs_tools.types import ( GUID, - MetadataObjectSubtype, MetadataObjectType, MetadataParent, PermissionType, @@ -32,31 +31,18 @@ def permissions( self, guids: list[GUID], *, - type: Union[MetadataObjectType, MetadataObjectSubtype], # noqa: A002 + metadata_type: MetadataObjectType, permission_type: PermissionType = PermissionType.explicit, chunksize: int = 25, ) -> TableRowsFormat: - """ """ - type_to_supertype = { - "FORMULA": "LOGICAL_COLUMN", - "CALENDAR_TABLE": "LOGICAL_COLUMN", - "LOGICAL_COLUMN": "LOGICAL_COLUMN", - "QUESTION_ANSWER_BOOK": "QUESTION_ANSWER_BOOK", - "PINBOARD_ANSWER_BOOK": "PINBOARD_ANSWER_BOOK", - "ONE_TO_ONE_LOGICAL": "LOGICAL_TABLE", - "USER_DEFINED": "LOGICAL_TABLE", - "WORKSHEET": "LOGICAL_TABLE", - "AGGR_WORKSHEET": "LOGICAL_TABLE", - "MATERIALIZED_VIEW": "LOGICAL_TABLE", - "SQL_VIEW": "LOGICAL_TABLE", - "LOGICAL_TABLE": "LOGICAL_TABLE", - } - + """ + Fetch permissions for a given object type. + """ sharing_access = [] group_guids = [group["id"] for group in self.ts.group.all()] for chunk in utils.batched(guids, n=chunksize): - r = self.ts.api.v1.security_metadata_permissions(metadata_type=type_to_supertype[type], guids=chunk) + r = self.ts.api.v1.security_metadata_permissions(metadata_type=metadata_type, guids=chunk) for data in r.json().values(): for shared_to_principal_guid, permission in data["permissions"].items(): @@ -233,23 +219,36 @@ def objects_exist(self, metadata_type: MetadataObjectType, guids: list[GUID]) -> existence = {header["id"] for header in r.json()["headers"]} return {guid: guid in existence for guid in guids} - @ft.lru_cache(maxsize=1000) # noqa: B019 - def fetch_data_source_info(self, guid: GUID) -> GUID: + @ft.cache # noqa: B019 + def fetch_header_and_extras(self, metadata_type: MetadataObjectType, guid: GUID) -> dict: """ METADATA DETAILS is expensive. Here's our shortcut. """ - r = self.ts.api.v1.metadata_details(metadata_type="DATA_SOURCE", guids=[guid], show_hidden=True) - return r.json()["storables"][0] + r = self.ts.api.v1.metadata_details(metadata_type=metadata_type, guids=[guid], show_hidden=True) + + d = r.json()["storables"][0] + + header_and_extras = { + "metadata_type": metadata_type, + "header": d["header"], + "type": d.get("type"), # READ: .subtype (eg. ONE_TO_ONE_LOGICAL, WORKSHEET, etc..) + # LOGICAL_TABLE extras + "dataSourceId": d.get("dataSourceId"), + "columns": d.get("columns"), + # VIZ extras (answer, liveboard) + "reportContent": d.get("reportContent"), + } + + return header_and_extras - @ft.lru_cache(maxsize=1000) # noqa: B019 def find_data_source_of_logical_table(self, guid: GUID) -> GUID: """ METADATA DETAILS is expensive. Here's our shortcut. """ - r = self.ts.api.v1.metadata_details(metadata_type="LOGICAL_TABLE", guids=[guid], show_hidden=True) - storable = r.json()["storables"][0] - return storable["dataSourceId"] + info = self.fetch_header_and_extras(metadata_type="LOGICAL_TABLE", guid=guid) + return info["dataSourceId"] + @ft.cache # noqa: B019 def table_references(self, guid: GUID, *, tml_type: str, hidden: bool = False) -> list[MetadataParent]: """ Returns a mapping of parent LOGICAL_TABLEs @@ -265,7 +264,7 @@ def table_references(self, guid: GUID, *, tml_type: str, hidden: bool = False) - """ metadata_type = TMLSupportedContent.from_friendly_type(tml_type) - r = self.ts.api.v1.metadata_details(metadata_type=metadata_type, guids=[guid], show_hidden=hidden) + r = self.fetch_header_and_extras(metadata_type=metadata_type, guids=guid, show_hidden=hidden) mappings: list[MetadataParent] = [] if "storables" not in r.json(): diff --git a/cs_tools/cli/commands/config.py b/cs_tools/cli/commands/config.py index 9b20fbcf..79c98609 100644 --- a/cs_tools/cli/commands/config.py +++ b/cs_tools/cli/commands/config.py @@ -40,7 +40,9 @@ def create( config: str = typer.Option(..., help="config file identifier", metavar="NAME"), url: str = typer.Option(..., help="your thoughtspot url or IP"), username: str = typer.Option(..., help="username when logging into ThoughtSpot"), - password: str = typer.Option(None, help="the password you type when using the ThoughtSpot login screen"), + password: str = typer.Option( + None, help="the password you type on the ThoughtSpot login screen, use [b magenta]prompt[/] to type it hidden" + ), secret: str = typer.Option(None, help="the trusted authentication secret key, found in the developer tab"), token: str = typer.Option(None, help="the V2 API bearer token"), default_org: int = typer.Option(None, help="org ID to sign into by default"), @@ -50,6 +52,7 @@ def create( disable_ssl: bool = typer.Option( False, "--disable-ssl", help="whether or not to turn off checking the SSL certificate" ), + proxy: str = typer.Option(None, help="proxy server to use to connect to ThoughtSpot"), default: bool = typer.Option(False, "--default", help="whether or not to make this the default configuration"), verbose: bool = typer.Option(False, "--verbose", "-v", help="enable verbose logging"), ): @@ -57,15 +60,18 @@ def create( Create a new config file. """ + if not any((password, secret, token)): + log.error("You must specify at least one authentication method (--password, --secret, or --token)") + raise typer.Exit() + if CSToolsConfig.exists(name=config): log.warning(f'[b yellow]Configuration file "{config}" already exists.') if not Confirm.ask("\nDo you want to overwrite it?", console=rich_console): raise typer.Abort() - if all(safe is None for safe in (password, secret, token)): - log.error("You must specify at least one authentication method") - return -1 + if password == "prompt": + password = rich_console.input("\nType your password [b yellow](your input is hidden)\n", password=True) data = { "name": config, @@ -77,6 +83,7 @@ def create( "bearer_token": token, "default_org": default_org, "disable_ssl": disable_ssl, + "proxy": proxy, }, "verbose": verbose, "temp_dir": temp_dir or cs_tools_venv.tmp_dir, @@ -112,13 +119,19 @@ def modify( config: str = typer.Option(None, help="config file identifier", metavar="NAME"), url: str = typer.Option(None, help="your thoughtspot server"), username: str = typer.Option(None, help="username when logging into ThoughtSpot"), - password: str = typer.Option(None, help="the password you type when using the ThoughtSpot login screen"), + password: str = typer.Option( + None, help="the password you type on the ThoughtSpot login screen, use [b magenta]prompt[/] to type it hidden" + ), secret: str = typer.Option(None, help="the trusted authentication secret key"), token: str = typer.Option(None, help="the V2 API bearer token"), + temp_dir: pathlib.Path = typer.Option( + None, help="the temporary directory to use for uploading files", click_type=Directory() + ), disable_ssl: bool = typer.Option( None, "--disable-ssl", help="whether or not to turn off checking the SSL certificate" ), default_org: int = typer.Option(None, help="org ID to sign into by default"), + proxy: str = typer.Option(None, help="proxy server to use to connect to ThoughtSpot"), default: bool = typer.Option( None, "--default / --remove-default", @@ -140,7 +153,7 @@ def modify( data["thoughtspot"]["default_org"] = default_org if password == "prompt": - password = rich_console.input("[b yellow]Type your password (your input is hidden)\n", password=True) + password = rich_console.input("\nType your password [b yellow](your input is hidden)\n", password=True) if password is not None: data["thoughtspot"]["password"] = password @@ -151,9 +164,15 @@ def modify( if token is not None: data["thoughtspot"]["bearer_token"] = token + if temp_dir is not None: + data["temp_dir"] = temp_dir + if disable_ssl is not None: data["thoughtspot"]["disable_ssl"] = disable_ssl + if proxy is not None: + data["thoughtspot"]["proxy"] = proxy + if default is not None: meta.default_config_name = config meta.save() diff --git a/cs_tools/cli/commands/main.py b/cs_tools/cli/commands/main.py index 3aa5a810..72fbd68d 100644 --- a/cs_tools/cli/commands/main.py +++ b/cs_tools/cli/commands/main.py @@ -172,7 +172,7 @@ def run() -> int: this_run = _analytics.CommandExecution.validated_init(**this_run_data, context=app.info.context_settings["obj"]) # Add the analytics to the local database - if not (CURRENT_RUNTIME.is_ci or CURRENT_RUNTIME.is_dev): + if not CURRENT_RUNTIME.is_dev: try: with db.begin() as transaction: stmt = sa.insert(_analytics.CommandExecution).values([this_run.model_dump()]) diff --git a/cs_tools/cli/commands/self.py b/cs_tools/cli/commands/self.py index 12304edb..c4ce7328 100644 --- a/cs_tools/cli/commands/self.py +++ b/cs_tools/cli/commands/self.py @@ -28,6 +28,7 @@ {meta.newer_version_string()} """, + no_args_is_help=True, invoke_without_command=True, ) diff --git a/cs_tools/cli/commands/tools.py b/cs_tools/cli/commands/tools.py index 38de9d49..831a886b 100644 --- a/cs_tools/cli/commands/tools.py +++ b/cs_tools/cli/commands/tools.py @@ -18,6 +18,7 @@ which aren't native to ThoughtSpot or advanced functionality for clients who have a well-adopted platform. """, + no_args_is_help=True, subcommand_metavar="", invoke_without_command=True, epilog=( diff --git a/cs_tools/cli/dependencies/thoughtspot.py b/cs_tools/cli/dependencies/thoughtspot.py index 5d22222a..5268c833 100644 --- a/cs_tools/cli/dependencies/thoughtspot.py +++ b/cs_tools/cli/dependencies/thoughtspot.py @@ -115,7 +115,8 @@ def __enter__(self): command_params = [p.name for p in ctx.command.params] overrides = {k: ctx.params.pop(k) for k in ctx.params.copy() if k not in command_params} - log.debug(f"Command Overrides: {' '.join(overrides)}") + if overrides: + log.debug(f"Command Overrides: {' '.join(overrides)}") cfg = CSToolsConfig.from_name(config, **overrides, automigrate=True) @@ -128,7 +129,7 @@ def __enter__(self): def _send_analytics_in_background(self) -> None: """Send analyics in the background.""" - if meta.analytics.is_opted_in is not True or meta.environment.is_dev: + if meta.analytics.is_opted_in or meta.environment.is_dev or meta.environment.is_ci: return # AVOID CIRCULAR IMPORTS WITH cli.ux diff --git a/cs_tools/cli/grid.py b/cs_tools/cli/grid.py new file mode 100644 index 00000000..ca03a0fc --- /dev/null +++ b/cs_tools/cli/grid.py @@ -0,0 +1,239 @@ +from __future__ import annotations + +from collections.abc import Collection +from typing import Literal, Optional +import datetime as dt +import itertools as it +import time +import uuid + +from rich import box +from rich.align import Align +from rich.console import Console, Group, RenderableType +from rich.live import Live +from rich.table import Column, Table + +DEFAULT_PROGRESS_INDICATOR = ( + ")======", + "=)=====", + "==)====", + "===)===", + "====)==", + "=====)=", + "======)", + "======(", + "=====(=", + "====(==", + "===(===", + "==(====", + "=(=====", + "(======", +) + + +class TableItem: + """ """ + + def __init__( + self, name: str, *, id: Optional[str] = None, progression: Collection[str] = DEFAULT_PROGRESS_INDICATOR + ): + assert all(len(segment) % 2 != 0 for segment in progression) + self.id = id or uuid.uuid4().hex.upper()[0:6] + self.name = name + self.progression = list(progression) + self.indicator = it.cycle(progression) + self.started_at: Optional[float] = None + self._duration: float = 0 + self.state: Literal["NEVER_STARTED", "ACTIVE", "STOPPED", "ERRORED"] = "NEVER_STARTED" + + @property + def duration(self) -> float: + if self.state == "NEVER_STARTED": + return self._duration + + if self.state == "ACTIVE": + assert self.started_at is not None + return time.perf_counter() - self.started_at + + return self._duration + + def __rich__(self) -> str: + if self.state == "NEVER_STARTED": + return "" + + if self.state == "ERRORED": + return ":thumbs_down:" + + if self.state == "STOPPED": + return ":thumbs_up:" + + return next(self.indicator) + + def __enter__(self) -> TableItem: + self.started_at = time.perf_counter() + self.state = "ACTIVE" + return self + + def __exit__(self, exc_type, exc, trace) -> None: + assert self.started_at is not None + self._duration = self._duration + (time.perf_counter() - self.started_at) + self.started_at = None + self.state = "STOPPED" if exc is None else "ERRORED" + + +class VerticalProgressTable: + """ """ + + def __init__(self, items: Collection[TableItem], *, justify: str = "CENTER", console=None): + self.items = items + self.justify = justify + self.console = console if console is not None else Console() + self.started_at: float = 0 + self.live = Live(get_renderable=self.get_renderable, console=self.console) + + def start(self) -> None: + """Begin the live rendering of this table.""" + self.started_at = time.perf_counter() + self.live.start() + + def stop(self) -> None: + """Stop the live rendering of this table.""" + self.live.stop() + + def get_renderable(self) -> RenderableType: + """Build the renderable.""" + max_width_of_progression = max(len(_) for i in self.items for _ in i.progression) + progress_table = Table( + Column(justify="center", width=max(3, max_width_of_progression)), # PROGRESS INDICATOR + Column(justify="left"), # COLUMN NAME + Column(justify="right", width=len("XXXX.00 s")), # DURATION INDICATOR + box=box.SIMPLE_HEAD, + ) + + for row in self.items: + renderables = (row, row.name, "" if row.state == "NEVER_STARTED" else f"{row.duration:.2f} s") + progress_table.add_row(*renderables) + + summary_table = Table( + Column(width=max(3, max_width_of_progression)), + Column(justify="right"), + Column(justify="left"), + box=None, + ) + summary_table.add_row( + "", "Total Elapsed", f"{dt.timedelta(seconds=int(time.perf_counter() - self.started_at))}" + ) + + grouped = Group(progress_table, summary_table) + return Align(grouped, align=self.justify.lower(), width=150) + + +class HorizontalProgressTable: + """ """ + + def __init__(self, rows: dict[str, Collection[TableItem]], *, justify: str = "CENTER", console=None): + self.row_width = len(next(iter(rows.values()))) + assert all(len(items) == self.row_width for items in rows.values()), "Row widths should be homogenous" + self.rows = rows + self.justify = justify + self.console = console if console is not None else Console() + self.started_at: float = 0 + self.live = Live(get_renderable=self.get_renderable, console=self.console) + + def start(self) -> None: + """Begin the live rendering of this table.""" + self.started_at = time.perf_counter() + self.live.start() + + def stop(self) -> None: + """Stop the live rendering of this table.""" + self.live.stop() + + def get_renderable(self) -> RenderableType: + """Build the renderable.""" + first_row = next(iter(self.rows.values())) + max_width_of_progression = max(len(_) for i in first_row for _ in i.progression) + max_width_of_headers = max(len(i.name) for i in first_row) + + progress_table = Table( + Column(justify="right"), + *( + Column(header=item.name, justify="center", width=max(3, max_width_of_headers, max_width_of_progression)) + for item in first_row + ), + box=box.SIMPLE_HEAD, + ) + + for row_header, row_values in self.rows.items(): + renderables = (row_header, *row_values) + progress_table.add_row(*renderables) + + summary_table = Table( + Column(width=max(3, max_width_of_headers, max_width_of_progression)), + Column(justify="right"), + Column(justify="left"), + box=None, + ) + summary_table.add_row( + "", "Total Elapsed", f"{dt.timedelta(seconds=int(time.perf_counter() - self.started_at))}" + ) + + grouped = Group(progress_table, summary_table) + return Align(grouped, align=self.justify.lower(), width=150) + + +# if __name__ == "__main__": +# import random +# import time + +# console = Console() + +# PROGRESSION = ("😊", "🙂", "😀", "😄", "😆", "😁", "ðŸĪŠ", "ðŸĪŠ") + +# if 1 == 2: +# tasks = [ +# TableItem(name="Collecting data from [b blue]TS: BI Server", progression=PROGRESSION), +# TableItem(name="Taking our time to really get things right", progression=PROGRESSION), +# TableItem(name="Doing the needful", progression=PROGRESSION), +# TableItem(name="Oh you're still here?", progression=PROGRESSION), +# TableItem(name="Writing rows to [b blue]CSV Syncer", progression=PROGRESSION), +# ] + +# table = VerticalProgressTable(items=tasks, console=console) +# iterable = tasks + +# else: +# tasks = { # type: ignore +# "Org One": [ +# TableItem(name="A", progression=PROGRESSION), +# TableItem(name="BB", progression=PROGRESSION), +# TableItem(name="CCC", progression=PROGRESSION), +# TableItem(name="DDDD", progression=PROGRESSION), +# TableItem(name="EE", progression=PROGRESSION), +# ], +# "Org Two": [ +# TableItem(name="A", progression=PROGRESSION), +# TableItem(name="BB", progression=PROGRESSION), +# TableItem(name="CCC", progression=PROGRESSION), +# TableItem(name="DDDD", progression=PROGRESSION), +# TableItem(name="EE", progression=PROGRESSION), +# ], +# "Org Three": [ +# TableItem(name="A", progression=PROGRESSION), +# TableItem(name="BB", progression=PROGRESSION), +# TableItem(name="CCC", progression=PROGRESSION), +# TableItem(name="DDDD", progression=PROGRESSION), +# TableItem(name="EE", progression=PROGRESSION), +# ], +# } + +# table = HorizontalProgressTable(rows=tasks, console=console) # type: ignore +# iterable = (_ for _ in it.chain.from_iterable(tasks.values())) # type: ignore + +# table.start() + +# for task in iterable: +# with task: +# time.sleep(random.randint(1, 5)) + +# table.stop() diff --git a/cs_tools/cli/tools/bulk-sharing/app.py b/cs_tools/cli/tools/bulk-sharing/app.py index d6bbfae8..ca4014ae 100644 --- a/cs_tools/cli/tools/bulk-sharing/app.py +++ b/cs_tools/cli/tools/bulk-sharing/app.py @@ -37,7 +37,7 @@ def cls_ui(ctx: typer.Context, webserver_port: int = typer.Option(5000, help="po _scoped["ts"] = ts - rich_console.print("starting webserver..." f"\nplease visit [green]http://{visit_ip}:5000/[/] in your browser") + log.info(f"Starting webserver...\nplease visit [green]http://{visit_ip}:{webserver_port}/[/] in your browser\n\n") uvicorn.run( "cs_tools.cli.tools.bulk-sharing.web_app:web_app", diff --git a/cs_tools/cli/tools/bulk-sharing/web_app.py b/cs_tools/cli/tools/bulk-sharing/web_app.py index 68f7a2d6..a4a20e20 100644 --- a/cs_tools/cli/tools/bulk-sharing/web_app.py +++ b/cs_tools/cli/tools/bulk-sharing/web_app.py @@ -29,7 +29,11 @@ @web_app.get("/", response_class=HTMLResponse) async def read_index(request: Request): - data = {"request": request, "host": _scoped["ts"].platform.url, "user": _scoped["ts"].me.display_name} + data = { + "request": request, + "host": _scoped["ts"].session_context.thoughtspot.url, + "user": _scoped["ts"].session_context.user.display_name, + } return templates.TemplateResponse("index.html", data) @@ -52,7 +56,7 @@ async def _(type: str = Body(...), guids: list[str] = Body(...), permissions: di TSSetPermissionRequest """ permissions = {guid: data["shareMode"] for guid, data in permissions.items()} - r = _scoped["ts"].api.security_share(metadata_type=type, guids=guids, permissions=permissions) + r = _scoped["ts"].api.v1.security_share(metadata_type=type, guids=guids, permissions=permissions) try: return r.json() @@ -69,7 +73,7 @@ async def _(type: str = Body(...), guids: list[str] = Body(...)): # noqa: A002 defined_permissions = {column_guid: {"permissions": {}} for column_guid in guids} for chunk in utils.batched(guids, n=15): - r = _scoped["ts"].api.security_metadata_permissions(metadata_type=type, guids=list(chunk)) + r = _scoped["ts"].api.v1.security_metadata_permissions(metadata_type=type, guids=list(chunk)) try: r.raise_for_status() @@ -98,7 +102,7 @@ async def _(): """ TSGetUserGroupsRequest """ - r = _scoped["ts"].api.metadata_list( + r = _scoped["ts"].api.v1.metadata_list( metadata_type="USER_GROUP", category="ALL", sort="DEFAULT", offset=-1, auto_created=False ) return r.json() @@ -109,5 +113,5 @@ async def _(): """ TSGetTablesRequest """ - r = _scoped["ts"].api.metadata_list(metadata_type="LOGICAL_TABLE", subtypes=["ONE_TO_ONE_LOGICAL"]) + r = _scoped["ts"].api.v1.metadata_list(metadata_type="LOGICAL_TABLE", subtypes=["ONE_TO_ONE_LOGICAL"]) return r.json() diff --git a/cs_tools/cli/tools/scriptability/tmlfs.py b/cs_tools/cli/tools/scriptability/tmlfs.py index f1bfd48a..84958782 100644 --- a/cs_tools/cli/tools/scriptability/tmlfs.py +++ b/cs_tools/cli/tools/scriptability/tmlfs.py @@ -222,6 +222,7 @@ def load_tml_for_guid(self, guid: GUID) -> TML: app = typer.Typer( name="tmlsfs", help="TML Scriptability File System", + no_args_is_help=True, invoke_without_command=True, ) diff --git a/cs_tools/cli/tools/searchable/app.py b/cs_tools/cli/tools/searchable/app.py index 6cdceee6..9748402d 100644 --- a/cs_tools/cli/tools/searchable/app.py +++ b/cs_tools/cli/tools/searchable/app.py @@ -46,6 +46,7 @@ def deploy( ), show_default=False, ), + org_override: str = typer.Option(None, "--org", help="the org to fetch history from"), export: pathlib.Path = typer.Option( None, help="download the TML files of the SpotApp", @@ -58,6 +59,9 @@ def deploy( """ ts = ctx.obj.thoughtspot + if ts.session_context.thoughtspot.is_orgs_enabled and org_override is not None: + ts.org.switch(org=org_override) + tasks = [ ("connection_details", "Getting details for data source"), ("customize_spotapp", "Customizing [b blue]Searchable Worksheets[/] to your environment"), @@ -66,7 +70,6 @@ def deploy( with LiveTasks(tasks, console=rich_console) as tasks: with tasks["connection_details"] as this_task: - connection_guid: str = None connection_name: str = None dialect: str = None @@ -76,14 +79,13 @@ def deploy( else: try: - info = ts.metadata.fetch_data_source_info(connection_guid) - except AttributeError: - log.error(f"Could not find a connection with guid {connection_guid}") + info = ts.metadata.fetch_header_and_extras(metadata_type="DATA_SOURCE", guid=connection_guid) + except (KeyError, IndexError): + log.error(f"Could not find a connection with guid '{connection_guid}'") raise typer.Exit(1) from None - connection_guid = info["header"]["id"] connection_name = info["header"]["name"] - dialect = info["header"]["type"] + dialect = info["type"] # Care for UPPERCASE or lowercase identity convention in dialects should_upper = "SNOWFLAKE" in dialect @@ -180,13 +182,9 @@ def bi_server( # DEV NOTE: @boonhapus # As of 9.10.0.cl , TS: BI Server only resides in the Primary Org(0), so switch to it - if ts.session_context.thoughtspot.is_orgs_enabled: ts.org.switch(org=0) - elif org_override is not None: - org_override = None - SEARCH_DATA_DATE_FMT = "%m/%d/%Y" SEARCH_TOKENS = ( "[org id] [incident id] [timestamp].'detailed' [url] [http response code] " @@ -209,7 +207,7 @@ def bi_server( with tasks["gather_search"]: data = ts.search(SEARCH_TOKENS, worksheet="TS: BI Server") - # SEARCH DATA API SEEMS TO HATE ISSUES WITH TIMEZONES AND CAUSES DUPLICATION OF DATA + # SEARCH DATA API SEEMS TO HAVE ISSUES WITH TIMEZONES AND CAUSES DUPLICATION OF DATA data = [dict(t) for t in {tuple(sorted(d.items())) for d in data}] # CLUSTER BY --> TIMESTAMP .. everything else is irrelevant after TS. @@ -427,30 +425,18 @@ def metadata( # access_controls row[8] = ":fire:" - types = { - "QUESTION_ANSWER_BOOK": ("QUESTION_ANSWER_BOOK",), - "PINBOARD_ANSWER_BOOK": ("PINBOARD_ANSWER_BOOK",), - "LOGICAL_TABLE": ( - "ONE_TO_ONE_LOGICAL", - "USER_DEFINED", - "WORKSHEET", - "AGGR_WORKSHEET", - "MATERIALIZED_VIEW", - "SQL_VIEW", - "LOGICAL_TABLE", - ), - } + types = ["QUESTION_ANSWER_BOOK", "PINBOARD_ANSWER_BOOK", "LOGICAL_TABLE", "DATA_SOURCE"] if include_column_access: - types["LOGICAL_COLUMN"] = ("FORMULA", "CALENDAR_TABLE", "LOGICAL_COLUMN") + types.append("LOGICAL_COLUMN") # NOTE: # In the case the ThoughtSpot cluster has a high number of users, the # column access block will take an incredibly long amount of time to # complete. We can probably find a better algorithm. # - for metadata_type, metadata_subtypes in types.items(): - guids = [obj["id"] for obj in content if obj["metadata_type"] in metadata_subtypes] + for metadata_type in types: + guids = [obj["id"] for obj in content if obj["metadata_type"] == metadata_type] r = ts.metadata.permissions(guids, type=metadata_type) temp_sync.dump( models.SharingAccess.__tablename__, data=transform.to_sharing_access(r, cluster=cluster_uuid) diff --git a/cs_tools/cli/tools/searchable/models.py b/cs_tools/cli/tools/searchable/models.py index 4a05a4a3..ee86ab86 100644 --- a/cs_tools/cli/tools/searchable/models.py +++ b/cs_tools/cli/tools/searchable/models.py @@ -5,7 +5,7 @@ import logging from sqlalchemy.schema import Column -from sqlalchemy.types import BigInteger +from sqlalchemy.types import BigInteger, Text from sqlmodel import Field import pydantic @@ -59,7 +59,7 @@ class Group(ValidatedSQLModel, table=True): org_id: int = Field(primary_key=True) group_guid: str = Field(primary_key=True) group_name: str - description: Optional[str] + description: Optional[str] = Field(sa_column=Column(Text, info={"length_override": "MAX"})) display_name: str sharing_visibility: str created: dt.datetime @@ -141,7 +141,7 @@ class MetadataObject(ValidatedSQLModel, table=True): org_id: int = Field(primary_key=True) object_guid: str = Field(primary_key=True) name: str - description: Optional[str] + description: Optional[str] = Field(sa_column=Column(Text, info={"length_override": "MAX"})) author_guid: str created: dt.datetime modified: dt.datetime @@ -168,7 +168,7 @@ class MetadataColumn(ValidatedSQLModel, table=True): column_guid: str = Field(primary_key=True) object_guid: str column_name: str - description: Optional[str] + description: Optional[str] = Field(sa_column=Column(Text, info={"length_override": "MAX"})) data_type: str column_type: str additive: bool @@ -219,7 +219,7 @@ class DependentObject(ValidatedSQLModel, table=True): dependent_guid: str = Field(primary_key=True) column_guid: str = Field(primary_key=True) name: str - description: Optional[str] + description: Optional[str] = Field(sa_column=Column(Text, info={"length_override": "MAX"})) author_guid: str created: dt.datetime modified: dt.datetime @@ -270,10 +270,10 @@ class BIServer(ValidatedSQLModel, table=True): client_type: Optional[str] client_id: Optional[str] answer_book_guid: Optional[str] - viz_id: Optional[str] + viz_id: Optional[str] = Field(sa_column=Column(Text, info={"length_override": "MAX"})) user_id: Optional[str] user_action: Optional[str] - query_text: Optional[str] + query_text: Optional[str] = Field(sa_column=Column(Text, info={"length_override": "MAX"})) response_size: Optional[int] = Field(sa_column=Column(BigInteger)) latency_us: Optional[int] = Field(sa_column=Column(BigInteger)) impressions: Optional[float] diff --git a/cs_tools/cli/tools/searchable/static/CS Tools - Worksheet Column Utilization.worksheet.tml b/cs_tools/cli/tools/searchable/static/CS Tools - Worksheet Column Utilization.worksheet.tml index 8877091e..10324247 100644 --- a/cs_tools/cli/tools/searchable/static/CS Tools - Worksheet Column Utilization.worksheet.tml +++ b/cs_tools/cli/tools/searchable/static/CS Tools - Worksheet Column Utilization.worksheet.tml @@ -172,7 +172,7 @@ worksheet: expr: "concat ( '{caption}' , [TS_DEPENDENT_OBJECT_1::Dependent] , '{/caption}' , [TS_CLUSTER_2::ThoughtSpot URL] , if ( [TS_DEPENDENT_OBJECT_1::Dependent Type] = 'question_answer_book' ) then '#/saved-answer/' else if ( [TS_DEPENDENT_OBJECT_1::Dependent Type] = 'pinboard_answer_book' ) then \"#/pinboard/\" else \"#/data/tables/\" , [TS_DEPENDENT_OBJECT_1::Dependent GUID] )" was_auto_generated: false - name: "Filtered: Column Parent is Worksheet" - expr: "[TS_METADATA_OBJECT_1::Object Type] = 'logical_table' and [TS_METADATA_OBJECT_1::Object Subtype] = 'worksheet'" + expr: "[TS_METADATA_OBJECT_1::Object Type] = 'logical_table' and [TS_METADATA_OBJECT_1::Object Subtype] in { 'worksheet', 'model' }" was_auto_generated: false - name: Has Synonyms expr: "count ( [TS_COLUMN_SYNONYM_1::Column Synonym] ) > 0" diff --git a/cs_tools/cli/tools/searchable/static/CS Tools Worksheet Column Utilization.liveboard.tml b/cs_tools/cli/tools/searchable/static/CS Tools Worksheet Column Utilization.liveboard.tml index 3e6fe713..9a528968 100644 --- a/cs_tools/cli/tools/searchable/static/CS Tools Worksheet Column Utilization.liveboard.tml +++ b/cs_tools/cli/tools/searchable/static/CS Tools Worksheet Column Utilization.liveboard.tml @@ -1271,13 +1271,3 @@ liveboard: "y": 0 height: 4 width: 6 - views: - - view_guid: b2f594b9-6eae-40ea-ab48-cb9390e5dd63 - name: GTM - view_filters: - - column: - - Worksheet - oper: in - values: - - GTM - is_public: true diff --git a/cs_tools/cli/tools/searchable/transform.py b/cs_tools/cli/tools/searchable/transform.py index c25c4502..c70e019a 100644 --- a/cs_tools/cli/tools/searchable/transform.py +++ b/cs_tools/cli/tools/searchable/transform.py @@ -12,7 +12,7 @@ ArbitraryJsonFormat = list[dict[str, Any]] -def to_cluster(data: SessionContext) -> list[TableRowsFormat]: +def to_cluster(data: SessionContext) -> TableRowsFormat: """ Extract information from the active session. @@ -26,13 +26,13 @@ def to_cluster(data: SessionContext) -> list[TableRowsFormat]: return [info.model_dump()] -def to_org(data: ArbitraryJsonFormat, cluster: str) -> list[TableRowsFormat]: +def to_org(data: ArbitraryJsonFormat, cluster: str) -> TableRowsFormat: """ Simple field renaming. SOURCE: /tspublic/v1/orgs """ - out: list[TableRowsFormat] = [] + out: TableRowsFormat = [] out.append( models.Org.validated_init( cluster_guid=cluster, @@ -44,13 +44,13 @@ def to_org(data: ArbitraryJsonFormat, cluster: str) -> list[TableRowsFormat]: return [model.model_dump() for model in out] -def to_group(data: ArbitraryJsonFormat, cluster: str) -> list[TableRowsFormat]: +def to_group(data: ArbitraryJsonFormat, cluster: str) -> TableRowsFormat: """ Mostly simple field renaming, flattening of orgs. SOURCE: /tspublic/v1/group """ - out: list[TableRowsFormat] = [] + out: TableRowsFormat = [] for row in data: for org_id in row["header"].get("orgIds", [0]): @@ -72,13 +72,13 @@ def to_group(data: ArbitraryJsonFormat, cluster: str) -> list[TableRowsFormat]: return [model.model_dump() for model in out] -def to_group_privilege(data: ArbitraryJsonFormat, cluster: str) -> list[TableRowsFormat]: +def to_group_privilege(data: ArbitraryJsonFormat, cluster: str) -> TableRowsFormat: """ Mostly simple field renaming, flattening of privileges. SOURCE: /tspublic/v1/group . privileges """ - out: list[TableRowsFormat] = [] + out: TableRowsFormat = [] for row in data: for privilege in row["privileges"]: @@ -91,13 +91,13 @@ def to_group_privilege(data: ArbitraryJsonFormat, cluster: str) -> list[TableRow return [model.model_dump() for model in out] -def to_user(data: ArbitraryJsonFormat, ever_seen: set[str], cluster: str) -> list[TableRowsFormat]: +def to_user(data: ArbitraryJsonFormat, ever_seen: set[str], cluster: str) -> TableRowsFormat: """ Mostly simple field renaming. SOURCE: /tspublic/v1/user """ - out: list[TableRowsFormat] = [] + out: TableRowsFormat = [] for row in data: if row["header"]["id"] in ever_seen: @@ -120,15 +120,13 @@ def to_user(data: ArbitraryJsonFormat, ever_seen: set[str], cluster: str) -> lis return [model.model_dump() for model in out] -def to_org_membership( - data: ArbitraryJsonFormat, cluster: str, ever_seen: set[tuple[str, ...]] -) -> list[TableRowsFormat]: +def to_org_membership(data: ArbitraryJsonFormat, cluster: str, ever_seen: set[tuple[str, ...]]) -> TableRowsFormat: """ Mostly simple field renaming, flattening of assigned orgs. SOURCE: /tspublic/v1/user . orgIds """ - out: list[TableRowsFormat] = [] + out: TableRowsFormat = [] for row in data: for org_id in row["header"]["orgIds"]: @@ -144,13 +142,13 @@ def to_org_membership( return [model.model_dump() for model in out] -def to_group_membership(data: ArbitraryJsonFormat, cluster: str) -> list[TableRowsFormat]: +def to_group_membership(data: ArbitraryJsonFormat, cluster: str) -> TableRowsFormat: """ Mostly simple field renaming, flattening of assigned groups. SOURCE: /tspublic/v1/group . assignedGroups """ - out: list[TableRowsFormat] = [] + out: TableRowsFormat = [] for row in data: for group in row["assignedGroups"]: @@ -163,13 +161,13 @@ def to_group_membership(data: ArbitraryJsonFormat, cluster: str) -> list[TableRo return [model.model_dump() for model in out] -def to_tag(data: ArbitraryJsonFormat, cluster: str) -> list[TableRowsFormat]: +def to_tag(data: ArbitraryJsonFormat, cluster: str) -> TableRowsFormat: """ Mostly simple field renaming, flattening of orgs. SOURCE: /tspublic/v1/metadata/list ? type = TAG """ - out: list[TableRowsFormat] = [] + out: TableRowsFormat = [] for row in data: for org_id in row.get("orgIds", [0]): @@ -189,14 +187,14 @@ def to_tag(data: ArbitraryJsonFormat, cluster: str) -> list[TableRowsFormat]: return [model.model_dump() for model in out] -def to_data_source(data: ArbitraryJsonFormat, cluster: str) -> list[TableRowsFormat]: +def to_data_source(data: ArbitraryJsonFormat, cluster: str) -> TableRowsFormat: """ Mostly simple field renaming, flattening of orgs. SOURCE: /tspublic/v1/metadata/list ? details = LOGICAL_TABLE """ ever_seen: set[tuple[str]] = set() - out: list[TableRowsFormat] = [] + out: TableRowsFormat = [] for row in data: if row.get("metadata_type", None) != "LOGICAL_TABLE": @@ -221,15 +219,13 @@ def to_data_source(data: ArbitraryJsonFormat, cluster: str) -> list[TableRowsFor return [model.model_dump() for model in out] -def to_metadata_object( - data: ArbitraryJsonFormat, cluster: str, ever_seen: set[tuple[str, ...]] -) -> list[TableRowsFormat]: +def to_metadata_object(data: ArbitraryJsonFormat, cluster: str, ever_seen: set[tuple[str, ...]]) -> TableRowsFormat: """ Mostly simple field renaming, flattening of orgs. SOURCE: /tspublic/v1/metadata/list ? type = { LOGICAL_TABLE|QUESTION_ANSWER_BOOK|PINBOARD_ANSWER_BOOK } """ - out: list[TableRowsFormat] = [] + out: TableRowsFormat = [] for row in data: for org_id in row.get("orgIds", [0]): @@ -260,13 +256,13 @@ def to_metadata_object( return [model.model_dump() for model in out] -def to_metadata_column(data: ArbitraryJsonFormat, cluster: str) -> list[TableRowsFormat]: +def to_metadata_column(data: ArbitraryJsonFormat, cluster: str) -> TableRowsFormat: """ Mostly simple field renaming. SOURCE: /tspublic/v1/metadata/list ? type = LOGICAL_COLUMN (cs_tools.middleswares.metadata.columns) """ - out: list[TableRowsFormat] = [] + out: TableRowsFormat = [] for row in data: out.append(models.MetadataColumn.validated_init(cluster_guid=cluster, **row)) @@ -274,7 +270,7 @@ def to_metadata_column(data: ArbitraryJsonFormat, cluster: str) -> list[TableRow return [model.model_dump() for model in out] -def to_column_synonym(data: ArbitraryJsonFormat, cluster: str) -> list[TableRowsFormat]: +def to_column_synonym(data: ArbitraryJsonFormat, cluster: str) -> TableRowsFormat: """ Clean and de-duplicate synonyms. @@ -287,11 +283,7 @@ def to_column_synonym(data: ArbitraryJsonFormat, cluster: str) -> list[TableRows for row in data: for synonym in row["synonyms"]: - if synonym is None: - log.warning( - f"{row['column_guid']} has a NULL synonym, this shouldn't be possible, please share details with " - f"the CS Tools team." - ) + if synonym is None or not synonym: continue model = models.ColumnSynonym.validated_init( @@ -311,13 +303,13 @@ def to_column_synonym(data: ArbitraryJsonFormat, cluster: str) -> list[TableRows return [row.model_dump() for row in sanitized] -def to_tagged_object(data: ArbitraryJsonFormat, cluster: str) -> list[TableRowsFormat]: +def to_tagged_object(data: ArbitraryJsonFormat, cluster: str) -> TableRowsFormat: """ Mostly simple field renaming. SOURCE: /tspublic/v1/metadata/list ? type = TAG """ - out: list[TableRowsFormat] = [] + out: TableRowsFormat = [] for row in data: for tag in row["tags"]: @@ -332,13 +324,13 @@ def to_tagged_object(data: ArbitraryJsonFormat, cluster: str) -> list[TableRowsF return [model.model_dump() for model in out] -def to_dependent_object(data: ArbitraryJsonFormat, cluster: str) -> list[TableRowsFormat]: +def to_dependent_object(data: ArbitraryJsonFormat, cluster: str) -> TableRowsFormat: """ Mostly simple field renaming. SOURCE: /tspublic/v1/dependency/listdependents """ - out: list[TableRowsFormat] = [] + out: TableRowsFormat = [] for row in data: out.append( @@ -360,13 +352,13 @@ def to_dependent_object(data: ArbitraryJsonFormat, cluster: str) -> list[TableRo return [model.model_dump() for model in out] -def to_sharing_access(data: ArbitraryJsonFormat, cluster: str) -> list[TableRowsFormat]: +def to_sharing_access(data: ArbitraryJsonFormat, cluster: str) -> TableRowsFormat: """ Mostly simple field renaming. SOURCE: /tspublic/v1/security/metadata/permissions """ - out: list[TableRowsFormat] = [] + out: TableRowsFormat = [] for row in data: PK = (row["object_guid"], row.get("shared_to_user_guid", "NULL"), row.get("shared_to_group_guid", "NULL")) diff --git a/cs_tools/cli/tools/user-management/app.py b/cs_tools/cli/tools/user-management/app.py index 814f3489..0b4df2ca 100644 --- a/cs_tools/cli/tools/user-management/app.py +++ b/cs_tools/cli/tools/user-management/app.py @@ -1,10 +1,8 @@ from __future__ import annotations -import datetime as dt import itertools as it import json import logging -import pathlib import httpx import typer @@ -16,9 +14,8 @@ from cs_tools.cli.types import MetadataType, MultipleChoiceType, SyncerProtocolType from cs_tools.cli.ux import CSToolsApp, rich_console from cs_tools.errors import CSToolsError -from cs_tools.updater import cs_tools_venv -from . import layout, work +from . import layout, models, work log = logging.getLogger(__name__) app = CSToolsApp(help="""Manage Users and Groups in bulk.""") @@ -111,7 +108,7 @@ def rename( to_username: str = typer.Option(None, "--to", help="new username"), syncer: DSyncer = typer.Option( None, - click_type=SyncerProtocolType(), + click_type=SyncerProtocolType(models=models.USER_MODELS), help="protocol and path for options to pass to the syncer", rich_help_panel="Syncer Options", ), @@ -181,9 +178,7 @@ def rename( responses[from_username] = r # back up the state of users - filename = f"user-rename-{dt.datetime.now(tz=dt.timezone.utc)::%Y%m%dT%H%M%S}" - with pathlib.Path(cs_tools_venv.app_dir / ".cache" / f"{filename}.json").open("w") as f: - json.dump([r.text for r in responses], f, indent=4) + work._backup_security([r.text for r in responses.values()]) with tasks["update_users"]: for from_username, r in responses.items(): @@ -216,7 +211,7 @@ def sync( ), syncer: DSyncer = typer.Option( ..., - click_type=SyncerProtocolType(), + click_type=SyncerProtocolType(models=models.USER_MODELS), help="protocol and path for options to pass to the syncer", rich_help_panel="Syncer Options", ), @@ -308,9 +303,7 @@ def sync( # back up the state of users u, g, x = work._get_current_security(ts) - filename = f"user-sync-{dt.datetime.now(tz=dt.timezone.utc):%Y%m%dT%H%M%S}" - with pathlib.Path(cs_tools_venv.app_dir / ".cache" / f"{filename}.json").open("w") as f: - json.dump({"users": u, "groups": g, "memberships": x}, f, indent=4) + work._backup_security({"users": u, "groups": g, "memberships": x}) try: r = ts.api.v1.user_sync( diff --git a/cs_tools/cli/tools/user-management/models.py b/cs_tools/cli/tools/user-management/models.py new file mode 100644 index 00000000..a0e6edd5 --- /dev/null +++ b/cs_tools/cli/tools/user-management/models.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from typing import Any, Optional +import logging + +from sqlalchemy.schema import Column +from sqlalchemy.types import Text +from sqlmodel import Field +import pydantic + +from cs_tools.datastructures import ValidatedSQLModel + +log = logging.getLogger(__name__) + + +class AuthUser(ValidatedSQLModel, table=True): + __tablename__ = "ts_auth_sync_users" + username: str = Field(primary_key=True) + email: Optional[str] + display_name: str + sharing_visibility: str + user_type: str + + +class AuthGroup(ValidatedSQLModel, table=True): + __tablename__ = "ts_auth_sync_groups" + group_name: str = Field(primary_key=True) + description: Optional[str] = Field(sa_column=Column(Text, info={"length_override": "MAX"})) + display_name: str + sharing_visibility: str + group_type: str + + @pydantic.field_validator("description", mode="before") + @classmethod + def remove_leading_trailing_spaces(cls, value: Any) -> str: + return None if value is None else value.strip() + + +class AuthGroupMembership(ValidatedSQLModel, table=True): + __tablename__ = "ts_auth_sync_xref" + principal_name: str = Field(primary_key=True) + principal_type: str = Field(primary_key=True) + group_name: str = Field(primary_key=True) + + +USER_MODELS = [AuthUser, AuthGroup, AuthGroupMembership] diff --git a/cs_tools/cli/tools/user-management/work.py b/cs_tools/cli/tools/user-management/work.py index 56ed9ce0..514447e0 100644 --- a/cs_tools/cli/tools/user-management/work.py +++ b/cs_tools/cli/tools/user-management/work.py @@ -2,57 +2,55 @@ from typing import TYPE_CHECKING import collections +import datetime as dt +import json +import pathlib -from cs_tools._compat import TypedDict +from cs_tools.updater import cs_tools_venv + +from . import models if TYPE_CHECKING: from cs_tools.thoughtspot import ThoughtSpot from cs_tools.types import SecurityPrincipal -class _UserInfo(TypedDict): - username: str - email: str - display_name: str - visibility: str # one of: DEFAULT, NOT_SHAREABLE - type: str # principal # noqa: A003 - - -class _GroupInfo(TypedDict): - username: str - email: str - display_name: str - visibility: str # one of: DEFAULT, NOT_SHAREABLE - type: str # principal # noqa: A003 - - -class _AssociationInfo(TypedDict): - principal_name: str - principal_type: str - group_name: str - - -def _get_current_security(ts: ThoughtSpot) -> tuple[list[_UserInfo], list[_GroupInfo], list[_AssociationInfo]]: +def _get_current_security( + ts: ThoughtSpot, +) -> tuple[list[models.AuthUser], list[models.AuthGroup], list[models.AuthGroupMembership]]: """ """ users_and_groups: list[SecurityPrincipal] = ts.api.v1.user_list().json() - users: list[_UserInfo] = [] - groups: list[_GroupInfo] = [] - associations: list[_AssociationInfo] = [] + users: list[models.AuthUser] = [] + groups: list[models.AuthGroup] = [] + associations: list[models.AuthGroupMembership] = [] for principal in users_and_groups: data = { "display_name": principal["displayName"], - "visibility": principal["visibility"], - "type": principal["principalTypeEnum"], + "sharing_visibility": principal["visibility"], } if "USER" in principal["principalTypeEnum"]: type_ = "USER" - users.append({"username": principal["name"], "email": principal["mail"], **data}) + users.append( + { + "username": principal["name"], + "email": principal["mail"], + "user_type": principal["principalTypeEnum"], + **data, + } + ) if "GROUP" in principal["principalTypeEnum"]: type_ = "GROUP" - groups.append({"group_name": principal["name"], "description": principal.get("description"), **data}) + groups.append( + { + "group_name": principal["name"], + "description": principal.get("description"), + "group_type": principal["principalTypeEnum"], + **data, + } + ) for group in principal["groupNames"]: associations.append({"principal_name": principal["name"], "principal_type": type_, "group_name": group}) @@ -61,7 +59,7 @@ def _get_current_security(ts: ThoughtSpot) -> tuple[list[_UserInfo], list[_Group def _form_principals( - users: list[_UserInfo], groups: list[_GroupInfo], xref: list[_AssociationInfo] + users: list[models.AuthUser], groups: list[models.AuthGroup], xref: list[models.AuthGroupMembership] ) -> list[SecurityPrincipal]: principals = [] principals_groups = collections.defaultdict(list) @@ -75,9 +73,9 @@ def _form_principals( "name": group["group_name"], "displayName": group["display_name"], "description": group["description"], - "principalTypeEnum": group["type"], + "principalTypeEnum": group["group_type"], "groupNames": principals_groups[group["group_name"]], - "visibility": group["visibility"], + "visibility": group["sharing_visibility"], } ) @@ -87,10 +85,17 @@ def _form_principals( "name": user["username"], "displayName": user["display_name"], "mail": user["email"], - "principalTypeEnum": user["type"], + "principalTypeEnum": user["user_type"], "groupNames": principals_groups[user["username"]], - "visibility": user["visibility"], + "visibility": user["sharing_visibility"], } ) return principals + + +def _backup_security(data) -> None: + filename = f"user-sync-{dt.datetime.now(tz=dt.timezone.utc):%Y%m%dT%H%M%S}" + + with pathlib.Path(cs_tools_venv.app_dir / ".cache" / f"{filename}.json").open("w") as f: + json.dump(data, f, indent=4) diff --git a/cs_tools/datastructures.py b/cs_tools/datastructures.py index e2d91666..456cb21a 100644 --- a/cs_tools/datastructures.py +++ b/cs_tools/datastructures.py @@ -142,6 +142,18 @@ class LocalSystemInfo(_GlobalModel): python: validators.CoerceVersion = AwesomeVersion(platform.python_version()) ran_at: Annotated[pydantic.AwareDatetime, validators.ensure_datetime_is_utc] = dt.datetime.now(tz=dt.timezone.utc) + @property + def is_linux(self) -> bool: + return self.system.startswith("Linux") + + @property + def is_mac_osx(self) -> bool: + return self.system.startswith("Darwin") + + @property + def is_windows(self) -> bool: + return self.system.startswith("Windows") + class UserInfo(_GlobalModel): """Information about the logged in user.""" diff --git a/cs_tools/errors.py b/cs_tools/errors.py index 0075bbb3..ac10d77b 100644 --- a/cs_tools/errors.py +++ b/cs_tools/errors.py @@ -176,7 +176,7 @@ class SyncerInitError(CSToolsCLIError): │ │ │ Mitigation │ │ Check the Syncer's documentation page for more information. │ - │ https://thoughtspot.github.io/cs_tools/syncer/Starburst/ │ + │ https://thoughtspot.github.io/cs_tools/syncer/starburst/ │ ╰─────────────────────────────────────────────────────────────â•Ŋ """ @@ -184,13 +184,13 @@ class SyncerInitError(CSToolsCLIError): mitigation = ( # fmt: off "Check the Syncer's documentation page for more information." - "\n[b blue]https://thoughtspot.github.io/cs_tools/syncer/{proto}/" + "\n[b blue]https://thoughtspot.github.io/cs_tools/syncer/{proto_lower}/" # fmt: on ) def __init__(self, pydantic_error: pydantic.ValidationError, *, proto: str): self.pydantic_error = pydantic_error - self.error_info = {"proto": proto or pydantic_error.title} + self.error_info = {"proto": proto, "proto_lower": proto.lower()} @property def reason(self) -> str: # type: ignore[override] diff --git a/cs_tools/settings.py b/cs_tools/settings.py index d205890f..6d1d61f5 100644 --- a/cs_tools/settings.py +++ b/cs_tools/settings.py @@ -15,6 +15,7 @@ import urllib import urllib.error import uuid +import zlib from awesomeversion import AwesomeVersion import pydantic @@ -63,9 +64,9 @@ class MetaConfig(_GlobalModel): install_uuid: uuid.UUID = pydantic.Field(default_factory=uuid.uuid4) default_config_name: Optional[str] = None - remote: Optional[RemoteRepositoryInfo] = RemoteRepositoryInfo() - analytics: Optional[AnalyticsOptIn] = AnalyticsOptIn() - environment: Optional[ExecutionEnvironment] = ExecutionEnvironment() + remote: RemoteRepositoryInfo = RemoteRepositoryInfo() + analytics: AnalyticsOptIn = AnalyticsOptIn() + environment: ExecutionEnvironment = ExecutionEnvironment() created_in_cs_tools_version: validators.CoerceVersion = __version__ _new_version_notified_ack: bool = False @@ -139,6 +140,9 @@ def load(cls) -> Self: def save(self) -> None: """Store the meta-config.""" + if self.environment.is_ci: + return + full_path = cs_tools_venv.app_dir / ".meta-config.json" # Don't save extra data. @@ -176,7 +180,7 @@ def check_remote_version(self) -> None: log.debug("Could not save to .meta-config.json") except Exception as e: - log.warning(f"Could not fetch release url: {e}") + log.debug(f"Could not fetch release url: {e}", exc_info=True) def newer_version_string(self) -> str: """Return the CLI new version media string.""" @@ -229,6 +233,7 @@ class ThoughtSpotConfiguration(_GlobalSettings): bearer_token: Optional[types.GUID] = pydantic.Field(default=None) default_org: Optional[int] = None disable_ssl: bool = False + proxy: Optional[str] = None # See: https://www.python-httpx.org/advanced/proxies/ @pydantic.model_validator(mode="before") @classmethod @@ -249,7 +254,7 @@ def encode_password(cls, data: Any) -> Optional[str]: try: utils.reveal(data.encode()).decode() - except binascii.Error: + except (binascii.Error, zlib.error): pass else: return data @@ -356,12 +361,12 @@ def from_name(cls, name: str, automigrate: bool = False, **overrides) -> CSTools @classmethod def from_environment(cls, name: str = "ENV", *, dotfile: Optional[pathlib.Path] = None) -> CSToolsConfig: """Read in a config from environment variables.""" - extra = {} + config = {"name": name} if dotfile is not None: - extra["_env_file"] = pathlib.Path(dotfile).as_posix() + config["_env_file"] = pathlib.Path(dotfile).as_posix() - return cls(name=name, thoughtspot={}, **extra) + return cls.model_validate(config) @classmethod def from_toml(cls, path: pathlib.Path, automigrate: bool = False) -> CSToolsConfig: diff --git a/cs_tools/sync/csv/syncer.py b/cs_tools/sync/csv/syncer.py index bc02fd3d..d5d798b7 100644 --- a/cs_tools/sync/csv/syncer.py +++ b/cs_tools/sync/csv/syncer.py @@ -36,6 +36,18 @@ class CSV(Syncer): _written_header: dict[str, bool] = {} # noqa: RUF012 """Whether or not the header has been written for a given file already""" + @pydantic.field_validator("directory", mode="after") + @classmethod + def _ensure_directory_exists(cls, value: Union[pydantic.DirectoryPath, pydantic.NewPath]) -> pydantic.DirectoryPath: + if value.is_file(): + raise ValueError(f"{value.resolve().as_posix()} is a file, not a directory.") + + if not value.exists(): + log.warning(f"The directory '{value.resolve().as_posix()}' does not yet exist, creating it..") + value.mkdir(parents=True, exist_ok=True) + + return value + @pydantic.field_validator("delimiter", "escape_character", mode="after") @classmethod def _only_single_characters_allowed(cls, value: str, info: pydantic.ValidationInfo) -> str: diff --git a/cs_tools/sync/databricks/MANIFEST.json b/cs_tools/sync/databricks/MANIFEST.json new file mode 100644 index 00000000..37e6d057 --- /dev/null +++ b/cs_tools/sync/databricks/MANIFEST.json @@ -0,0 +1,5 @@ +{ + "name": "databricks", + "syncer_class": "Databricks", + "requirements": ["databricks-sql-connector >= 2.9.3"] +} diff --git a/cs_tools/sync/databricks/__init__.py b/cs_tools/sync/databricks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cs_tools/sync/databricks/syncer.py b/cs_tools/sync/databricks/syncer.py new file mode 100644 index 00000000..385f7e3f --- /dev/null +++ b/cs_tools/sync/databricks/syncer.py @@ -0,0 +1,82 @@ +from __future__ import annotations + +from typing import Any, Optional +import logging +import pathlib + +import pydantic +import sqlalchemy as sa +import sqlmodel + +from cs_tools.sync import utils as sync_utils +from cs_tools.sync.base import DatabaseSyncer +from cs_tools.sync.types import TableRows + +log = logging.getLogger(__name__) + + +class Databricks(DatabaseSyncer): + """Interact with a Databricks database.""" + + __manifest_path__ = pathlib.Path(__file__).parent / "MANIFEST.json" + __syncer_name__ = "Databricks" + + server_hostname: str + http_path: str + access_token: str + catalog: str + schema_: Optional[str] = pydantic.Field(default="default", alias="schema") + port: Optional[int] = 443 + temp_dir: Optional[pydantic.DirectoryPath] = pathlib.Path(".") + + @pydantic.field_validator("access_token", mode="before") + @classmethod + def ensure_dapi_prefix(cls, value: Any) -> str: + if not str(value).startswith("dapi"): + raise ValueError("Access Token should start with 'dapi'") + return value + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self._engine = sa.create_engine(self.make_url(), future=True) + self.metadata = sqlmodel.MetaData(schema=self.schema_) + + def make_url(self) -> str: + """Create a connection string for the Databricks JDBC driver.""" + username = "token" + password = self.access_token + host = self.server_hostname + port = self.port + query = f"http_path={self.http_path}&catalog={self.catalog}&schema={self.schema_}" + return f"databricks://{username}:{password}@{host}:{port}?{query}" + + def __repr__(self): + return f"" + + def load(self, tablename: str) -> TableRows: + """SELECT rows from Databricks""" + table = self.metadata.tables[f"{self.schema_}.{tablename}"] + rows = self.session.execute(table.select()) + return [row.model_dump() for row in rows] + + def dump(self, tablename: str, *, data: TableRows) -> None: + """INSERT rows into Databricks.""" + if not data: + log.warning(f"no data to write to syncer {self}") + return + + table = self.metadata.tables[f"{self.schema_}.{tablename}"] + + if self.load_strategy == "APPEND": + sync_utils.batched(table.insert().values, session=self.session, data=data, max_parameters=250) + + if self.load_strategy == "TRUNCATE": + self.session.execute(table.delete()) + sync_utils.batched(table.insert().values, session=self.session, data=data, max_parameters=250) + + if self.load_strategy == "UPSERT": + # TODO: @sameerjain901, 2024/02/10 + # need to investigate COPY->MERGE INTO functionality, similar to how we have in Snowflake syncer + # https://docs.databricks.com/en/sql/language-manual/delta-merge-into.html + # + sync_utils.generic_upsert(table, session=self.session, data=data, max_params=250) diff --git a/cs_tools/sync/json/syncer.py b/cs_tools/sync/json/syncer.py index 98dccc90..10a0ed2b 100644 --- a/cs_tools/sync/json/syncer.py +++ b/cs_tools/sync/json/syncer.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Literal, Optional +from typing import TYPE_CHECKING, Literal, Optional, Union import json import logging import pathlib @@ -21,9 +21,21 @@ class JSON(Syncer): __manifest_path__ = pathlib.Path(__file__).parent / "MANIFEST.json" __syncer_name__ = "json" - directory: pydantic.DirectoryPath + directory: Union[pydantic.DirectoryPath, pydantic.NewPath] encoding: Optional[Literal["UTF-8"]] = None + @pydantic.field_validator("directory", mode="after") + @classmethod + def _ensure_directory_exists(cls, value: Union[pydantic.DirectoryPath, pydantic.NewPath]) -> pydantic.DirectoryPath: + if value.is_file(): + raise ValueError(f"{value.resolve().as_posix()} is a file, not a directory.") + + if not value.exists(): + log.warning(f"The directory '{value.resolve().as_posix()}' does not yet exist, creating it..") + value.mkdir(parents=True, exist_ok=True) + + return value + def __repr__(self): return f"" diff --git a/cs_tools/sync/mock/MANIFEST.json b/cs_tools/sync/mock/MANIFEST.json new file mode 100644 index 00000000..2d36da4a --- /dev/null +++ b/cs_tools/sync/mock/MANIFEST.json @@ -0,0 +1,4 @@ +{ + "name": "mock", + "syncer_class": "Mock" +} diff --git a/cs_tools/sync/mock/__init__.py b/cs_tools/sync/mock/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cs_tools/sync/mock/syncer.py b/cs_tools/sync/mock/syncer.py new file mode 100644 index 00000000..c8f5f169 --- /dev/null +++ b/cs_tools/sync/mock/syncer.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING +import logging +import pathlib + +import sqlalchemy as sa + +from cs_tools.sync.base import DatabaseSyncer + +if TYPE_CHECKING: + from cs_tools.sync.types import TableRows + +log = logging.getLogger(__name__) + + +class Mock(DatabaseSyncer): + """Pretend to interact with a particular syncer.""" + + __manifest_path__ = pathlib.Path(__file__).parent / "MANIFEST.json" + __syncer_name__ = "mock" + + # dialect: Literal["Databricks", "Falcon", "Redshift", "Snowflake", "SQLite", "Starburst", "Trino"] + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self._engine = sa.engine.create_mock_engine("sqlite://", self.sql_query_to_log) # type: ignore[assignment] + + def __finalize__(self) -> None: + log.warning("[b yellow]THESE DDL ARE APPROXIMATE, YOU WILL NEED TO CUSTOMIZE TO YOUR DIALECT.") + super().__finalize__() + log.info("ALL SCOPED TABLES HAVE BEEN PRINTED, EXITING..") + raise SystemExit(-1) + + def sql_query_to_log(self, query: sa.schema.ExecutableDDLElement, *_multiparams, **_params): + """Convert a SQL query into a string.""" + compiled = query.compile(dialect=self.engine.dialect) # type: ignore[attr-defined] + log.info(f"\n{compiled.string.strip()}\n") + + def __repr__(self): + # return f"" + return "" + + # MANDATORY PROTOCOL MEMBERS + + def load(self, tablename: str): + pass + + def dump(self, tablename: str, *, data: TableRows): + pass diff --git a/cs_tools/sync/parquet/syncer.py b/cs_tools/sync/parquet/syncer.py index aa91e117..1cedccf1 100644 --- a/cs_tools/sync/parquet/syncer.py +++ b/cs_tools/sync/parquet/syncer.py @@ -25,6 +25,18 @@ class Parquet(Syncer): directory: Union[pydantic.DirectoryPath, pydantic.NewPath] compression: Literal["GZIP", "SNAPPY"] = "GZIP" + @pydantic.field_validator("directory", mode="after") + @classmethod + def _ensure_directory_exists(cls, value: Union[pydantic.DirectoryPath, pydantic.NewPath]) -> pydantic.DirectoryPath: + if value.is_file(): + raise ValueError(f"{value.resolve().as_posix()} is a file, not a directory.") + + if not value.exists(): + log.warning(f"The directory '{value.resolve().as_posix()}' does not yet exist, creating it..") + value.mkdir(parents=True, exist_ok=True) + + return value + def __repr__(self): return f"" diff --git a/cs_tools/sync/redshift/MANIFEST.json b/cs_tools/sync/redshift/MANIFEST.json index b5ed5ca5..894750da 100644 --- a/cs_tools/sync/redshift/MANIFEST.json +++ b/cs_tools/sync/redshift/MANIFEST.json @@ -2,8 +2,8 @@ "name": "redshift", "syncer_class": "Redshift", "requirements": [ - "sqlalchemy-redshift>=1.4.1", - "redshift-connector>=2.0.905", - "s3fs>=2022.2.0" + ["sqlalchemy-redshift >= 1.4.1", "--no-deps"], + "redshift_connector >= 2.1.0", + "psycopg2-binary >= 2.9.9" ] } diff --git a/cs_tools/sync/redshift/compiler.py b/cs_tools/sync/redshift/compiler.py new file mode 100644 index 00000000..9a192a65 --- /dev/null +++ b/cs_tools/sync/redshift/compiler.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +import logging + +from sqlalchemy import event +from sqlalchemy.types import String, TypeDecorator +import sqlalchemy as sa + +log = logging.getLogger(__name__) + +# https://docs.aws.amazon.com/redshift/latest/dg/r_Character_types.html#r_Character_types-varchar-or-character-varying +MAX_VARCHAR_LENGTH = 65535 + + +class TrimmedString(TypeDecorator): + impl = String + + def process_bind_param(self, value, dialect): + """Customize how the value is processed before being sent to the database.""" + if value is None: + pass + + elif len(value) > MAX_VARCHAR_LENGTH: + log.warning(f"Incoming data value Redshift maxsize: {len(value)} chars, see logs for full value..") + log.debug(value) + # MAX_LENGTH minus 4 because.. + # - python is zero indexed + # - we want to leave 3 characters for the truncation indicator + # + value = value[: MAX_VARCHAR_LENGTH - 4] + "..." + + return value + + +@event.listens_for(sa.MetaData, "before_create") +def before_create(metadata, connection, **kw): + """Customize how the Table is configured before CREATE TABLE ran in Redshift.""" + for table in metadata.tables.values(): + for column in table.columns: + try: + column.info["length_override"] + except KeyError: + continue + + column.type = TrimmedString(length=MAX_VARCHAR_LENGTH) diff --git a/cs_tools/sync/redshift/syncer.py b/cs_tools/sync/redshift/syncer.py index 53963d38..468a23e9 100644 --- a/cs_tools/sync/redshift/syncer.py +++ b/cs_tools/sync/redshift/syncer.py @@ -1,131 +1,74 @@ from __future__ import annotations -from typing import Any -import csv -import enum -import io +from typing import Literal import logging +import pathlib -from pydantic.dataclasses import dataclass -from sqlalchemy_redshift import dialect -import s3fs import sqlalchemy as sa -log = logging.getLogger(__name__) +from cs_tools.sync import utils as sync_utils +from cs_tools.sync.base import DatabaseSyncer +from cs_tools.sync.types import TableRows + +from . import compiler # noqa: F401 +log = logging.getLogger(__name__) -class AuthType(enum.Enum): - local = "local" - okta = "okta" +class Redshift(DatabaseSyncer): + """Interact with a Redshift Database.""" -@dataclass -class Redshift: - """ - Interact with an AWS Redshift database. - """ + __manifest_path__ = pathlib.Path(__file__).parent + __syncer_name__ = "Redshift" + host: str + port: int = 5439 username: str - password: str + secret: str database: str - aws_access_key: str # for S3 data load - aws_secret_key: str # for S3 data load - aws_endpoint: str # FMT: .xxxxxx..redshift.amazonaws.com - port: int = 5439 - auth_type: AuthType = AuthType.local - # okta_account_name: str = None - # okta_app_id: str = None - truncate_on_load: bool = True - - # DATABASE ATTRIBUTES - __is_database__ = True - - def __post_init_post_parse__(self): - if self.auth_type == AuthType.local: - connect_args = {} - url = sa.engine.URL.create( - drivername="redshift+redshift_connector", - host=self.aws_endpoint, - port=self.port, - database=self.database, - username=self.username, - password=self.password, - ) - - elif self.auth_type == AuthType.okta: - # aws_cluster_id, _, aws_region, *_ = self.aws_endpoint.split('.') - # connect_args = { - # 'credentials_provider': 'OktaCredentialsProvider', - # 'idp_host': '.okta.com', - # 'app_id': '', - # 'app_name': 'amazon_aws_redshift', - # 'cluster_identifier': aws_cluster_id, - # 'region': aws_region, - # 'ssl_insecure': False, - # **connect_args - # } - # url = sa.engine.URL.create( - # drivername='redshift+redshift_connector', - # database=self.database, - # username=self.username, - # password=self.password - # ) - raise NotImplementedError( - "our implementation is best-effort, but lacks testing.. see the source " - "code for ideas on how to implement MFA to Okta." - ) - - self.engine = sa.create_engine(url, connect_args=connect_args) - self.cnxn = self.engine.connect() - - # decorators must be declared here, SQLAlchemy doesn't care about instances - sa.event.listen(sa.schema.MetaData, "after_create", self.capture_metadata) - - def capture_metadata(self, metadata, cnxn, **kw): - self.metadata = metadata - - def __repr__(self): - return f"" - - # MANDATORY PROTOCOL MEMBERS + authentication: Literal["basic"] = "basic" - @property - def name(self) -> str: - return "redshift" + def __init__(self, **kwargs): + super().__init__(**kwargs) + self._engine = sa.create_engine(self.make_url(), future=True) - def load(self, table: str) -> list[dict[str, Any]]: - t = self.metadata.tables[table] + def make_url(self) -> str: + """Create a connection string for the Redshift JDBC driver.""" + host = self.host + port = self.port + username = self.username + secret = self.secret + database = self.database + return f"redshift+psycopg2://{username}:{secret}@{host}:{port}/{database}" - with self.cnxn.begin(): - r = self.cnxn.execute(t.select()) + def __repr__(self): + return f"" - return [dict(_) for _ in r] + def load(self, tablename: str) -> TableRows: + """SELECT rows from Redshift.""" + table = self.metadata.tables[tablename] + rows = self.session.execute(table.select()) + return [row.model_dump() for row in rows] - def dump(self, table: str, *, data: list[dict[str, Any]]) -> None: + def dump(self, tablename: str, *, data: TableRows) -> None: + """INSERT rows into Redshift.""" if not data: - log.warning(f"no data to write to syncer {self}") + log.warning(f"No data to write to syncer {self}") return - t = self.metadata.tables[table] - - if self.truncate_on_load: - with self.cnxn.begin(): - self.cnxn.execute(table.delete().where(True)) - - # 1. Load file to S3 - fs = s3fs.S3FileSystem(key=self.aws_access_key, secret=self.aws_secret_key) - fp = f"s3://{self.s3_bucket_name}/ts_{table}.csv" - - with io.StringIO() as buf, fs.open(fp, "w") as f: - header = list(data[0].keys()) - writer = csv.DictWriter(buf, fieldnames=header, dialect="excel", delimiter="|") - writer.writeheader() - writer.writerows(data) + table = self.metadata.tables[tablename] - f.write(buf.getvalue()) + if self.load_strategy == "APPEND": + sync_utils.batched(table.insert().values, session=self.session, data=data, max_parameters=250) + self.session.commit() - # 2. Perform a COPY operation - q = dialect.CopyCommand(t, data_location=fp, ignore_header=0) + if self.load_strategy == "TRUNCATE": + self.session.execute(table.delete()) + sync_utils.batched(table.insert().values, session=self.session, data=data, max_parameters=250) - with self.cnxn.begin(): - self.cnxn.execute(q) # .execution_options(autocommit=True) + if self.load_strategy == "UPSERT": + # TODO: @saurabhsingh1608, 2024/04/09 + # need to investigate COPY->MERGE INTO functionality, similar to how we have in Snowflake syncer + # https://docs.aws.amazon.com/redshift/latest/dg/r_MERGE.html + # + sync_utils.generic_upsert(table, session=self.session, data=data, max_params=250) diff --git a/cs_tools/sync/snowflake/MANIFEST.json b/cs_tools/sync/snowflake/MANIFEST.json index 7bfff421..0a3694ad 100644 --- a/cs_tools/sync/snowflake/MANIFEST.json +++ b/cs_tools/sync/snowflake/MANIFEST.json @@ -4,7 +4,6 @@ "requirements": [ "setuptools", "wheel", - "cryptography >= 40.0.2 , < 41.0.0", "snowflake-connector-python < 4.0.0", ["snowflake-sqlalchemy >= 1.5.1 , < 1.6.0", "--no-deps"] ] diff --git a/cs_tools/sync/snowflake/syncer.py b/cs_tools/sync/snowflake/syncer.py index c0dfded7..eff54cff 100644 --- a/cs_tools/sync/snowflake/syncer.py +++ b/cs_tools/sync/snowflake/syncer.py @@ -149,7 +149,6 @@ def stage_and_put(self, tablename: str, *, data: TableRows) -> str: FILE_FORMAT = ( TYPE = CSV FIELD_DELIMITER = '|' - NULL_IF = ( '\\N' ) FIELD_OPTIONALLY_ENCLOSED_BY = '"' EMPTY_FIELD_AS_NULL = TRUE ) diff --git a/cs_tools/sync/trino/syncer.py b/cs_tools/sync/trino/syncer.py index 43040b1a..8726b39d 100644 --- a/cs_tools/sync/trino/syncer.py +++ b/cs_tools/sync/trino/syncer.py @@ -29,7 +29,7 @@ class Trino(DatabaseSyncer): __syncer_name__ = "trino" host: pydantic.IPvAnyAddress - port: int = 8080 + port: Optional[int] = 8080 catalog: str schema_: Optional[str] = pydantic.Field(default="public", alias="schema") authentication: Literal["basic", "jwt"] diff --git a/cs_tools/sync/utils.py b/cs_tools/sync/utils.py index a11473a0..327a321d 100644 --- a/cs_tools/sync/utils.py +++ b/cs_tools/sync/utils.py @@ -67,9 +67,10 @@ def batched(prepared_statement, *, session: sa.orm.Session, data: TableRows, max rows = [] # Final commit, grab the rest of the data rows. - stmt = prepared_statement(rows) - session.execute(stmt) - session.commit() + if rows: + stmt = prepared_statement(rows) + session.execute(stmt) + session.commit() def generic_upsert( @@ -86,7 +87,7 @@ def generic_upsert( Performs multiple queries to classify and then properly add records. """ if unique_key is None and not target.primary_key: - raise ValueError() + raise ValueError(f"No unique key was supplied for {target}") log.debug(f" TABLE: {target}") log.debug(f"DATA IN: {len(data): >7,} rows") diff --git a/cs_tools/thoughtspot.py b/cs_tools/thoughtspot.py index 49ec56fc..bb3d2fa4 100644 --- a/cs_tools/thoughtspot.py +++ b/cs_tools/thoughtspot.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, Optional import collections +import json import logging import httpx @@ -22,7 +23,7 @@ TSLoadMiddleware, UserMiddleware, ) -from cs_tools.datastructures import SessionContext +from cs_tools.datastructures import LocalSystemInfo, SessionContext from cs_tools.errors import AuthenticationError, ThoughtSpotUnavailable if TYPE_CHECKING: @@ -40,8 +41,13 @@ class ThoughtSpot: def __init__(self, config: CSToolsConfig, auto_login: bool = False): self.config = config - self.api = RESTAPIClient(ts_url=str(config.thoughtspot.url), verify=not config.thoughtspot.disable_ssl) self._session_context: Optional[SessionContext] = None + self.config = config + self.api = RESTAPIClient( + ts_url=str(config.thoughtspot.url), + verify=not config.thoughtspot.disable_ssl, + proxy=config.thoughtspot.proxy, + ) # ============================================================================================================== # API MIDDLEWARES: logically grouped API interactions within ThoughtSpot @@ -86,11 +92,18 @@ def _attempt_do_authenticate(self, authentication_method, **authentication_keywo except (httpx.ConnectError, httpx.ConnectTimeout) as e: if "SSL: CERTIFICATE_VERIFY_FAILED" in str(e): reason = "Outdated Python default certificate detected." + # fmt: off mitigation = ( - f"Quick fix: run [b blue]cs_tools config modify --config {self.config.name} --disable_ssl[/] " - f"and try again.\n\nLonger fix: try running [b blue]cs_tools self pip install certifi " - f"--upgrade[/] and try again." + f"Quick fix: run [b blue]cs_tools config modify --config {self.config.name} --disable_ssl[/] and " + f"try again." ) + # fmt: on + + if LocalSystemInfo().is_mac_osx: + mitigation = ( + "\n\nLonger fix: install (double click) the pre-bundled python certificates located at " + "[b blue]/Applications/Python x.y/Install Certificates.command[/]" + ) else: reason = ( f"Cannot connect to ThoughtSpot ( [b blue]{self.config.thoughtspot.url}[/] ) from your " f"computer" @@ -207,7 +220,7 @@ def login(self) -> None: login_info.pop("password") try: - log.debug(f"Session context\n{self.session_context.model_dump()}") + log.debug(f"SESSION CONTEXT\n{json.dumps(self.session_context.model_dump(mode='json'), indent=4)}") except errors.NoSessionEstablished: for method, r in attempted_auth_method.items(): log.info(f"Attempted {method.title().replace('_', ' ')}: HTTP {r.status_code}, see logs for details..") diff --git a/cs_tools/updater/_bootstrapper.py b/cs_tools/updater/_bootstrapper.py index 09ae2887..afc785b9 100644 --- a/cs_tools/updater/_bootstrapper.py +++ b/cs_tools/updater/_bootstrapper.py @@ -7,8 +7,6 @@ possible so we don't need to worry about something not being available on an under-supported version of Python. """ -from __future__ import annotations - from argparse import RawTextHelpFormatter import argparse import datetime as dt @@ -60,7 +58,7 @@ def cli(): operation.add_argument( "-i", "--install", - help=f"install cs_tools to your system {_GREEN}(default option){_RESET}", + help="install cs_tools to your system {c}(default option){x}".format(c=_GREEN, x=_RESET), dest="install", action="store_true", default=False, @@ -89,12 +87,19 @@ def cli(): action="store_true", default=False, ) + parser.add_argument( + "--no-clean", + help="don't remove existing BOOTSTRAPPER files", + dest="pre_clean", + action="store_false", + default=True, + ) args = parser.parse_args() _setup_logging(args.verbose) # remove any pre-existing work from a historical install - if not args.offline_mode: + if args.pre_clean: _cleanup() log.info( @@ -146,7 +151,7 @@ def cli(): requires = "cs_tools[cli]" if args.offline_mode: - log.info("Using the offline binary found at {p}{v.find_links}{x}".format(p=_PURPLE, x=_RESET, v=venv)) + log.info("Using the offline binary found at {c}{fp}{x}".format(c=_PURPLE, fp=venv.find_links, x=_RESET)) elif args.dev: log.info("Installing locally using the development environment.") @@ -154,7 +159,7 @@ def cli(): dir_updater = os.path.dirname(here) dir_library = os.path.dirname(dir_updater) dir_package = os.path.dirname(dir_library) - requires = "{local}[cli]".format(local=dir_package) + requires = "{pdir}[cli]".format(pdir=dir_package) else: log.info("Getting the latest CS Tools {beta}release.".format(beta="beta " if args.beta else "")) @@ -173,7 +178,7 @@ def cli(): shutil.rmtree(venv.venv_path, ignore_errors=True) path.unset() - log.info("{g}Done!{x} Thank you for trying CS Tools.".format(g=_GREEN, x=_RESET)) + log.info("{c}Done!{x} Thank you for trying CS Tools.".format(c=_GREEN, x=_RESET)) if args.install or args.reinstall: log.info( @@ -216,7 +221,7 @@ def _create_color_code(color, bold=False): } if color not in foreground_color_map: - raise ValueError(f"invalid terminal color code: '{color}'") + raise ValueError("invalid terminal color code: '{c}'".format(c=color)) to_bold = int(bold) # 0 = reset , 1 = bold to_color = foreground_color_map[color] @@ -235,8 +240,8 @@ def _setup_logging(verbose=True): import pathlib import tempfile - random_dir = tempfile.NamedTemporaryFile().name - random_path = pathlib.Path.cwd().joinpath(f"cs_tools-bootstrap-error-{pathlib.Path(random_dir).name}.log") + random_name = tempfile.NamedTemporaryFile().name + random_path = pathlib.Path.cwd().joinpath("cs_tools-bootstrap-error-{n}.log".format(n=random_name)) config = { "version": 1, @@ -353,7 +358,7 @@ def format(self, record, *a, **kw): s = self.formatMessage(record) prefix, _, _ = s.partition(record.msg[:10]) prefix = prefix.replace(formatted_time, len(formatted_time) * " ") - record.msg = record.msg.replace("\n", f"\n{prefix}") + record.msg = record.msg.replace("\n", "\n{p}".format(p=prefix)) return super().format(record, *a, **kw) @@ -416,10 +421,10 @@ def cli_type_filepath(fp): path = pathlib.Path(fp) if not path.exists(): - raise argparse.ArgumentTypeError(f"path '{fp}' does not exist") + raise argparse.ArgumentTypeError("path '{fp}' does not exist".format(fp=path)) if path.is_file(): - raise argparse.ArgumentTypeError(f"path must be a directory, got '{fp}'") + raise argparse.ArgumentTypeError("path must be a directory, got '{fp}'".format(fp=path)) return path @@ -433,14 +438,14 @@ def get_cs_tools_venv(find_links): updater_py = here / "_updater.py" if not updater_py.exists(): - log.info(f"Missing '{updater_py}', downloading from GitHub") + log.info("Missing '{py}', downloading from GitHub".format(py=updater_py)) url = "https://api.github.com/repos/thoughtspot/cs_tools/contents/cs_tools/updater/_updater.py" data = http_request(url, to_json=True) assert isinstance(data, dict) data = http_request(data["download_url"], to_json=False) assert isinstance(data, bytes) updater_py.write_text(data.decode()) - log.info(f"Downloaded as '{updater_py}'") + log.info("Downloaded as '{py}'".format(py=updater_py)) # Hack the PATH var so we can import from _updater sys.path.insert(0, here.as_posix()) @@ -452,7 +457,7 @@ def get_cs_tools_venv(find_links): "Unable to find the CS Tools _updater.py, try getting at " "{b}https://github.com/thoughtspot/cs_tools/releases/latest{x}".format(b=_BLUE, x=_RESET) ) - raise SystemExit(1) from None + raise SystemExit(1) # noqa: B904 if find_links is not None: cs_tools_venv.with_offline_mode(find_links=find_links) @@ -555,7 +560,7 @@ def main(): except Exception as e: disk_handler = next(h for h in log.root.handlers if isinstance(h, InMemoryUntilErrorHandler)) disk_handler.drain_buffer() - log.debug(f"Error found: {e}", exc_info=True) + log.debug("Error found: {err}".format(err=e), exc_info=True) log.warning( "Unexpected error in bootstrapper, see {b}{logfile}{x} for details..".format( b=_BLUE, logfile=disk_handler.baseFilename, x=_RESET @@ -565,42 +570,74 @@ def main(): return return_code + +if __name__ == "__main__": + # ===================== # VERSION CHECK FAILED # ===================== + if sys.version_info <= __minimum_python_version__: + args = " ".join(map(str, sys.argv)) + py_vers = ".".join(map(str, sys.version_info[:2])) - args = " ".join(map(str, sys.argv)) - py_vers = ".".join(map(str, sys.version_info[:2])) + msg = ( + "\n{y}It looks like you are running {r}Python v{version}{y}!{x}" + "\n" + "\nCS Tools supports {b}python version {minimum_support}{x} or greater." + ) - msg = ( - "{y}It looks like you are running {r}Python v{version}{y}!{x}" - "\n" - "\nCS Tools supports {b}python version {minimum_support}{x} or greater." - ) + if sys.platform != "win32": + msg += ( + "\n" + "{b}Please re-run the following command..{x}" + "\n" + "\npython3 {args}" + "\n" + ) + else: + msg += ( + "\n" + "\n{y}Python installers are available for download for all versions at..{x}" + "\n{b}https://www.python.org/downloads/{x}" + "\n" + ) - if sys.version_info <= (2, 7, 99) and not (sys.platform == "win32"): - msg += "\n" "{b}Please re-run the following command..{x}" "\n" "\npython3 {args}" "\n" - else: - msg += ( + formatting = { + "b": _BLUE, + "r": _RED, + "y": _YELLOW, + "x": _RESET, + "version": py_vers, + "minimum_support": ".".join(map(str, __minimum_python_version__)), + "args": args, + } + + print(msg.format(**formatting)) # noqa: T201 + raise SystemExit(1) + + elif "CONDA_ENV_PATH" in os.environ and "CS_TOOLS_IGNORE_CONDA_PATH" not in os.environ: + msg = ( + "\n{y}It looks like you are running in an Anaconda environment!{x}" "\n" - "\n{y}Python installers are available for download for all versions at..{x}" - "\n{b}https://www.python.org/downloads/{x}" + "\n{r}Installation within conda is not tested and may lead to issues.{x}" + "\n" + "\nPlease deactivate the environment and run again." + "\n {g}See{x} https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#deactivating-an-environment" + "\n" + "\nTo ignore this warning, set the environment variable {b}CS_TOOLS_IGNORE_CONDA_PATH{x} to any value." "\n" ) - formatting = { - "b": _BLUE, - "r": _RED, - "y": _YELLOW, - "x": _RESET, - "version": py_vers, - "minimum_support": ".".join(__minimum_python_version__), - "args": args, - } - - print(msg.format(**formatting)) # noqa: T201 - return 1 + formatting = { + "b": _BLUE, + "g": _GREEN, + "r": _RED, + "y": _YELLOW, + "x": _RESET, + } + print(msg.format(**formatting)) # noqa: T201 + raise SystemExit(1) -if __name__ == "__main__": - raise SystemExit(main()) + else: + raise SystemExit(main()) diff --git a/cs_tools/updater/_updater.py b/cs_tools/updater/_updater.py index 7cf3d9ed..45124a0c 100644 --- a/cs_tools/updater/_updater.py +++ b/cs_tools/updater/_updater.py @@ -180,24 +180,15 @@ def make(self) -> None: if self.exists: return - sys_pydir = pathlib.Path(sys.base_prefix) - directory = "Scripts" if self.IS_WINDOWS else "bin" - exec_name = "python.exe" if self.IS_WINDOWS else "python" - - for python in (sys_pydir / exec_name, sys_pydir / directory / exec_name): - if python.exists(): - break - else: - log.error("Could not find global python executable. Do you have python installed?") - raise SystemExit(1) - - # if "pyenv" in sys_pydir.as_posix(): - # python = sys_pydir / exec_name - # else: - # python = sys_pydir / directory / exec_name + # TODO: need to replace this with an implementation which finds the system python executable, apparently + # sys.base_prefix is NOT the way. + # + # LIKELY: https://virtualenv.pypa.io/en/16.7.9/reference.html#compatibility-with-the-stdlib-venv-module + python = sys.executable # Run with global/system python , equivalent to.. # python -m venv $USER_DIR/cs_tools/.cs_tools + log.debug(f"Executing venv creation: {python} -m venv {self.venv_path}") self.run(python, "-m", "venv", self.venv_path.as_posix()) # Ensure `pip` is at least V23.1 so that backjumping is available @@ -205,21 +196,11 @@ def make(self) -> None: def reset(self) -> None: """Reset the virtual environment to base.""" - cp = self.pip("freeze", visible_output=False) - frozen = [ - line - for line in cp.stdout.decode().split("\n") - for version_specifier in ("==", "@") - if version_specifier in line - ] - - installed = self.venv_path.joinpath("INSTALLED-REQUIREMENTS.txt") - installed.write_text("\n".join(frozen)) - - if installed.stat().st_size: - self.pip("uninstall", "-r", installed.as_posix(), "-y") + # Destroy the venv. + shutil.rmtree(self.venv_path, ignore_errors=True) - installed.unlink() + # Re-make the venv. + self.make() cs_tools_venv = CSToolsVirtualEnvironment() diff --git a/cs_tools/utils.py b/cs_tools/utils.py index 0f100ee8..dfe14550 100644 --- a/cs_tools/utils.py +++ b/cs_tools/utils.py @@ -56,7 +56,7 @@ def obscure(data: bytes) -> bytes: if isinstance(data, str): data = str.encode(data) - return b64e(zlib.compress(data, 9)) + return b64e(zlib.compress(data, level=9)) def reveal(obscured: bytes) -> bytes: @@ -67,7 +67,6 @@ def reveal(obscured: bytes) -> bytes: """ if obscured is None: return - return zlib.decompress(b64d(obscured)) @@ -190,13 +189,21 @@ def run(self) -> None: def determine_editable_install() -> bool: """Determine if the current CS Tools context is an editable install.""" - for site_directory in site.getsitepackages(): - for path in pathlib.Path(site_directory).iterdir(): + for directory in site.getsitepackages(): + try: + site_directory = pathlib.Path(directory) + + # not all distros will bundle python the same (eg. ubuntu-slim) + except FileNotFoundError: + continue + + for path in site_directory.iterdir(): if not path.is_file(): continue if "__editable__.cs_tools" in path.as_posix(): return True + return False diff --git a/docs/syncer/csv.md b/docs/syncer/csv.md index 5136e10c..37e281bf 100644 --- a/docs/syncer/csv.md +++ b/docs/syncer/csv.md @@ -1,4 +1,5 @@ --- +icon: material/file hide: - toc --- diff --git a/docs/syncer/databricks.md b/docs/syncer/databricks.md new file mode 100644 index 00000000..6eeddc29 --- /dev/null +++ b/docs/syncer/databricks.md @@ -0,0 +1,74 @@ +--- +icon: material/database +hide: + - toc +--- + +Databricks is a cloud-based data platform that helps companies manage and analyze large amounts of data from various sources. + +Databricks was originally created as a way to easily run Apache Spark, a powerful open-source data processing engine, without having to worry about the underlying infrastructure. It provided a user-friendly "notebook" interface where you could write code and run it on a scalable, distributed computing cluster in the cloud. + +!!! note "Databricks parameters" + + ### __Required__ parameters are in __red__{ .fc-red } and __Optional__ parameters are in __blue__{ .fc-blue }. + + --- + + - [X] __server_hostname__{ .fc-red }, _your SQL Warehouse's host name_ +
_this can be found on the Connection Details tab_ + + --- + + - [X] __http_path__{ .fc-red }, _your SQL Warehouse's path_ +
_this can be found on the Connection Details tab_ + + --- + + - [X] __access_token__{ .fc-red }, _generate a personal access token from your SQL Warehouse_ +
_this can be generated on the Connection Details tab_ + + --- + + - [X] __catalog__{ .fc-red }, _the catalog to write new data to_ +
___if tables do not exist in the catalog.schema location already, we'll auto-create them___{ .fc-green } + + --- + + - [ ] __schema__{ .fc-blue }, _the schema to write new data to_ +
___if tables do not exist in the database.schema location already, we'll auto-create them___{ .fc-green } + + --- + + - [ ] __port__{ .fc-blue }, _the port number where your Databricks instance is exposed on_ +
__default__{ .fc-gray }: `443` + + --- + + - [ ] __load_strategy__{ .fc-blue}, _how to write new data into existing tables_ +
__default__{ .fc-gray }: `APPEND` ( __allowed__{ .fc-green }: `APPEND`, `TRUNCATE`, `UPSERT` ) + + +??? question "How do I use the Databricks syncer in commands?" + + `cs_tools tools searchable bi-server --syncer "databricks://server_hostname=...&http_path=...&access_token=...&catalog=..."` + + __- or -__{ .fc-blue } + + `cs_tools tools searchable bi-server --syncer databricks://definition.toml` + + +## Definition TOML Example + +`definition.toml` +```toml +[configuration] +server_hostname = "..." +http_path = "..." +access_token = "..." +catalog = "..." +schema = 'CS_TOOLS' +load_strategy = 'truncate' +``` + +[snowflake-account-id]: https://docs.snowflake.com/en/user-guide/admin-account-identifier +[snowflake-auth]: https://docs.snowflake.com/en/developer-guide/node-js/nodejs-driver-authenticate \ No newline at end of file diff --git a/docs/syncer/excel.md b/docs/syncer/excel.md index e27395c8..74e3d6fe 100644 --- a/docs/syncer/excel.md +++ b/docs/syncer/excel.md @@ -1,4 +1,5 @@ --- +icon: material/file hide: - toc --- diff --git a/docs/syncer/falcon.md b/docs/syncer/falcon.md index 0e20984f..edc4a2d4 100644 --- a/docs/syncer/falcon.md +++ b/docs/syncer/falcon.md @@ -1,4 +1,5 @@ --- +icon: material/database hide: - toc --- diff --git a/docs/syncer/gsheets.md b/docs/syncer/gsheets.md index be140332..411a9a77 100644 --- a/docs/syncer/gsheets.md +++ b/docs/syncer/gsheets.md @@ -1,4 +1,5 @@ --- +icon: material/file hide: - toc --- diff --git a/docs/syncer/json.md b/docs/syncer/json.md index 6f38c776..f58010b9 100644 --- a/docs/syncer/json.md +++ b/docs/syncer/json.md @@ -1,4 +1,5 @@ --- +icon: material/file hide: - toc --- diff --git a/docs/syncer/parquet.md b/docs/syncer/parquet.md index f2619dbc..3454e676 100644 --- a/docs/syncer/parquet.md +++ b/docs/syncer/parquet.md @@ -1,4 +1,5 @@ --- +icon: material/file hide: - toc --- diff --git a/docs/syncer/redshift.md b/docs/syncer/redshift.md index 85a054f8..420eb75f 100644 --- a/docs/syncer/redshift.md +++ b/docs/syncer/redshift.md @@ -1 +1,70 @@ -Lorem Ipsum.. +--- +icon: material/database +hide: + - toc +--- + +Redshift is a powerful data warehousing service provided by Amazon Web Services (AWS). It's designed to help businesses quickly analyze large amounts of data. + +Redshift's combination of high performance, scalability, cost-effectiveness, ease of use, and advanced analytics capabilities make it a highly useful data warehousing solution for businesses of all sizes, especially those dealing with large and growing datasets. + +??? example "Setup instructions" + + If you face issues with connectivity to your Redshift cluster, make sure you can first access the cluster from your local machine. + + You can learn how to make your cluster accessible in the [__Redshift documentation__](https://repost.aws/knowledge-center/redshift-cluster-private-public). + + +!!! note "Redshift parameters" + + ### __Required__ parameters are in __red__{ .fc-red } and __Optional__ parameters are in __blue__{ .fc-blue }. + + --- + + - [X] __host__{ .fc-red }, _the URL of your Redshift database_ + + --- + + - [ ] __port__{ .fc-blue }, _the port number where your Redshift database is located_ +
__default__{ .fc-gray }: `5439` + + --- + + - [X] __database__{ .fc-red }, _the database to write new data to_ +
___if tables do not exist in the database location already, we'll auto-create them___{ .fc-green } + + --- + + - [X] __username__{ .fc-red }, _your Redshift username_ + + --- + + - [X] __secret__{ .fc-red }, _the secret value to pass to the authentication mechanism_ +
_this will be your __password__{ .fc-purple }_ + + --- + + - [ ] __load_strategy__{ .fc-blue}, _how to write new data into existing tables_ +
__default__{ .fc-gray }: `APPEND` ( __allowed__{ .fc-green }: `APPEND`, `TRUNCATE`, `UPSERT` ) + + +??? question "How do I use the Redshift syncer in commands?" + + `cs_tools tools searchable bi-server --syncer redshift://host=0.0.0.0&database=...&username=admin&password=...&load_strategy=upsert` + + __- or -__{ .fc-blue } + + `cs_tools tools searchable bi-server --syncer redshift://definition.toml` + + +## Definition TOML Example + +`definition.toml` +```toml +[configuration] +host = "mycluster.us0I3nrnge4i.us-west-2.redshift.amazonaws.com" +database = "..." +username = "admin" +secret = "..." +load_strategy = "upsert" +``` diff --git a/docs/syncer/snowflake.md b/docs/syncer/snowflake.md index a40403be..f16db11a 100644 --- a/docs/syncer/snowflake.md +++ b/docs/syncer/snowflake.md @@ -1,4 +1,5 @@ --- +icon: material/database hide: - toc --- @@ -39,7 +40,7 @@ Instead of having a single, monolithic database, Snowflake separates the storage --- - - [ ] __schema___{ .fc-blue }, _the schema to write new data to_ + - [ ] __schema__{ .fc-blue }, _the schema to write new data to_
___if tables do not exist in the database.schema location already, we'll auto-create them___{ .fc-green } --- diff --git a/docs/syncer/sqlite.md b/docs/syncer/sqlite.md index 4c974ae4..15a1dcbe 100644 --- a/docs/syncer/sqlite.md +++ b/docs/syncer/sqlite.md @@ -1,4 +1,5 @@ --- +icon: material/database hide: - toc --- diff --git a/docs/syncer/starburst.md b/docs/syncer/starburst.md index c97de720..e68fef70 100644 --- a/docs/syncer/starburst.md +++ b/docs/syncer/starburst.md @@ -1,4 +1,5 @@ --- +icon: material/database hide: - toc --- diff --git a/docs/syncer/trino.md b/docs/syncer/trino.md index 42844d31..4f223b36 100644 --- a/docs/syncer/trino.md +++ b/docs/syncer/trino.md @@ -1,4 +1,5 @@ --- +icon: material/database hide: - toc --- diff --git a/docs/tutorial/install.md b/docs/tutorial/install.md index 93c409d1..5293a77b 100644 --- a/docs/tutorial/install.md +++ b/docs/tutorial/install.md @@ -88,17 +88,14 @@ Follow the steps below to get __CS Tools__ installed on your platform. === ":fontawesome-brands-windows: Windows" Open up __Windows Terminal__ or __Powershell__. - + Find the copy button :material-content-copy: to the right of the code block. ```powershell - (Invoke-WebRequest `# (1)! - -Uri https://raw.githubusercontent.com/thoughtspot/cs_tools/master/cs_tools/updater/_bootstrapper.py ` - -UseBasicParsing ` - ).Content | python - --install # (2)! + powershell -c "IRM https://raw.githubusercontent.com/thoughtspot/cs_tools/master/cs_tools/updater/_bootstrapper.py | python - --install" ``` - 1. `Invoke-WebRequest` is like `curl`, but for Windows. It will download a file from the URL specified. - 2. The `IWR` response is sent or "piped" to `python` for install. + !!! example "To open Powershell" + Press the __Windows key__   ++windows++   and type __Powershell__{ .fc-purple }, then hit __enter__   ++enter++ === ":fontawesome-brands-apple: :fontawesome-brands-linux: Mac, Linux" @@ -194,6 +191,8 @@ Follow the steps below to get __CS Tools__ installed on your platform.

Try running CS Tools by typing..

+__cs_tools self info --anonymous__ + ~cs~tools ../.. cs_tools self info --anonymous !!! warning "Where can I reach out for help?" diff --git a/mkdocs.yml b/mkdocs.yml index 117b678f..df5b8b9c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -125,19 +125,19 @@ nav: - User Management: tools/user-management/README.md - Syncers: - "What's a Syncer?": syncer/what-is.md - # - Amazon Redshift: syncer/redshift.md - CSV: syncer/csv.md - # - Databricks: syncer/databricks.md + - Databricks: syncer/databricks.md + - Excel: syncer/excel.md + - Falcon: syncer/falcon.md # - Google BigQuery: syncer/bigquery.md - Google Sheets: syncer/gsheets.md - JSON: syncer/json.md - - Microsoft Excel: syncer/excel.md - Parquet: syncer/parquet.md # - Postgres: syncer/postgres.md + - Redshift: syncer/redshift.md - Snowflake: syncer/snowflake.md - SQLite: syncer/sqlite.md - Starburst: syncer/starburst.md - - ThoughtSpot Falcon: syncer/falcon.md - Trino: syncer/trino.md - Changelog: - v1.5.0: changelog/1.5.0.md diff --git a/pyproject.toml b/pyproject.toml index e22c7ec2..bbe32e11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -114,6 +114,9 @@ cstools = "cs_tools.cli.commands.main:run" version = {attr = "cs_tools.__project__.__version__"} [tool.mypy] +plugins = [ + "pydantic.mypy" +] warn_unused_configs = true warn_redundant_casts = true warn_unused_ignores = true @@ -153,10 +156,11 @@ exclude = [ ".nox", # ignore virtual environments # project specific ignores - "__init__.py", # ignore __init__.py - "__project__.py", # ignore project metadata - "_compat.py", # ignore compatibles - "const.py", # ignore constants + "__init__.py", # ignore __init__.py + "__project__.py", # ignore project metadata + "_compat.py", # ignore compatibles + "const.py", # ignore constants + "_bootstrapper.py", # ignore bootstrapper "noxfile.py", "setup.py", ]