Skip to content

Commit

Permalink
🔨 Move chart configs into chart_configs table
Browse files Browse the repository at this point in the history
  • Loading branch information
Marigold committed Jul 10, 2024
1 parent 9a81b4b commit da01cfc
Show file tree
Hide file tree
Showing 8 changed files with 80 additions and 26 deletions.
2 changes: 1 addition & 1 deletion apps/chart_sync/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ def cli(
if diff.configs_are_equal():
log.info(
"chart_sync.skip",
slug=diff.target_chart.config["slug"],
slug=diff.target_chart.slug,
reason="identical chart already exists",
chart_id=chart_id,
)
Expand Down
10 changes: 7 additions & 3 deletions apps/metadata_migrate/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,11 @@ def cli(
assert not table_name, "specify either chart-slug or table-name, not both"

q = f"""
select config from charts
where slug = '{chart_slug}'
SELECT
cc.config
FROM charts c
JOIN chart_configs cc ON c.configId = cc.id
WHERE cc.config->>'$.slug' = '{chart_slug}';
"""
df = read_sql(q, engine)
if df.empty:
Expand Down Expand Up @@ -347,11 +350,12 @@ def _load_grapher_config(engine: Engine, col: str, ds_meta: DatasetMeta) -> Dict
"""TODO: This is work in progress! Update this function as you like."""
q = f"""
select
c.config
cc.config
from variables as v
join datasets as d on v.datasetId = d.id
join chart_dimensions as cd on v.id = cd.variableId
join charts as c on cd.chartId = c.id
join chart_configs as cc on c.configId = cc.id
where
v.shortName = '{col}' and
d.namespace = '{ds_meta.namespace}' and
Expand Down
5 changes: 5 additions & 0 deletions apps/utils/scan_chart_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,14 @@ def cli(dry_run: bool) -> None:
if "Unknown MySQL server host" in str(e):
log.warning("scan-chart-diff.unknown-host", pr=pr)
continue
# PRs with schema migrations
if "Unknown database" in str(e):
log.warning("scan-chart-diff.unknown-database", pr=pr)
continue
if "Unknown column" in str(e):
log.warning("scan-chart-diff.unknown-column", pr=pr)
continue
# Server is likely not ready yet
if "Can't connect to MySQL server" in str(e):
log.warning("scan-chart-diff.cant-connect", pr=pr)
continue
Expand Down
7 changes: 4 additions & 3 deletions apps/wizard/app_pages/chart_diff/chart_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,8 @@ def slug(self) -> str:
If slug of the chart miss-matches between target and source sessions, an error is displayed.
"""
if self.target_chart:
assert self.source_chart.config["slug"] == self.target_chart.config["slug"], "Slug mismatch!"
return self.source_chart.config.get("slug", "no-slug")
assert self.source_chart.slug == self.target_chart.slug, "Slug mismatch!"
return self.source_chart.slug or "no-slug"

@property
def in_conflict(self) -> bool:
Expand Down Expand Up @@ -586,11 +586,12 @@ def _modified_chart_configs_by_admin(
base_q = """
select
c.id as chartId,
MD5(c.config) as chartChecksum,
MD5(cc.config) as chartChecksum,
c.lastEditedByUserId as chartLastEditedByUserId,
c.publishedByUserId as chartPublishedByUserId,
c.lastEditedAt as chartLastEditedAt
from charts as c
join chart_configs as cc on c.configId = cc.id
where
"""
# NOTE: We assume that all changes on staging server are done by Admin user with ID = 1. This is
Expand Down
4 changes: 2 additions & 2 deletions apps/wizard/app_pages/indicator_upgrade/charts_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ def get_affected_charts_and_preview(indicator_mapping: Dict[int, int]) -> List[g
# Build Series with slugs
slugs = pd.DataFrame(
{
"thumbnail": [OWID_ENV.thumb_url(chart.slug) for chart in charts],
"url": [OWID_ENV.chart_site(chart.slug) for chart in charts],
"thumbnail": [OWID_ENV.thumb_url(chart.slug) for chart in charts], # type: ignore
"url": [OWID_ENV.chart_site(chart.slug) for chart in charts], # type: ignore
}
)
st.dataframe(
Expand Down
6 changes: 4 additions & 2 deletions etl/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,8 +258,9 @@ def get_charts_slugs(db_conn: Optional[pymysql.Connection] = None) -> pd.DataFra
query = """\
SELECT
c.id AS chart_id,
c.slug AS chart_slug
cc.config->>'$.slug' AS chart_slug
FROM charts c
JOIN chart_configs cc ON c.configId = cc.id
LEFT JOIN chart_dimensions cd ON c.id = cd.chartId
LEFT JOIN variables v ON cd.variableId = v.id
WHERE
Expand Down Expand Up @@ -365,8 +366,9 @@ def get_info_for_etl_datasets(db_conn: Optional[pymysql.Connection] = None) -> p
JOIN variables v ON v.datasetId = d.id
JOIN chart_dimensions cd ON cd.variableId = v.id
JOIN charts c ON c.id = cd.chartId
JOIN chart_configs cc ON c.configId = cc.id
WHERE
json_extract(c.config, "$.isPublished") = TRUE
json_extract(cc.config, "$.isPublished") = TRUE
GROUP BY
d.id) q2
ON q1.datasetId = q2.datasetId
Expand Down
67 changes: 54 additions & 13 deletions etl/grapher_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
import structlog
from owid import catalog
from owid.catalog.meta import VARIABLE_TYPE
from sqlalchemy import JSON as _JSON
from sqlalchemy import (
BINARY,
BigInteger,
Computed,
Date,
Expand All @@ -32,12 +32,14 @@
Index,
Integer,
SmallInteger,
String,
and_,
func,
or_,
select,
text,
)
from sqlalchemy import JSON as _JSON
from sqlalchemy.dialects.mysql import (
ENUM,
LONGTEXT,
Expand All @@ -46,7 +48,15 @@
VARCHAR,
)
from sqlalchemy.exc import NoResultFound
from sqlalchemy.orm import DeclarativeBase, Mapped, MappedAsDataclass, Session, mapped_column # type: ignore
from sqlalchemy.ext.hybrid import hybrid_property
from sqlalchemy.orm import ( # type: ignore
DeclarativeBase,
Mapped,
MappedAsDataclass,
Session,
mapped_column,
relationship,
)
from sqlalchemy.sql import Select
from typing_extensions import Self, TypedDict

Expand Down Expand Up @@ -225,9 +235,28 @@ class ChartRevisions(Base):
updatedAt: Mapped[Optional[datetime]] = mapped_column(DateTime, init=False)


class ChartConfig(Base):
__tablename__ = "chart_configs"
__table_args__ = (Index("idx_chart_configs_uuid", "uuid", unique=True),)

id: Mapped[bytes] = mapped_column(BINARY(16), primary_key=True)
patchConfig: Mapped[dict] = mapped_column(JSON)
config: Mapped[dict] = mapped_column(JSON)
createdAt: Mapped[datetime] = mapped_column(DateTime, server_default=text("CURRENT_TIMESTAMP"))
uuid: Mapped[Optional[str]] = mapped_column(
String(36, "utf8mb4_0900_as_cs"), Computed("(bin_to_uuid(`id`,1))", persisted=False)
)
updatedAt: Mapped[Optional[datetime]] = mapped_column(DateTime)

chartss: Mapped[List["Chart"]] = relationship("Chart", back_populates="chart_config")


class Chart(Base):
__tablename__ = "charts"
__table_args__ = (
ForeignKeyConstraint(
["configId"], ["chart_configs.id"], ondelete="RESTRICT", onupdate="RESTRICT", name="charts_configId"
),
ForeignKeyConstraint(
["lastEditedByUserId"],
["users.id"],
Expand All @@ -244,28 +273,40 @@ class Chart(Base):
),
Index("charts_lastEditedByUserId", "lastEditedByUserId"),
Index("charts_publishedByUserId", "publishedByUserId"),
Index("charts_slug", "slug"),
Index("configId", "configId", unique=True),
)

id: Mapped[int] = mapped_column(Integer, primary_key=True, init=False)
config: Mapped[dict] = mapped_column(JSON)
configId: Mapped[bytes] = mapped_column(BINARY(16))
createdAt: Mapped[datetime] = mapped_column(DateTime, server_default=text("CURRENT_TIMESTAMP"), init=False)
lastEditedAt: Mapped[datetime] = mapped_column(DateTime)
lastEditedByUserId: Mapped[int] = mapped_column(Integer)
is_indexable: Mapped[int] = mapped_column(TINYINT(1), server_default=text("'0'"))
slug: Mapped[str] = mapped_column(
VARCHAR(255), Computed("(json_unquote(json_extract(`config`,_utf8mb4'$.slug')))", persisted=False)
)
type: Mapped[Optional[str]] = mapped_column(
VARCHAR(255),
Computed(
"(coalesce(json_unquote(json_extract(`config`,_utf8mb4'$.type')),_utf8mb4'LineChart'))", persisted=False
),
)
updatedAt: Mapped[datetime] = mapped_column(DateTime, init=False)
publishedAt: Mapped[Optional[datetime]] = mapped_column(DateTime)
publishedByUserId: Mapped[Optional[int]] = mapped_column(Integer)

chart_config: Mapped["ChartConfig"] = relationship("ChartConfig", back_populates="chartss")

@hybrid_property
def config(self) -> dict[str, Any]: # type: ignore
return self.chart_config.config

@config.expression
def config(cls):
return select(ChartConfig.config).where(ChartConfig.id == cls.configId).scalar_subquery()

@hybrid_property
def slug(self) -> Optional[str]: # type: ignore
if self.chart_config and self.chart_config.config:
return self.chart_config.config.get("slug")
return None

@slug.expression
def slug(cls):
# NOTE: this is really slow because `slug` is in JSON and is not indexed
return select(ChartConfig.config["slug"]).where(ChartConfig.id == cls.configId).scalar_subquery()

@classmethod
def load_chart(cls, session: Session, chart_id: Optional[int] = None, slug: Optional[str] = None) -> "Chart":
"""Load chart with id `chart_id`."""
Expand Down
5 changes: 3 additions & 2 deletions etl/steps/data/garden/sdg/latest/generate_sdg_mapping.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -165,12 +165,13 @@
" v.name as variable_name,\n",
" d.id as dataset_id,\n",
" d.name as dataset_name,\n",
" c.config->>\"$.slug\" as chart_slug\n",
" cc.config->>\"$.slug\" as chart_slug\n",
"from variables as v\n",
"join datasets as d on d.id = v.datasetId\n",
"join chart_dimensions as cd on cd.variableId = v.id\n",
"join charts as c on c.id = cd.chartId\n",
"where c.config->>\"$.slug\" in %(slugs)s\n",
"join chart_configs as cc on c.configId = cc.id\n",
"where cc.config->>\"$.slug\" in %(slugs)s\n",
" and d.isPrivate is false\n",
"\"\"\"\n",
"gf = pd.read_sql(q, engine, params={\"slugs\": df.chart_slug.tolist()})"
Expand Down

0 comments on commit da01cfc

Please sign in to comment.