Add artifacts interface (qiskit-community#1342)

### Summary This PR adds the artifacts interface following the design in https://github.com/Qiskit/rfcs/blob/master/0007-experiment-dataframe.md. ### Details and comments - Added the `ArtifactData` dataclass for representing artifacts. - Added `ExperimentData.artifacts()`, `.add_artifacts()`, and `delete_artifact()` for working with artifacts, which is stored in a thread safe list. Currently the `ScatterTable` and `CurveFitResult` objects are stored as artifacts, and experiment serialization data will be added in the future. - Artifacts are grouped by type and stored in a compressed format so that there aren't a huge number of individual files for composite experiments. As such, this PR depends on Qiskit-Extensions/qiskit-ibm-experiment#93 to allow `.zip` formats for uploading to the cloud service. Inside each zipped file is a list of JSON artifact files with the filename equal to their unique artifact ID. For composite experiments with `flatten_results=True`, all `ScatterTable` artifacts are stored in `curve_data.zip` in individual jsons and so forth. - Added a how-to for artifacts and updated documentation to demonstrate dataframe objects like AnalysisResults and the ScatterTable (`dataframe.css` is for styling these tables). - Deprecated accessing analysis results via numerical indices to anticipate removing the curve fit result from analysis results altogether in the next release. - Fixed bug where `figure_names` were being duplicated in a copied `ExperimentData` object. Example experiment with artifacts ([link](https://quantum.ibm.com/experiments/eaad518d-232f-4cab-b137-e480ff7f1cbb)): ![image](https://github.com/Qiskit-Extensions/qiskit-experiments/assets/3870315/a2929782-dfef-4535-b246-1167666ebfc9) --------- Co-authored-by: Naoki Kanazawa <nkanazawa1989@gmail.com> Co-authored-by: Will Shanks <wshaos@posteo.net>
wshanks · Feb 8, 2024 · a7d260a · a7d260a
1 parent 777e2d5
commit a7d260a
Show file tree

Hide file tree

Showing 48 changed files with 1,406 additions and 379 deletions.
diff --git a/docs/_static/dataframe.css b/docs/_static/dataframe.css
@@ -0,0 +1,35 @@
+/* Styling for pandas dataframes in documentation */
+
+div.output table {
+    border: none;
+    border-collapse: collapse;
+    border-spacing: 0;
+    color: black;
+    font-size: 12px;
+    table-layout: fixed;
+    width: 100%;
+}
+div.output thead {
+    border-bottom: 1px solid black;
+    vertical-align: bottom;
+}
+div.output tr,
+div.output th,
+div.output td {
+    text-align: right;
+    vertical-align: middle;
+    padding: 0.5em 0.5em;
+    line-height: normal;
+    white-space: normal;
+    max-width: none;
+    border: none;
+}
+div.output th {
+    font-weight: bold;
+}
+div.output tbody tr:nth-child(odd) {
+    background: #f5f5f5;
+}
+div.output tbody tr:hover {
+    background: rgba(66, 165, 245, 0.2);
+}
diff --git a/docs/conf.py b/docs/conf.py
@@ -80,9 +80,7 @@
 templates_path = ["_templates"]
 # Manually add the gallery CSS file for now
 # TODO: Figure out why the styling is not working by default
-html_css_files = [
-    "nbsphinx-gallery.css",
-]
+html_css_files = ["nbsphinx-gallery.css", "dataframe.css"]
 
 nbsphinx_timeout = 360
 nbsphinx_execute = os.getenv("QISKIT_DOCS_BUILD_TUTORIALS", "never")
@@ -171,6 +169,7 @@
     "matplotlib": ("https://matplotlib.org/stable/", None),
     "qiskit": ("https://docs.quantum.ibm.com/api/qiskit/", None),
     "uncertainties": ("https://pythonhosted.org/uncertainties", None),
+    "pandas": ("http://pandas.pydata.org/docs/", None),
     "qiskit_aer": ("https://qiskit.org/ecosystem/aer", None),
     "qiskit_dynamics": ("https://qiskit.org/ecosystem/dynamics/", None),
     "qiskit_ibm_runtime": ("https://docs.quantum.ibm.com/api/qiskit-ibm-runtime/", None),
@@ -236,6 +235,11 @@ def maybe_skip_member(app, what, name, obj, skip, options):
         "filter_kwargs",
         "fit_func",
         "signature",
+        "artifact_id",
+        "artifact_data",
+        "device_components",
+        "created_time",
+        "data",
     ]
     skip_members = [
         ParameterRepr.repr,

diff --git a/docs/howtos/artifacts.rst b/docs/howtos/artifacts.rst
@@ -0,0 +1,148 @@
+Work with experiment artifacts
+==============================
+
+Problem
+-------
+
+You want to view, add, remove, and save artifacts associated with your :class:`.ExperimentData` instance.
+
+Solution
+--------
+
+Artifacts are used to store auxiliary data for an experiment that don't fit neatly in the
+:class:`.AnalysisResult` model. Any data that can be serialized, such as fit data, can be added as
+:class:`.ArtifactData` artifacts to :class:`.ExperimentData`.
+
+For example, after an experiment that uses :class:`.CurveAnalysis` is run, its :class:`.ExperimentData`
+object is automatically populated with ``fit_summary`` and ``curve_data`` artifacts. The ``fit_summary``
+artifact has one or more :class:`.CurveFitResult` objects that contain parameters from the fit. The
+``curve_data`` artifact has a :class:`.ScatterTable` object that contains raw and fitted data in a pandas
+:class:`~pandas:pandas.DataFrame`.
+
+Viewing artifacts
+~~~~~~~~~~~~~~~~~
+
+Here we run a parallel experiment consisting of two :class:`.T1` experiments in parallel and then view the output
+artifacts as a list of :class:`.ArtifactData` objects accessed by :meth:`.ExperimentData.artifacts`:
+
+.. jupyter-execute::
+
+    from qiskit_ibm_runtime.fake_provider import FakePerth
+    from qiskit_aer import AerSimulator
+    from qiskit_experiments.library import T1
+    from qiskit_experiments.framework import ParallelExperiment
+    import numpy as np
+
+    backend = AerSimulator.from_backend(FakePerth())
+    exp1 = T1(physical_qubits=[0], delays=np.arange(1e-6, 6e-4, 5e-5))
+    exp2 = T1(physical_qubits=[1], delays=np.arange(1e-6, 6e-4, 5e-5))
+    data = ParallelExperiment([exp1, exp2], flatten_results=True).run(backend).block_for_results()
+    data.artifacts()
+
+Artifacts can be accessed using either the artifact ID, which has to be unique in each
+:class:`.ExperimentData` object, or the artifact name, which does not have to be unique and will return
+all artifacts with the same name:
+
+.. jupyter-execute::
+
+    print("Number of curve_data artifacts:", len(data.artifacts("curve_data")))
+    # retrieve by name and index
+    curve_data_id = data.artifacts("curve_data")[0].artifact_id
+    # retrieve by ID
+    scatter_table = data.artifacts(curve_data_id).data
+    print("The first curve_data artifact:\n")
+    scatter_table.dataframe
+
+In composite experiments, artifacts behave like analysis results and figures in that if
+``flatten_results`` isn't ``True``, they are accessible in the :meth:`.artifacts` method of each
+:meth:`.child_data`. The artifacts in a large composite experiment with ``flatten_results=True`` can be
+distinguished from each other using the :attr:`~.ArtifactData.experiment` and
+:attr:`~.ArtifactData.device_components`
+attributes.
+
+One useful pattern is to load raw or fitted data from ``curve_data`` for further data manipulation. You
+can work with the dataframe using standard pandas dataframe methods or the built-in
+:class:`.ScatterTable` methods:
+
+.. jupyter-execute::
+
+    import matplotlib.pyplot as plt
+
+    exp_type = data.artifacts(curve_data_id).experiment
+    component = data.artifacts(curve_data_id).device_components[0]
+
+    raw_data = scatter_table.filter(category="raw")
+    fitted_data = scatter_table.filter(category="fitted")
+
+    # visualize the data
+    plt.figure()
+    plt.errorbar(raw_data.x, raw_data.y, yerr=raw_data.y_err, capsize=5, label="raw data")
+    plt.errorbar(fitted_data.x, fitted_data.y, yerr=fitted_data.y_err, capsize=5, label="fitted data")
+    plt.title(f"{exp_type} experiment on {component}")
+    plt.xlabel('x')
+    plt.ylabel('y')    
+    plt.legend()
+    plt.show()
+
+Adding artifacts
+~~~~~~~~~~~~~~~~
+
+You can add arbitrary data as an artifact as long as it's serializable with :class:`.ExperimentEncoder`,
+which extends Python's default JSON serialization with support for other data types commonly used with
+Qiskit Experiments.
+
+.. jupyter-execute::
+
+    from qiskit_experiments.framework import ArtifactData
+
+    new_artifact = ArtifactData(name="experiment_notes", data={"content": "Testing some new ideas."})
+    data.add_artifacts(new_artifact)
+    data.artifacts("experiment_notes")
+
+.. jupyter-execute::
+
+    print(data.artifacts("experiment_notes").data)
+
+Saving and loading artifacts
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. note::
+    This feature is only for those who have access to the cloud service. You can 
+    check whether you do by logging into the IBM Quantum interface 
+    and seeing if you can see the `database <https://quantum.ibm.com/experiments>`__.
+
+Artifacts are saved and loaded to and from the cloud service along with the rest of the
+:class:`ExperimentData` object. Artifacts are stored as ``.zip`` files in the cloud service grouped by
+the artifact name. For example, the composite experiment above will generate two artifact files, ``fit_summary.zip`` and
+``curve_data.zip``. Each of these zipfiles will contain serialized artifact data in JSON format named
+by their unique artifact ID:
+
+.. jupyter-execute::
+    :hide-code:
+
+    print("fit_summary.zip")
+    print(f"|- {data.artifacts('fit_summary')[0].artifact_id}.json")
+    print(f"|- {data.artifacts('fit_summary')[1].artifact_id}.json")
+    print("curve_data.zip")
+    print(f"|- {data.artifacts('curve_data')[0].artifact_id}.json")
+    print(f"|- {data.artifacts('curve_data')[1].artifact_id}.json")
+    print("experiment_notes.zip")
+    print(f"|- {data.artifacts('experiment_notes').artifact_id}.json")
+
+Note that for performance reasons, the auto save feature does not apply to artifacts. You must still
+call :meth:`.ExperimentData.save` once the experiment analysis has completed to upload artifacts to the
+cloud service.
+
+Note also though individual artifacts can be deleted, currently artifact files cannot be removed from the
+cloud service. Instead, you can delete all artifacts of that name
+using :meth:`~.delete_artifact` and then call :meth:`.ExperimentData.save`.
+This will save an empty file to the service, and the loaded experiment data will not contain
+these artifacts.
+
+See Also
+--------
+
+* :ref:`Curve Analysis: Data management with scatter table <data_management_with_scatter_table>` tutorial
+* :class:`.ArtifactData` API documentation
+* :class:`.ScatterTable` API documentation
+* :class:`.CurveFitResult` API documentation
diff --git a/docs/manuals/measurement/readout_mitigation.rst b/docs/manuals/measurement/readout_mitigation.rst
@@ -78,7 +78,7 @@ circuits, one for all “0” and one for all “1” results.
 
     exp.analysis.set_options(plot=True)
     result = exp.run(backend)
-    mitigator = result.analysis_results(0).value
+    mitigator = result.analysis_results("Local Readout Mitigator").value
 
 The resulting measurement matrix can be illustrated by comparing it to
 the identity.

diff --git a/docs/tutorials/curve_analysis.rst b/docs/tutorials/curve_analysis.rst
@@ -318,6 +318,8 @@ without an overhead of complex data management, as well as end-users with
 retrieving and reusing the intermediate data for their custom fitting workflow
 outside our curve fitting framework.
 Note that a :class:`ScatterTable` instance may be saved in the :class:`.ExperimentData` as an artifact.
+See the :doc:`Artifacts how-to </howtos/artifacts>` for more information.
+
 
 .. _curve_analysis_workflow:
 

diff --git a/docs/tutorials/getting_started.rst b/docs/tutorials/getting_started.rst
@@ -150,6 +150,9 @@ analysis, respectively:
     print(exp_data.job_status())
     print(exp_data.analysis_status())
 
+Figures
+-------
+
 Once the analysis is complete, figures are retrieved using the
 :meth:`~.ExperimentData.figure` method. See the :doc:`visualization module
 <visualization>` tutorial on how to customize figures for an experiment. For our
@@ -160,15 +163,22 @@ exponential decay model of the :math:`T_1` experiment:
 
     display(exp_data.figure(0))
 
-The fit results and associated parameters are accessed with
-:meth:`~.ExperimentData.analysis_results`:
+Analysis Results
+----------------
+
+The analysis results resulting from the fit are accessed with :meth:`~.ExperimentData.analysis_results`:
 
 .. jupyter-execute::
 
     for result in exp_data.analysis_results():
         print(result)
 
-Results can be indexed numerically (starting from 0) or using their name.
+Results can be indexed numerically (starting from 0) or using their name. Analysis results can also be
+retrieved in the pandas :class:`~pandas:pandas.DataFrame` format by passing ``dataframe=True``:
+
+.. jupyter-execute::
+
+    exp_data.analysis_results(dataframe=True)
 
 .. note::
     See the :meth:`~.ExperimentData.analysis_results` API documentation for more 
@@ -186,6 +196,24 @@ value and standard deviation of each value can be accessed as follows:
 For further documentation on how to work with UFloats, consult the ``uncertainties``
 :external+uncertainties:doc:`user_guide`.
 
+Artifacts
+---------
+
+The curve fit data itself is contained in :meth:`~.ExperimentData.artifacts`, which are accessed
+in an analogous manner. Artifacts for a standard experiment include both the curve fit data
+stored in ``artifacts("curve_data")`` and information on the fit stored in ``artifacts("fit_summary")``.
+Use the ``data`` attribute to access artifact data:
+
+.. jupyter-execute::
+
+    print(exp_data.artifacts("fit_summary").data)
+
+.. note::
+    See the :doc:`artifacts </howtos/artifacts>` how-to for more information on using artifacts.
+
+Circuit data and metadata
+-------------------------
+
 Raw circuit output data and its associated metadata can be accessed with the
 :meth:`~.ExperimentData.data` property. Data is indexed by the circuit it corresponds
 to. Depending on the measurement level set in the experiment, the raw data will either
@@ -210,6 +238,9 @@ Experiments also have global associated metadata accessed by the
 
     print(exp_data.metadata)
 
+Job information
+---------------
+
 The actual backend jobs that were executed for the experiment can be accessed with the
 :meth:`~.ExperimentData.jobs` method.
 
@@ -406,8 +437,7 @@ into one level:
     )
     parallel_data = parallel_exp.run(backend, seed_simulator=101).block_for_results()
 
-    for result in parallel_data.analysis_results():
-        print(result)
+    parallel_data.analysis_results(dataframe=True)
 
 Broadcasting analysis options to child experiments
 --------------------------------------------------

diff --git a/qiskit_experiments/curve_analysis/base_curve_analysis.py b/qiskit_experiments/curve_analysis/base_curve_analysis.py
@@ -98,13 +98,6 @@ class BaseCurveAnalysis(BaseAnalysis, ABC):
     This method creates analysis results for important fit parameters
     that might be defined by analysis options ``result_parameters``.
 
-    .. rubric:: _create_curve_data
-
-    This method creates analysis results for the formatted dataset, i.e. data used for the fitting.
-    Entries are created when the analysis option ``return_data_points`` is ``True``.
-    If analysis consists of multiple series, analysis result is created for
-    each curve data in the series definitions.
-
     .. rubric:: _create_figures
 
     This method creates figures by consuming the scatter table data.
@@ -162,9 +155,9 @@ def _default_options(cls) -> Options:
                 dataset without formatting, on canvas. This is ``False`` by default.
             plot (bool): Set ``True`` to create figure for fit result or ``False`` to
                 not create a figure. This overrides the behavior of ``generate_figures``.
-            return_fit_parameters (bool): Set ``True`` to return all fit model parameters
-                with details of the fit outcome. Default to ``True``.
-            return_data_points (bool): Set ``True`` to include in the analysis result
+            return_fit_parameters (bool): (Deprecated) Set ``True`` to return all fit model parameters
+                with details of the fit outcome. Default to ``False``.
+            return_data_points (bool): (Deprecated) Set ``True`` to include in the analysis result
                 the formatted data points given to the fitter. Default to ``False``.
             data_processor (Callable): A callback function to format experiment data.
                 This can be a :class:`.DataProcessor`
@@ -237,49 +230,6 @@ def _default_options(cls) -> Options:
 
         return options
 
-    def set_options(self, **fields):
-        """Set the analysis options for :meth:`run` method.
-
-        Args:
-            fields: The fields to update the options
-
-        Raises:
-            KeyError: When removed option ``curve_fitter`` is set.
-        """
-        # TODO remove this in Qiskit Experiments v0.5
-
-        if "curve_fitter_options" in fields:
-            warnings.warn(
-                "The option 'curve_fitter_options' is replaced with 'lmfit_options.' "
-                "This option will be removed in Qiskit Experiments 0.5.",
-                DeprecationWarning,
-                stacklevel=2,
-            )
-            fields["lmfit_options"] = fields.pop("curve_fitter_options")
-
-        # TODO remove this in Qiskit Experiments 0.6
-        if "curve_drawer" in fields:
-            warnings.warn(
-                "The option 'curve_drawer' is replaced with 'plotter'. "
-                "This option will be removed in Qiskit Experiments 0.6.",
-                DeprecationWarning,
-                stacklevel=2,
-            )
-            # Set the plotter drawer to `curve_drawer`. If `curve_drawer` is the right type, set it
-            # directly. If not, wrap it in a compatibility drawer.
-            if isinstance(fields["curve_drawer"], BaseDrawer):
-                plotter = self.options.plotter
-                plotter.drawer = fields.pop("curve_drawer")
-                fields["plotter"] = plotter
-            else:
-                drawer = fields["curve_drawer"]
-                compat_drawer = LegacyCurveCompatDrawer(drawer)
-                plotter = self.options.plotter
-                plotter.drawer = compat_drawer
-                fields["plotter"] = plotter
-
-        super().set_options(**fields)
-
     @abstractmethod
     def _run_data_processing(
         self,