Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port Lilypond to i2s format #2472

Merged
merged 2 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,33 @@
from helm.benchmark.annotation.image2structure.image_compiler_annotator import ImageCompilerAnnotator, CompilationError
from helm.benchmark.adaptation.request_state import RequestState
from helm.common.cache import CacheConfig
from helm.common.optional_dependencies import handle_module_not_found_error
from helm.common.optional_dependencies import handle_module_not_found_error, OptionalDependencyNotInstalled

try:
from PIL import Image
from PIL import Image, ImageOps
except ModuleNotFoundError as ex:
handle_module_not_found_error(ex, suggestions=["images"])


class LilyPondAnnotator(ImageCompilerAnnotator):
class LilypondCompilerAnnotator(ImageCompilerAnnotator):
"""Annotator that compiles the text completions into a music sheet with LilyPond."""

name: str = "lilypond_compiler"
base_path = "/home/josselin/installs/lilypond-2.24.3/bin"

def __int__(self, cache_config: CacheConfig, file_storage_path: str):
def __init__(self, cache_config: CacheConfig, file_storage_path: str):
super().__init__(cache_config, file_storage_path)
result = subprocess.run(["lilypond", "--version"], capture_output=True, text=True)
assert (
result.returncode == 0
), "LilyPond is not installed. Download and install it from https://lilypond.org/download.html"
try:
result = subprocess.run([f"{self.base_path}/lilypond", "--version"], capture_output=True, text=True)
if result.returncode != 0:
raise OptionalDependencyNotInstalled(
"LilyPond is not installed. Download and install it from https://lilypond.org/download.html"
)
except FileNotFoundError as e:
raise OptionalDependencyNotInstalled(
"LilyPond is not installed. Download and install it from https://lilypond.org/download.html.\n"
f"Original error: {e}"
) from e

def compile_completion_into_image(
self, request_state: RequestState, completion_text: str
Expand All @@ -43,18 +51,27 @@ def compile_completion_into_image(

try:
# Edits the LilyPond file to be compatible with the current version
result = subprocess.run(["convert-ly", "-e", ly_file_path], capture_output=True, text=True)
result = subprocess.run(
[f"{self.base_path}/convert-ly", "-e", ly_file_path], capture_output=True, text=True
)
assert result.returncode == 0, f"convert-ly failed: {result.stderr}"

# Generate PNG image from the LilyPond file
# LilyPond supports partial compilation, which means it attempts to produce an image
# for the correct portions of the code, even if there are errors elsewhere
subprocess.run(["lilypond", "--png", "-o", output_path, ly_file_path], capture_output=True, text=True)
subprocess.run(
[f"{self.base_path}/lilypond", "--png", "-o", output_path, ly_file_path], capture_output=True, text=True
)
# If an image file is not generated, we consider it an absolute compilation failure
assert os.path.exists(sheet_music_path), "lilypond did not generate the image"

# Load the image as a PIL Image object
image = Image.open(sheet_music_path)

# Crop the image to remove the white space around the music sheet
(w, h) = image.size
image = image.crop((0, 0, w, h - int(h * 0.2))) # Remove pagination
image = image.crop(ImageOps.invert(image).getbbox()) # Remove white border
except (AssertionError, RuntimeError) as e:
raise CompilationError(str(e)) from e
finally:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ entries: [
{description: "image2latex:subset=algorithm,model=vlm", priority: 1, groups: ["image2latex"]}

# sheetmusic2lilypond
{description: "sheetmusic2lilypond:model=vlm", priority: 1}
{description: "image2musicsheet:model=vlm", priority: 1, groups: ["image2musicsheet"]}

# webpages
{description: "image2webpage:subset=css,model=vlm", priority: 1, groups: ["image2webpage"]}
Expand Down
68 changes: 33 additions & 35 deletions src/helm/benchmark/run_specs/vlm_run_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ def get_image2structure_metric_specs(
metric_names = [
AnnotatedImageMetrics.PIXEL_SIMILARITY,
AnnotatedImageMetrics.FID_SIMILARITY,
AnnotatedImageMetrics.EDIT_SIMILARITY,
AnnotatedImageMetrics.EARTH_MOVER_SIMILARITY,
]
if include_edit_similarity:
Expand Down Expand Up @@ -268,6 +267,39 @@ def get_image2webpage_spec(subset: str, recompile_prompt: bool = False, args: Op
)


@run_spec_function("image2musicsheet")
def get_image2musicsheet_spec(args: Optional[Dict] = None) -> RunSpec:
scenario_spec = ScenarioSpec(
class_name="helm.benchmark.scenarios.vision_language.image2structure.musicsheet_scenario.MusicSheetScenario",
args={"subset": "music", "recompile_prompt": False}, # There os only one subset for music sheets
)
adapter_spec: AdapterSpec = get_generation_adapter_spec(
instructions="Just give a short answer without answering in a complete sentence.",
max_tokens=2000,
)
metric_specs: List[MetricSpec] = get_image2structure_metric_specs(
generation_type="musicsheet",
args=args,
include_edit_similarity=False, # No ground truth for music sheets
size_handling_method="padding",
)
annotator_specs: List[AnnotatorSpec] = [
AnnotatorSpec(
class_name="helm.benchmark.annotation.image2structure.lilypond_compiler_annotator.LilypondCompilerAnnotator", # noqa: E501
)
]

run_spec_name: str = "image2musicsheet"
return RunSpec(
name=f"{run_spec_name}",
scenario_spec=scenario_spec,
adapter_spec=adapter_spec,
metric_specs=metric_specs,
groups=[run_spec_name],
annotators=annotator_specs,
)


@run_spec_function("mmmu")
def get_mmmu_spec(subject: str, question_type: str) -> RunSpec:
scenario_spec = ScenarioSpec(
Expand Down Expand Up @@ -318,37 +350,3 @@ def get_heim_human_eval_spec(question_type: str) -> RunSpec:
metric_specs=metric_specs,
groups=[run_spec_name],
)


@run_spec_function("sheetmusic2lilypond")
def get_sheetmusic2lilypond_spec() -> RunSpec:
scenario_spec = ScenarioSpec(
class_name="helm.benchmark.scenarios.vision_language.image2structure.sheetmusic2lilypond_scenario."
"SheetMusic2LilyPondScenario",
args={},
)
adapter_spec: AdapterSpec = get_generation_adapter_spec(
instructions="Generate the LilyPond code for the following sheet music. "
"Just give the LilyPond code without any explanation.",
max_tokens=1500,
)

metric_specs: List[MetricSpec] = get_image2structure_metric_specs(
generation_type="lilypond",
include_edit_similarity=False,
)
annotator_specs: List[AnnotatorSpec] = [
AnnotatorSpec(
class_name="helm.benchmark.annotation.image2structure.lilypond_compiler_annotator.LilyPondAnnotator",
)
]

run_spec_name: str = "sheetmusic2lilypond"
return RunSpec(
name=run_spec_name,
scenario_spec=scenario_spec,
adapter_spec=adapter_spec,
metric_specs=metric_specs,
annotators=annotator_specs,
groups=[run_spec_name],
)
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,10 @@ def get_instances(self, output_path: str) -> List[Instance]:
cache_dir=output_path,
)
):
question_id: str = row["num_id"]
question_uuid: str = row["uuid"]
if row["category"][1:-1] != self._subset:
hlog(
f"Skipping instance {question_id} as it belong in category"
f"Skipping instance {question_uuid} as it belong in category"
f" {row['category']} and not {self._subset}"
)
continue
Expand All @@ -114,11 +114,13 @@ def get_instances(self, output_path: str) -> List[Instance]:
row = self.preprocess_row(row, assets_path)

# Step 2: Save the image locally
image_path: str = os.path.join(images_path, f"{question_id}.png")
image_path: str = os.path.join(images_path, f"{question_uuid}.png")
if not os.path.exists(image_path):
if not self._recompile_prompt: # 2.a
row["image"].save(image_path)
else: # 2.b
if "structure" not in row:
raise ValueError("Cannot recompile prompt without structure")
structure: str = row["structure"]
text: str = self.compile_and_save(structure, assets_path, image_path)
row["text"] = text
Expand All @@ -135,28 +137,40 @@ def get_instances(self, output_path: str) -> List[Instance]:

# Step 5: Create the references
# 5.a Create the reference containing the structure and the associated image.
multimedia_object: MultimediaObject
if os.path.exists(row["structure"]):
# 5.a.1 The structure is a path, therefore represent it as a multimedia object
# containing the files used to compile the structure (such as a repository
# containing the HTML, CSS, and JavaScript files used to generate a webpage)
multimedia_object = MultimediaObject(
[image_object, MediaObject(location=row["structure"], content_type="path/path")]
reference: Reference
if "structure" in row:
multimedia_object: MultimediaObject
if os.path.exists(row["structure"]):
# 5.a.1 The structure is a path, therefore represent it as a multimedia object
# containing the files used to compile the structure (such as a repository
# containing the HTML, CSS, and JavaScript files used to generate a webpage)
multimedia_object = MultimediaObject(
[image_object, MediaObject(location=row["structure"], content_type="path/path")]
)
elif row["structure"] == PROCESSED:
# 5.a.2 The structure has been processed and is no longer present in the row
# This can be the case if the structure is a base64 encoding of an archive that
# has been extracted to a temporary path and processed but the path is no longer
# existing (deleted after the processing is done)
multimedia_object = MultimediaObject([image_object])
else:
# 5.a.3 The structure is not a path, therefore it is directly a valid string
# representing the structure (such as LaTeX code)
multimedia_object = MultimediaObject([image_object])
reference = Reference(
output=Output(text=row["text"], multimedia_content=multimedia_object),
tags=[CORRECT_TAG],
)
elif row["structure"] == PROCESSED:
# 5.a.2 The structure has been processed and is no longer present in the row
# This can be the case if the structure is a base64 encoding of an archive that
# has been extracted to a temporary path and processed but the path is no longer
# existing (deleted after the processing is done)
multimedia_object = MultimediaObject([image_object])
else:
# 5.a.3 The structure is not a path, therefore it is directly a valid string
# representing the structure (such as LaTeX code)
multimedia_object = MultimediaObject([image_object])
reference = Reference(
output=Output(text=row["text"], multimedia_content=multimedia_object),
tags=[CORRECT_TAG],
)
if "text" in row:
reference = Reference(
output=Output(text=row["text"], multimedia_content=MultimediaObject([image_object])),
tags=[CORRECT_TAG],
)
else:
reference = Reference(
output=Output(multimedia_content=MultimediaObject([image_object])), tags=[CORRECT_TAG]
)
references: List[Reference] = [reference]

# 5.b Create the reference containing the assets
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from helm.benchmark.scenarios.scenario import VALID_SPLIT
from helm.benchmark.scenarios.vision_language.image2structure.image2structure_scenario import Image2StructureScenario


class MusicSheetScenario(Image2StructureScenario):
BASE_PROMPT = (
"Please generate the Lilypond code to generate a music sheet that looks like this image as much as feasible possible.\n" # noqa: E501
"This music sheet was created by me, and I would like to recreate it using Lilypond."
)
HUGGINGFACE_DATASET_NAME = "stanford-crfm/i2s-musicsheet"
SUBSETS = ["music"]

name = "image2musicsheet"
description = "Evaluate multimodal models on Lilypond generation to recreate a provided image"

def __init__(self, subset: str, recompile_prompt: bool = True, split: str = VALID_SPLIT):
super().__init__(subset, recompile_prompt, split)

def compile_and_save(self, structure: str, assets_path: str, destination_path: str) -> str:
raise Exception("Music sheets have no ground truth, compilation is not possible")

This file was deleted.

Loading