apache · AnandInguva · Dec 11, 2023 · Nov 29, 2023 · Nov 29, 2023 · Nov 29, 2023
diff --git a/sdks/python/apache_beam/ml/transforms/base.py b/sdks/python/apache_beam/ml/transforms/base.py
@@ -93,28 +93,17 @@ class PTransformProvider:
   Data processing transforms that are intended to be used with MLTransform
   should subclass PTransformProvider and implement the following methods:
   1. get_ptransform_for_processing()
-  2. requires_chaining()
 
   get_ptransform_for_processing() method should return a PTransform that can be
   used to process the data.
 
-  requires_chaining() method should return True if the data processing
-  transforms needs to be chained sequentially with compatible data processing
-  transforms.
   """
   @abc.abstractmethod
   def get_ptransform_for_processing(self, **kwargs) -> beam.PTransform:
     """
     Returns a PTransform that can be used to process the data.
     """
 
-  @abc.abstractmethod
-  def requires_chaining(self):
-    """
-    Returns True if the data processing transforms needs to be chained
-    sequentially with compatible data processing transforms.
-    """
-
   def get_counter(self):
     """
     Returns the counter name for the data processing transform.
@@ -196,10 +185,6 @@ def get_model_handler(self) -> ModelHandler:
     Return framework specific model handler.
     """
 
-  def requires_chaining(self):
-    # each embedding config requires a separate PTransform. so no chaining.
-    return False
-
   def get_columns_to_apply(self):
     return self.columns
 
@@ -495,15 +480,14 @@ def create_ptransform_list(self):
           artifact_location=os.path.join(
               self._parent_artifact_location, uuid.uuid4().hex[:6]),
           artifact_mode=self.artifact_mode)
-      # Determine if a new ptransform should be added to the list
-      is_different_type = (type(current_ptransform) != previous_ptransform_type)
-      if is_different_type or not transform.requires_chaining():
+      append_transform = hasattr(current_ptransform, 'append_transform')
+      if (type(current_ptransform) != previous_ptransform_type) or not append_transform:
         ptransform_list.append(current_ptransform)
         previous_ptransform_type = type(current_ptransform)
-
-      if hasattr(ptransform_list[-1], 'append_transform'):
+      # If different PTransform is appended to the list and the PTransform
+      # supports append_transform, append the transform to the PTransform.
+      if append_transform:
         ptransform_list[-1].append_transform(transform)
-
     return ptransform_list
 
   def save_transforms_in_artifact_location(self, ptransform_list):

diff --git a/sdks/python/apache_beam/ml/transforms/embeddings/sentence_transformer.py b/sdks/python/apache_beam/ml/transforms/embeddings/sentence_transformer.py
@@ -122,7 +122,4 @@ def get_model_handler(self):
   def get_ptransform_for_processing(self, **kwargs) -> beam.PTransform:
     # wrap the model handler in a _TextEmbeddingHandler since
     # the SentenceTransformerEmbeddings works on text input data.
-    return (RunInference(model_handler=_TextEmbeddingHandler(self)))
-
-  def requires_chaining(self):
-    return False
+    return (RunInference(model_handler=_TextEmbeddingHandler(self)))
diff --git a/sdks/python/apache_beam/ml/transforms/embeddings/tensorflow_hub.py b/sdks/python/apache_beam/ml/transforms/embeddings/tensorflow_hub.py
@@ -45,7 +45,7 @@ def __init__(self, preprocessing_url: Optional[str], *args, **kwargs):
   def load_model(self):
     # unable to load the models with tf.keras.models.load_model so
     # using hub.KerasLayer instead
-    model = hub.KerasLayer(self._model_uri)
+    model = hub.KerasLayer(self._model_uri, )
     return model
 
   def _convert_prediction_result_to_list(

diff --git a/sdks/python/apache_beam/ml/transforms/embeddings/tensorflow_hub_test.py b/sdks/python/apache_beam/ml/transforms/embeddings/tensorflow_hub_test.py
@@ -21,7 +21,7 @@
 import apache_beam as beam
 from apache_beam.ml.transforms.base import MLTransform
 
-hub_url = 'https://tfhub.dev/google/LEALLA/LEALLA-small/1'
+hub_url = 'https://tfhub.dev/google/nnlm-en-dim128/2'
 test_query_column = 'test_query'
 test_query = 'This is a test query'
 
@@ -134,12 +134,12 @@ def test_embeddings_with_read_artifact_location(self):
             pipeline=data, read_artifact_location=self.artifact_location)
 
         def assert_element(element):
-          assert round(element, 2) == 0.21
+          #  0.29836970567703247
+          assert round(element, 2) == 0.3
 
         _ = (
             result_pcoll
             | beam.Map(lambda x: max(x[test_query_column]))
-            #  0.14797046780586243
             | beam.Map(assert_element))
 
   def test_with_int_data_types(self):
@@ -185,12 +185,12 @@ def test_with_gcs_artifact_location(self):
             pipeline=data, read_artifact_location=artifact_location)
 
         def assert_element(element):
-          assert round(element, 2) == 0.21
+          # 0.29836970567703247
+          assert round(element, 2) == 0.3
 
         _ = (
             result_pcoll
             | beam.Map(lambda x: max(x[test_query_column]))
-            #  0.14797046780586243
             | beam.Map(assert_element))