feat: force to generate for non-required fields (#91)

* Allow to create strings of any length. If the generated string ends with a space, it will be changed to a dot (eg. 'at.' instead of 'at '. Just to avoid confusion. * Allow to generate data even for non-required fields (similar to: Polyfactory allow_none_optionals). The value should be a float between 0.0 and 1.0, indicating the probability of allowing getting a None value. 0.0 means "0% chances of getting a None", 1.0 means "100% chances of getting a None", 0.5 means "50% chances of getting a None", etc. The default is 0.5.
ghandic · Jan 28, 2024 · 4c935d6 · 4c935d6
1 parent 5cbcd0d
commit 4c935d6
Show file tree

Hide file tree

Showing 19 changed files with 136 additions and 42 deletions.
diff --git a/jsf/README.md b/jsf/README.md
@@ -85,7 +85,7 @@ fake_json = faker.generate()
 ```
 
 <details markdown="1">
-<summary>Or run stright from the <code>commandline</code>...</summary>
+<summary>Or run straight from the <code>commandline</code>...</summary>
 
 #### Native install
 

diff --git a/jsf/parser.py b/jsf/parser.py
@@ -5,6 +5,7 @@
 from copy import deepcopy
 from datetime import datetime
 from itertools import count
+from types import MappingProxyType
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 from faker import Faker
@@ -32,13 +33,16 @@ class JSF:
     def __init__(
         self,
         schema: Dict[str, Any],
-        context: Dict[str, Any] = {
-            "faker": faker,
-            "random": random,
-            "datetime": datetime,
-            "__internal__": {"List": List, "Union": Union, "Tuple": Tuple},
-        },
-        initial_state: Dict[str, Any] = {},
+        context: Dict[str, Any] = MappingProxyType(
+            {
+                "faker": faker,
+                "random": random,
+                "datetime": datetime,
+                "__internal__": {"List": List, "Union": Union, "Tuple": Tuple},
+            }
+        ),
+        initial_state: Dict[str, Any] = MappingProxyType({}),
+        allow_none_optionals: float = 0.5,
     ):
         self.root_schema = schema
         self.definitions = {}
@@ -48,18 +52,35 @@ def __init__(
             **initial_state,
         }
         self.base_context = context
+        self.allow_none_optionals = allow_none_optionals
 
         self.root = None
         self._parse(schema)
 
     def __parse_primitive(self, name: str, path: str, schema: Dict[str, Any]) -> PrimitiveTypes:
         item_type, is_nullable = self.__is_field_nullable(schema)
         cls = Primitives.get(item_type)
-        return cls.from_dict({"name": name, "path": path, "is_nullable": is_nullable, **schema})
+        return cls.from_dict(
+            {
+                "name": name,
+                "path": path,
+                "is_nullable": is_nullable,
+                "allow_none_optionals": self.allow_none_optionals,
+                **schema,
+            }
+        )
 
     def __parse_object(self, name: str, path: str, schema: Dict[str, Any]) -> Object:
         _, is_nullable = self.__is_field_nullable(schema)
-        model = Object.from_dict({"name": name, "path": path, "is_nullable": is_nullable, **schema})
+        model = Object.from_dict(
+            {
+                "name": name,
+                "path": path,
+                "is_nullable": is_nullable,
+                "allow_none_optionals": self.allow_none_optionals,
+                **schema,
+            }
+        )
         props = []
         for _name, definition in schema.get("properties", {}).items():
             props.append(self.__parse_definition(_name, path=f"{path}/{_name}", schema=definition))
@@ -75,13 +96,29 @@ def __parse_object(self, name: str, path: str, schema: Dict[str, Any]) -> Object
 
     def __parse_array(self, name: str, path: str, schema: Dict[str, Any]) -> Array:
         _, is_nullable = self.__is_field_nullable(schema)
-        arr = Array.from_dict({"name": name, "path": path, "is_nullable": is_nullable, **schema})
+        arr = Array.from_dict(
+            {
+                "name": name,
+                "path": path,
+                "is_nullable": is_nullable,
+                "allow_none_optionals": self.allow_none_optionals,
+                **schema,
+            }
+        )
         arr.items = self.__parse_definition(name, name, schema["items"])
         return arr
 
     def __parse_tuple(self, name: str, path: str, schema: Dict[str, Any]) -> JSFTuple:
         _, is_nullable = self.__is_field_nullable(schema)
-        arr = JSFTuple.from_dict({"name": name, "path": path, "is_nullable": is_nullable, **schema})
+        arr = JSFTuple.from_dict(
+            {
+                "name": name,
+                "path": path,
+                "is_nullable": is_nullable,
+                "allow_none_optionals": self.allow_none_optionals,
+                **schema,
+            }
+        )
         arr.items = []
         for i, item in enumerate(schema["items"]):
             arr.items.append(self.__parse_definition(name, path=f"{name}[{i}]", schema=item))
@@ -145,7 +182,13 @@ def __parse_definition(self, name: str, path: str, schema: Dict[str, Any]) -> Al
                 isinstance(item, (int, float, str, type(None))) for item in enum_list
             ), "Enum Type is not null, int, float or string"
             return JSFEnum.from_dict(
-                {"name": name, "path": path, "is_nullable": is_nullable, **schema}
+                {
+                    "name": name,
+                    "path": path,
+                    "is_nullable": is_nullable,
+                    "allow_none_optionals": self.allow_none_optionals,
+                    **schema,
+                }
             )
         elif "type" in schema:
             if item_type == "object" and "properties" in schema:

diff --git a/jsf/schema_types/_tuple.py b/jsf/schema_types/_tuple.py
@@ -15,7 +15,8 @@ class JSFTuple(BaseSchema):
     uniqueItems: Optional[bool] = False
     fixed: Optional[Union[int, str]] = Field(None, alias="$fixed")
 
-    def from_dict(d):
+    @classmethod
+    def from_dict(cls, d: Dict):
         return JSFTuple(**d)
 
     def generate(self, context: Dict[str, Any]) -> Optional[List[Tuple]]:

diff --git a/jsf/schema_types/allof.py b/jsf/schema_types/allof.py
@@ -6,7 +6,8 @@
 class AllOf(BaseSchema):
     combined_schema: BaseSchema = None
 
-    def from_dict(d):
+    @classmethod
+    def from_dict(cls, d: Dict):
         return AllOf(**d)
 
     def generate(self, context: Dict[str, Any]) -> Optional[Any]:

diff --git a/jsf/schema_types/anyof.py b/jsf/schema_types/anyof.py
@@ -7,7 +7,8 @@
 class AnyOf(BaseSchema):
     schemas: List[BaseSchema] = None
 
-    def from_dict(d):
+    @classmethod
+    def from_dict(cls, d: Dict):
         return AnyOf(**d)
 
     def generate(self, context: Dict[str, Any]) -> Optional[Any]:

diff --git a/jsf/schema_types/array.py b/jsf/schema_types/array.py
@@ -14,7 +14,8 @@ class Array(BaseSchema):
     uniqueItems: Optional[bool] = False
     fixed: Optional[Union[int, str]] = Field(None, alias="$fixed")
 
-    def from_dict(d):
+    @classmethod
+    def from_dict(cls, d: Dict):
         return Array(**d)
 
     def generate(self, context: Dict[str, Any]) -> Optional[List[Any]]:
@@ -28,7 +29,7 @@ def generate(self, context: Dict[str, Any]) -> Optional[List[Any]]:
 
             output = [
                 self.items.generate(context)
-                for _ in range(random.randint(self.minItems, self.maxItems))
+                for _ in range(random.randint(int(self.minItems), int(self.maxItems)))
             ]
             if self.uniqueItems and self.items.type == "object":
                 output = [dict(s) for s in {frozenset(d.items()) for d in output}]

diff --git a/jsf/schema_types/base.py b/jsf/schema_types/base.py
@@ -33,15 +33,17 @@ class BaseSchema(BaseModel):
     provider: Optional[str] = Field(None, alias="$provider")
     set_state: Optional[Dict[str, str]] = Field(None, alias="$state")
     is_nullable: bool = False
+    allow_none_optionals: float = Field(0.5, ge=0.0, le=1.0)
 
-    def from_dict(d):
+    @classmethod
+    def from_dict(cls, d: Dict):
         raise NotImplementedError  # pragma: no cover
 
     def generate(self, context: Dict[str, Any]) -> Any:
         if self.set_state is not None:
             context["state"][self.path] = {k: eval(v, context)() for k, v in self.set_state.items()}
 
-        if self.is_nullable and random.uniform(0, 1) < 0.9:
+        if self.is_nullable and random.uniform(0, 1) < self.allow_none_optionals:
             return None
         if self.provider is not None:
             return eval(self.provider, context)()
@@ -62,4 +64,4 @@ def to_pydantic(self, context, _type):
                 Optional[_type],
                 Field(..., description=self.description, example=example),
             )
-        return (_type, Field(..., description=self.description, example=example))
+        return _type, Field(..., description=self.description, example=example)
diff --git a/jsf/schema_types/boolean.py b/jsf/schema_types/boolean.py
@@ -14,5 +14,6 @@ def generate(self, context: Dict[str, Any]) -> Optional[bool]:
     def model(self, context: Dict[str, Any]):
         return self.to_pydantic(context, bool)
 
-    def from_dict(d):
+    @classmethod
+    def from_dict(cls, d: Dict):
         return Boolean(**d)
diff --git a/jsf/schema_types/enum.py b/jsf/schema_types/enum.py
@@ -20,7 +20,8 @@ def generate(self, context: Dict[str, Any]) -> Optional[Union[str, int, float]]:
         except ProviderNotSetException:
             return random.choice(self.enum)
 
-    def from_dict(d):
+    @classmethod
+    def from_dict(cls, d: Dict):
         return JSFEnum(**d)
 
     def model(self, context: Dict[str, Any]):

diff --git a/jsf/schema_types/null.py b/jsf/schema_types/null.py
@@ -13,5 +13,6 @@ def generate(self, context: Dict[str, Any]) -> None:
     def model(self, context: Dict[str, Any]):
         return self.to_pydantic(context, type(None))
 
-    def from_dict(d):
+    @classmethod
+    def from_dict(cls, d: Dict):
         return Null(**d)
diff --git a/jsf/schema_types/number.py b/jsf/schema_types/number.py
@@ -40,7 +40,8 @@ def generate(self, context: Dict[str, Any]) -> Optional[float]:
     def model(self, context: Dict[str, Any]):
         return self.to_pydantic(context, float)
 
-    def from_dict(d):
+    @classmethod
+    def from_dict(cls, d: Dict):
         return Number(**d)
 
 
@@ -52,5 +53,6 @@ def generate(self, context: Dict[str, Any]) -> Optional[int]:
     def model(self, context: Dict[str, Any]):
         return self.to_pydantic(context, int)
 
-    def from_dict(d):
+    @classmethod
+    def from_dict(cls, d: Dict):
         return Integer(**d)
diff --git a/jsf/schema_types/object.py b/jsf/schema_types/object.py
@@ -28,13 +28,14 @@ class Object(BaseSchema):
     dependencies: Optional[Union[PropertyDependency, SchemaDependency]] = None
     patternProperties: Optional[Dict[str, BaseSchema]] = None
 
-    def from_dict(d):
+    @classmethod
+    def from_dict(cls, d: dict):
         return Object(**d)
 
     def should_keep(self, property_name: str) -> bool:
         if isinstance(self.required, list) and property_name in self.required:
             return True
-        return random.uniform(0, 1) < 0.5
+        return random.uniform(0, 1) > self.allow_none_optionals
 
     def generate(self, context: Dict[str, Any]) -> Optional[Dict[str, Any]]:
         try:

diff --git a/jsf/schema_types/oneof.py b/jsf/schema_types/oneof.py
@@ -7,7 +7,8 @@
 class OneOf(BaseSchema):
     schemas: List[BaseSchema] = None
 
-    def from_dict(d):
+    @classmethod
+    def from_dict(cls, d: Dict):
         return OneOf(**d)
 
     def generate(self, context: Dict[str, Any]) -> Optional[List[Any]]:

diff --git a/jsf/schema_types/string.py b/jsf/schema_types/string.py
@@ -67,8 +67,8 @@ def temporal_duration(
     return duration
 
 
-def mostly_zero_randint(min, max):
-    return 0 if random.random() > 0.8 else random.randint(min, max)
+def mostly_zero_randint(_min: int, _max: int) -> int:
+    return 0 if random.random() > 0.8 else random.randint(int(_min), int(_max))
 
 
 def fake_duration():
@@ -117,8 +117,8 @@ def fake_duration():
 
 
 class String(BaseSchema):
-    minLength: Optional[float] = 0
-    maxLength: Optional[float] = 50
+    minLength: Optional[int] = 0
+    maxLength: Optional[int] = 50
     pattern: Optional[str] = None
     format: Optional[str] = None
     # enum: Optional[List[Union[str, int, float]]] = None  # NOTE: Not used - enums go to enum class
@@ -151,5 +151,6 @@ def generate(self, context: Dict[str, Any]) -> Optional[str]:
     def model(self, context: Dict[str, Any]):
         return self.to_pydantic(context, str)
 
-    def from_dict(d):
+    @classmethod
+    def from_dict(cls, d: Dict):
         return String(**d)
diff --git a/jsf/schema_types/string_utils/content_encoding.py b/jsf/schema_types/string_utils/content_encoding.py
@@ -1,7 +1,6 @@
 import base64
 import quopri
 from enum import Enum
-from typing import Optional
 
 
 class ContentEncoding(str, Enum):
@@ -57,5 +56,5 @@ def b64_encoder(string: str) -> str:
 }
 
 
-def encode(string: str, encoding: Optional[ContentEncoding]) -> str:
+def encode(string: str, encoding: ContentEncoding) -> str:
     return Encoder.get(encoding, lambda s: s)(string)
diff --git a/jsf/schema_types/string_utils/content_type/text__plain.py b/jsf/schema_types/string_utils/content_type/text__plain.py
@@ -3,17 +3,23 @@
 LOREM = """Lorem ipsum dolor sit amet consectetur adipisicing elit.
 Hic molestias, esse veniam placeat officiis nobis architecto modi
 possimus reiciendis accusantium exercitationem quas illum libero odit magnam,
-reprehenderit ipsum, repellendus culpa!""".split()
+reprehenderit ipsum, repellendus culpa! Nullam vehicula ipsum a arcu cursus vitae congue.
+Enim nec dui nunc mattis enim ut tellus.""".split()
 
 
 def random_fixed_length_sentence(_min: int = 0, _max: int = 50) -> str:
+    if _min > _max:
+        raise ValueError("'_max' should be greater than '_min'")  # pragma: no cover
     output = ""
-    while len(output) < _max:
+    while True:
         remaining = _max - len(output)
-        valid_words = list(filter(lambda s: len(s) < remaining, LOREM))
+        valid_words = list(filter(lambda s: len(s) <= remaining, LOREM))
         if len(valid_words) == 0:
             break
-        output += random.choice(valid_words) + " "
-        if len(output) > _min and random.uniform(0, 1) > 0.9:
+        if len(output) >= _min and random.uniform(0, 1) > 0.9:
             break
-    return output.strip()
+        output += random.choice(valid_words) + " "
+    output = output.strip()
+    if len(output) < _min:
+        output = output + "."
+    return output
diff --git a/jsf/tests/data/object-with-optionals.json b/jsf/tests/data/object-with-optionals.json
@@ -0,0 +1,8 @@
+{
+    "type": "object",
+    "required": ["name"],
+    "properties": {
+        "name": { "type": "string" },
+        "credit_card": { "type": "number" }
+    }
+}
diff --git a/jsf/tests/data/string-max-min-length.json b/jsf/tests/data/string-max-min-length.json
@@ -0,0 +1,5 @@
+{
+    "type": "string",
+    "maxLength": 2,
+    "minLength": 2
+}
diff --git a/jsf/tests/test_default_fake.py b/jsf/tests/test_default_fake.py
@@ -110,6 +110,15 @@ def test_fake_string(TestData):
     assert len(fake_data) - len(set(fake_data)) < 50
 
 
+def test_fake_string_max_min_length(TestData):
+    with open(TestData / "string-max-min-length.json", "r") as file:
+        schema = json.load(file)
+    p = JSF(schema)
+    assert isinstance(p.generate(), str)
+    fake_data = [p.generate() for _ in range(10)]
+    assert all(len(fd) == 2 for fd in fake_data)
+
+
 def test_fake_string_content_encoding(TestData):
     with open(TestData / "string-content-encoding.json", "r") as file:
         schema = json.load(file)
@@ -426,3 +435,13 @@ def test_list_of_types(TestData):
     assert all(type(f["randTypeValue"]) in [bool, int, float, str] for f in fake_data), fake_data
     assert all(isinstance(f["int"], int) for f in fake_data), fake_data
     assert all(isinstance(f["null"], type(None)) for f in fake_data), fake_data
+
+
+def test_non_required_are_not_none(TestData):
+    with open(TestData / "object-with-optionals.json", "r") as file:
+        schema = json.load(file)
+    for _ in range(10):
+        fake_data = JSF(schema, allow_none_optionals=0.0).generate()
+
+        assert fake_data["name"] is not None
+        assert fake_data["credit_card"] is not None