Skip to content

Commit

Permalink
feat: force to generate for non-required fields (#91)
Browse files Browse the repository at this point in the history
* Allow to create strings of any length. If the generated string ends with a space, it will be changed to a dot (eg. 'at.' instead of 'at '. Just to avoid confusion.
* Allow to generate data even for non-required fields (similar to: Polyfactory allow_none_optionals). The value should be a float between 0.0 and 1.0, indicating the probability of allowing getting a None value. 0.0 means "0% chances of getting a None", 1.0 means "100% chances of getting a None", 0.5 means "50% chances of getting a None", etc. The default is 0.5.
  • Loading branch information
elecay authored Jan 28, 2024
1 parent 5cbcd0d commit 4c935d6
Show file tree
Hide file tree
Showing 19 changed files with 136 additions and 42 deletions.
2 changes: 1 addition & 1 deletion jsf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ fake_json = faker.generate()
```

<details markdown="1">
<summary>Or run stright from the <code>commandline</code>...</summary>
<summary>Or run straight from the <code>commandline</code>...</summary>

#### Native install

Expand Down
67 changes: 55 additions & 12 deletions jsf/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from copy import deepcopy
from datetime import datetime
from itertools import count
from types import MappingProxyType
from typing import Any, Dict, List, Optional, Tuple, Union

from faker import Faker
Expand Down Expand Up @@ -32,13 +33,16 @@ class JSF:
def __init__(
self,
schema: Dict[str, Any],
context: Dict[str, Any] = {
"faker": faker,
"random": random,
"datetime": datetime,
"__internal__": {"List": List, "Union": Union, "Tuple": Tuple},
},
initial_state: Dict[str, Any] = {},
context: Dict[str, Any] = MappingProxyType(
{
"faker": faker,
"random": random,
"datetime": datetime,
"__internal__": {"List": List, "Union": Union, "Tuple": Tuple},
}
),
initial_state: Dict[str, Any] = MappingProxyType({}),
allow_none_optionals: float = 0.5,
):
self.root_schema = schema
self.definitions = {}
Expand All @@ -48,18 +52,35 @@ def __init__(
**initial_state,
}
self.base_context = context
self.allow_none_optionals = allow_none_optionals

self.root = None
self._parse(schema)

def __parse_primitive(self, name: str, path: str, schema: Dict[str, Any]) -> PrimitiveTypes:
item_type, is_nullable = self.__is_field_nullable(schema)
cls = Primitives.get(item_type)
return cls.from_dict({"name": name, "path": path, "is_nullable": is_nullable, **schema})
return cls.from_dict(
{
"name": name,
"path": path,
"is_nullable": is_nullable,
"allow_none_optionals": self.allow_none_optionals,
**schema,
}
)

def __parse_object(self, name: str, path: str, schema: Dict[str, Any]) -> Object:
_, is_nullable = self.__is_field_nullable(schema)
model = Object.from_dict({"name": name, "path": path, "is_nullable": is_nullable, **schema})
model = Object.from_dict(
{
"name": name,
"path": path,
"is_nullable": is_nullable,
"allow_none_optionals": self.allow_none_optionals,
**schema,
}
)
props = []
for _name, definition in schema.get("properties", {}).items():
props.append(self.__parse_definition(_name, path=f"{path}/{_name}", schema=definition))
Expand All @@ -75,13 +96,29 @@ def __parse_object(self, name: str, path: str, schema: Dict[str, Any]) -> Object

def __parse_array(self, name: str, path: str, schema: Dict[str, Any]) -> Array:
_, is_nullable = self.__is_field_nullable(schema)
arr = Array.from_dict({"name": name, "path": path, "is_nullable": is_nullable, **schema})
arr = Array.from_dict(
{
"name": name,
"path": path,
"is_nullable": is_nullable,
"allow_none_optionals": self.allow_none_optionals,
**schema,
}
)
arr.items = self.__parse_definition(name, name, schema["items"])
return arr

def __parse_tuple(self, name: str, path: str, schema: Dict[str, Any]) -> JSFTuple:
_, is_nullable = self.__is_field_nullable(schema)
arr = JSFTuple.from_dict({"name": name, "path": path, "is_nullable": is_nullable, **schema})
arr = JSFTuple.from_dict(
{
"name": name,
"path": path,
"is_nullable": is_nullable,
"allow_none_optionals": self.allow_none_optionals,
**schema,
}
)
arr.items = []
for i, item in enumerate(schema["items"]):
arr.items.append(self.__parse_definition(name, path=f"{name}[{i}]", schema=item))
Expand Down Expand Up @@ -145,7 +182,13 @@ def __parse_definition(self, name: str, path: str, schema: Dict[str, Any]) -> Al
isinstance(item, (int, float, str, type(None))) for item in enum_list
), "Enum Type is not null, int, float or string"
return JSFEnum.from_dict(
{"name": name, "path": path, "is_nullable": is_nullable, **schema}
{
"name": name,
"path": path,
"is_nullable": is_nullable,
"allow_none_optionals": self.allow_none_optionals,
**schema,
}
)
elif "type" in schema:
if item_type == "object" and "properties" in schema:
Expand Down
3 changes: 2 additions & 1 deletion jsf/schema_types/_tuple.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ class JSFTuple(BaseSchema):
uniqueItems: Optional[bool] = False
fixed: Optional[Union[int, str]] = Field(None, alias="$fixed")

def from_dict(d):
@classmethod
def from_dict(cls, d: Dict):
return JSFTuple(**d)

def generate(self, context: Dict[str, Any]) -> Optional[List[Tuple]]:
Expand Down
3 changes: 2 additions & 1 deletion jsf/schema_types/allof.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
class AllOf(BaseSchema):
combined_schema: BaseSchema = None

def from_dict(d):
@classmethod
def from_dict(cls, d: Dict):
return AllOf(**d)

def generate(self, context: Dict[str, Any]) -> Optional[Any]:
Expand Down
3 changes: 2 additions & 1 deletion jsf/schema_types/anyof.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
class AnyOf(BaseSchema):
schemas: List[BaseSchema] = None

def from_dict(d):
@classmethod
def from_dict(cls, d: Dict):
return AnyOf(**d)

def generate(self, context: Dict[str, Any]) -> Optional[Any]:
Expand Down
5 changes: 3 additions & 2 deletions jsf/schema_types/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ class Array(BaseSchema):
uniqueItems: Optional[bool] = False
fixed: Optional[Union[int, str]] = Field(None, alias="$fixed")

def from_dict(d):
@classmethod
def from_dict(cls, d: Dict):
return Array(**d)

def generate(self, context: Dict[str, Any]) -> Optional[List[Any]]:
Expand All @@ -28,7 +29,7 @@ def generate(self, context: Dict[str, Any]) -> Optional[List[Any]]:

output = [
self.items.generate(context)
for _ in range(random.randint(self.minItems, self.maxItems))
for _ in range(random.randint(int(self.minItems), int(self.maxItems)))
]
if self.uniqueItems and self.items.type == "object":
output = [dict(s) for s in {frozenset(d.items()) for d in output}]
Expand Down
8 changes: 5 additions & 3 deletions jsf/schema_types/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,17 @@ class BaseSchema(BaseModel):
provider: Optional[str] = Field(None, alias="$provider")
set_state: Optional[Dict[str, str]] = Field(None, alias="$state")
is_nullable: bool = False
allow_none_optionals: float = Field(0.5, ge=0.0, le=1.0)

def from_dict(d):
@classmethod
def from_dict(cls, d: Dict):
raise NotImplementedError # pragma: no cover

def generate(self, context: Dict[str, Any]) -> Any:
if self.set_state is not None:
context["state"][self.path] = {k: eval(v, context)() for k, v in self.set_state.items()}

if self.is_nullable and random.uniform(0, 1) < 0.9:
if self.is_nullable and random.uniform(0, 1) < self.allow_none_optionals:
return None
if self.provider is not None:
return eval(self.provider, context)()
Expand All @@ -62,4 +64,4 @@ def to_pydantic(self, context, _type):
Optional[_type],
Field(..., description=self.description, example=example),
)
return (_type, Field(..., description=self.description, example=example))
return _type, Field(..., description=self.description, example=example)
3 changes: 2 additions & 1 deletion jsf/schema_types/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@ def generate(self, context: Dict[str, Any]) -> Optional[bool]:
def model(self, context: Dict[str, Any]):
return self.to_pydantic(context, bool)

def from_dict(d):
@classmethod
def from_dict(cls, d: Dict):
return Boolean(**d)
3 changes: 2 additions & 1 deletion jsf/schema_types/enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ def generate(self, context: Dict[str, Any]) -> Optional[Union[str, int, float]]:
except ProviderNotSetException:
return random.choice(self.enum)

def from_dict(d):
@classmethod
def from_dict(cls, d: Dict):
return JSFEnum(**d)

def model(self, context: Dict[str, Any]):
Expand Down
3 changes: 2 additions & 1 deletion jsf/schema_types/null.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,6 @@ def generate(self, context: Dict[str, Any]) -> None:
def model(self, context: Dict[str, Any]):
return self.to_pydantic(context, type(None))

def from_dict(d):
@classmethod
def from_dict(cls, d: Dict):
return Null(**d)
6 changes: 4 additions & 2 deletions jsf/schema_types/number.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ def generate(self, context: Dict[str, Any]) -> Optional[float]:
def model(self, context: Dict[str, Any]):
return self.to_pydantic(context, float)

def from_dict(d):
@classmethod
def from_dict(cls, d: Dict):
return Number(**d)


Expand All @@ -52,5 +53,6 @@ def generate(self, context: Dict[str, Any]) -> Optional[int]:
def model(self, context: Dict[str, Any]):
return self.to_pydantic(context, int)

def from_dict(d):
@classmethod
def from_dict(cls, d: Dict):
return Integer(**d)
5 changes: 3 additions & 2 deletions jsf/schema_types/object.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,14 @@ class Object(BaseSchema):
dependencies: Optional[Union[PropertyDependency, SchemaDependency]] = None
patternProperties: Optional[Dict[str, BaseSchema]] = None

def from_dict(d):
@classmethod
def from_dict(cls, d: dict):
return Object(**d)

def should_keep(self, property_name: str) -> bool:
if isinstance(self.required, list) and property_name in self.required:
return True
return random.uniform(0, 1) < 0.5
return random.uniform(0, 1) > self.allow_none_optionals

def generate(self, context: Dict[str, Any]) -> Optional[Dict[str, Any]]:
try:
Expand Down
3 changes: 2 additions & 1 deletion jsf/schema_types/oneof.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
class OneOf(BaseSchema):
schemas: List[BaseSchema] = None

def from_dict(d):
@classmethod
def from_dict(cls, d: Dict):
return OneOf(**d)

def generate(self, context: Dict[str, Any]) -> Optional[List[Any]]:
Expand Down
11 changes: 6 additions & 5 deletions jsf/schema_types/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ def temporal_duration(
return duration


def mostly_zero_randint(min, max):
return 0 if random.random() > 0.8 else random.randint(min, max)
def mostly_zero_randint(_min: int, _max: int) -> int:
return 0 if random.random() > 0.8 else random.randint(int(_min), int(_max))


def fake_duration():
Expand Down Expand Up @@ -117,8 +117,8 @@ def fake_duration():


class String(BaseSchema):
minLength: Optional[float] = 0
maxLength: Optional[float] = 50
minLength: Optional[int] = 0
maxLength: Optional[int] = 50
pattern: Optional[str] = None
format: Optional[str] = None
# enum: Optional[List[Union[str, int, float]]] = None # NOTE: Not used - enums go to enum class
Expand Down Expand Up @@ -151,5 +151,6 @@ def generate(self, context: Dict[str, Any]) -> Optional[str]:
def model(self, context: Dict[str, Any]):
return self.to_pydantic(context, str)

def from_dict(d):
@classmethod
def from_dict(cls, d: Dict):
return String(**d)
3 changes: 1 addition & 2 deletions jsf/schema_types/string_utils/content_encoding.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import base64
import quopri
from enum import Enum
from typing import Optional


class ContentEncoding(str, Enum):
Expand Down Expand Up @@ -57,5 +56,5 @@ def b64_encoder(string: str) -> str:
}


def encode(string: str, encoding: Optional[ContentEncoding]) -> str:
def encode(string: str, encoding: ContentEncoding) -> str:
return Encoder.get(encoding, lambda s: s)(string)
18 changes: 12 additions & 6 deletions jsf/schema_types/string_utils/content_type/text__plain.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,23 @@
LOREM = """Lorem ipsum dolor sit amet consectetur adipisicing elit.
Hic molestias, esse veniam placeat officiis nobis architecto modi
possimus reiciendis accusantium exercitationem quas illum libero odit magnam,
reprehenderit ipsum, repellendus culpa!""".split()
reprehenderit ipsum, repellendus culpa! Nullam vehicula ipsum a arcu cursus vitae congue.
Enim nec dui nunc mattis enim ut tellus.""".split()


def random_fixed_length_sentence(_min: int = 0, _max: int = 50) -> str:
if _min > _max:
raise ValueError("'_max' should be greater than '_min'") # pragma: no cover
output = ""
while len(output) < _max:
while True:
remaining = _max - len(output)
valid_words = list(filter(lambda s: len(s) < remaining, LOREM))
valid_words = list(filter(lambda s: len(s) <= remaining, LOREM))
if len(valid_words) == 0:
break
output += random.choice(valid_words) + " "
if len(output) > _min and random.uniform(0, 1) > 0.9:
if len(output) >= _min and random.uniform(0, 1) > 0.9:
break
return output.strip()
output += random.choice(valid_words) + " "
output = output.strip()
if len(output) < _min:
output = output + "."
return output
8 changes: 8 additions & 0 deletions jsf/tests/data/object-with-optionals.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"type": "object",
"required": ["name"],
"properties": {
"name": { "type": "string" },
"credit_card": { "type": "number" }
}
}
5 changes: 5 additions & 0 deletions jsf/tests/data/string-max-min-length.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"type": "string",
"maxLength": 2,
"minLength": 2
}
19 changes: 19 additions & 0 deletions jsf/tests/test_default_fake.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,15 @@ def test_fake_string(TestData):
assert len(fake_data) - len(set(fake_data)) < 50


def test_fake_string_max_min_length(TestData):
with open(TestData / "string-max-min-length.json", "r") as file:
schema = json.load(file)
p = JSF(schema)
assert isinstance(p.generate(), str)
fake_data = [p.generate() for _ in range(10)]
assert all(len(fd) == 2 for fd in fake_data)


def test_fake_string_content_encoding(TestData):
with open(TestData / "string-content-encoding.json", "r") as file:
schema = json.load(file)
Expand Down Expand Up @@ -426,3 +435,13 @@ def test_list_of_types(TestData):
assert all(type(f["randTypeValue"]) in [bool, int, float, str] for f in fake_data), fake_data
assert all(isinstance(f["int"], int) for f in fake_data), fake_data
assert all(isinstance(f["null"], type(None)) for f in fake_data), fake_data


def test_non_required_are_not_none(TestData):
with open(TestData / "object-with-optionals.json", "r") as file:
schema = json.load(file)
for _ in range(10):
fake_data = JSF(schema, allow_none_optionals=0.0).generate()

assert fake_data["name"] is not None
assert fake_data["credit_card"] is not None

0 comments on commit 4c935d6

Please sign in to comment.