diff --git a/ee/clickhouse/queries/funnels/base.py b/ee/clickhouse/queries/funnels/base.py index 1398c5a207638..dee01706287dc 100644 --- a/ee/clickhouse/queries/funnels/base.py +++ b/ee/clickhouse/queries/funnels/base.py @@ -272,6 +272,36 @@ def _build_filters(self, entity: Entity, index: int) -> str: return prop_filters return "" + def _get_funnel_person_step_condition(self): + step_num = self._filter.funnel_step + max_steps = len(self._filter.entities) + + if step_num is None: + raise ValueError("funnel_step should not be none") + + if step_num >= 0: + self.params.update({"step_num": [i for i in range(step_num, max_steps + 1)]}) + return "steps IN %(step_num)s" + else: + self.params.update({"step_num": abs(step_num) - 1}) + return "steps = %(step_num)s" + + def _get_count_columns(self, max_steps: int): + cols: List[str] = [] + + for i in range(max_steps): + cols.append(f"countIf(steps = {i + 1}) step_{i + 1}") + + return ", ".join(cols) + + def _get_step_time_avgs(self, max_steps: int): + conditions: List[str] = [] + for i in range(1, max_steps): + conditions.append(f"avg(step_{i}_average_conversion_time) step_{i}_average_conversion_time") + + formatted = ", ".join(conditions) + return f", {formatted}" if formatted else "" + @abstractmethod def get_query(self, format_properties): pass diff --git a/ee/clickhouse/queries/funnels/funnel.py b/ee/clickhouse/queries/funnels/funnel.py index d54773c441ec8..0fe00b7d4b223 100644 --- a/ee/clickhouse/queries/funnels/funnel.py +++ b/ee/clickhouse/queries/funnels/funnel.py @@ -11,28 +11,24 @@ def get_query(self, format_properties): class ClickhouseFunnelNew(ClickhouseFunnelBase): def get_query(self, format_properties): - return self.get_step_counts_query() - def get_step_counts_query(self): - - steps_per_person_query = self._get_steps_per_person_query() + steps_per_person_query = self.get_step_counts_query() max_steps = len(self._filter.entities) return f""" SELECT {self._get_count_columns(max_steps)} {self._get_step_time_avgs(max_steps)} FROM ( - SELECT person_id, max(steps) AS furthest {self._get_step_time_avgs(max_steps)} FROM ( {steps_per_person_query} - ) GROUP BY person_id ) SETTINGS allow_experimental_window_functions = 1 """ - def _get_count_columns(self, max_steps: int): - cols: List[str] = [] - - for i in range(max_steps): - cols.append(f"countIf(furthest = {i + 1}) step_{i + 1}") + def get_step_counts_query(self): + steps_per_person_query = self._get_steps_per_person_query() + max_steps = len(self._filter.entities) - return ", ".join(cols) + return f"""SELECT person_id, max(steps) AS steps {self._get_step_time_avgs(max_steps)} FROM ( + {steps_per_person_query} + ) GROUP BY person_id + """ def _format_results(self, results): # Format of this is [step order, person count (that reached that step), array of person uuids] @@ -61,12 +57,3 @@ def _get_breakdown_prop(self) -> str: return ", prop" else: return "" - - # TODO: include in the inner query to handle time to convert - def _get_step_time_avgs(self, max_steps: int): - conditions: List[str] = [] - for i in range(1, max_steps): - conditions.append(f"avg(step_{i}_average_conversion_time) step_{i}_average_conversion_time") - - formatted = ", ".join(conditions) - return f", {formatted}" if formatted else "" diff --git a/ee/clickhouse/queries/funnels/funnel_persons.py b/ee/clickhouse/queries/funnels/funnel_persons.py index f5487810e68c0..4c8334922a28d 100644 --- a/ee/clickhouse/queries/funnels/funnel_persons.py +++ b/ee/clickhouse/queries/funnels/funnel_persons.py @@ -1,31 +1,16 @@ -from ee.clickhouse.queries.funnels.base import ClickhouseFunnelBase +from ee.clickhouse.queries.funnels.funnel import ClickhouseFunnelNew from ee.clickhouse.sql.funnels.funnel import FUNNEL_PERSONS_BY_STEP_SQL from posthog.models import Person -class ClickhouseFunnelPersons(ClickhouseFunnelBase): +class ClickhouseFunnelPersons(ClickhouseFunnelNew): def get_query(self, format_properties): - steps_per_person_query = self._get_steps_per_person_query() return FUNNEL_PERSONS_BY_STEP_SQL.format( **format_properties, - steps_per_person_query=steps_per_person_query, + steps_per_person_query=self.get_step_counts_query(), persons_steps=self._get_funnel_person_step_condition() ) - def _get_funnel_person_step_condition(self): - step_num = self._filter.funnel_step - max_steps = len(self._filter.entities) - - if step_num is None: - raise ValueError("funnel_step should not be none") - - if step_num >= 0: - self.params.update({"step_num": [i for i in range(step_num, max_steps + 1)]}) - return "steps IN %(step_num)s" - else: - self.params.update({"step_num": abs(step_num) - 1}) - return "steps = %(step_num)s" - def _format_results(self, results): people = Person.objects.filter(team_id=self._team.pk, uuid__in=[val[0] for val in results]) diff --git a/ee/clickhouse/queries/funnels/funnel_trends.py b/ee/clickhouse/queries/funnels/funnel_trends.py index db5a28901c170..967361c2db9cf 100644 --- a/ee/clickhouse/queries/funnels/funnel_trends.py +++ b/ee/clickhouse/queries/funnels/funnel_trends.py @@ -1,7 +1,7 @@ from datetime import date, datetime, timedelta from typing import Union -from ee.clickhouse.queries.funnels.funnel import ClickhouseFunnelNew +from ee.clickhouse.queries.funnels.base import ClickhouseFunnelBase from ee.clickhouse.queries.util import get_time_diff, get_trunc_func_ch from posthog.constants import BREAKDOWN from posthog.models.filters.filter import Filter @@ -15,7 +15,7 @@ HUMAN_READABLE_TIMESTAMP_FORMAT = "%a. %-d %b" -class ClickhouseFunnelTrends(ClickhouseFunnelNew): +class ClickhouseFunnelTrends(ClickhouseFunnelBase): """ ## Funnel trends assumptions diff --git a/ee/clickhouse/queries/funnels/funnel_unordered.py b/ee/clickhouse/queries/funnels/funnel_unordered.py index 15a0880c2a2b0..09a3eff4fee2c 100644 --- a/ee/clickhouse/queries/funnels/funnel_unordered.py +++ b/ee/clickhouse/queries/funnels/funnel_unordered.py @@ -24,7 +24,14 @@ class ClickhouseFunnelUnordered(ClickhouseFunnelBase): """ def get_query(self, format_properties): - return self.get_step_counts_query() + + max_steps = len(self._filter.entities) + + return f""" + SELECT {self._get_count_columns(max_steps)} {self._get_step_time_avgs(max_steps)} FROM ( + {self.get_step_counts_query()} + ) SETTINGS allow_experimental_window_functions = 1 + """ def get_step_counts_query(self): @@ -45,7 +52,7 @@ def get_step_counts_query(self): """ formatted_query = f""" - SELECT *, {sorting_condition} AS steps FROM ( + SELECT *, {sorting_condition} AS steps {self._get_step_times(max_steps)} FROM ( {inner_query} ) WHERE step_0 = 1""" @@ -56,11 +63,9 @@ def get_step_counts_query(self): union_formatted_query = " UNION ALL ".join(union_queries) return f""" - SELECT furthest, count(1), groupArray(100)(person_id) FROM ( - SELECT person_id, max(steps) AS furthest FROM ( + SELECT person_id, max(steps) AS steps {self._get_step_time_avgs(max_steps)} FROM ( {union_formatted_query} - ) GROUP BY person_id - ) GROUP BY furthest SETTINGS allow_experimental_window_functions = 1 + ) GROUP BY person_id """ def get_sorting_condition(self, max_steps: int): @@ -72,3 +77,25 @@ def get_sorting_condition(self, max_steps: int): ) return f"arraySum([{','.join(basic_conditions)}, 1])" + + # TODO: copied from funnel.py. Once the new funnel query replaces old one, the base format_results function can use this + def _format_results(self, results): + # Format of this is [step order, person count (that reached that step), array of person uuids] + steps = [] + total_people = 0 + + for step in reversed(self._filter.entities): + + if results[0] and len(results[0]) > 0: + total_people += results[0][step.order] + + serialized_result = self._serialize_step(step, total_people, []) + if step.order > 0: + serialized_result.update( + {"average_conversion_time": results[0][step.order + len(self._filter.entities) - 1]} + ) + else: + serialized_result.update({"average_conversion_time": None}) + steps.append(serialized_result) + + return steps[::-1] #  reverse diff --git a/ee/clickhouse/queries/funnels/funnel_unordered_persons.py b/ee/clickhouse/queries/funnels/funnel_unordered_persons.py new file mode 100644 index 0000000000000..bd2b7bdb119b0 --- /dev/null +++ b/ee/clickhouse/queries/funnels/funnel_unordered_persons.py @@ -0,0 +1,19 @@ +from ee.clickhouse.queries.funnels.funnel_unordered import ClickhouseFunnelUnordered +from ee.clickhouse.sql.funnels.funnel import FUNNEL_PERSONS_BY_STEP_SQL +from posthog.models import Person + + +class ClickhouseFunnelUnorderedPersons(ClickhouseFunnelUnordered): + def get_query(self, format_properties): + return FUNNEL_PERSONS_BY_STEP_SQL.format( + **format_properties, + steps_per_person_query=self.get_step_counts_query(), + persons_steps=self._get_funnel_person_step_condition() + ) + + def _format_results(self, results): + people = Person.objects.filter(team_id=self._team.pk, uuid__in=[val[0] for val in results]) + + from posthog.api.person import PersonSerializer + + return PersonSerializer(people, many=True).data diff --git a/ee/clickhouse/queries/funnels/test/test_funnel.py b/ee/clickhouse/queries/funnels/test/test_funnel.py index 9a573db02fc42..5389c83627308 100644 --- a/ee/clickhouse/queries/funnels/test/test_funnel.py +++ b/ee/clickhouse/queries/funnels/test/test_funnel.py @@ -2,6 +2,7 @@ from ee.clickhouse.models.event import create_event from ee.clickhouse.queries.funnels.funnel import ClickhouseFunnel, ClickhouseFunnelNew +from ee.clickhouse.queries.funnels.funnel_persons import ClickhouseFunnelPersons from ee.clickhouse.util import ClickhouseTestMixin from posthog.constants import INSIGHT_FUNNELS from posthog.models.action import Action @@ -40,6 +41,11 @@ class TestFunnel(ClickhouseTestMixin, funnel_test_factory(ClickhouseFunnel, _cre class TestFunnelNew(ClickhouseTestMixin, funnel_test_factory(ClickhouseFunnelNew, _create_event, _create_person)): # type: ignore + def _get_people_at_step(self, filter, funnel_step): + person_filter = filter.with_data({"funnel_step": funnel_step}) + result = ClickhouseFunnelPersons(person_filter, self.team)._exec_query() + return [row[0] for row in result] + def test_basic_funnel_default_funnel_days(self): filters = { "events": [ @@ -93,11 +99,20 @@ def test_basic_funnel_with_repeat_steps(self): person2_stopped_after_signup = _create_person(distinct_ids=["stopped_after_signup2"], team_id=self.team.pk) _create_event(team=self.team, event="user signed up", distinct_id="stopped_after_signup2") - with self.assertNumQueries(1): - result = funnel.run() + result = funnel.run() self.assertEqual(result[0]["name"], "user signed up") self.assertEqual(result[0]["count"], 2) + self.assertEqual(result[1]["count"], 1) + + self.assertCountEqual( + self._get_people_at_step(filter, 1), + [person1_stopped_after_two_signups.uuid, person2_stopped_after_signup.uuid], + ) + + self.assertCountEqual( + self._get_people_at_step(filter, 2), [person1_stopped_after_two_signups.uuid], + ) def test_advanced_funnel_with_repeat_steps(self): filters = { @@ -109,7 +124,6 @@ def test_advanced_funnel_with_repeat_steps(self): {"id": "$pageview", "type": "events", "order": 4}, ], "insight": INSIGHT_FUNNELS, - "funnel_window_days": 14, } filter = Filter(data=filters) @@ -158,6 +172,49 @@ def test_advanced_funnel_with_repeat_steps(self): self.assertEqual(result[1]["name"], "$pageview") self.assertEqual(result[4]["name"], "$pageview") self.assertEqual(result[0]["count"], 5) + self.assertEqual(result[1]["count"], 4) + self.assertEqual(result[2]["count"], 3) + self.assertEqual(result[3]["count"], 2) + self.assertEqual(result[4]["count"], 1) + # check ordering of people in every step + self.assertCountEqual( + self._get_people_at_step(filter, 1), + [ + person1_stopped_after_signup.uuid, + person2_stopped_after_one_pageview.uuid, + person3_stopped_after_two_pageview.uuid, + person4_stopped_after_three_pageview.uuid, + person5_stopped_after_many_pageview.uuid, + ], + ) + + self.assertCountEqual( + self._get_people_at_step(filter, 2), + [ + person2_stopped_after_one_pageview.uuid, + person3_stopped_after_two_pageview.uuid, + person4_stopped_after_three_pageview.uuid, + person5_stopped_after_many_pageview.uuid, + ], + ) + + self.assertCountEqual( + self._get_people_at_step(filter, 3), + [ + person3_stopped_after_two_pageview.uuid, + person4_stopped_after_three_pageview.uuid, + person5_stopped_after_many_pageview.uuid, + ], + ) + + self.assertCountEqual( + self._get_people_at_step(filter, 4), + [person4_stopped_after_three_pageview.uuid, person5_stopped_after_many_pageview.uuid], + ) + + self.assertCountEqual( + self._get_people_at_step(filter, 5), [person5_stopped_after_many_pageview.uuid], + ) def test_advanced_funnel_with_repeat_steps_out_of_order_events(self): filters = { @@ -225,13 +282,49 @@ def test_advanced_funnel_with_repeat_steps_out_of_order_events(self): _create_event(team=self.team, event="$pageview", distinct_id="stopped_after_pageview5") _create_event(team=self.team, event="$pageview", distinct_id="stopped_after_pageview5") - with self.assertNumQueries(1): - result = funnel.run() + result = funnel.run() self.assertEqual(result[0]["name"], "user signed up") self.assertEqual(result[1]["name"], "$pageview") self.assertEqual(result[4]["name"], "$pageview") self.assertEqual(result[0]["count"], 5) + self.assertEqual(result[1]["count"], 4) + self.assertEqual(result[2]["count"], 1) + self.assertEqual(result[3]["count"], 1) + self.assertEqual(result[4]["count"], 1) + # check ordering of people in every step + self.assertCountEqual( + self._get_people_at_step(filter, 1), + [ + person1_stopped_after_signup.uuid, + person2_stopped_after_one_pageview.uuid, + person3_stopped_after_two_pageview.uuid, + person4_stopped_after_three_pageview.uuid, + person5_stopped_after_many_pageview.uuid, + ], + ) + + self.assertCountEqual( + self._get_people_at_step(filter, 2), + [ + person2_stopped_after_one_pageview.uuid, + person3_stopped_after_two_pageview.uuid, + person4_stopped_after_three_pageview.uuid, + person5_stopped_after_many_pageview.uuid, + ], + ) + + self.assertCountEqual( + self._get_people_at_step(filter, 3), [person5_stopped_after_many_pageview.uuid], + ) + + self.assertCountEqual( + self._get_people_at_step(filter, 4), [person5_stopped_after_many_pageview.uuid], + ) + + self.assertCountEqual( + self._get_people_at_step(filter, 5), [person5_stopped_after_many_pageview.uuid], + ) def test_funnel_with_actions(self): @@ -247,7 +340,6 @@ def test_funnel_with_actions(self): {"id": sign_up_action.id, "math": "wau", "order": 1}, ], "insight": INSIGHT_FUNNELS, - "funnel_window_days": 14, } filter = Filter(data=filters) @@ -261,11 +353,20 @@ def test_funnel_with_actions(self): person2_stopped_after_signup = _create_person(distinct_ids=["stopped_after_signup2"], team_id=self.team.pk) _create_event(team=self.team, event="sign up", distinct_id="stopped_after_signup2", properties={"key": "val"}) - with self.assertNumQueries(1): - result = funnel.run() + result = funnel.run() self.assertEqual(result[0]["name"], "sign up") self.assertEqual(result[0]["count"], 2) + self.assertEqual(result[1]["count"], 1) + # check ordering of people in first step + self.assertCountEqual( + self._get_people_at_step(filter, 1), + [person1_stopped_after_two_signups.uuid, person2_stopped_after_signup.uuid], + ) + + self.assertCountEqual( + self._get_people_at_step(filter, 2), [person1_stopped_after_two_signups.uuid], + ) def test_funnel_with_actions_and_events(self): @@ -317,10 +418,31 @@ def test_funnel_with_actions_and_events(self): person5 = _create_person(distinct_ids=["person5"], team_id=self.team.pk) _create_event(team=self.team, event="sign up", distinct_id="person5", properties={"key": "val"}) - with self.assertNumQueries(1): - result = funnel.run() + result = funnel.run() self.assertEqual(result[0]["name"], "user signed up") + self.assertEqual(result[0]["count"], 4) + self.assertEqual(result[1]["count"], 4) + self.assertEqual(result[2]["count"], 3) + self.assertEqual(result[3]["count"], 1) + + # check ordering of people in steps + self.assertCountEqual( + self._get_people_at_step(filter, 1), + [person1_stopped_after_two_signups.uuid, person2_stopped_after_signup.uuid, person3.uuid, person4.uuid], + ) + + self.assertCountEqual( + self._get_people_at_step(filter, 2), + [person1_stopped_after_two_signups.uuid, person2_stopped_after_signup.uuid, person3.uuid, person4.uuid], + ) + + self.assertCountEqual( + self._get_people_at_step(filter, 3), + [person1_stopped_after_two_signups.uuid, person2_stopped_after_signup.uuid, person3.uuid,], + ) + + self.assertCountEqual(self._get_people_at_step(filter, 4), [person1_stopped_after_two_signups.uuid,]) def test_funnel_with_matching_properties(self): filters = { @@ -333,10 +455,7 @@ def test_funnel_with_matching_properties(self): "properties": {"$current_url": "aloha2.com"}, }, # different event to above {"id": "$pageview", "order": 3, "properties": {"$current_url": "aloha2.com"}}, - { - "id": "$pageview", - "order": 4, - }, # TODO(nk): does this supercede the above event? i.e. order 3 is subset of order 4? doesn't make sense to allow this in a funnel + {"id": "$pageview", "order": 4,}, ], "insight": INSIGHT_FUNNELS, "funnel_window_days": 14, @@ -438,13 +557,55 @@ def test_funnel_with_matching_properties(self): properties={"$current_url": "aloha2.com"}, ) - with self.assertNumQueries(1): - result = funnel.run() + result = funnel.run() self.assertEqual(result[0]["name"], "user signed up") self.assertEqual(result[1]["name"], "$pageview") self.assertEqual(result[4]["name"], "$pageview") self.assertEqual(result[0]["count"], 5) + self.assertEqual(result[1]["count"], 4) + self.assertEqual(result[2]["count"], 3) + self.assertEqual(result[3]["count"], 2) + self.assertEqual(result[4]["count"], 0) + # check ordering of people in every step + self.assertCountEqual( + self._get_people_at_step(filter, 1), + [ + person1_stopped_after_signup.uuid, + person2_stopped_after_one_pageview.uuid, + person3_stopped_after_two_pageview.uuid, + person4_stopped_after_three_pageview.uuid, + person5_stopped_after_many_pageview.uuid, + ], + ) + + self.assertCountEqual( + self._get_people_at_step(filter, 2), + [ + person2_stopped_after_one_pageview.uuid, + person3_stopped_after_two_pageview.uuid, + person4_stopped_after_three_pageview.uuid, + person5_stopped_after_many_pageview.uuid, + ], + ) + + self.assertCountEqual( + self._get_people_at_step(filter, 3), + [ + person3_stopped_after_two_pageview.uuid, + person4_stopped_after_three_pageview.uuid, + person5_stopped_after_many_pageview.uuid, + ], + ) + + self.assertCountEqual( + self._get_people_at_step(filter, 4), + [person4_stopped_after_three_pageview.uuid, person5_stopped_after_many_pageview.uuid], + ) + + self.assertCountEqual( + self._get_people_at_step(filter, 5), [], + ) def test_funnel_step_conversion_times(self): diff --git a/ee/clickhouse/queries/funnels/test/test_funnel_unordered.py b/ee/clickhouse/queries/funnels/test/test_funnel_unordered.py index 8d8e5034151d5..a6d5e2f0ba0ab 100644 --- a/ee/clickhouse/queries/funnels/test/test_funnel_unordered.py +++ b/ee/clickhouse/queries/funnels/test/test_funnel_unordered.py @@ -1,8 +1,8 @@ -from datetime import datetime, timedelta from uuid import uuid4 from ee.clickhouse.models.event import create_event from ee.clickhouse.queries.funnels.funnel_unordered import ClickhouseFunnelUnordered +from ee.clickhouse.queries.funnels.funnel_unordered_persons import ClickhouseFunnelUnorderedPersons from ee.clickhouse.util import ClickhouseTestMixin from posthog.constants import INSIGHT_FUNNELS from posthog.models.filters import Filter @@ -23,6 +23,11 @@ def _create_event(**kwargs): class TestFunnelUnorderedSteps(ClickhouseTestMixin, APIBaseTest): + def _get_people_at_step(self, filter, funnel_step): + person_filter = filter.with_data({"funnel_step": funnel_step}) + result = ClickhouseFunnelUnorderedPersons(person_filter, self.team)._exec_query() + return [row[0] for row in result] + def test_basic_unordered_funnel(self): filter = Filter( data={ @@ -86,12 +91,14 @@ def test_basic_unordered_funnel(self): result = funnel.run() self.assertEqual(result[0]["name"], "user signed up") + self.assertEqual(result[0]["count"], 8) self.assertEqual(result[1]["name"], "$pageview") + self.assertEqual(result[1]["count"], 5) self.assertEqual(result[2]["name"], "insight viewed") - self.assertEqual(result[0]["count"], 8) + self.assertEqual(result[2]["count"], 3) self.assertCountEqual( - result[0]["people"], + self._get_people_at_step(filter, 1), [ person1_stopped_after_signup.uuid, person2_stopped_after_one_pageview.uuid, @@ -105,7 +112,7 @@ def test_basic_unordered_funnel(self): ) self.assertCountEqual( - result[1]["people"], + self._get_people_at_step(filter, 2), [ person2_stopped_after_one_pageview.uuid, person3_stopped_after_insight_view.uuid, @@ -116,7 +123,12 @@ def test_basic_unordered_funnel(self): ) self.assertCountEqual( - result[2]["people"], + self._get_people_at_step(filter, -2), + [person1_stopped_after_signup.uuid, person6_did_only_insight_view.uuid, person7_did_only_pageview.uuid,], + ) + + self.assertCountEqual( + self._get_people_at_step(filter, 3), [ person3_stopped_after_insight_view.uuid, person4_stopped_after_insight_view_reverse_order.uuid, @@ -124,6 +136,11 @@ def test_basic_unordered_funnel(self): ], ) + self.assertCountEqual( + self._get_people_at_step(filter, -3), + [person2_stopped_after_one_pageview.uuid, person8_didnot_signup.uuid,], + ) + def test_big_multi_step_unordered_funnel(self): filter = Filter( data={ @@ -188,13 +205,16 @@ def test_big_multi_step_unordered_funnel(self): result = funnel.run() self.assertEqual(result[0]["name"], "user signed up") + self.assertEqual(result[0]["count"], 8) self.assertEqual(result[1]["name"], "$pageview") + self.assertEqual(result[1]["count"], 5) self.assertEqual(result[2]["name"], "insight viewed") + self.assertEqual(result[2]["count"], 3) self.assertEqual(result[3]["name"], "crying") - self.assertEqual(result[0]["count"], 8) + self.assertEqual(result[3]["count"], 1) self.assertCountEqual( - result[0]["people"], + self._get_people_at_step(filter, 1), [ person1_stopped_after_signup.uuid, person2_stopped_after_one_pageview.uuid, @@ -208,7 +228,7 @@ def test_big_multi_step_unordered_funnel(self): ) self.assertCountEqual( - result[1]["people"], + self._get_people_at_step(filter, 2), [ person2_stopped_after_one_pageview.uuid, person3_stopped_after_insight_view.uuid, @@ -219,7 +239,7 @@ def test_big_multi_step_unordered_funnel(self): ) self.assertCountEqual( - result[2]["people"], + self._get_people_at_step(filter, 3), [ person3_stopped_after_insight_view.uuid, person4_stopped_after_insight_view_reverse_order.uuid, @@ -228,5 +248,5 @@ def test_big_multi_step_unordered_funnel(self): ) self.assertCountEqual( - result[3]["people"], [person5_stopped_after_insight_view_random.uuid,], + self._get_people_at_step(filter, 4), [person5_stopped_after_insight_view_random.uuid,], ) diff --git a/ee/clickhouse/queries/funnels/test/test_funnel_unordered_persons.py b/ee/clickhouse/queries/funnels/test/test_funnel_unordered_persons.py new file mode 100644 index 0000000000000..441f0ec317117 --- /dev/null +++ b/ee/clickhouse/queries/funnels/test/test_funnel_unordered_persons.py @@ -0,0 +1,141 @@ +from datetime import datetime, timedelta +from uuid import uuid4 + +import pytest + +from ee.clickhouse.models.event import create_event +from ee.clickhouse.queries.funnels.funnel_unordered import ClickhouseFunnelUnordered +from ee.clickhouse.queries.funnels.funnel_unordered_persons import ClickhouseFunnelUnorderedPersons +from ee.clickhouse.util import ClickhouseTestMixin +from posthog.constants import INSIGHT_FUNNELS +from posthog.models.filters import Filter +from posthog.models.person import Person +from posthog.test.base import APIBaseTest + +FORMAT_TIME = "%Y-%m-%d 00:00:00" + + +def _create_person(**kwargs): + person = Person.objects.create(**kwargs) + return Person(id=person.uuid, uuid=person.uuid) + + +def _create_event(**kwargs): + kwargs.update({"event_uuid": uuid4()}) + create_event(**kwargs) + + +class TestFunnelUnorderedStepsPersons(ClickhouseTestMixin, APIBaseTest): + def _create_sample_data_multiple_dropoffs(self): + for i in range(5): + _create_person(distinct_ids=[f"user_{i}"], team=self.team) + _create_event(event="step one", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:00:00") + _create_event(event="step three", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-03 00:00:00") + _create_event(event="step two", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-05 00:00:00") + + for i in range(5, 15): + _create_person(distinct_ids=[f"user_{i}"], team=self.team) + _create_event(event="step two", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:00:00") + _create_event(event="step one", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-03 00:00:00") + + for i in range(15, 35): + _create_person(distinct_ids=[f"user_{i}"], team=self.team) + _create_event(event="step one", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:00:00") + + def test_invalid_steps(self): + data = { + "insight": INSIGHT_FUNNELS, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-07 00:00:00", + "funnel_window_days": 7, + "funnel_step": "blah", + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + filter = Filter(data=data) + with self.assertRaises(ValueError): + ClickhouseFunnelUnorderedPersons(filter, self.team).run() + + filter = filter.with_data({"funnel_step": -1}) + result = ClickhouseFunnelUnorderedPersons(filter, self.team).run() + self.assertEqual(0, len(result)) + + def test_first_step(self): + self._create_sample_data_multiple_dropoffs() + data = { + "insight": INSIGHT_FUNNELS, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-07 00:00:00", + "funnel_window_days": 7, + "funnel_step": 1, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + filter = Filter(data=data) + results = ClickhouseFunnelUnorderedPersons(filter, self.team).run() + self.assertEqual(35, len(results)) + + def test_last_step(self): + self._create_sample_data_multiple_dropoffs() + data = { + "insight": INSIGHT_FUNNELS, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-07 00:00:00", + "funnel_window_days": 7, + "funnel_step": 3, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + filter = Filter(data=data) + results = ClickhouseFunnelUnorderedPersons(filter, self.team).run() + self.assertEqual(5, len(results)) + + def test_second_step_dropoff(self): + self._create_sample_data_multiple_dropoffs() + data = { + "insight": INSIGHT_FUNNELS, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-07 00:00:00", + "funnel_window_days": 7, + "funnel_step": -2, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + filter = Filter(data=data) + results = ClickhouseFunnelUnorderedPersons(filter, self.team).run() + self.assertEqual(20, len(results)) + + def test_last_step_dropoff(self): + self._create_sample_data_multiple_dropoffs() + data = { + "insight": INSIGHT_FUNNELS, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-07 00:00:00", + "funnel_window_days": 7, + "funnel_step": -3, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + filter = Filter(data=data) + results = ClickhouseFunnelUnorderedPersons(filter, self.team).run() + self.assertEqual(10, len(results)) diff --git a/posthog/models/person.py b/posthog/models/person.py index b50dd94219c08..673701e94f4ba 100644 --- a/posthog/models/person.py +++ b/posthog/models/person.py @@ -79,22 +79,6 @@ def merge_people(self, people_to_merge: List["Person"]): # Has an index on properties -> email, built concurrently # See migration 0121 - @staticmethod - def get_distinct_ids_and_email_by_ids(person_ids, team_id): - decorated = [] - for person_id in person_ids: - distinct_ids, email = Person.get_distinct_ids_and_email_by_id(person_id, team_id) - decorated.append({"distinct_ids": distinct_ids, "email": email}) - return decorated - - @staticmethod - def get_distinct_ids_and_email_by_id(person_id, team_id): - person = Person.objects.get(uuid=person_id) - distinct_ids = PersonDistinctId.objects.filter(person_id=person.id, team_id=team_id) - flat_distinct_ids = [row.distinct_id for row in distinct_ids] - email = person.properties.get("email", "") - return flat_distinct_ids, email - class PersonDistinctId(models.Model): class Meta: