Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Setup Funnel Unordered persons and Testing #4943

Merged
merged 54 commits into from
Jul 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
537bdb3
wip: pagination for persons on clickhouse funnels
buwilliams Jun 18, 2021
605f9de
wip: added offset support for getting a list of persons; added suppor…
buwilliams Jun 19, 2021
a9a5f12
fixed mypy exception
buwilliams Jun 19, 2021
e952308
helper function to insert data for local testing
buwilliams Jun 20, 2021
ceda12e
moved generate code into separate class for more functionality later
buwilliams Jun 20, 2021
1a29fdb
corrected person_distinct_id to use the person id from postgres
buwilliams Jun 20, 2021
1990907
minor corrections to generate local class along with addition of data…
buwilliams Jun 20, 2021
7daeaaa
reduce the number of persons who make it to each step
buwilliams Jun 21, 2021
fba259e
moved funnel queries to a new folder for better organization; separat…
buwilliams Jun 21, 2021
cc24758
Merge branch 'master' into funnel-persons-pagination-conversion-window
EDsCODE Jun 22, 2021
142e7b6
funnel persons and tests
buwilliams Jun 22, 2021
df44123
Merge branch 'funnel-persons-pagination-conversion-window' of github.…
buwilliams Jun 22, 2021
589e937
initial implementation
EDsCODE Jun 22, 2021
ac6d601
invoke the funnel or funnel trends class respectively
buwilliams Jun 22, 2021
34d5bad
add a test
EDsCODE Jun 22, 2021
b483277
add breakdown handling and first test
EDsCODE Jun 22, 2021
9242414
add test stubs
EDsCODE Jun 23, 2021
3042e59
remove repeats
EDsCODE Jun 23, 2021
474ba42
mypy corrections and PR feedback
buwilliams Jun 23, 2021
4abecaf
run funnel test suite on new query implementation
EDsCODE Jun 23, 2021
62f2919
remove imports
EDsCODE Jun 23, 2021
4fb5a43
corrected tests
buwilliams Jun 23, 2021
74bfa1b
minor test updates
EDsCODE Jun 23, 2021
0aa36f9
Merge branch 'funnel-persons-pagination-conversion-window' into funne…
EDsCODE Jun 23, 2021
5c9dcd2
correct func name
EDsCODE Jun 23, 2021
5df0cad
fix types
EDsCODE Jun 23, 2021
d7d2e60
merge master
EDsCODE Jun 24, 2021
a9e9b7f
unordered step and test
neilkakkar Jun 24, 2021
0427159
func name change
EDsCODE Jun 24, 2021
b9cae01
move builder functions to funnel base
EDsCODE Jun 25, 2021
7c5e606
add test classe for new funnel
EDsCODE Jun 25, 2021
49f8c92
Merge branch 'master' into funnel-step-query-new
EDsCODE Jun 25, 2021
0938b92
resolve issues with unordered funnel
neilkakkar Jun 25, 2021
4420421
resolve merge conflicts
neilkakkar Jun 25, 2021
ae21c8c
oops
neilkakkar Jun 25, 2021
b3fa339
remove breakdown, fix mypy error
neilkakkar Jun 25, 2021
716e8af
Handle multiple same events in the funnel (#4863)
neilkakkar Jun 25, 2021
79cbea2
Merge branch 'funnel-step-query-new' of github.com:PostHog/posthog in…
neilkakkar Jun 25, 2021
6c4245f
from O(2^N) to O(N)
neilkakkar Jun 25, 2021
6d1f8f1
merge master
EDsCODE Jun 28, 2021
001a149
add query intuition blurb
neilkakkar Jun 29, 2021
1372f68
rm todo
neilkakkar Jun 29, 2021
a54db4e
wip persons
neilkakkar Jun 29, 2021
7857487
wip persons 2
neilkakkar Jun 29, 2021
74e72cd
Merge branch 'master' of github.com:PostHog/posthog into funnel-unord…
neilkakkar Jun 30, 2021
1bfa011
address comments
neilkakkar Jun 30, 2021
aa71b6d
merge master
neilkakkar Jun 30, 2021
9b8df58
Merge branch 'funnel-unordered-step' of github.com:PostHog/posthog in…
neilkakkar Jun 30, 2021
c46dc07
test things, fix bugs
neilkakkar Jun 30, 2021
a11eeed
resolve merge conflicts
neilkakkar Jul 1, 2021
f4d9b58
Merge branch 'master' into funnel-unordered-persons
neilkakkar Jul 1, 2021
a13add8
Merge branch 'master' into funnel-unordered-persons
EDsCODE Jul 1, 2021
d330e34
match result format to funnel.py
EDsCODE Jul 1, 2021
2d9ea97
Merge branch 'master' into funnel-unordered-persons
EDsCODE Jul 1, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions ee/clickhouse/queries/funnels/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,36 @@ def _build_filters(self, entity: Entity, index: int) -> str:
return prop_filters
return ""

def _get_funnel_person_step_condition(self):
step_num = self._filter.funnel_step
max_steps = len(self._filter.entities)

if step_num is None:
raise ValueError("funnel_step should not be none")

if step_num >= 0:
self.params.update({"step_num": [i for i in range(step_num, max_steps + 1)]})
return "steps IN %(step_num)s"
else:
self.params.update({"step_num": abs(step_num) - 1})
return "steps = %(step_num)s"

def _get_count_columns(self, max_steps: int):
cols: List[str] = []

for i in range(max_steps):
cols.append(f"countIf(steps = {i + 1}) step_{i + 1}")

return ", ".join(cols)

def _get_step_time_avgs(self, max_steps: int):
conditions: List[str] = []
for i in range(1, max_steps):
conditions.append(f"avg(step_{i}_average_conversion_time) step_{i}_average_conversion_time")

formatted = ", ".join(conditions)
return f", {formatted}" if formatted else ""

@abstractmethod
def get_query(self, format_properties):
pass
29 changes: 8 additions & 21 deletions ee/clickhouse/queries/funnels/funnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,24 @@ def get_query(self, format_properties):

class ClickhouseFunnelNew(ClickhouseFunnelBase):
def get_query(self, format_properties):
return self.get_step_counts_query()

def get_step_counts_query(self):

steps_per_person_query = self._get_steps_per_person_query()
steps_per_person_query = self.get_step_counts_query()
max_steps = len(self._filter.entities)

return f"""
SELECT {self._get_count_columns(max_steps)} {self._get_step_time_avgs(max_steps)} FROM (
SELECT person_id, max(steps) AS furthest {self._get_step_time_avgs(max_steps)} FROM (
{steps_per_person_query}
) GROUP BY person_id
) SETTINGS allow_experimental_window_functions = 1
"""

def _get_count_columns(self, max_steps: int):
cols: List[str] = []

for i in range(max_steps):
cols.append(f"countIf(furthest = {i + 1}) step_{i + 1}")
def get_step_counts_query(self):
steps_per_person_query = self._get_steps_per_person_query()
max_steps = len(self._filter.entities)

return ", ".join(cols)
return f"""SELECT person_id, max(steps) AS steps {self._get_step_time_avgs(max_steps)} FROM (
{steps_per_person_query}
) GROUP BY person_id
"""

def _format_results(self, results):
# Format of this is [step order, person count (that reached that step), array of person uuids]
Expand Down Expand Up @@ -61,12 +57,3 @@ def _get_breakdown_prop(self) -> str:
return ", prop"
else:
return ""

# TODO: include in the inner query to handle time to convert
def _get_step_time_avgs(self, max_steps: int):
conditions: List[str] = []
for i in range(1, max_steps):
conditions.append(f"avg(step_{i}_average_conversion_time) step_{i}_average_conversion_time")

formatted = ", ".join(conditions)
return f", {formatted}" if formatted else ""
21 changes: 3 additions & 18 deletions ee/clickhouse/queries/funnels/funnel_persons.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,16 @@
from ee.clickhouse.queries.funnels.base import ClickhouseFunnelBase
from ee.clickhouse.queries.funnels.funnel import ClickhouseFunnelNew
from ee.clickhouse.sql.funnels.funnel import FUNNEL_PERSONS_BY_STEP_SQL
from posthog.models import Person


class ClickhouseFunnelPersons(ClickhouseFunnelBase):
class ClickhouseFunnelPersons(ClickhouseFunnelNew):
def get_query(self, format_properties):
steps_per_person_query = self._get_steps_per_person_query()
return FUNNEL_PERSONS_BY_STEP_SQL.format(
**format_properties,
steps_per_person_query=steps_per_person_query,
steps_per_person_query=self.get_step_counts_query(),
persons_steps=self._get_funnel_person_step_condition()
)

def _get_funnel_person_step_condition(self):
step_num = self._filter.funnel_step
max_steps = len(self._filter.entities)

if step_num is None:
raise ValueError("funnel_step should not be none")

if step_num >= 0:
self.params.update({"step_num": [i for i in range(step_num, max_steps + 1)]})
return "steps IN %(step_num)s"
else:
self.params.update({"step_num": abs(step_num) - 1})
return "steps = %(step_num)s"

def _format_results(self, results):
people = Person.objects.filter(team_id=self._team.pk, uuid__in=[val[0] for val in results])

Expand Down
4 changes: 2 additions & 2 deletions ee/clickhouse/queries/funnels/funnel_trends.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from datetime import date, datetime, timedelta
from typing import Union

from ee.clickhouse.queries.funnels.funnel import ClickhouseFunnelNew
from ee.clickhouse.queries.funnels.base import ClickhouseFunnelBase
from ee.clickhouse.queries.util import get_time_diff, get_trunc_func_ch
from posthog.constants import BREAKDOWN
from posthog.models.filters.filter import Filter
Expand All @@ -15,7 +15,7 @@
HUMAN_READABLE_TIMESTAMP_FORMAT = "%a. %-d %b"


class ClickhouseFunnelTrends(ClickhouseFunnelNew):
class ClickhouseFunnelTrends(ClickhouseFunnelBase):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Eventually, I think visualisation like Trends should inherit from the different types of ordered funnels, and use their now-new get_step_counts_query() as the seed query, but now's not the time for it. ( Want atleast a pattern of 2 before we generalise, so we don't create the wrong abstractions)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yup, agreed

"""
## Funnel trends assumptions

Expand Down
39 changes: 33 additions & 6 deletions ee/clickhouse/queries/funnels/funnel_unordered.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,14 @@ class ClickhouseFunnelUnordered(ClickhouseFunnelBase):
"""

def get_query(self, format_properties):
return self.get_step_counts_query()

max_steps = len(self._filter.entities)

return f"""
SELECT {self._get_count_columns(max_steps)} {self._get_step_time_avgs(max_steps)} FROM (
{self.get_step_counts_query()}
) SETTINGS allow_experimental_window_functions = 1
"""

def get_step_counts_query(self):

Expand All @@ -45,7 +52,7 @@ def get_step_counts_query(self):
"""

formatted_query = f"""
SELECT *, {sorting_condition} AS steps FROM (
SELECT *, {sorting_condition} AS steps {self._get_step_times(max_steps)} FROM (
{inner_query}
) WHERE step_0 = 1"""

Expand All @@ -56,11 +63,9 @@ def get_step_counts_query(self):
union_formatted_query = " UNION ALL ".join(union_queries)

return f"""
SELECT furthest, count(1), groupArray(100)(person_id) FROM (
SELECT person_id, max(steps) AS furthest FROM (
SELECT person_id, max(steps) AS steps {self._get_step_time_avgs(max_steps)} FROM (
{union_formatted_query}
) GROUP BY person_id
) GROUP BY furthest SETTINGS allow_experimental_window_functions = 1
) GROUP BY person_id
"""

def get_sorting_condition(self, max_steps: int):
Expand All @@ -72,3 +77,25 @@ def get_sorting_condition(self, max_steps: int):
)

return f"arraySum([{','.join(basic_conditions)}, 1])"

# TODO: copied from funnel.py. Once the new funnel query replaces old one, the base format_results function can use this
def _format_results(self, results):
# Format of this is [step order, person count (that reached that step), array of person uuids]
steps = []
total_people = 0

for step in reversed(self._filter.entities):

if results[0] and len(results[0]) > 0:
total_people += results[0][step.order]

serialized_result = self._serialize_step(step, total_people, [])
if step.order > 0:
serialized_result.update(
{"average_conversion_time": results[0][step.order + len(self._filter.entities) - 1]}
)
else:
serialized_result.update({"average_conversion_time": None})
steps.append(serialized_result)

return steps[::-1] #  reverse
19 changes: 19 additions & 0 deletions ee/clickhouse/queries/funnels/funnel_unordered_persons.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from ee.clickhouse.queries.funnels.funnel_unordered import ClickhouseFunnelUnordered
from ee.clickhouse.sql.funnels.funnel import FUNNEL_PERSONS_BY_STEP_SQL
from posthog.models import Person


class ClickhouseFunnelUnorderedPersons(ClickhouseFunnelUnordered):
def get_query(self, format_properties):
return FUNNEL_PERSONS_BY_STEP_SQL.format(
**format_properties,
steps_per_person_query=self.get_step_counts_query(),
persons_steps=self._get_funnel_person_step_condition()
)

def _format_results(self, results):
people = Person.objects.filter(team_id=self._team.pk, uuid__in=[val[0] for val in results])

from posthog.api.person import PersonSerializer

return PersonSerializer(people, many=True).data
Loading