{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Metric_DefinitionBaseline_Val
0Unique cookies to view course overview page pe...40000.000000
1Unique cookies to click \"Start free trial\" per...3200.000000
2Enrollments per day:660.000000
3Click-through-probability on \"Start free trial\":0.080000
4Probability of enrolling, given click:0.206250
5Probability of payment, given enroll:0.530000
6Probability of payment, given click0.109313
\n", "
" ], "text/plain": [ " Metric_Definition Baseline_Val\n", "0 Unique cookies to view course overview page pe... 40000.000000\n", "1 Unique cookies to click \"Start free trial\" per... 3200.000000\n", "2 Enrollments per day: 660.000000\n", "3 Click-through-probability on \"Start free trial\": 0.080000\n", "4 Probability of enrolling, given click: 0.206250\n", "5 Probability of payment, given enroll: 0.530000\n", "6 Probability of payment, given click 0.109313" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd, numpy as np\n", "df = pd.read_csv('data/Final_Project_Baseline_Values.csv', \n", " index_col = False, \n", " header = None, \n", " names= ['Metric_Definition', 'Baseline_Val'] )\n", "df" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Metric_DefinitionBaseline_ValMetric
0Unique cookies to view course overview page pe...40000.000000Cookies
1Unique cookies to click \"Start free trial\" per...3200.000000Clicks
2Enrollments per day:660.000000Enrollments (Clicks * Gross_Conversion)
3Click-through-probability on \"Start free trial\":0.080000CTP (Clicks/Cookies)
4Probability of enrolling, given click:0.206250Gross_Conversion (Enrollments/Clicks)
5Probability of payment, given enroll:0.530000Retention (Paid/Enrollments)
6Probability of payment, given click0.109313Net_Conversion(Paid/Clicks)
\n", "
" ], "text/plain": [ " Metric_Definition Baseline_Val \\\n", "0 Unique cookies to view course overview page pe... 40000.000000 \n", "1 Unique cookies to click \"Start free trial\" per... 3200.000000 \n", "2 Enrollments per day: 660.000000 \n", "3 Click-through-probability on \"Start free trial\": 0.080000 \n", "4 Probability of enrolling, given click: 0.206250 \n", "5 Probability of payment, given enroll: 0.530000 \n", "6 Probability of payment, given click 0.109313 \n", "\n", " Metric \n", "0 Cookies \n", "1 Clicks \n", "2 Enrollments (Clicks * Gross_Conversion) \n", "3 CTP (Clicks/Cookies) \n", "4 Gross_Conversion (Enrollments/Clicks) \n", "5 Retention (Paid/Enrollments) \n", "6 Net_Conversion(Paid/Clicks) " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Labelling the metrics for ease of reference\n", "df['Metric'] = ['Cookies', 'Clicks', 'Enrollments (Clicks * Gross_Conversion)',\n", " 'CTP (Clicks/Cookies)', \n", " 'Gross_Conversion (Enrollments/Clicks)', \n", " 'Retention (Paid/Enrollments)','Net_Conversion(Paid/Clicks)']\n", "df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Metric_DefinitionBaseline_ValMetricdmin
0Unique cookies to view course overview page pe...40000.000000Cookies3000.0000
1Unique cookies to click \"Start free trial\" per...3200.000000Clicks240.0000
2Enrollments per day:660.000000Enrollments (Clicks * Gross_Conversion)50.0000
3Click-through-probability on \"Start free trial\":0.080000CTP (Clicks/Cookies)0.0100
4Probability of enrolling, given click:0.206250Gross_Conversion (Enrollments/Clicks)0.0100
5Probability of payment, given enroll:0.530000Retention (Paid/Enrollments)0.0100
6Probability of payment, given click0.109313Net_Conversion(Paid/Clicks)0.0075
\n", "
" ], "text/plain": [ " Metric_Definition Baseline_Val \\\n", "0 Unique cookies to view course overview page pe... 40000.000000 \n", "1 Unique cookies to click \"Start free trial\" per... 3200.000000 \n", "2 Enrollments per day: 660.000000 \n", "3 Click-through-probability on \"Start free trial\": 0.080000 \n", "4 Probability of enrolling, given click: 0.206250 \n", "5 Probability of payment, given enroll: 0.530000 \n", "6 Probability of payment, given click 0.109313 \n", "\n", " Metric dmin \n", "0 Cookies 3000.0000 \n", "1 Clicks 240.0000 \n", "2 Enrollments (Clicks * Gross_Conversion) 50.0000 \n", "3 CTP (Clicks/Cookies) 0.0100 \n", "4 Gross_Conversion (Enrollments/Clicks) 0.0100 \n", "5 Retention (Paid/Enrollments) 0.0100 \n", "6 Net_Conversion(Paid/Clicks) 0.0075 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Add Practical Significance level column\n", "df['dmin'] = [3000,240,50,0.01,0.01,0.01,0.0075]\n", "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Scaling collected data:\n", "For all the calculations to follow we need to scale our collected counts estimates of metrics with the sample size we specified for variance estimation. In this case, from 40000 unique cookies to visit the course overview page per day, to 5000." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Measuring Standard Deviation\n", "$$SD= \\sqrt \\frac{\\ \\hat{p}(1−\\hat{p})}{\\ n}$$\n", "\n", "In order to estimate variance analytically, we can assume metrics which are probabilities ( $\\hat{p}$ ) are binomially distributed, so we can use this formula for the standard deviation: \n", "\n", "This assumption is only valid when the unit of diversion of the experiment is equal to the unit of analysis (the denominator of the metric formula). In the cases when this is not valid, the actual variance might be different and it is recommended to estimate it empirically.\n", "\n", "For each metric, we need to plug two variables into the formula: \n", "
$ \\hat{p} $ = baseline probability of the event to occur\n", "
n = sample size" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Metric_DefinitionBaseline_ValMetricdminSD
0Unique cookies to view course overview page pe...40000.000000Cookies3000.0000NA
1Unique cookies to click \"Start free trial\" per...3200.000000Clicks240.0000NA
2Enrollments per day:660.000000Enrollments (Clicks * Gross_Conversion)50.0000NA
3Click-through-probability on \"Start free trial\":0.080000CTP (Clicks/Cookies)0.0100NA
4Probability of enrolling, given click:0.206250Gross_Conversion (Enrollments/Clicks)0.01000.0202
5Probability of payment, given enroll:0.530000Retention (Paid/Enrollments)0.01000.05495
6Probability of payment, given click0.109313Net_Conversion(Paid/Clicks)0.00750.0156
\n", "
" ], "text/plain": [ " Metric_Definition Baseline_Val \\\n", "0 Unique cookies to view course overview page pe... 40000.000000 \n", "1 Unique cookies to click \"Start free trial\" per... 3200.000000 \n", "2 Enrollments per day: 660.000000 \n", "3 Click-through-probability on \"Start free trial\": 0.080000 \n", "4 Probability of enrolling, given click: 0.206250 \n", "5 Probability of payment, given enroll: 0.530000 \n", "6 Probability of payment, given click 0.109313 \n", "\n", " Metric dmin SD \n", "0 Cookies 3000.0000 NA \n", "1 Clicks 240.0000 NA \n", "2 Enrollments (Clicks * Gross_Conversion) 50.0000 NA \n", "3 CTP (Clicks/Cookies) 0.0100 NA \n", "4 Gross_Conversion (Enrollments/Clicks) 0.0100 0.0202 \n", "5 Retention (Paid/Enrollments) 0.0100 0.05495 \n", "6 Net_Conversion(Paid/Clicks) 0.0075 0.0156 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Predetermined from baseline values\n", "ctp = 0.08 \n", "gross_conv = 0.206250\n", "retention = 0.53\n", "net_conv = 0.109313\n", "\n", "#scale data based on sample size 5000:\n", "sum_cookies = 5000\n", "sum_clicks = ctp * sum_cookies # n size for Gross Conversion & Net Conversion\n", "sum_enrolled = sum_clicks * gross_conv # n size for Retention\n", "\n", "# Calculate SD for evaluation metrics:\n", "sd_gross_conv = round(np.sqrt((gross_conv * (1-gross_conv))/sum_clicks),4)\n", "sd_retention = round(np.sqrt((retention * (1-retention))/sum_enrolled),5)\n", "sd_net_conv = round(np.sqrt((net_conv * (1-net_conv))/sum_clicks),4)\n", "\n", "df[\"SD\"] = ['NA','NA', 'NA', 'NA', sd_gross_conv, sd_retention, sd_net_conv]\n", "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Sizing\n", "To calculate the sample size required for the experiment, the largest sample size required for one of the evaluation metrics will effectively be the size to go with. I plug in the following values into the [online calculator](http://www.evanmiller.org/ab-testing/sample-size.html) for sample size:\n", "- Baseline conversion rate: probability of each metric\n", "- Minimum detectable effect: dmin of each metric\n", "- beta: 0.2\n", "- alpha: 0.05" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "df_eval = df.iloc[4: , 1:]\n", "df_eval.set_index(['Metric'], inplace = True)\n", "df_eval.rename(columns = {'Baseline_Val': 'p'} , inplace = True)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pdminSDsample_size
Metric
Gross_Conversion (Enrollments/Clicks)0.2062500.01000.020225835
Retention (Paid/Enrollments)0.5300000.01000.0549539155
Net_Conversion(Paid/Clicks)0.1093130.00750.015627413
\n", "
" ], "text/plain": [ " p dmin SD sample_size\n", "Metric \n", "Gross_Conversion (Enrollments/Clicks) 0.206250 0.0100 0.0202 25835\n", "Retention (Paid/Enrollments) 0.530000 0.0100 0.05495 39155\n", "Net_Conversion(Paid/Clicks) 0.109313 0.0075 0.0156 27413" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Sample size using the online calculator\n", "df_eval ['sample_size'] = [25835, 39155, 27413]\n", "df_eval" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Total pageviews required = Total unique cookies accessing course overview page \n", "\n", "- Gross Conversion\n", "
Total cookies required in order to have **25835 clicks** per group (control and experiment):\n", "
$$ \\frac {Clicks * 2}{\\ ctp} $$\n", "\n", "\n", "- Retention\n", "
Total cookies required in order to have **39155 enrollments** per group (control and experiment):\n", "
$$ \\frac{Enrollments * 2}{\\ GrossConversion * ctp} $$\n", "\n", "- Net Conversion\n", "
Total cookies required in order to have **27413 clicks** per group (control and experiment):\n", "
$$ \\frac{Clicks * 2}{\\ ctp} $$" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pdminSDsample_sizepage_views
Metric
Gross_Conversion (Enrollments/Clicks)0.2062500.01000.020225835645875
Retention (Paid/Enrollments)0.5300000.01000.05495391554746061
Net_Conversion(Paid/Clicks)0.1093130.00750.015627413685325
\n", "
" ], "text/plain": [ " p dmin SD sample_size \\\n", "Metric \n", "Gross_Conversion (Enrollments/Clicks) 0.206250 0.0100 0.0202 25835 \n", "Retention (Paid/Enrollments) 0.530000 0.0100 0.05495 39155 \n", "Net_Conversion(Paid/Clicks) 0.109313 0.0075 0.0156 27413 \n", "\n", " page_views \n", "Metric \n", "Gross_Conversion (Enrollments/Clicks) 645875 \n", "Retention (Paid/Enrollments) 4746061 \n", "Net_Conversion(Paid/Clicks) 685325 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pageviews_gc = round(25835 * 2 / 0.08) \n", "pageviews_ret = round(39155 * 2 / (0.206250 * 0.08))\n", "pageviews_nc = round(27413 * 2 / 0.08)\n", "df_eval ['page_views'] = [pageviews_gc,pageviews_ret,pageviews_nc]\n", "df_eval" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Duration vs Exposure\n", "4 million page views is significantly beyond the estimated 40K views we get on average daily. That would take us at least 100 days to collect the data, and typically any experiments taking longer than a few weeks is not reasonable, hence I decided to drop Retention as a metric. **Net Conversion** has the largest number of page views of the remaining 2 evaluation metrics.\n", "\n", "Now, let's calculate the duration at different exposure rates:" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Duration at 100% exposure: 17 days\n", "Duration at 75% exposure: 23 days\n", "Duration at 50% exposure: 34 days\n" ] } ], "source": [ "Duration100 = round(pageviews_nc / 40000)\n", "Duration75 = round(pageviews_nc / (40000 * 0.75))\n", "Duration50 = round(pageviews_nc / (40000 * 0.5))\n", "print ('Duration at 100% exposure: {} days'.format(Duration100))\n", "print ('Duration at 75% exposure: {} days'.format(Duration75))\n", "print ('Duration at 50% exposure: {} days'.format(Duration50))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I decided to go with **75% exposure** since a 3 week duration to run the experiment is a reasonable length. 50% exposure rate with over a month long duration is not necessary as the risk is low since we do not expect a big drop in net conversion which may impact the company's revenue. I personally try to avoid 100% exposure as I find that sometimes there are some business risks or technolgy issues resulting from running the experiment, and it is always good to hold back some traffic from the change.\n", "\n", "There are similar analysis using different exposure rates than mine with good justifications as well, so definitely do your own reasoning to choose the right exposure." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Analyzing results from the experiment\n", "### 1. Sanity Check\n", "\n", "Before analyzing results from the experiment, sanity checks should be performed. These checks help to verify if the experiment was conducted as expected and that other factors did not influence the data which we collected. This also makes sure that data collection was correct.\n", "\n", "For invariant metrics we expect equal diversion into the experiment and control group. We will test this at the 95% confidence interval.\n", "\n", "Two of these metrics are simple counts like number of cookies or number of clicks and the third is a probability (CTP). We will use two different ways of checking whether these observed values are within expectations.\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DatePageviewsClicksEnrollmentsPayments
0Sat, Oct 117723687134.070.0
1Sun, Oct 129102779147.070.0
2Mon, Oct 1310511909167.095.0
3Tue, Oct 149871836156.0105.0
4Wed, Oct 1510014837163.064.0
\n", "
" ], "text/plain": [ " Date Pageviews Clicks Enrollments Payments\n", "0 Sat, Oct 11 7723 687 134.0 70.0\n", "1 Sun, Oct 12 9102 779 147.0 70.0\n", "2 Mon, Oct 13 10511 909 167.0 95.0\n", "3 Tue, Oct 14 9871 836 156.0 105.0\n", "4 Wed, Oct 15 10014 837 163.0 64.0" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Load experiment results into dataframe\n", "df_ctr = pd.read_csv('data/Final_Project_Results_Control.csv')\n", "df_ctr.head()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DatePageviewsClicksEnrollmentsPayments
0Sat, Oct 117716686105.034.0
1Sun, Oct 129288785116.091.0
2Mon, Oct 1310480884145.079.0
3Tue, Oct 149867827138.092.0
4Wed, Oct 159793832140.094.0
\n", "
" ], "text/plain": [ " Date Pageviews Clicks Enrollments Payments\n", "0 Sat, Oct 11 7716 686 105.0 34.0\n", "1 Sun, Oct 12 9288 785 116.0 91.0\n", "2 Mon, Oct 13 10480 884 145.0 79.0\n", "3 Tue, Oct 14 9867 827 138.0 92.0\n", "4 Wed, Oct 15 9793 832 140.0 94.0" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_exp = pd.read_csv('data/Final_Project_Results_Experiment.csv')\n", "df_exp.head()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# Assigning variable names to total of each columns in the experiment results\n", "pageviews_exp = df_exp['Pageviews'].sum()\n", "pageviews_ctr = df_ctr['Pageviews'].sum()\n", "clicks_exp = df_exp['Clicks'].sum()\n", "clicks_ctr = df_ctr['Clicks'].sum()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 1.1 Sanity Checks for differences between counts \n", "I use binomial distribution (p = 0.5) to determine the probability for Number of Cookies and Number of Clicks is within the margin of error at 95% confidence interval since the cookies are randomly assigned to either control or experiment group.\n", "\n", "What we want to test is whether our observed fraction, $\\hat {p}$ (number of samples in control or experiment group divided by total number of samples in both groups) is not significantly different than p=0.5 . If the observed $\\hat {p}$ is **within** the margin of error range acceptable at a 95% confidence level, **we passed the sanity checks!** =)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ExperimentControl
Number of Cookies344660345543
Number of Clicks2832528378
\n", "
" ], "text/plain": [ " Experiment Control\n", "Number of Cookies 344660 345543\n", "Number of Clicks 28325 28378" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_inv = pd.DataFrame({\n", " 'Experiment': [pageviews_exp, clicks_exp],\n", " 'Control': [pageviews_ctr, clicks_ctr]\n", " }, index = ['Number of Cookies', 'Number of Clicks'])\n", "df_inv" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ExperimentControlSDMOELower_BoundUpper_Bound
Number of Cookies3446603455430.0006020.0011800.4988200.501180
Number of Clicks28325283780.0021000.0041160.4958840.504116
\n", "
" ], "text/plain": [ " Experiment Control SD MOE Lower_Bound \\\n", "Number of Cookies 344660 345543 0.000602 0.001180 0.498820 \n", "Number of Clicks 28325 28378 0.002100 0.004116 0.495884 \n", "\n", " Upper_Bound \n", "Number of Cookies 0.501180 \n", "Number of Clicks 0.504116 " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p = 0.5\n", "alpha = 0.05\n", "\n", "#Standard Deviation using binomial probability p = 0.5\n", "sd_cookies = np.sqrt((0.5*(1-0.5))/(pageviews_exp + pageviews_ctr))\n", "sd_clicks = np.sqrt((0.5*(1-0.5))/(clicks_exp + clicks_ctr))\n", "df_inv['SD'] = [sd_cookies, sd_clicks]\n", "\n", "#Margin of Error = Z score * Standard Deviation\n", "#z score is 1.96 at 95% confidence interval\n", "moe_cookies = 1.96 * sd_cookies\n", "moe_clicks = 1.96 * sd_clicks\n", "df_inv['MOE'] = [1.96 * sd_cookies, 1.96 * sd_clicks]\n", "\n", "#Lower and Upper Bound (p +- MOE)\n", "df_inv['Lower_Bound'] = [0.5 - moe_cookies, 0.5 - moe_clicks]\n", "df_inv['Upper_Bound'] = [0.5 + moe_cookies, 0.5 + moe_clicks]\n", "\n", "df_inv" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ExperimentControlSDMOELower_BoundUpper_Boundp_observedPass_Sanity
Number of Cookies3446603455430.0006020.0011800.4988200.5011800.499360True
Number of Clicks28325283780.0021000.0041160.4958840.5041160.499533True
\n", "
" ], "text/plain": [ " Experiment Control SD MOE Lower_Bound \\\n", "Number of Cookies 344660 345543 0.000602 0.001180 0.498820 \n", "Number of Clicks 28325 28378 0.002100 0.004116 0.495884 \n", "\n", " Upper_Bound p_observed Pass_Sanity \n", "Number of Cookies 0.501180 0.499360 True \n", "Number of Clicks 0.504116 0.499533 True " ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# observed fraction, p_observed using either experiment or control group (I ran the calcs with experiment group)\n", "p_observed_cookies = pageviews_exp/(pageviews_exp+pageviews_ctr)\n", "p_observed_clicks = clicks_exp/(clicks_exp+clicks_ctr)\n", "\n", "df_inv['p_observed'] = [p_observed_cookies,p_observed_clicks]\n", "df_inv[\"Pass_Sanity\"] = df_inv.apply(lambda x: (x['p_observed'] > x['Lower_Bound']) \n", " and (x['p_observed'] < x['Upper_Bound']),axis = 'columns' )\n", "df_inv" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ExperimentControlSDMOELower_BoundUpper_Boundp_observedPass_Sanity
Number of Cookies3446603455430.0006020.0011800.4988200.5011800.500640True
Number of Clicks28325283780.0021000.0041160.4958840.5041160.500467True
\n", "
" ], "text/plain": [ " Experiment Control SD MOE Lower_Bound \\\n", "Number of Cookies 344660 345543 0.000602 0.001180 0.498820 \n", "Number of Clicks 28325 28378 0.002100 0.004116 0.495884 \n", "\n", " Upper_Bound p_observed Pass_Sanity \n", "Number of Cookies 0.501180 0.500640 True \n", "Number of Clicks 0.504116 0.500467 True " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Same calc as above but using control group to calc observed fraction , p_observed :\n", "p_observed_cookies = pageviews_ctr/(pageviews_exp+pageviews_ctr)\n", "p_observed_clicks = clicks_ctr/(clicks_exp+clicks_ctr)\n", "\n", "df_inv['p_observed'] = [p_observed_cookies,p_observed_clicks]\n", "\n", "df_inv[\"Pass_Sanity\"] = df_inv.apply(lambda x: (x['p_observed'] > x['Lower_Bound']) \n", " and (x['p_observed'] < x['Upper_Bound']),axis = 'columns' )\n", "df_inv" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I ran both sets of calculations to show you that the result is the same: **Observed fraction is within bounds, passing sanity checks for both metrics.**" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 1.2 Sanity Checks for differences between probabilities\n", "**Click-through-probability of the Free Trial Button**\n", "\n", "In this case, we want to make sure the proportion of clicks given a pageview (our observed CTP) is about the same in both groups (since this was not expected to change due to the experiment). In order to check this out we will calculate the CTP in each group and calculate a confidence interval for the expected difference between them.\n", "In other words, we expect to see no difference ( CTPexp−CTPcont=0 ), with an acceptable margin of error, dictated by our calculated confidence interval. **The changes we should notice are for the calculation of the standard error, which in this case is a pooled standard error.**\n", "\n", "$$ SD_{pool}= \\sqrt {\\hat {p_{pool}}(1−\\hat {p_{pool}})(\\frac{\\ 1}{Ncont}+\\frac{\\ 1}{Nexp})}$$\n", "\n", "$$ \\hat {p_{pool}} = \\frac{\\ X_{cont} + X_{exp}}{N_{cont}+ N_{exp}} $$\n", "\n", "We should understand that CTP is a proportion in a population (amount of events x in a population n) like the amount of clicks out of the amount of pageviews." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CTP_ExperimentCTP_ControlPpoolDiff_in_CTPSDpoolMOEpool
Click through Probability0.08220.08210.08220.00010.00070.0014
\n", "
" ], "text/plain": [ " CTP_Experiment CTP_Control Ppool Diff_in_CTP \\\n", "Click through Probability 0.0822 0.0821 0.0822 0.0001 \n", "\n", " SDpool MOEpool \n", "Click through Probability 0.0007 0.0014 " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# CTP probability per group\n", "ctp_ctr = round(clicks_ctr/pageviews_ctr,4)\n", "ctp_exp = round(clicks_exp/pageviews_exp,4)\n", "\n", "ctp_diff = round(ctp_exp - ctp_ctr,4)\n", "\n", "#pooled CTP probability\n", "ctp_pool = round((clicks_ctr + clicks_exp) / (pageviews_ctr + pageviews_exp),4)\n", "SD_pool = round(np.sqrt ( (ctp_pool*(1-ctp_pool)/pageviews_ctr) + (ctp_pool*(1-ctp_pool)/pageviews_exp)),4)\n", "MOE = round(1.96* SD_pool,4)\n", "\n", "df_ctp = pd.DataFrame({\n", " 'CTP_Experiment': [ctp_exp],\n", " 'CTP_Control': [ctp_ctr],\n", " 'Ppool': [ctp_pool],\n", " 'Diff_in_CTP': [ctp_diff],\n", " 'SDpool':[SD_pool],\n", " 'MOEpool': [MOE]\n", " }, index = ['Click through Probability'])\n", "df_ctp" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CTP_ExperimentCTP_ControlPpoolDiff_in_CTPSDpoolMOEpoolLower_BoundUpper_BoundPass_Sanity
Click through Probability0.08220.08210.08220.00010.00070.00140.08070.0835True
\n", "
" ], "text/plain": [ " CTP_Experiment CTP_Control Ppool Diff_in_CTP \\\n", "Click through Probability 0.0822 0.0821 0.0822 0.0001 \n", "\n", " SDpool MOEpool Lower_Bound Upper_Bound \\\n", "Click through Probability 0.0007 0.0014 0.0807 0.0835 \n", "\n", " Pass_Sanity \n", "Click through Probability True " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Lower & Upper Bound with p using either ctp control or experiment group (let's use ctp control as example)\n", "df_ctp['Lower_Bound'] = round(ctp_ctr - MOE, 4)\n", "df_ctp['Upper_Bound'] = round(ctp_ctr + MOE,4)\n", "df_ctp[\"Pass_Sanity\"] = df_ctp.apply(lambda x: (x['CTP_Control'] > x['Lower_Bound']) \n", " and (x['CTP_Control'] < x['Upper_Bound']),axis = 'columns' )\n", "df_ctp" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CTP_ExperimentCTP_ControlPpoolDiff_in_CTPSDpoolMOEpoolLower_BoundUpper_BoundPass_Sanity
Click through Probability0.08220.08210.08220.00010.00070.00140.08080.0836True
\n", "
" ], "text/plain": [ " CTP_Experiment CTP_Control Ppool Diff_in_CTP \\\n", "Click through Probability 0.0822 0.0821 0.0822 0.0001 \n", "\n", " SDpool MOEpool Lower_Bound Upper_Bound \\\n", "Click through Probability 0.0007 0.0014 0.0808 0.0836 \n", "\n", " Pass_Sanity \n", "Click through Probability True " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#If we use ctp experiment instead:\n", "df_ctp['Lower_Bound'] = round(ctp_exp - MOE, 4)\n", "df_ctp['Upper_Bound'] = round(ctp_exp + MOE,4)\n", "df_ctp[\"Pass_Sanity\"] = df_ctp.apply(lambda x: (x['CTP_Experiment'] > x['Lower_Bound']) \n", " and (x['CTP_Experiment'] < x['Upper_Bound']),axis = 'columns' )\n", "df_ctp" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**We passed the sanity checks **for click through probability metric as the CTP for either groups are within bounds. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2. Check for Practical and Statistical Significance\n", "Based on the experiment results, we have 23 days of enrollment, so to calculate the probability of the evaluation metrics, the data points should be retrieved from those 23 days only. \n", "\n", "The next step is looking at the changes between the control and experiment groups with regard to our evaluation metrics to make sure the difference is there, that it is statistically significant and most importantly practically significant (the difference is \"big\" enough to make the experimented change beneficial to the company).\n", "\n", "Now, all that is left is to measure for each evaluation metric, the difference between the values from both groups. Then, we compute the confidence interval for that difference and test whether or not this confidence interval is both statistically and practically significant." ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Date 37\n", "Pageviews 37\n", "Clicks 37\n", "Enrollments 23\n", "Payments 23\n", "dtype: int64" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_ctr.notnull().sum()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Date 37\n", "Pageviews 37\n", "Clicks 37\n", "Enrollments 23\n", "Payments 23\n", "dtype: int64" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_exp.notnull().sum()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Based on the experiment results, we have 23 days of enrollment and payment data, so to calculate the probability of the evaluation metrics, we should use the corresponding pageviews and clicks from those days, and not all of them." ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "cond = (df_exp['Enrollments'].notnull()) & (df_exp['Payments'].notnull())\n", "df_exp23 = df_exp[cond]\n", "cond = (df_ctr['Enrollments'].notnull()) & (df_ctr['Payments'].notnull())\n", "df_ctr23 = df_ctr[cond]" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ExperimentControl
Pageviews211362.0212163.0
Clicks17260.017293.0
Enrollments3423.03785.0
Payments1945.02033.0
\n", "
" ], "text/plain": [ " Experiment Control\n", "Pageviews 211362.0 212163.0\n", "Clicks 17260.0 17293.0\n", "Enrollments 3423.0 3785.0\n", "Payments 1945.0 2033.0" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_23 = pd.DataFrame({\n", " 'Experiment': df_exp23[['Pageviews','Clicks','Enrollments','Payments']].sum(), \n", " 'Control': df_ctr23[['Pageviews','Clicks','Enrollments','Payments']].sum()\n", " } )\n", "\n", "df_23" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "df_23.loc['Gross_Conversion'] = df_23.loc['Enrollments']/df_23.loc['Clicks']\n", "df_23.loc['Net_Conversion'] = df_23.loc['Payments']/df_23.loc['Clicks']" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ExperimentControlTotal
Pageviews211362.000000212163.000000423525.0
Clicks17260.00000017293.00000034553.0
Enrollments3423.0000003785.0000007208.0
Payments1945.0000002033.0000003978.0
Gross_Conversion0.1983200.218875NaN
Net_Conversion0.1126880.117562NaN
\n", "
" ], "text/plain": [ " Experiment Control Total\n", "Pageviews 211362.000000 212163.000000 423525.0\n", "Clicks 17260.000000 17293.000000 34553.0\n", "Enrollments 3423.000000 3785.000000 7208.0\n", "Payments 1945.000000 2033.000000 3978.0\n", "Gross_Conversion 0.198320 0.218875 NaN\n", "Net_Conversion 0.112688 0.117562 NaN" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_23['Total'] = df_23['Experiment'].iloc[:4] + df_23['Control'].iloc[:4] \n", "df_23" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Just in case you need to refer back to the formula to calculate pooled probability and std deviation pool:\n", "$$ SD_{pool}= \\sqrt {\\hat {p_{pool}}(1−\\hat{p_{pool}})(\\frac{\\ 1}{Ncont}+\\frac{\\ 1}{Nexp})}$$\n", "\n", "$$ \\hat {p_{pool}} = \\frac{\\ X_{cont} + X_{exp}}{N_{cont}+ N_{exp}} $$" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ExperimentControlTotalPpooldminSDpoolMOE
Pageviews211362.000000212163.000000423525.0NaN3000.0000NaNNaN
Clicks17260.00000017293.00000034553.0NaN240.0000NaNNaN
Enrollments3423.0000003785.0000007208.0NaN50.0000NaNNaN
Payments1945.0000002033.0000003978.0NaNNaNNaNNaN
Gross_Conversion0.1983200.218875NaN0.2086070.01000.0043720.008569
Net_Conversion0.1126880.117562NaN0.1151270.00750.0034340.006731
\n", "
" ], "text/plain": [ " Experiment Control Total Ppool dmin \\\n", "Pageviews 211362.000000 212163.000000 423525.0 NaN 3000.0000 \n", "Clicks 17260.000000 17293.000000 34553.0 NaN 240.0000 \n", "Enrollments 3423.000000 3785.000000 7208.0 NaN 50.0000 \n", "Payments 1945.000000 2033.000000 3978.0 NaN NaN \n", "Gross_Conversion 0.198320 0.218875 NaN 0.208607 0.0100 \n", "Net_Conversion 0.112688 0.117562 NaN 0.115127 0.0075 \n", "\n", " SDpool MOE \n", "Pageviews NaN NaN \n", "Clicks NaN NaN \n", "Enrollments NaN NaN \n", "Payments NaN NaN \n", "Gross_Conversion 0.004372 0.008569 \n", "Net_Conversion 0.003434 0.006731 " ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Add Ppool as a row:\n", "df_23['Ppool'] = np.nan\n", "Ppool_gc = df_23.loc['Enrollments']['Total'] / df_23.loc['Clicks']['Total']\n", "Ppool_nc = df_23.loc['Payments']['Total'] / df_23.loc['Clicks']['Total']\n", "df_23['Ppool'].loc['Gross_Conversion'] = Ppool_gc\n", "df_23['Ppool'].loc['Net_Conversion'] = Ppool_nc\n", "\n", "df_23['dmin'] = [3000,240,50,np.nan,0.01,0.0075]\n", "\n", "# Std Deviation pool\n", "SDpool_gc = round(np.sqrt( (Ppool_gc*(1-Ppool_gc)/df_23.loc['Clicks']['Experiment']) + \n", " (Ppool_gc*(1-Ppool_gc)/df_23.loc['Clicks']['Control'])\n", " ),6)\n", "SDpool_nc = round(np.sqrt( (Ppool_nc*(1-Ppool_nc)/df_23.loc['Clicks']['Experiment']) + \n", " (Ppool_nc*(1-Ppool_nc)/df_23.loc['Clicks']['Control'])\n", " ),6)\n", "df_23['SDpool'] = [np.nan,np.nan,np.nan,np.nan, SDpool_gc, SDpool_nc]\n", "\n", "# Add Margin of Error at 95% Confidence Interval, z-score is 1.96\n", "df_23['MOE'] = 1.96 * df_23['SDpool']\n", "df_23" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 2.1 Practical Significance Test:\n", "To determine the practical significance level, the probability of difference(Pdiff) between the control and experiment group has to be larger than dmin.
\n", "Compute difference between Gross Conversion Experiment and Control group. Repeat the same for Net Conversion. " ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ExperimentControlTotalPpooldminSDpoolMOEPdiff
Pageviews211362.000000212163.000000423525.0NaN3000.0000NaNNaNNaN
Clicks17260.00000017293.00000034553.0NaN240.0000NaNNaNNaN
Enrollments3423.0000003785.0000007208.0NaN50.0000NaNNaNNaN
Payments1945.0000002033.0000003978.0NaNNaNNaNNaNNaN
Gross_Conversion0.1983200.218875NaN0.2086070.01000.0043720.008569-0.020555
Net_Conversion0.1126880.117562NaN0.1151270.00750.0034340.006731-0.004874
\n", "
" ], "text/plain": [ " Experiment Control Total Ppool dmin \\\n", "Pageviews 211362.000000 212163.000000 423525.0 NaN 3000.0000 \n", "Clicks 17260.000000 17293.000000 34553.0 NaN 240.0000 \n", "Enrollments 3423.000000 3785.000000 7208.0 NaN 50.0000 \n", "Payments 1945.000000 2033.000000 3978.0 NaN NaN \n", "Gross_Conversion 0.198320 0.218875 NaN 0.208607 0.0100 \n", "Net_Conversion 0.112688 0.117562 NaN 0.115127 0.0075 \n", "\n", " SDpool MOE Pdiff \n", "Pageviews NaN NaN NaN \n", "Clicks NaN NaN NaN \n", "Enrollments NaN NaN NaN \n", "Payments NaN NaN NaN \n", "Gross_Conversion 0.004372 0.008569 -0.020555 \n", "Net_Conversion 0.003434 0.006731 -0.004874 " ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_23['Pdiff'] = df_23['Experiment'] - df_23['Control']\n", "df_23['Pdiff'].iloc[:4] = np.nan\n", "df_23" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Gross Conversion metric is practically significant** as the probability difference between experiment and control group, **Pdiff is -2%**, which is greater than the 1% dmin change.
\n", "**Net Conversion metric is NOT practically significant** as the probability difference between experiment and control group, **Pdiff is -0.4%**, which is lower than the 0.75% dmin change." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 2.2 Statistical Significance Test:\n", "Statistically significant means a result is unlikely due to chance. The p-value is the probability of obtaining the difference we saw from a sample (or a larger one) if there really isn’t a difference for all users.

\n", "Statistical significance doesn’t mean practical significance. Only by considering context can we determine whether a difference is practically significant; that is, whether it requires action.

\n", "It is statistically significant if:\n", "* The confidence interval does not cross zero.\n", "* The observed p difference is not small, within CI range.\n", "* The observed p difference is less than alpha (A conventional (and arbitrary) threshold to indicate it is highly unlikely result occured by chance)\n", "\n", "With large sample sizes, you’re virtually certain to see statistically significant results, in such situations it’s important to interpret the size of the difference.
\n", "Small sample sizes often do not yield statistical significance; when they do, the differences themselves tend also to be practically significant; that is, meaningful enough to warrant action.\n", "\n", "*refer further details here: https://measuringu.com/statistically-significant/*" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ExperimentControlTotalPpooldminSDpoolMOEPdiffLowerUpper
Pageviews211362.000000212163.000000423525.0NaN3000.0000NaNNaNNaNNaNNaN
Clicks17260.00000017293.00000034553.0NaN240.0000NaNNaNNaNNaNNaN
Enrollments3423.0000003785.0000007208.0NaN50.0000NaNNaNNaNNaNNaN
Payments1945.0000002033.0000003978.0NaNNaNNaNNaNNaNNaNNaN
Gross_Conversion0.1983200.218875NaN0.2086070.01000.0043720.008569-0.020555-0.029124-0.011986
Net_Conversion0.1126880.117562NaN0.1151270.00750.0034340.006731-0.004874-0.0116040.001857
\n", "
" ], "text/plain": [ " Experiment Control Total Ppool dmin \\\n", "Pageviews 211362.000000 212163.000000 423525.0 NaN 3000.0000 \n", "Clicks 17260.000000 17293.000000 34553.0 NaN 240.0000 \n", "Enrollments 3423.000000 3785.000000 7208.0 NaN 50.0000 \n", "Payments 1945.000000 2033.000000 3978.0 NaN NaN \n", "Gross_Conversion 0.198320 0.218875 NaN 0.208607 0.0100 \n", "Net_Conversion 0.112688 0.117562 NaN 0.115127 0.0075 \n", "\n", " SDpool MOE Pdiff Lower Upper \n", "Pageviews NaN NaN NaN NaN NaN \n", "Clicks NaN NaN NaN NaN NaN \n", "Enrollments NaN NaN NaN NaN NaN \n", "Payments NaN NaN NaN NaN NaN \n", "Gross_Conversion 0.004372 0.008569 -0.020555 -0.029124 -0.011986 \n", "Net_Conversion 0.003434 0.006731 -0.004874 -0.011604 0.001857 " ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Find the confidence interval range\n", "df_23['Lower'] = df_23['Pdiff'] - df_23['MOE'] \n", "df_23['Upper'] = df_23['Pdiff'] + df_23['MOE'] \n", "df_23" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Gross Conversion metric is statistically significant** since **Pdiff is -0.02**, which is within the 95% confidence interval range: [-0.029124, -0.011986] and the CI does not include 0.
\n", "**Net Conversion metric is NOT statistically significant** since **Pdiff is -0.004**, which is a very small decrease and as such is not statistically significant. The 95% confidence interval range: [-0.011604, 0.001857] indicates the CI does include 0." ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "### 3. Sign Test\n", "\n", "In a sign test, we check if the trend of change we observed (increase or decrease) was evident in the daily data. \n", "\n", "Based on above results analysis, I expect to see experiment group with lower gross conversion rate and net conversion rate than the control group. Compute the Gross Conversion and Net Conversion daily per group, then count how many days each metric was lower in the experiment group and this will be the number of successes for the binomial test to calculate the two-tail P value. \n", "\n", "I use an [online binomial tool](https://www.graphpad.com/quickcalcs/binomial1/) to calculate the two-tail P value. You can implement the calculations behind it by referring to [Tammy Rotem's kaggle solution](https://www.kaggle.com/tammyrotem/ab-tests-with-python/notebook)." ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DatePageviews_xClicks_xEnrollments_xPayments_xPageviews_yClicks_yEnrollments_yPayments_yGC_expNC_expGC_ctrNC_ctr
0Sat, Oct 117716686105.034.07723687134.070.00.1530610.0495630.1950510.101892
1Sun, Oct 129288785116.091.09102779147.070.00.1477710.1159240.1887030.089859
2Mon, Oct 1310480884145.079.010511909167.095.00.1640270.0893670.1837180.104510
3Tue, Oct 149867827138.092.09871836156.0105.00.1668680.1112450.1866030.125598
4Wed, Oct 159793832140.094.010014837163.064.00.1682690.1129810.1947430.076464
\n", "
" ], "text/plain": [ " Date Pageviews_x Clicks_x Enrollments_x Payments_x Pageviews_y \\\n", "0 Sat, Oct 11 7716 686 105.0 34.0 7723 \n", "1 Sun, Oct 12 9288 785 116.0 91.0 9102 \n", "2 Mon, Oct 13 10480 884 145.0 79.0 10511 \n", "3 Tue, Oct 14 9867 827 138.0 92.0 9871 \n", "4 Wed, Oct 15 9793 832 140.0 94.0 10014 \n", "\n", " Clicks_y Enrollments_y Payments_y GC_exp NC_exp GC_ctr NC_ctr \n", "0 687 134.0 70.0 0.153061 0.049563 0.195051 0.101892 \n", "1 779 147.0 70.0 0.147771 0.115924 0.188703 0.089859 \n", "2 909 167.0 95.0 0.164027 0.089367 0.183718 0.104510 \n", "3 836 156.0 105.0 0.166868 0.111245 0.186603 0.125598 \n", "4 837 163.0 64.0 0.168269 0.112981 0.194743 0.076464 " ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Merge both groups of data with date as join using the 23 day worth of data established previously:\n", "df_sign = pd.merge(df_exp23, df_ctr23, on = 'Date')\n", "\n", "#Experiment group:\n", "df_sign['GC_exp'] = df_sign['Enrollments_x']/df_sign['Clicks_x']\n", "df_sign['NC_exp'] = df_sign['Payments_x']/df_sign['Clicks_x']\n", "\n", "#Control group:\n", "df_sign['GC_ctr'] = df_sign['Enrollments_y']/df_sign['Clicks_y']\n", "df_sign['NC_ctr'] = df_sign['Payments_y']/df_sign['Clicks_y']\n", "\n", "df_sign.head()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DateGC_expGC_ctrNC_expNC_ctr
0Sat, Oct 110.1530610.1950510.0495630.101892
1Sun, Oct 120.1477710.1887030.1159240.089859
2Mon, Oct 130.1640270.1837180.0893670.104510
3Tue, Oct 140.1668680.1866030.1112450.125598
4Wed, Oct 150.1682690.1947430.1129810.076464
\n", "
" ], "text/plain": [ " Date GC_exp GC_ctr NC_exp NC_ctr\n", "0 Sat, Oct 11 0.153061 0.195051 0.049563 0.101892\n", "1 Sun, Oct 12 0.147771 0.188703 0.115924 0.089859\n", "2 Mon, Oct 13 0.164027 0.183718 0.089367 0.104510\n", "3 Tue, Oct 14 0.166868 0.186603 0.111245 0.125598\n", "4 Wed, Oct 15 0.168269 0.194743 0.112981 0.076464" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Select only relevant columns for easier read across:\n", "df_sign = df_sign[['Date','GC_exp', 'GC_ctr', 'NC_exp', 'NC_ctr']]\n", "df_sign.head()" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True 19\n", "False 4\n", "Name: GC_sign_result, dtype: int64" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Gross conversion sign test: True (pass or success) if GC Experiment is lower \n", "df_sign['GC_sign_result'] = df_sign['GC_exp'] < df_sign['GC_ctr']\n", "df_sign['GC_sign_result'].value_counts()\n" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True 13\n", "False 10\n", "Name: NC_sign_result, dtype: int64" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Net conversion sign test: True (pass or success) if NC Experiment is lower \n", "df_sign['NC_sign_result'] = df_sign['NC_exp'] < df_sign['NC_ctr']\n", "df_sign['NC_sign_result'].value_counts()" ] }, { "attachments": { "image.png": { "image/png": "" } }, "cell_type": "markdown", "metadata": {}, "source": [ "#### Gross Conversion comparison: 4 failures, 19 successes\n", "![image.png](attachment:image.png)\n", "\n", "So, P-Value of the test is 0.0026. Since the probability to pass the test daily is 1-0.0026 = 0.9974 which is greater than 95%, this result does not happen by chance (statistically significant) and **it passes the sign test**." ] }, { "attachments": { "image.png": { "image/png": "" } }, "cell_type": "markdown", "metadata": {}, "source": [ "#### Net Conversion comparison: 10 failures , 13 successes\n", "![image.png](attachment:image.png)\n", "\n", "So, P-Value of the test is 0.6776. Since the probability to pass the test daily is 1-0.6776 = 0.3224 which is lower than 95% , **it does NOT pass the sign test.** The experiment will not have statistical significance impact on Net Conversion." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.1" } }, "nbformat": 4, "nbformat_minor": 2 }