{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Experiments on Synthetic Data #\n", "\n", "For the details of the experimental setup and results please see Section 5.1 of the main text." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/dennisshasha/anaconda/envs/py36/lib/python3.6/site-packages/numexpr/cpuinfo.py:76: UserWarning: [Errno 2] No such file or directory: 'sysctl': 'sysctl'\n", " stacklevel=stacklevel + 1):\n" ] } ], "source": [ "# IMPORTS\n", "\n", "# BASICS\n", "%matplotlib inline\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "from random import random\n", "from math import exp, sqrt, log, tanh\n", "from copy import deepcopy\n", "from tqdm import tqdm\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "\n", "\n", "# STYLE (optional)\n", "np.set_printoptions(formatter={'float': lambda x: \"{0:0.3f}\".format(x)})\n", "plt.rcParams[\"font.family\"] = \"Times New Roman\"\n", "plt.style.use('seaborn-whitegrid')\n", "plt.style.use('seaborn-poster')\n", "plt.style.use('seaborn-dark-palette')\n", "plt.rcParams[\"mathtext.fontset\"] = \"cm\"\n", "\n", "\n", "# SCIKIT-LEARN\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn import svm\n", "from sklearn import datasets\n", "from sklearn.calibration import CalibratedClassifierCV\n", "from sklearn.utils import shuffle\n", "from sklearn.model_selection import GridSearchCV\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.model_selection import KFold" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# ADAPTIVE SAFEPREDICT \n", "\n", "class SafePredict:\n", " def __init__(self, eps = 0.1, w0 = 0.5, alpha = 0, beta = 1, horizon = 1):\n", " # Initialize\n", " self.eps = eps # Target error rate\n", " self.w0 = w0 # Initial weight of Dummy (i.e. 1- w_P)\n", " self.w = [w0, 1-w0]\n", " self.wPs = (1-w0)*w0\n", " self.alpha = alpha # Adaptivity parameter: w_P >= alpha (default value 0)\n", " self.beta = beta # Adaptivity parameter: w_P <= beta (default value 1)\n", " self.T = horizon # Time horizon\n", " self.C = sqrt(-log(w0) - (self.T-1)*log(1-self.alpha)) / (1-self.eps) \n", " self.k = 1 \n", " self.eta = self.C / 2**(self.k/2)\n", "\n", "\n", " \n", " \n", " def update(self, lP):\n", " # Update the weights, one data point at a time\n", " \n", " # Update the weights\n", " if self.wPs < 2**self.k:\n", " self.w[0] = self.w[0]*exp(-self.eta * self.eps); self.w[1] = self.w[1]*exp(-self.eta * lP); W = sum(self.w)\n", " self.w[0] /= W; self.w[1] /= W\n", " else:\n", " self.w[0] = self.w0; self.w[1] = 1-self.w0\n", " self.k += 1; \n", " self.eta = self.eta / sqrt(2)\n", " self.wPs = (1-self.w0)*self.w0\n", " \n", " # Mix the weights\n", " self.w[0] = self.w[0]*(self.beta-self.alpha) + 1 - self.beta \n", " self.w[1] = self.w[1]*(self.beta-self.alpha) + self.alpha\n", " \n", " \n", " # Update the learning rate \n", " self.wPs += self.w[1]*self.w[0]\n", " \n", " # Return the prediction probability\n", " return self.w[1]\n", " " ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# GENERATION OF SYNTHETIC DATA\n", "\n", "epsilon = 0.05\n", "w0 = 0.5\n", "HORIZON = 50000\n", "CHANGE_POINTS = 10\n", "LOW_NOISE = epsilon*0.8\n", "HIGH_NOISE = epsilon*2\n", "\n", "PP = [LOW_NOISE + (HIGH_NOISE-LOW_NOISE) * ((i//(HORIZON/(CHANGE_POINTS+1)))%2) for i in range(HORIZON)]\n", "LP = [random()" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# ERROR AND EFFICIENCY PLOTS \n", "WW = 12; HH = 16; ll = 2; ms = 10; me = 3000\n", "# width, heigh, line_width, marker_size, mark_every\n", "\n", "plt.figure(figsize=(WW,HH))\n", "\n", "ax = plt.subplot(2,1,1)\n", "plt.plot(np.cumsum([Wp1[i]*LP[i] for i in range(HORIZON)])/np.cumsum(Wp1), label = r'$ \\alpha = 0$', linestyle='-', linewidth=ll, marker = 'o', markevery=me, markersize = ms)\n", "plt.plot(np.cumsum([Wp2[i]*LP[i] for i in range(HORIZON)])/np.cumsum(Wp2), label = r'$ \\alpha = 1/T$', linestyle='-', linewidth=ll, marker = '*', markevery=me, markersize = ms+2)\n", "plt.plot(np.cumsum([Wp3[i]*LP[i] for i in range(HORIZON)])/np.cumsum(Wp3), label = r'$ \\alpha = 5/T$', linestyle='-', linewidth=ll, marker = 'P', markevery=me, markersize = ms)\n", "plt.plot(np.cumsum([Wp4[i]*LP[i] for i in range(HORIZON)])/np.cumsum(Wp4), label = r'$ \\alpha = 10/T$', linestyle='-', linewidth=ll, marker = 's', markevery=me, markersize = ms)\n", "plt.plot(np.cumsum([LP[i]*(PP[i] <= epsilon) for i in range(HORIZON)])/np.cumsum([PP[i] <= epsilon for i in range(HORIZON)]),'k', label = \"oracle\", linestyle='--', linewidth=ll*1.5 )#, marker = 'd', markevery=me, markersize = ms)\n", "plt.plot([0, HORIZON-1],[epsilon, epsilon],'r', label = \"target\", linestyle='--', linewidth=ll*2 )\n", "plt.ylabel(\"Error Rate\", fontsize= 18, x = -0.05)\n", "plt.yticks(fontsize = 15)\n", "plt.ylim([0, 2*epsilon])\n", "lg = ax.legend(loc='best', ncol = 2, fontsize =18)\n", "lg.draw_frame(True)\n", "\n", "\n", "ax = plt.subplot(2,1,2)\n", "plt.plot(np.cumsum(Wp1)/np.arange(HORIZON), label = r'$ \\alpha = 0$', linestyle='-', linewidth=ll, marker = 'o', markevery=me, markersize = ms)\n", "plt.plot(np.cumsum(Wp2)/np.arange(HORIZON), label = r'$ \\alpha = 1/T$', linestyle='-', linewidth=ll, marker = '*', markevery=me, markersize = ms+2)\n", "plt.plot(np.cumsum(Wp3)/np.arange(HORIZON), label = r'$ \\alpha = 5/T$', linestyle='-', linewidth=ll, marker = 'P', markevery=me, markersize = ms)\n", "plt.plot(np.cumsum(Wp4)/np.arange(HORIZON), label = r'$ \\alpha = 10/T$', linestyle='-', linewidth=ll, marker = 's', markevery=me, markersize = ms)\n", "plt.plot(np.cumsum([p <= epsilon for p in PP])/np.arange(HORIZON),'k', label = \"oracle\", linestyle='--', linewidth=ll*1.5 )#, marker = 'd', markevery=me, markersize = ms)\n", "plt.ylabel(\"Efficiency\", fontsize= 18, x = -0.05)\n", "plt.ylim([0, 1.1])\n", "plt.xlim([0, 50000])\n", "plt.yticks(fontsize = 15)\n", "lg = ax.legend(loc='best', ncol = 1, fontsize =18)\n", "lg.draw_frame(True)\n", "plt.xlabel(\"Time\", fontsize = 18)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [conda env:py36]", "language": "python", "name": "conda-env-py36-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 2 }