TabularPandas on Titanic dataset.

Create a Learner, TabularPandas on Titanic’s dataset.
fastpages
jupyter
Author

Eric Vincent

Published

September 30, 2022

iskaggle = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', '')
if iskaggle: path = Path('../input/titanic')
else:
    path = Path('titanic')
    if not path.exists():
        import zipfile,kaggle
        kaggle.api.competition_download_cli(str(path))
        zipfile.ZipFile(f'{path}.zip').extractall(path)
import torch, numpy as np, pandas as pd
np.set_printoptions(linewidth=140)
torch.set_printoptions(linewidth=140, sci_mode=False, edgeitems=7)
pd.set_option('display.width', 140)
!pip install fastbook
import fastbook
fastbook.setup_book()
from fastbook import *
import torch.nn.functional as F
Collecting fastbook
  Downloading fastbook-0.0.28-py3-none-any.whl (719 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 719.8/719.8 kB 30.6 MB/s eta 0:00:00
Requirement already satisfied: graphviz in /usr/local/lib/python3.9/dist-packages (from fastbook) (0.20.1)
Requirement already satisfied: packaging in /usr/local/lib/python3.9/dist-packages (from fastbook) (21.3)
Requirement already satisfied: pip in /usr/local/lib/python3.9/dist-packages (from fastbook) (22.2.2)
Requirement already satisfied: datasets in /usr/local/lib/python3.9/dist-packages (from fastbook) (2.3.2)
Requirement already satisfied: sentencepiece in /usr/local/lib/python3.9/dist-packages (from fastbook) (0.1.96)
Collecting fastai>=2.6
  Downloading fastai-2.7.9-py3-none-any.whl (225 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 225.5/225.5 kB 28.8 MB/s eta 0:00:00
Requirement already satisfied: pandas in /usr/local/lib/python3.9/dist-packages (from fastbook) (1.4.3)
Requirement already satisfied: transformers in /usr/local/lib/python3.9/dist-packages (from fastbook) (4.20.1)
Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from fastbook) (2.28.1)
Requirement already satisfied: spacy<4 in /usr/local/lib/python3.9/dist-packages (from fastai>=2.6->fastbook) (3.4.0)
Requirement already satisfied: scikit-learn in /usr/local/lib/python3.9/dist-packages (from fastai>=2.6->fastbook) (1.1.1)
Requirement already satisfied: torchvision>=0.8.2 in /usr/local/lib/python3.9/dist-packages (from fastai>=2.6->fastbook) (0.13.0+cu116)
Requirement already satisfied: scipy in /usr/local/lib/python3.9/dist-packages (from fastai>=2.6->fastbook) (1.8.1)
Requirement already satisfied: pyyaml in /usr/local/lib/python3.9/dist-packages (from fastai>=2.6->fastbook) (5.4.1)
Collecting fastprogress>=0.2.4
  Downloading fastprogress-1.0.3-py3-none-any.whl (12 kB)
Requirement already satisfied: matplotlib in /usr/local/lib/python3.9/dist-packages (from fastai>=2.6->fastbook) (3.5.2)
Requirement already satisfied: pillow>6.0.0 in /usr/local/lib/python3.9/dist-packages (from fastai>=2.6->fastbook) (9.2.0)
Collecting fastdownload<2,>=0.0.5
  Downloading fastdownload-0.0.7-py3-none-any.whl (12 kB)
Requirement already satisfied: torch<1.14,>=1.7 in /usr/local/lib/python3.9/dist-packages (from fastai>=2.6->fastbook) (1.12.0+cu116)
Collecting fastcore<1.6,>=1.4.5
  Downloading fastcore-1.5.27-py3-none-any.whl (67 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 67.1/67.1 kB 12.2 MB/s eta 0:00:00
Requirement already satisfied: aiohttp in /usr/local/lib/python3.9/dist-packages (from datasets->fastbook) (3.8.1)
Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.9/dist-packages (from datasets->fastbook) (1.23.1)
Requirement already satisfied: xxhash in /usr/local/lib/python3.9/dist-packages (from datasets->fastbook) (3.0.0)
Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.9/dist-packages (from datasets->fastbook) (4.64.0)
Requirement already satisfied: fsspec[http]>=2021.05.0 in /usr/local/lib/python3.9/dist-packages (from datasets->fastbook) (2022.5.0)
Requirement already satisfied: dill<0.3.6 in /usr/local/lib/python3.9/dist-packages (from datasets->fastbook) (0.3.5.1)
Requirement already satisfied: multiprocess in /usr/local/lib/python3.9/dist-packages (from datasets->fastbook) (0.70.13)
Requirement already satisfied: responses<0.19 in /usr/local/lib/python3.9/dist-packages (from datasets->fastbook) (0.18.0)
Requirement already satisfied: pyarrow>=6.0.0 in /usr/local/lib/python3.9/dist-packages (from datasets->fastbook) (8.0.0)
Requirement already satisfied: huggingface-hub<1.0.0,>=0.1.0 in /usr/local/lib/python3.9/dist-packages (from datasets->fastbook) (0.8.1)
Requirement already satisfied: certifi>=2017.4.17 in /usr/lib/python3/dist-packages (from requests->fastbook) (2019.11.28)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->fastbook) (1.26.10)
Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.9/dist-packages (from requests->fastbook) (2.1.0)
Requirement already satisfied: idna<4,>=2.5 in /usr/lib/python3/dist-packages (from requests->fastbook) (2.8)
Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.9/dist-packages (from packaging->fastbook) (3.0.9)
Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.9/dist-packages (from pandas->fastbook) (2022.1)
Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas->fastbook) (2.8.2)
Requirement already satisfied: tokenizers!=0.11.3,<0.13,>=0.11.1 in /usr/local/lib/python3.9/dist-packages (from transformers->fastbook) (0.12.1)
Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.9/dist-packages (from transformers->fastbook) (2022.7.9)
Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from transformers->fastbook) (3.7.1)
Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.9/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets->fastbook) (4.3.0)
Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.1->pandas->fastbook) (1.14.0)
Requirement already satisfied: wasabi<1.1.0,>=0.9.1 in /usr/local/lib/python3.9/dist-packages (from spacy<4->fastai>=2.6->fastbook) (0.9.1)
Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /usr/local/lib/python3.9/dist-packages (from spacy<4->fastai>=2.6->fastbook) (3.3.0)
Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /usr/local/lib/python3.9/dist-packages (from spacy<4->fastai>=2.6->fastbook) (2.0.7)
Requirement already satisfied: thinc<8.2.0,>=8.1.0 in /usr/local/lib/python3.9/dist-packages (from spacy<4->fastai>=2.6->fastbook) (8.1.0)
Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<4->fastai>=2.6->fastbook) (2.0.6)
Requirement already satisfied: pathy>=0.3.5 in /usr/local/lib/python3.9/dist-packages (from spacy<4->fastai>=2.6->fastbook) (0.6.2)
Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /usr/local/lib/python3.9/dist-packages (from spacy<4->fastai>=2.6->fastbook) (3.0.6)
Requirement already satisfied: setuptools in /usr/local/lib/python3.9/dist-packages (from spacy<4->fastai>=2.6->fastbook) (63.1.0)
Requirement already satisfied: typer<0.5.0,>=0.3.0 in /usr/local/lib/python3.9/dist-packages (from spacy<4->fastai>=2.6->fastbook) (0.4.2)
Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /usr/local/lib/python3.9/dist-packages (from spacy<4->fastai>=2.6->fastbook) (2.4.3)
Requirement already satisfied: pydantic!=1.8,!=1.8.1,<1.10.0,>=1.7.4 in /usr/local/lib/python3.9/dist-packages (from spacy<4->fastai>=2.6->fastbook) (1.9.1)
Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /usr/local/lib/python3.9/dist-packages (from spacy<4->fastai>=2.6->fastbook) (1.0.7)
Requirement already satisfied: jinja2 in /usr/local/lib/python3.9/dist-packages (from spacy<4->fastai>=2.6->fastbook) (3.1.2)
Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /usr/local/lib/python3.9/dist-packages (from spacy<4->fastai>=2.6->fastbook) (1.0.2)
Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.9 in /usr/local/lib/python3.9/dist-packages (from spacy<4->fastai>=2.6->fastbook) (3.0.9)
Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.9/dist-packages (from aiohttp->datasets->fastbook) (1.2.0)
Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.9/dist-packages (from aiohttp->datasets->fastbook) (4.0.2)
Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.9/dist-packages (from aiohttp->datasets->fastbook) (1.3.0)
Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.9/dist-packages (from aiohttp->datasets->fastbook) (6.0.2)
Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->datasets->fastbook) (18.2.0)
Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.9/dist-packages (from aiohttp->datasets->fastbook) (1.7.2)
Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib->fastai>=2.6->fastbook) (1.4.3)
Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib->fastai>=2.6->fastbook) (4.34.4)
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.9/dist-packages (from matplotlib->fastai>=2.6->fastbook) (0.11.0)
Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.9/dist-packages (from scikit-learn->fastai>=2.6->fastbook) (3.1.0)
Requirement already satisfied: joblib>=1.0.0 in /usr/local/lib/python3.9/dist-packages (from scikit-learn->fastai>=2.6->fastbook) (1.1.0)
Requirement already satisfied: smart-open<6.0.0,>=5.2.1 in /usr/local/lib/python3.9/dist-packages (from pathy>=0.3.5->spacy<4->fastai>=2.6->fastbook) (5.2.1)
Requirement already satisfied: blis<0.8.0,>=0.7.8 in /usr/local/lib/python3.9/dist-packages (from thinc<8.2.0,>=8.1.0->spacy<4->fastai>=2.6->fastbook) (0.7.8)
Requirement already satisfied: click<9.0.0,>=7.1.1 in /usr/local/lib/python3.9/dist-packages (from typer<0.5.0,>=0.3.0->spacy<4->fastai>=2.6->fastbook) (8.1.3)
Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.9/dist-packages (from jinja2->spacy<4->fastai>=2.6->fastbook) (2.1.1)
Installing collected packages: fastprogress, fastcore, fastdownload, fastai, fastbook
Successfully installed fastai-2.7.9 fastbook-0.0.28 fastcore-1.5.27 fastdownload-0.0.7 fastprogress-1.0.3
WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv
from fastai.tabular.all import *

pd.options.display.float_format = '{:.2f}'.format
set_seed(42)
df = pd.read_csv(path/'train.csv')
def add_features(df):
    df['LogFare'] = np.log1p(df['Fare'])
    df['Deck'] = df.Cabin.str[0].map(dict(A="ABC", B="ABC", C="ABC", D="DE", E="DE", F="FG", G="FG"))
    df['Family'] = df.SibSp+df.Parch
    df['Alone'] = df.Family==0
    df['TicketFreq'] = df.groupby('Ticket')['Ticket'].transform('count')
    df['Title'] = df.Name.str.split(', ', expand=True)[1].str.split('.', expand=True)[0]
    df['Title'] = df.Title.map(dict(Mr="Mr",Miss="Miss",Mrs="Mrs",Master="Master"))

add_features(df)
df.groupby('Sex')['Fare'].mean()
Sex
female   44.48
male     25.52
Name: Fare, dtype: float64
splits = RandomSplitter(seed=42)(df)
dls = TabularPandas(
    df, splits=splits,
    procs = [Categorify, FillMissing, Normalize],
    cat_names=["Sex","Pclass","Embarked","Deck", "Title"],
    cont_names=['Age', 'SibSp', 'Parch', 'LogFare', 'Alone', 'TicketFreq', 'Family'],
    y_names="Survived", y_block = CategoryBlock(),
).dataloaders(path=".")
learn = tabular_learner(dls, metrics=accuracy, layers=[10,10])
# suggest a learning rate using fastai's lr_find function
learn.lr_find(suggest_funcs=(slide, valley))
SuggestedLRs(slide=0.05754399299621582, valley=0.013182567432522774)