{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\ivypa\\Anaconda3\\lib\\site-packages\\pandas\\computation\\__init__.py:19: UserWarning: The installed version of numexpr 2.4.4 is not supported in pandas and will be not be used\n", "\n", " UserWarning)\n" ] } ], "source": [ "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data = pd.read_csv('responses.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TimestampФамилия Имя ОтчествоE-mail адресРоль на курсеПолГруппаДата рожденияGPA (Средний балл)Родной городИмели опыт анализа данных до курса?...Сколько параметров имеет нормальное распределение?Какой статистический критерий примените для проверки различия между двумя связанными выборками, в случае, если нельзя сделать предположение о виде распределения выборки?Сколько листьев имеет полное двоичное дерево высоты 3?Характеризуйте тремя предложениями, почему решили пойти на курс?Какими социальными сетями пользуетесь?Ссылка на личный вебсайтСсылка на профиль в LinkedInСсылка на страницу в FacebookСогласие на обработку данныхФакультет
01/11/2016 21:16:50Шестаков Андрей Владимировичshestakoffandrey@gmail.comСеминаристмужскойИАД-35/17/19917.00г. Бор, Нижегородская областьда...2.0Критерий знаковых рангов Уилкоксона8For. The. Science!Facebook, VK.com (ВКонтакте), LinkedIn, Github...NaNNaNNaNЯ заполнил форму полностью и даю согласие на о...Компьютерных Наук
11/11/2016 22:13:51Кашницкий Юрий Савельевичyury.kashnitsky@gmail.comСеминаристмужскойаспирант 3 года11/1/19904.53Москвада...2.0Критерий знаковых рангов Уилкоксона8Хочу получить опыт преподавания не только Pyth...Facebook, VK.com (ВКонтакте), LinkedIn, Githubhttp://www.hse.ru/staff/ykashnitskyhttps://www.linkedin.com/profile/view?id=19224...https://www.facebook.com/festlineЯ заполнил форму полностью и даю согласие на о...Факультет Компьютерных Наук
21/12/2016 9:28:50Захарова Елена Сергеевна1583253@gmail.comСтудентженский2011/19/1996NaNМоскванет...NaNНе знаюНе знаю, что такое двоичное деревоПолезно. Для. Основной специальности.VK.com (ВКонтакте), InstagramNaNNaNNaNЯ заполнил форму полностью и даю согласие на о...Гуманитарных наук/Лингвистика
31/12/2016 9:30:32Михайлин Анатолий Владимировичmehanat1996@gmail.comСтудентмужской135/26/19967.20Москвада...2.0Не знаюНе знаю, что такое двоичное деревоЗанимаюсь разработкой web-приложений (на языке...VK.com (ВКонтакте)vk.com/mehanatNaNNaNЯ заполнил форму полностью и даю согласие на о...ГиМУ
41/12/2016 9:33:02Мельник Анастасия Александровнаmelnik-a-a@mail.ruСтудентженский208/3/20169.00Москванет...2.0Не знаю8Я учусь на прикладной лингвистике, а в програм...Facebook, VK.com (ВКонтакте), Instagramhttps://vk.com/feedNaNNaNЯ заполнил форму полностью и даю согласие на о...Лингвистики
\n", "

5 rows × 27 columns

\n", "
" ], "text/plain": [ " Timestamp Фамилия Имя Отчество \\\n", "0 1/11/2016 21:16:50 Шестаков Андрей Владимирович \n", "1 1/11/2016 22:13:51 Кашницкий Юрий Савельевич \n", "2 1/12/2016 9:28:50 Захарова Елена Сергеевна \n", "3 1/12/2016 9:30:32 Михайлин Анатолий Владимирович \n", "4 1/12/2016 9:33:02 Мельник Анастасия Александровна \n", "\n", " E-mail адрес Роль на курсе Пол Группа \\\n", "0 shestakoffandrey@gmail.com Семинарист мужской ИАД-3 \n", "1 yury.kashnitsky@gmail.com Семинарист мужской аспирант 3 года \n", "2 1583253@gmail.com Студент женский 20 \n", "3 mehanat1996@gmail.com Студент мужской 13 \n", "4 melnik-a-a@mail.ru Студент женский 20 \n", "\n", " Дата рождения GPA (Средний балл) Родной город \\\n", "0 5/17/1991 7.00 г. Бор, Нижегородская область \n", "1 11/1/1990 4.53 Москва \n", "2 11/19/1996 NaN Москва \n", "3 5/26/1996 7.20 Москва \n", "4 8/3/2016 9.00 Москва \n", "\n", " Имели опыт анализа данных до курса? ... \\\n", "0 да ... \n", "1 да ... \n", "2 нет ... \n", "3 да ... \n", "4 нет ... \n", "\n", " Сколько параметров имеет нормальное распределение? \\\n", "0 2.0 \n", "1 2.0 \n", "2 NaN \n", "3 2.0 \n", "4 2.0 \n", "\n", " Какой статистический критерий примените для проверки различия между двумя связанными выборками, в случае, если нельзя сделать предположение о виде распределения выборки? \\\n", "0 Критерий знаковых рангов Уилкоксона \n", "1 Критерий знаковых рангов Уилкоксона \n", "2 Не знаю \n", "3 Не знаю \n", "4 Не знаю \n", "\n", " Сколько листьев имеет полное двоичное дерево высоты 3? \\\n", "0 8 \n", "1 8 \n", "2 Не знаю, что такое двоичное дерево \n", "3 Не знаю, что такое двоичное дерево \n", "4 8 \n", "\n", " Характеризуйте тремя предложениями, почему решили пойти на курс? \\\n", "0 For. The. Science! \n", "1 Хочу получить опыт преподавания не только Pyth... \n", "2 Полезно. Для. Основной специальности. \n", "3 Занимаюсь разработкой web-приложений (на языке... \n", "4 Я учусь на прикладной лингвистике, а в програм... \n", "\n", " Какими социальными сетями пользуетесь? \\\n", "0 Facebook, VK.com (ВКонтакте), LinkedIn, Github... \n", "1 Facebook, VK.com (ВКонтакте), LinkedIn, Github \n", "2 VK.com (ВКонтакте), Instagram \n", "3 VK.com (ВКонтакте) \n", "4 Facebook, VK.com (ВКонтакте), Instagram \n", "\n", " Ссылка на личный вебсайт \\\n", "0 NaN \n", "1 http://www.hse.ru/staff/ykashnitsky \n", "2 NaN \n", "3 vk.com/mehanat \n", "4 https://vk.com/feed \n", "\n", " Ссылка на профиль в LinkedIn \\\n", "0 NaN \n", "1 https://www.linkedin.com/profile/view?id=19224... \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", " Ссылка на страницу в Facebook \\\n", "0 NaN \n", "1 https://www.facebook.com/festline \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", " Согласие на обработку данных \\\n", "0 Я заполнил форму полностью и даю согласие на о... \n", "1 Я заполнил форму полностью и даю согласие на о... \n", "2 Я заполнил форму полностью и даю согласие на о... \n", "3 Я заполнил форму полностью и даю согласие на о... \n", "4 Я заполнил форму полностью и даю согласие на о... \n", "\n", " Факультет \n", "0 Компьютерных Наук \n", "1 Факультет Компьютерных Наук \n", "2 Гуманитарных наук/Лингвистика \n", "3 ГиМУ \n", "4 Лингвистики \n", "\n", "[5 rows x 27 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(112, 27)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.shape" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Index(['Timestamp', 'Фамилия Имя Отчество', 'E-mail адрес', 'Роль на курсе',\n", " 'Пол', 'Группа', 'Дата рождения', 'GPA (Средний балл)', 'Родной город',\n", " 'Имели опыт анализа данных до курса?',\n", " 'Есть ли у вас научные публикации / доклады?',\n", " 'Есть ли у вас публикации в ненаучных изданиях (журналы, тематические блоги и т.д.)? Перечислите издания через запятую.',\n", " 'Какой ваш уровень владения английским языком?',\n", " 'Как называется изучаемый на курсе предмет по-английски?',\n", " 'Какими языками программирования владеете?', 'Сколько будет 2+2?',\n", " 'После удержания налога на доходы Мария Константиновна получила 16530 рублей. Сколько рублей составляет заработная плата Марии Константиновны?',\n", " 'Сколько параметров имеет нормальное распределение?',\n", " 'Какой статистический критерий примените для проверки различия между двумя связанными выборками, в случае, если нельзя сделать предположение о виде распределения выборки?',\n", " 'Сколько листьев имеет полное двоичное дерево высоты 3?',\n", " 'Характеризуйте тремя предложениями, почему решили пойти на курс?',\n", " 'Какими социальными сетями пользуетесь?', 'Ссылка на личный вебсайт',\n", " 'Ссылка на профиль в LinkedIn', 'Ссылка на страницу в Facebook',\n", " 'Согласие на обработку данных', 'Факультет'],\n", " dtype='object')" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.columns" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(76, 27)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = pd.read_csv('responses.csv')\n", "data = data.drop(104)\n", "data = data.dropna(subset=['E-mail адрес', 'Пол', 'Дата рождения', \n", " 'GPA (Средний балл)', 'Есть ли у вас научные публикации / доклады?',\n", " 'Какой ваш уровень владения английским языком?',\n", " 'Как называется изучаемый на курсе предмет по-английски?',\n", " 'Сколько параметров имеет нормальное распределение?',\n", " 'Какой статистический критерий примените для проверки различия между двумя связанными выборками, в случае, если нельзя сделать предположение о виде распределения выборки?'])\n", "data.shape" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "new_data = pd.DataFrame()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
email
0gmail.com
1gmail.com
2gmail.com
3mail.ru
4edu.hse.ru
\n", "
" ], "text/plain": [ " email\n", "0 gmail.com\n", "1 gmail.com\n", "2 gmail.com\n", "3 mail.ru\n", "4 edu.hse.ru" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_data['email'] = [email.split('@')[1] for email in data['E-mail адрес']]\n", "new_data.head()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
emailcourse_rolegender
0gmail.comСеминаристмужской
1gmail.comСеминаристмужской
2gmail.comСтудентмужской
3mail.ruСтудентженский
4edu.hse.ruСтудентженский
\n", "
" ], "text/plain": [ " email course_role gender\n", "0 gmail.com Семинарист мужской\n", "1 gmail.com Семинарист мужской\n", "2 gmail.com Студент мужской\n", "3 mail.ru Студент женский\n", "4 edu.hse.ru Студент женский" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_data.insert(1, 'course_role', data[\"Роль на курсе\"].values)\n", "new_data.insert(2, 'gender', data[\"Пол\"].values)\n", "new_data.head()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
emailcourse_rolegenderbirth_date
0gmail.comСеминаристмужской1991
1gmail.comСеминаристмужской1990
2gmail.comСтудентмужской1996
3mail.ruСтудентженский2016
4edu.hse.ruСтудентженский1996
\n", "
" ], "text/plain": [ " email course_role gender birth_date\n", "0 gmail.com Семинарист мужской 1991\n", "1 gmail.com Семинарист мужской 1990\n", "2 gmail.com Студент мужской 1996\n", "3 mail.ru Студент женский 2016\n", "4 edu.hse.ru Студент женский 1996" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_data.insert(3, 'birth_date', [date.split('/')[2] for date in data['Дата рождения']])\n", "new_data.head()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array(['2', '1', '2', '3', '3', '3', '2', '3', '3', '3', '3', '3', '3',\n", " '3', '3', '3', '2', '3', '3', '1', '3', '3', '3', '2', '3', '3',\n", " '2', '2', '1', '2', '3', '2', '2', '2', '2', '3', '3', '3', '3',\n", " '3', '2', '2', '2', '2', '2', '2', '2', '2', '2', '3', '2', '2',\n", " '3', '2', '3', '3', '3', '3', '3', '3', '3', '2', '3', '3', '3',\n", " '3', '2', '3', '3', '3', '3', '3', '2', '3', '2', '3'], dtype=object)" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tmp_data = np.empty(data['GPA (Средний балл)'].values.shape, 'object') # [0..2.5, 2.5..5, 5..7.5, 7.5..10]\n", "tmp_data[data['GPA (Средний балл)'].values < 2.5] = '0'\n", "tmp_data[np.bitwise_and(data['GPA (Средний балл)'].values >= 2.5, data['GPA (Средний балл)'].values < 5.0)] = '1'\n", "tmp_data[np.bitwise_and(data['GPA (Средний балл)'].values >= 5.0, data['GPA (Средний балл)'].values < 7.5)] = '2'\n", "tmp_data[data['GPA (Средний балл)'].values >= 7.5] = '3'\n", "tmp_data" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
emailcourse_rolegenderbirth_dategpa
0gmail.comСеминаристмужской19912
1gmail.comСеминаристмужской19901
2gmail.comСтудентмужской19962
3mail.ruСтудентженский20163
4edu.hse.ruСтудентженский19963
\n", "
" ], "text/plain": [ " email course_role gender birth_date gpa\n", "0 gmail.com Семинарист мужской 1991 2\n", "1 gmail.com Семинарист мужской 1990 1\n", "2 gmail.com Студент мужской 1996 2\n", "3 mail.ru Студент женский 2016 3\n", "4 edu.hse.ru Студент женский 1996 3" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_data.insert(4, 'gpa', tmp_data)\n", "new_data.head()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{'доклады на топовых конференциях по некоторой тематике',\n", " 'есть какие-то',\n", " 'нет'}" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "set(data['Есть ли у вас научные публикации / доклады?'])" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
emailcourse_rolegenderbirth_dategpapapers
0gmail.comСеминаристмужской19912some
1gmail.comСеминаристмужской19901many
2gmail.comСтудентмужской19962none
3mail.ruСтудентженский20163none
4edu.hse.ruСтудентженский19963none
\n", "
" ], "text/plain": [ " email course_role gender birth_date gpa papers\n", "0 gmail.com Семинарист мужской 1991 2 some\n", "1 gmail.com Семинарист мужской 1990 1 many\n", "2 gmail.com Студент мужской 1996 2 none\n", "3 mail.ru Студент женский 2016 3 none\n", "4 edu.hse.ru Студент женский 1996 3 none" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_data.insert(5, 'papers', ['many' if val[0] == 'д' else 'some' if val[0] == 'е' else 'none' \n", " for val in data['Есть ли у вас научные публикации / доклады?']])\n", "new_data.head()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{'Advanced',\n", " 'Beginner',\n", " 'Full proficiency',\n", " 'Intermediate',\n", " 'Upper-intermediate'}" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "set(data['Какой ваш уровень владения английским языком?'])" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
emailcourse_rolegenderbirth_dategpapapersenglish_level
0gmail.comСеминаристмужской19912someUpper-intermediate
1gmail.comСеминаристмужской19901manyUpper-intermediate
2gmail.comСтудентмужской19962noneIntermediate
3mail.ruСтудентженский20163noneUpper-intermediate
4edu.hse.ruСтудентженский19963noneUpper-intermediate
\n", "
" ], "text/plain": [ " email course_role gender birth_date gpa papers english_level\n", "0 gmail.com Семинарист мужской 1991 2 some Upper-intermediate\n", "1 gmail.com Семинарист мужской 1990 1 many Upper-intermediate\n", "2 gmail.com Студент мужской 1996 2 none Intermediate\n", "3 mail.ru Студент женский 2016 3 none Upper-intermediate\n", "4 edu.hse.ru Студент женский 1996 3 none Upper-intermediate" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_data.insert(6, 'english_level', data['Какой ваш уровень владения английским языком?'].values)\n", "new_data.head()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{'Data Analysis', 'Data Mining', 'Data Science', 'Machine Learning'}" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "set(data['Как называется изучаемый на курсе предмет по-английски?'])" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
emailcourse_rolegenderbirth_dategpapapersenglish_levelcourse_name
0gmail.comСеминаристмужской19912someUpper-intermediateData Analysis
1gmail.comСеминаристмужской19901manyUpper-intermediateData Analysis
2gmail.comСтудентмужской19962noneIntermediateData Analysis
3mail.ruСтудентженский20163noneUpper-intermediateData Mining
4edu.hse.ruСтудентженский19963noneUpper-intermediateData Mining
\n", "
" ], "text/plain": [ " email course_role gender birth_date gpa papers english_level \\\n", "0 gmail.com Семинарист мужской 1991 2 some Upper-intermediate \n", "1 gmail.com Семинарист мужской 1990 1 many Upper-intermediate \n", "2 gmail.com Студент мужской 1996 2 none Intermediate \n", "3 mail.ru Студент женский 2016 3 none Upper-intermediate \n", "4 edu.hse.ru Студент женский 1996 3 none Upper-intermediate \n", "\n", " course_name \n", "0 Data Analysis \n", "1 Data Analysis \n", "2 Data Analysis \n", "3 Data Mining \n", "4 Data Mining " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_data.insert(7, 'course_name', data['Как называется изучаемый на курсе предмет по-английски?'].values)\n", "new_data.head()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{1.0, 2.0, 3.0, 42.0}" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "set(data['Сколько параметров имеет нормальное распределение?'])" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
emailcourse_rolegenderbirth_dategpapapersenglish_levelcourse_namenormal_params
0gmail.comСеминаристмужской19912someUpper-intermediateData Analysis2.0
1gmail.comСеминаристмужской19901manyUpper-intermediateData Analysis2.0
2gmail.comСтудентмужской19962noneIntermediateData Analysis2.0
3mail.ruСтудентженский20163noneUpper-intermediateData Mining2.0
4edu.hse.ruСтудентженский19963noneUpper-intermediateData Mining2.0
\n", "
" ], "text/plain": [ " email course_role gender birth_date gpa papers english_level \\\n", "0 gmail.com Семинарист мужской 1991 2 some Upper-intermediate \n", "1 gmail.com Семинарист мужской 1990 1 many Upper-intermediate \n", "2 gmail.com Студент мужской 1996 2 none Intermediate \n", "3 mail.ru Студент женский 2016 3 none Upper-intermediate \n", "4 edu.hse.ru Студент женский 1996 3 none Upper-intermediate \n", "\n", " course_name normal_params \n", "0 Data Analysis 2.0 \n", "1 Data Analysis 2.0 \n", "2 Data Analysis 2.0 \n", "3 Data Mining 2.0 \n", "4 Data Mining 2.0 " ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_data.insert(8, 'normal_params', list(map(str, data['Сколько параметров имеет нормальное распределение?'])))\n", "new_data.head()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{'Критерий Мана-Уитни',\n", " 'Критерий Стьюдента',\n", " 'Критерий Стьюдента для связанных выборок',\n", " 'Критерий знаковых рангов Уилкоксона',\n", " 'Не знаю'}" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "set(data['Какой статистический критерий примените для проверки различия между двумя связанными выборками, в случае, если нельзя сделать предположение о виде распределения выборки?'])" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
emailcourse_rolegenderbirth_dategpapapersenglish_levelcourse_namenormal_paramsstat_crit
0gmail.comСеминаристмужской19912someUpper-intermediateData Analysis2.0Критерий знаковых рангов Уилкоксона
1gmail.comСеминаристмужской19901manyUpper-intermediateData Analysis2.0Критерий знаковых рангов Уилкоксона
2gmail.comСтудентмужской19962noneIntermediateData Analysis2.0Не знаю
3mail.ruСтудентженский20163noneUpper-intermediateData Mining2.0Не знаю
4edu.hse.ruСтудентженский19963noneUpper-intermediateData Mining2.0Критерий Стьюдента для связанных выборок
\n", "
" ], "text/plain": [ " email course_role gender birth_date gpa papers english_level \\\n", "0 gmail.com Семинарист мужской 1991 2 some Upper-intermediate \n", "1 gmail.com Семинарист мужской 1990 1 many Upper-intermediate \n", "2 gmail.com Студент мужской 1996 2 none Intermediate \n", "3 mail.ru Студент женский 2016 3 none Upper-intermediate \n", "4 edu.hse.ru Студент женский 1996 3 none Upper-intermediate \n", "\n", " course_name normal_params stat_crit \n", "0 Data Analysis 2.0 Критерий знаковых рангов Уилкоксона \n", "1 Data Analysis 2.0 Критерий знаковых рангов Уилкоксона \n", "2 Data Analysis 2.0 Не знаю \n", "3 Data Mining 2.0 Не знаю \n", "4 Data Mining 2.0 Критерий Стьюдента для связанных выборок " ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_data.insert(9, 'stat_crit', data['Какой статистический критерий примените для проверки различия между двумя связанными выборками, в случае, если нельзя сделать предположение о виде распределения выборки?'].values)\n", "new_data.head()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# ..." ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
email_bk.ruemail_edu.hse.ruemail_gmail.comemail_inbox.ruemail_mail.ruemail_outlook.comemail_yande.ruemail_yandex.rucourse_role_Лекторcourse_role_Семинарист...course_name_Machine Learningnormal_params_1.0normal_params_2.0normal_params_3.0normal_params_42.0stat_crit_Критерий Мана-Уитниstat_crit_Критерий Стьюдентаstat_crit_Критерий Стьюдента для связанных выборокstat_crit_Критерий знаковых рангов Уилкоксонаstat_crit_Не знаю
00.00.01.00.00.00.00.00.00.01.0...0.00.01.00.00.00.00.00.01.00.0
10.00.01.00.00.00.00.00.00.01.0...0.00.01.00.00.00.00.00.01.00.0
20.00.01.00.00.00.00.00.00.00.0...0.00.01.00.00.00.00.00.00.01.0
30.00.00.00.01.00.00.00.00.00.0...0.00.01.00.00.00.00.00.00.01.0
40.01.00.00.00.00.00.00.00.00.0...0.00.01.00.00.00.00.01.00.00.0
\n", "

5 rows × 45 columns

\n", "
" ], "text/plain": [ " email_bk.ru email_edu.hse.ru email_gmail.com email_inbox.ru \\\n", "0 0.0 0.0 1.0 0.0 \n", "1 0.0 0.0 1.0 0.0 \n", "2 0.0 0.0 1.0 0.0 \n", "3 0.0 0.0 0.0 0.0 \n", "4 0.0 1.0 0.0 0.0 \n", "\n", " email_mail.ru email_outlook.com email_yande.ru email_yandex.ru \\\n", "0 0.0 0.0 0.0 0.0 \n", "1 0.0 0.0 0.0 0.0 \n", "2 0.0 0.0 0.0 0.0 \n", "3 1.0 0.0 0.0 0.0 \n", "4 0.0 0.0 0.0 0.0 \n", "\n", " course_role_Лектор course_role_Семинарист ... \\\n", "0 0.0 1.0 ... \n", "1 0.0 1.0 ... \n", "2 0.0 0.0 ... \n", "3 0.0 0.0 ... \n", "4 0.0 0.0 ... \n", "\n", " course_name_Machine Learning normal_params_1.0 normal_params_2.0 \\\n", "0 0.0 0.0 1.0 \n", "1 0.0 0.0 1.0 \n", "2 0.0 0.0 1.0 \n", "3 0.0 0.0 1.0 \n", "4 0.0 0.0 1.0 \n", "\n", " normal_params_3.0 normal_params_42.0 stat_crit_Критерий Мана-Уитни \\\n", "0 0.0 0.0 0.0 \n", "1 0.0 0.0 0.0 \n", "2 0.0 0.0 0.0 \n", "3 0.0 0.0 0.0 \n", "4 0.0 0.0 0.0 \n", "\n", " stat_crit_Критерий Стьюдента \\\n", "0 0.0 \n", "1 0.0 \n", "2 0.0 \n", "3 0.0 \n", "4 0.0 \n", "\n", " stat_crit_Критерий Стьюдента для связанных выборок \\\n", "0 0.0 \n", "1 0.0 \n", "2 0.0 \n", "3 0.0 \n", "4 1.0 \n", "\n", " stat_crit_Критерий знаковых рангов Уилкоксона stat_crit_Не знаю \n", "0 1.0 0.0 \n", "1 1.0 0.0 \n", "2 0.0 1.0 \n", "3 0.0 1.0 \n", "4 0.0 0.0 \n", "\n", "[5 rows x 45 columns]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_data = pd.get_dummies(new_data)\n", "new_data.head()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(76, 45)" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_data.shape" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# conda install orange3\n", "# pip install orange3-associate\n", "# https://orange3-associate.readthedocs.io/en/latest/\n", "\n", "# conda create --name py27 python=2.7 anaconda (если стоит третий питон по умолчанию)\n", "# activate py27\n", "# conda install orange\n", "# http://docs.orange.biolab.si/2/reference/rst/Orange.associate.html\n", "# deactivate (после использования)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from orangecontrib.associate.fpgrowth import frequent_itemsets, association_rules" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": true }, "outputs": [], "source": [ "itemsets = dict(frequent_itemsets(new_data.values, 0.5))" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{frozenset({10, 12}): 38,\n", " frozenset({10, 23, 25}): 39,\n", " frozenset({37}): 63,\n", " frozenset({25}): 65,\n", " frozenset({25, 37}): 53,\n", " frozenset({17, 25}): 46,\n", " frozenset({10}): 73,\n", " frozenset({10, 25}): 65,\n", " frozenset({10, 23}): 45,\n", " frozenset({10, 25, 37}): 53,\n", " frozenset({23}): 45,\n", " frozenset({23, 37}): 40,\n", " frozenset({33}): 41,\n", " frozenset({10, 33}): 40,\n", " frozenset({17}): 53,\n", " frozenset({17, 37}): 44,\n", " frozenset({23, 25}): 39,\n", " frozenset({10, 17}): 53,\n", " frozenset({10, 17, 25}): 46,\n", " frozenset({17, 25, 37}): 38,\n", " frozenset({10, 37}): 60,\n", " frozenset({10, 17, 37}): 44,\n", " frozenset({10, 17, 25, 37}): 38,\n", " frozenset({10, 23, 37}): 40,\n", " frozenset({31}): 40,\n", " frozenset({12}): 41,\n", " frozenset({10, 31}): 38}" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "itemsets" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 email_bk.ru\n", "1 email_edu.hse.ru\n", "2 email_gmail.com\n", "3 email_inbox.ru\n", "4 email_mail.ru\n", "5 email_outlook.com\n", "6 email_yande.ru\n", "7 email_yandex.ru\n", "8 course_role_Лектор\n", "9 course_role_Семинарист\n", "10 course_role_Студент\n", "11 gender_женский\n", "12 gender_мужской\n", "13 birth_date_1990\n", "14 birth_date_1991\n", "15 birth_date_1994\n", "16 birth_date_1995\n", "17 birth_date_1996\n", "18 birth_date_1997\n", "19 birth_date_2016\n", "20 birth_date_2041\n", "21 gpa_1\n", "22 gpa_2\n", "23 gpa_3\n", "24 papers_many\n", "25 papers_none\n", "26 papers_some\n", "27 english_level_Advanced\n", "28 english_level_Beginner\n", "29 english_level_Full proficiency\n", "30 english_level_Intermediate\n", "31 english_level_Upper-intermediate\n", "32 course_name_Data Analysis\n", "33 course_name_Data Mining\n", "34 course_name_Data Science\n", "35 course_name_Machine Learning\n", "36 normal_params_1.0\n", "37 normal_params_2.0\n", "38 normal_params_3.0\n", "39 normal_params_42.0\n", "40 stat_crit_Критерий Мана-Уитни\n", "41 stat_crit_Критерий Стьюдента\n", "42 stat_crit_Критерий Стьюдента для связанных выборок\n", "43 stat_crit_Критерий знаковых рангов Уилкоксона\n", "44 stat_crit_Не знаю\n" ] } ], "source": [ "for i, item in enumerate(new_data.columns):\n", " print(i, item)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[(frozenset({17, 25, 37}), frozenset({10}), 38, 1.0),\n", " (frozenset({23, 25}), frozenset({10}), 39, 1.0),\n", " (frozenset({25, 37}), frozenset({10}), 53, 1.0),\n", " (frozenset({17, 25}), frozenset({10}), 46, 1.0),\n", " (frozenset({23, 37}), frozenset({10}), 40, 1.0),\n", " (frozenset({17, 37}), frozenset({10}), 44, 1.0),\n", " (frozenset({25}), frozenset({10}), 65, 1.0),\n", " (frozenset({23}), frozenset({10}), 45, 1.0),\n", " (frozenset({17}), frozenset({10}), 53, 1.0),\n", " (frozenset({33}), frozenset({10}), 40, 0.975609756097561),\n", " (frozenset({37}), frozenset({10}), 60, 0.9523809523809523),\n", " (frozenset({31}), frozenset({10}), 38, 0.95),\n", " (frozenset({12}), frozenset({10}), 38, 0.926829268292683),\n", " (frozenset({10}), frozenset({25}), 65, 0.8904109589041096),\n", " (frozenset({10, 23}), frozenset({37}), 40, 0.8888888888888888),\n", " (frozenset({23}), frozenset({10, 37}), 40, 0.8888888888888888),\n", " (frozenset({23}), frozenset({37}), 40, 0.8888888888888888),\n", " (frozenset({10, 37}), frozenset({25}), 53, 0.8833333333333333),\n", " (frozenset({10, 17}), frozenset({25}), 46, 0.8679245283018868),\n", " (frozenset({17}), frozenset({10, 25}), 46, 0.8679245283018868),\n", " (frozenset({17}), frozenset({25}), 46, 0.8679245283018868),\n", " (frozenset({10, 23}), frozenset({25}), 39, 0.8666666666666667),\n", " (frozenset({23}), frozenset({10, 25}), 39, 0.8666666666666667),\n", " (frozenset({23}), frozenset({25}), 39, 0.8666666666666667),\n", " (frozenset({10, 17, 37}), frozenset({25}), 38, 0.8636363636363636),\n", " (frozenset({17, 37}), frozenset({10, 25}), 38, 0.8636363636363636),\n", " (frozenset({17, 37}), frozenset({25}), 38, 0.8636363636363636),\n", " (frozenset({37}), frozenset({10, 25}), 53, 0.8412698412698413),\n", " (frozenset({37}), frozenset({25}), 53, 0.8412698412698413),\n", " (frozenset({10, 17}), frozenset({37}), 44, 0.8301886792452831),\n", " (frozenset({17}), frozenset({10, 37}), 44, 0.8301886792452831),\n", " (frozenset({17}), frozenset({37}), 44, 0.8301886792452831),\n", " (frozenset({10, 17, 25}), frozenset({37}), 38, 0.8260869565217391),\n", " (frozenset({17, 25}), frozenset({10, 37}), 38, 0.8260869565217391),\n", " (frozenset({17, 25}), frozenset({37}), 38, 0.8260869565217391),\n", " (frozenset({10}), frozenset({37}), 60, 0.821917808219178),\n", " (frozenset({10, 25}), frozenset({37}), 53, 0.8153846153846154),\n", " (frozenset({25}), frozenset({10, 37}), 53, 0.8153846153846154),\n", " (frozenset({25}), frozenset({37}), 53, 0.8153846153846154)]" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sorted(list(association_rules(itemsets, 0.8)), key=lambda x: -x[3])" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{frozenset({10, 17, 31}): 30,\n", " frozenset({10, 17, 23, 25}): 28,\n", " frozenset({10, 17, 23, 37}): 30,\n", " frozenset({10, 17, 44}): 24,\n", " frozenset({10, 23, 33, 37}): 24,\n", " frozenset({11, 23, 37}): 24,\n", " frozenset({10, 23}): 45,\n", " frozenset({22}): 28,\n", " frozenset({17, 37}): 44,\n", " frozenset({17, 23, 37}): 30,\n", " frozenset({10, 44}): 31,\n", " frozenset({2, 10, 37}): 23,\n", " frozenset({17, 23, 25}): 28,\n", " frozenset({23, 31}): 25,\n", " frozenset({23, 31, 37}): 23,\n", " frozenset({25, 37}): 53,\n", " frozenset({23, 25, 37}): 35,\n", " frozenset({10, 23, 25, 37}): 35,\n", " frozenset({33}): 41,\n", " frozenset({17, 33}): 26,\n", " frozenset({10, 17, 25, 31}): 24,\n", " frozenset({25, 37, 44}): 23,\n", " frozenset({2, 25}): 25,\n", " frozenset({12}): 41,\n", " frozenset({12, 25, 37}): 25,\n", " frozenset({10, 12, 25, 37}): 25,\n", " frozenset({12, 17, 25}): 24,\n", " frozenset({10, 17, 37}): 44,\n", " frozenset({10, 25, 33, 37}): 31,\n", " frozenset({12, 17, 37}): 23,\n", " frozenset({11}): 35,\n", " frozenset({11, 37}): 31,\n", " frozenset({10, 23, 31, 37}): 23,\n", " frozenset({10, 42}): 24,\n", " frozenset({11, 25}): 32,\n", " frozenset({10, 12}): 38,\n", " frozenset({25, 31, 37}): 28,\n", " frozenset({10, 31, 37}): 34,\n", " frozenset({17}): 53,\n", " frozenset({10, 17, 25}): 46,\n", " frozenset({10, 17, 23, 25, 37}): 25,\n", " frozenset({32}): 24,\n", " frozenset({10, 33}): 40,\n", " frozenset({10, 22}): 26,\n", " frozenset({10, 11}): 35,\n", " frozenset({10, 25, 31, 37}): 28,\n", " frozenset({10, 23, 33}): 25,\n", " frozenset({10, 23, 25, 33}): 23,\n", " frozenset({23, 33, 37}): 24,\n", " frozenset({17, 25, 33}): 23,\n", " frozenset({12, 17}): 28,\n", " frozenset({10, 33, 37}): 34,\n", " frozenset({11, 25, 37}): 28,\n", " frozenset({22, 25}): 24,\n", " frozenset({23, 25, 33}): 23,\n", " frozenset({31}): 40,\n", " frozenset({17, 25, 31}): 24,\n", " frozenset({10, 31}): 38,\n", " frozenset({10, 23, 31}): 25,\n", " frozenset({10, 12, 17}): 28,\n", " frozenset({25}): 65,\n", " frozenset({10, 23, 25}): 39,\n", " frozenset({25, 44}): 28,\n", " frozenset({10, 11, 25}): 32,\n", " frozenset({17, 25}): 46,\n", " frozenset({10, 22, 25}): 24,\n", " frozenset({10, 12, 17, 37}): 23,\n", " frozenset({12, 25}): 33,\n", " frozenset({10, 25, 37}): 53,\n", " frozenset({11, 17}): 25,\n", " frozenset({10, 25, 37, 44}): 23,\n", " frozenset({17, 44}): 24,\n", " frozenset({10}): 73,\n", " frozenset({10, 17}): 53,\n", " frozenset({10, 12, 17, 25}): 24,\n", " frozenset({37}): 63,\n", " frozenset({10, 37}): 60,\n", " frozenset({33, 37}): 35,\n", " frozenset({10, 17, 33, 37}): 23,\n", " frozenset({42}): 24,\n", " frozenset({23, 25}): 39,\n", " frozenset({44}): 31,\n", " frozenset({10, 37, 44}): 26,\n", " frozenset({17, 23}): 34,\n", " frozenset({10, 17, 23}): 34,\n", " frozenset({17, 31}): 30,\n", " frozenset({10, 11, 17}): 25,\n", " frozenset({17, 25, 37}): 38,\n", " frozenset({23, 37}): 40,\n", " frozenset({10, 11, 23, 37}): 24,\n", " frozenset({2}): 31,\n", " frozenset({2, 10}): 28,\n", " frozenset({10, 25}): 65,\n", " frozenset({10, 25, 44}): 28,\n", " frozenset({25, 33}): 37,\n", " frozenset({10, 25, 33}): 37,\n", " frozenset({10, 11, 25, 37}): 28,\n", " frozenset({12, 37}): 32,\n", " frozenset({10, 17, 25, 33}): 23,\n", " frozenset({2, 37}): 26,\n", " frozenset({10, 12, 25}): 33,\n", " frozenset({23, 33}): 25,\n", " frozenset({11, 23}): 25,\n", " frozenset({25, 33, 37}): 31,\n", " frozenset({10, 17, 25, 37}): 38,\n", " frozenset({17, 31, 37}): 27,\n", " frozenset({10, 17, 33}): 26,\n", " frozenset({10, 12, 37}): 29,\n", " frozenset({2, 10, 25}): 25,\n", " frozenset({23}): 45,\n", " frozenset({17, 23, 25, 37}): 25,\n", " frozenset({10, 11, 37}): 31,\n", " frozenset({10, 23, 37}): 40,\n", " frozenset({10, 25, 31}): 31,\n", " frozenset({37, 44}): 26,\n", " frozenset({25, 31}): 31,\n", " frozenset({17, 33, 37}): 23,\n", " frozenset({10, 11, 23}): 25,\n", " frozenset({31, 37}): 36,\n", " frozenset({10, 17, 31, 37}): 27}" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "itemsets = dict(frequent_itemsets(new_data.values, 0.3))\n", "itemsets" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 email_bk.ru\n", "1 email_edu.hse.ru\n", "2 email_gmail.com\n", "3 email_inbox.ru\n", "4 email_mail.ru\n", "5 email_outlook.com\n", "6 email_yande.ru\n", "7 email_yandex.ru\n", "8 course_role_Лектор\n", "9 course_role_Семинарист\n", "10 course_role_Студент\n", "11 gender_женский\n", "12 gender_мужской\n", "13 birth_date_1990\n", "14 birth_date_1991\n", "15 birth_date_1994\n", "16 birth_date_1995\n", "17 birth_date_1996\n", "18 birth_date_1997\n", "19 birth_date_2016\n", "20 birth_date_2041\n", "21 gpa_1\n", "22 gpa_2\n", "23 gpa_3\n", "24 papers_many\n", "25 papers_none\n", "26 papers_some\n", "27 english_level_Advanced\n", "28 english_level_Beginner\n", "29 english_level_Full proficiency\n", "30 english_level_Intermediate\n", "31 english_level_Upper-intermediate\n", "32 course_name_Data Analysis\n", "33 course_name_Data Mining\n", "34 course_name_Data Science\n", "35 course_name_Machine Learning\n", "36 normal_params_1.0\n", "37 normal_params_2.0\n", "38 normal_params_3.0\n", "39 normal_params_42.0\n", "40 stat_crit_Критерий Мана-Уитни\n", "41 stat_crit_Критерий Стьюдента\n", "42 stat_crit_Критерий Стьюдента для связанных выборок\n", "43 stat_crit_Критерий знаковых рангов Уилкоксона\n", "44 stat_crit_Не знаю\n" ] } ], "source": [ "for i, item in enumerate(new_data.columns):\n", " print(i, item)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['birth_date_1996', 'normal_params_2.0', 'papers_none', 'gpa_3'] --> ['course_role_Студент'] with supp = 25, conf = 1.0\n", "['birth_date_1996', 'papers_none', 'gpa_3'] --> ['course_role_Студент'] with supp = 28, conf = 1.0\n", "['birth_date_1996', 'normal_params_2.0', 'gpa_3'] --> ['course_role_Студент'] with supp = 30, conf = 1.0\n", "['course_name_Data Mining', 'normal_params_2.0', 'gpa_3'] --> ['course_role_Студент'] with supp = 24, conf = 1.0\n", "['papers_none', 'normal_params_2.0', 'gpa_3'] --> ['course_role_Студент'] with supp = 35, conf = 1.0\n", "['birth_date_1996', 'papers_none', 'english_level_Upper-intermediate'] --> ['course_role_Студент'] with supp = 24, conf = 1.0\n", "['papers_none', 'gender_мужской', 'normal_params_2.0'] --> ['course_role_Студент'] with supp = 25, conf = 1.0\n", "['papers_none', 'normal_params_2.0', 'course_name_Data Mining'] --> ['course_role_Студент'] with supp = 31, conf = 1.0\n", "['gpa_3', 'normal_params_2.0', 'english_level_Upper-intermediate'] --> ['course_role_Студент'] with supp = 23, conf = 1.0\n", "['papers_none', 'normal_params_2.0', 'english_level_Upper-intermediate'] --> ['course_role_Студент'] with supp = 28, conf = 1.0\n", "['papers_none', 'gpa_3', 'course_name_Data Mining'] --> ['course_role_Студент'] with supp = 23, conf = 1.0\n", "['birth_date_1996', 'gender_мужской', 'normal_params_2.0'] --> ['course_role_Студент'] with supp = 23, conf = 1.0\n", "['papers_none', 'stat_crit_Не знаю', 'normal_params_2.0'] --> ['course_role_Студент'] with supp = 23, conf = 1.0\n", "['birth_date_1996', 'gender_мужской', 'papers_none'] --> ['course_role_Студент'] with supp = 24, conf = 1.0\n", "['birth_date_1996', 'normal_params_2.0', 'course_name_Data Mining'] --> ['course_role_Студент'] with supp = 23, conf = 1.0\n", "['gender_женский', 'normal_params_2.0', 'gpa_3'] --> ['course_role_Студент'] with supp = 24, conf = 1.0\n", "['papers_none', 'gender_женский', 'normal_params_2.0'] --> ['course_role_Студент'] with supp = 28, conf = 1.0\n", "['birth_date_1996', 'papers_none', 'course_name_Data Mining'] --> ['course_role_Студент'] with supp = 23, conf = 1.0\n", "['birth_date_1996', 'normal_params_2.0', 'papers_none'] --> ['course_role_Студент'] with supp = 38, conf = 1.0\n", "['birth_date_1996', 'normal_params_2.0', 'english_level_Upper-intermediate'] --> ['course_role_Студент'] with supp = 27, conf = 1.0\n", "['birth_date_1996', 'english_level_Upper-intermediate'] --> ['course_role_Студент'] with supp = 30, conf = 1.0\n", "['birth_date_1996', 'stat_crit_Не знаю'] --> ['course_role_Студент'] with supp = 24, conf = 1.0\n", "['birth_date_1996', 'normal_params_2.0'] --> ['course_role_Студент'] with supp = 44, conf = 1.0\n", "['course_name_Data Mining', 'gpa_3'] --> ['course_role_Студент'] with supp = 25, conf = 1.0\n", "['birth_date_1996', 'papers_none'] --> ['course_role_Студент'] with supp = 46, conf = 1.0\n", "['gpa_3', 'english_level_Upper-intermediate'] --> ['course_role_Студент'] with supp = 25, conf = 1.0\n", "['birth_date_1996', 'gender_мужской'] --> ['course_role_Студент'] with supp = 28, conf = 1.0\n", "['papers_none', 'gpa_3'] --> ['course_role_Студент'] with supp = 39, conf = 1.0\n", "['papers_none', 'gender_женский'] --> ['course_role_Студент'] with supp = 32, conf = 1.0\n", "['papers_none', 'gpa_2'] --> ['course_role_Студент'] with supp = 24, conf = 1.0\n", "['papers_none', 'normal_params_2.0'] --> ['course_role_Студент'] with supp = 53, conf = 1.0\n", "['stat_crit_Не знаю', 'normal_params_2.0'] --> ['course_role_Студент'] with supp = 26, conf = 1.0\n", "['birth_date_1996', 'gpa_3'] --> ['course_role_Студент'] with supp = 34, conf = 1.0\n", "['birth_date_1996', 'gender_женский'] --> ['course_role_Студент'] with supp = 25, conf = 1.0\n", "['papers_none', 'stat_crit_Не знаю'] --> ['course_role_Студент'] with supp = 28, conf = 1.0\n", "['papers_none', 'course_name_Data Mining'] --> ['course_role_Студент'] with supp = 37, conf = 1.0\n", "['papers_none', 'gender_мужской'] --> ['course_role_Студент'] with supp = 33, conf = 1.0\n", "['birth_date_1996', 'course_name_Data Mining'] --> ['course_role_Студент'] with supp = 26, conf = 1.0\n", "['papers_none', 'email_gmail.com'] --> ['course_role_Студент'] with supp = 25, conf = 1.0\n", "['gender_женский', 'normal_params_2.0'] --> ['course_role_Студент'] with supp = 31, conf = 1.0\n", "['normal_params_2.0', 'gpa_3'] --> ['course_role_Студент'] with supp = 40, conf = 1.0\n", "['papers_none', 'english_level_Upper-intermediate'] --> ['course_role_Студент'] with supp = 31, conf = 1.0\n", "['gender_женский', 'gpa_3'] --> ['course_role_Студент'] with supp = 25, conf = 1.0\n", "['gpa_3'] --> ['course_role_Студент'] with supp = 45, conf = 1.0\n", "['stat_crit_Не знаю'] --> ['course_role_Студент'] with supp = 31, conf = 1.0\n", "['stat_crit_Критерий Стьюдента для связанных выборок'] --> ['course_role_Студент'] with supp = 24, conf = 1.0\n", "['gender_женский'] --> ['course_role_Студент'] with supp = 35, conf = 1.0\n", "['birth_date_1996'] --> ['course_role_Студент'] with supp = 53, conf = 1.0\n", "['papers_none'] --> ['course_role_Студент'] with supp = 65, conf = 1.0\n", "['course_name_Data Mining'] --> ['course_role_Студент'] with supp = 40, conf = 0.975609756097561\n", "['course_name_Data Mining', 'normal_params_2.0'] --> ['course_role_Студент'] with supp = 34, conf = 0.9714285714285714\n", "['course_name_Data Mining', 'course_role_Студент', 'gpa_3'] --> ['normal_params_2.0'] with supp = 24, conf = 0.96\n", "['course_name_Data Mining', 'gpa_3'] --> ['course_role_Студент', 'normal_params_2.0'] with supp = 24, conf = 0.96\n", "['course_role_Студент', 'gender_женский', 'gpa_3'] --> ['normal_params_2.0'] with supp = 24, conf = 0.96\n", "['gender_женский', 'gpa_3'] --> ['course_role_Студент', 'normal_params_2.0'] with supp = 24, conf = 0.96\n", "['gender_женский', 'gpa_3'] --> ['normal_params_2.0'] with supp = 24, conf = 0.96\n", "['course_name_Data Mining', 'gpa_3'] --> ['normal_params_2.0'] with supp = 24, conf = 0.96\n", "['normal_params_2.0'] --> ['course_role_Студент'] with supp = 60, conf = 0.9523809523809523\n", "['english_level_Upper-intermediate'] --> ['course_role_Студент'] with supp = 38, conf = 0.95\n", "['normal_params_2.0', 'english_level_Upper-intermediate'] --> ['course_role_Студент'] with supp = 34, conf = 0.9444444444444444\n", "['gpa_2'] --> ['course_role_Студент'] with supp = 26, conf = 0.9285714285714286\n", "['gender_мужской'] --> ['course_role_Студент'] with supp = 38, conf = 0.926829268292683\n", "['course_name_Data Mining', 'course_role_Студент'] --> ['papers_none'] with supp = 37, conf = 0.925\n", "['course_role_Студент', 'gpa_2'] --> ['papers_none'] with supp = 24, conf = 0.9230769230769231\n", "['course_role_Студент', 'gpa_3', 'english_level_Upper-intermediate'] --> ['normal_params_2.0'] with supp = 23, conf = 0.92\n", "['english_level_Upper-intermediate', 'gpa_3'] --> ['course_role_Студент', 'normal_params_2.0'] with supp = 23, conf = 0.92\n", "['course_name_Data Mining', 'course_role_Студент', 'gpa_3'] --> ['papers_none'] with supp = 23, conf = 0.92\n", "['course_name_Data Mining', 'gpa_3'] --> ['papers_none', 'course_role_Студент'] with supp = 23, conf = 0.92\n", "['gpa_3', 'english_level_Upper-intermediate'] --> ['normal_params_2.0'] with supp = 23, conf = 0.92\n", "['course_name_Data Mining', 'gpa_3'] --> ['papers_none'] with supp = 23, conf = 0.92\n", "['course_role_Студент', 'gender_женский'] --> ['papers_none'] with supp = 32, conf = 0.9142857142857143\n", "['gender_женский'] --> ['papers_none', 'course_role_Студент'] with supp = 32, conf = 0.9142857142857143\n", "['gender_женский'] --> ['papers_none'] with supp = 32, conf = 0.9142857142857143\n", "['course_name_Data Mining', 'course_role_Студент', 'normal_params_2.0'] --> ['papers_none'] with supp = 31, conf = 0.9117647058823529\n", "['gender_мужской', 'normal_params_2.0'] --> ['course_role_Студент'] with supp = 29, conf = 0.90625\n", "['papers_none', 'course_role_Студент', 'english_level_Upper-intermediate'] --> ['normal_params_2.0'] with supp = 28, conf = 0.9032258064516129\n", "['papers_none', 'english_level_Upper-intermediate'] --> ['course_role_Студент', 'normal_params_2.0'] with supp = 28, conf = 0.9032258064516129\n", "['course_role_Студент', 'gender_женский', 'normal_params_2.0'] --> ['papers_none'] with supp = 28, conf = 0.9032258064516129\n", "['gender_женский', 'normal_params_2.0'] --> ['papers_none', 'course_role_Студент'] with supp = 28, conf = 0.9032258064516129\n", "['papers_none', 'english_level_Upper-intermediate'] --> ['normal_params_2.0'] with supp = 28, conf = 0.9032258064516129\n", "['gender_женский', 'normal_params_2.0'] --> ['papers_none'] with supp = 28, conf = 0.9032258064516129\n", "['course_role_Студент', 'stat_crit_Не знаю'] --> ['papers_none'] with supp = 28, conf = 0.9032258064516129\n", "['stat_crit_Не знаю'] --> ['papers_none', 'course_role_Студент'] with supp = 28, conf = 0.9032258064516129\n", "['stat_crit_Не знаю'] --> ['papers_none'] with supp = 28, conf = 0.9032258064516129\n", "['email_gmail.com'] --> ['course_role_Студент'] with supp = 28, conf = 0.9032258064516129\n", "['course_name_Data Mining'] --> ['papers_none', 'course_role_Студент'] with supp = 37, conf = 0.9024390243902439\n", "['course_name_Data Mining'] --> ['papers_none'] with supp = 37, conf = 0.9024390243902439\n", "['birth_date_1996', 'course_role_Студент', 'english_level_Upper-intermediate'] --> ['normal_params_2.0'] with supp = 27, conf = 0.9\n", "['birth_date_1996', 'english_level_Upper-intermediate'] --> ['course_role_Студент', 'normal_params_2.0'] with supp = 27, conf = 0.9\n", "['birth_date_1996', 'english_level_Upper-intermediate'] --> ['normal_params_2.0'] with supp = 27, conf = 0.9\n", "['english_level_Upper-intermediate'] --> ['normal_params_2.0'] with supp = 36, conf = 0.9\n" ] } ], "source": [ "for rule in sorted(list(association_rules(itemsets, 0.9)), key=lambda x: -x[3]):\n", " print(\"{} --> {} with supp = {}, conf = {}\".format([new_data.columns[ftr] for ftr in rule[0]],\n", " [new_data.columns[ftr] for ftr in rule[1]],\n", " rule[2], rule[3]))" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['gpa_3'] --> ['course_role_Студент'] with supp = 45, conf = 1.0\n", "['stat_crit_Не знаю'] --> ['course_role_Студент'] with supp = 31, conf = 1.0\n", "['stat_crit_Критерий Стьюдента для связанных выборок'] --> ['course_role_Студент'] with supp = 24, conf = 1.0\n", "['gender_женский'] --> ['course_role_Студент'] with supp = 35, conf = 1.0\n", "['birth_date_1996'] --> ['course_role_Студент'] with supp = 53, conf = 1.0\n", "['papers_none'] --> ['course_role_Студент'] with supp = 65, conf = 1.0\n", "['course_name_Data Mining'] --> ['course_role_Студент'] with supp = 40, conf = 0.975609756097561\n", "['normal_params_2.0'] --> ['course_role_Студент'] with supp = 60, conf = 0.9523809523809523\n", "['english_level_Upper-intermediate'] --> ['course_role_Студент'] with supp = 38, conf = 0.95\n", "['gpa_2'] --> ['course_role_Студент'] with supp = 26, conf = 0.9285714285714286\n", "['gender_мужской'] --> ['course_role_Студент'] with supp = 38, conf = 0.926829268292683\n", "['gender_женский'] --> ['papers_none', 'course_role_Студент'] with supp = 32, conf = 0.9142857142857143\n", "['gender_женский'] --> ['papers_none'] with supp = 32, conf = 0.9142857142857143\n", "['stat_crit_Не знаю'] --> ['papers_none', 'course_role_Студент'] with supp = 28, conf = 0.9032258064516129\n", "['stat_crit_Не знаю'] --> ['papers_none'] with supp = 28, conf = 0.9032258064516129\n", "['email_gmail.com'] --> ['course_role_Студент'] with supp = 28, conf = 0.9032258064516129\n", "['course_name_Data Mining'] --> ['papers_none', 'course_role_Студент'] with supp = 37, conf = 0.9024390243902439\n", "['course_name_Data Mining'] --> ['papers_none'] with supp = 37, conf = 0.9024390243902439\n", "['english_level_Upper-intermediate'] --> ['normal_params_2.0'] with supp = 36, conf = 0.9\n" ] } ], "source": [ "for rule in sorted(list(association_rules(itemsets, 0.9)), key=lambda x: -x[3]):\n", " if len(rule[0]) > 1:\n", " continue\n", " print(\"{} --> {} with supp = {}, conf = {}\".format([new_data.columns[ftr] for ftr in rule[0]],\n", " [new_data.columns[ftr] for ftr in rule[1]],\n", " rule[2], rule[3]))" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['birth_date_1996', 'english_level_Upper-intermediate'] --> ['course_role_Студент'] with supp = 30, conf = 1.0\n", "['birth_date_1996', 'stat_crit_Не знаю'] --> ['course_role_Студент'] with supp = 24, conf = 1.0\n", "['birth_date_1996', 'normal_params_2.0'] --> ['course_role_Студент'] with supp = 44, conf = 1.0\n", "['course_name_Data Mining', 'gpa_3'] --> ['course_role_Студент'] with supp = 25, conf = 1.0\n", "['birth_date_1996', 'papers_none'] --> ['course_role_Студент'] with supp = 46, conf = 1.0\n", "['gpa_3', 'english_level_Upper-intermediate'] --> ['course_role_Студент'] with supp = 25, conf = 1.0\n", "['birth_date_1996', 'gender_мужской'] --> ['course_role_Студент'] with supp = 28, conf = 1.0\n", "['papers_none', 'gpa_3'] --> ['course_role_Студент'] with supp = 39, conf = 1.0\n", "['papers_none', 'gender_женский'] --> ['course_role_Студент'] with supp = 32, conf = 1.0\n", "['papers_none', 'gpa_2'] --> ['course_role_Студент'] with supp = 24, conf = 1.0\n", "['papers_none', 'normal_params_2.0'] --> ['course_role_Студент'] with supp = 53, conf = 1.0\n", "['stat_crit_Не знаю', 'normal_params_2.0'] --> ['course_role_Студент'] with supp = 26, conf = 1.0\n", "['birth_date_1996', 'gpa_3'] --> ['course_role_Студент'] with supp = 34, conf = 1.0\n", "['birth_date_1996', 'gender_женский'] --> ['course_role_Студент'] with supp = 25, conf = 1.0\n", "['papers_none', 'stat_crit_Не знаю'] --> ['course_role_Студент'] with supp = 28, conf = 1.0\n", "['papers_none', 'course_name_Data Mining'] --> ['course_role_Студент'] with supp = 37, conf = 1.0\n", "['papers_none', 'gender_мужской'] --> ['course_role_Студент'] with supp = 33, conf = 1.0\n", "['birth_date_1996', 'course_name_Data Mining'] --> ['course_role_Студент'] with supp = 26, conf = 1.0\n", "['papers_none', 'email_gmail.com'] --> ['course_role_Студент'] with supp = 25, conf = 1.0\n", "['gender_женский', 'normal_params_2.0'] --> ['course_role_Студент'] with supp = 31, conf = 1.0\n", "['normal_params_2.0', 'gpa_3'] --> ['course_role_Студент'] with supp = 40, conf = 1.0\n", "['papers_none', 'english_level_Upper-intermediate'] --> ['course_role_Студент'] with supp = 31, conf = 1.0\n", "['gender_женский', 'gpa_3'] --> ['course_role_Студент'] with supp = 25, conf = 1.0\n", "['course_name_Data Mining', 'normal_params_2.0'] --> ['course_role_Студент'] with supp = 34, conf = 0.9714285714285714\n", "['course_name_Data Mining', 'gpa_3'] --> ['course_role_Студент', 'normal_params_2.0'] with supp = 24, conf = 0.96\n", "['gender_женский', 'gpa_3'] --> ['course_role_Студент', 'normal_params_2.0'] with supp = 24, conf = 0.96\n", "['gender_женский', 'gpa_3'] --> ['normal_params_2.0'] with supp = 24, conf = 0.96\n", "['course_name_Data Mining', 'gpa_3'] --> ['normal_params_2.0'] with supp = 24, conf = 0.96\n", "['normal_params_2.0', 'english_level_Upper-intermediate'] --> ['course_role_Студент'] with supp = 34, conf = 0.9444444444444444\n", "['course_name_Data Mining', 'course_role_Студент'] --> ['papers_none'] with supp = 37, conf = 0.925\n", "['course_role_Студент', 'gpa_2'] --> ['papers_none'] with supp = 24, conf = 0.9230769230769231\n", "['english_level_Upper-intermediate', 'gpa_3'] --> ['course_role_Студент', 'normal_params_2.0'] with supp = 23, conf = 0.92\n", "['course_name_Data Mining', 'gpa_3'] --> ['papers_none', 'course_role_Студент'] with supp = 23, conf = 0.92\n", "['gpa_3', 'english_level_Upper-intermediate'] --> ['normal_params_2.0'] with supp = 23, conf = 0.92\n", "['course_name_Data Mining', 'gpa_3'] --> ['papers_none'] with supp = 23, conf = 0.92\n", "['course_role_Студент', 'gender_женский'] --> ['papers_none'] with supp = 32, conf = 0.9142857142857143\n", "['gender_мужской', 'normal_params_2.0'] --> ['course_role_Студент'] with supp = 29, conf = 0.90625\n", "['papers_none', 'english_level_Upper-intermediate'] --> ['course_role_Студент', 'normal_params_2.0'] with supp = 28, conf = 0.9032258064516129\n", "['gender_женский', 'normal_params_2.0'] --> ['papers_none', 'course_role_Студент'] with supp = 28, conf = 0.9032258064516129\n", "['papers_none', 'english_level_Upper-intermediate'] --> ['normal_params_2.0'] with supp = 28, conf = 0.9032258064516129\n", "['gender_женский', 'normal_params_2.0'] --> ['papers_none'] with supp = 28, conf = 0.9032258064516129\n", "['course_role_Студент', 'stat_crit_Не знаю'] --> ['papers_none'] with supp = 28, conf = 0.9032258064516129\n", "['birth_date_1996', 'english_level_Upper-intermediate'] --> ['course_role_Студент', 'normal_params_2.0'] with supp = 27, conf = 0.9\n", "['birth_date_1996', 'english_level_Upper-intermediate'] --> ['normal_params_2.0'] with supp = 27, conf = 0.9\n" ] } ], "source": [ "for rule in sorted(list(association_rules(itemsets, 0.9)), key=lambda x: -x[3]):\n", " if len(rule[0]) != 2:\n", " continue\n", " print(\"{} --> {} with supp = {}, conf = {}\".format([new_data.columns[ftr] for ftr in rule[0]],\n", " [new_data.columns[ftr] for ftr in rule[1]],\n", " rule[2], rule[3]))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.1" } }, "nbformat": 4, "nbformat_minor": 0 }